"""
[课 题]: python爬虫实战-淘宝商品数据
[必备资料]: stealth.min.js
[开发环境]:
python 3.8
pycharm 专业版
selenium 模块 3.141.0 操作浏览器
Chromedriver.exe
Chrome浏览器
"""
from selenium import webdriver
import time
import csv
content = input("请输入你要搜索内容:")
f = open('淘宝.csv', mode='a', newline='', encoding='utf-8')
csv_writer = csv.writer(f)
csv_writer.writerow(['title', 'price', 'shopName', 'salesVolume', 'location', 'link'])
# 浏览器配置对象
options = webdriver.ChromeOptions()
# 禁用自动化栏
options.add_experimental_option('excludeSwitches', ['enable-automation'])
# 屏蔽保存密码提示框
prefs = {'credentials_enable_service': False, 'profile.password_manager_enabled': False}
options.add_experimental_option('prefs', prefs)
# 反爬虫特征处理
options.add_argument('--disable-blink-features=AutomationControlled')
# 1. 打开浏览器
driver = webdriver.Chrome(options=options)
# 读取js脚本
f = open('stealth.min.js', mode='r', encoding='utf-8').read()
# 移除selenium当中爬虫的特征
driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {'source': f})
# 2. 登陆淘宝
login_url = f'https://login.taobao.com/member/login.jhtml?redirectURL=https%3a%2f%2fs.taobao.com:443/search%2F_____tmd_____%2Fpage%2Flogin_jump%3Frand%3DS3WxGHAgAt756EpznwfNzJq2AFA2qBNla3j6EINUS8We9dazM_iKElp8DwVSHZUevpC41Bx7RzivXIj9RnZgdg%26_lgt_%3D18226bb91deded8cffaff7888beed94f___215918___cfebbaa67a39cf15a90ac3bc9d90d370___837b211a0c5c4d0311617da5fff37e25001413704de625b860e2518faad0f03625c577351a3a8791603261bef29a779dbdd4a89e6b9b9cca582985dcf430d08c74006c7ccabb817b7e28145df69ebceaf0e031931b9132ad85013eb3be5f87fdfda1ffe6d0052b26010cff24765b937333be60e83f16310c86c494a16dd5d1b16c7f2de620667f2e22d717e3d5c8f4f1ef8b8415db5cc8cb7b25c3032c6b30b5%26x5referer%3Dhttps%253A%252F%252Fs.taobao.com%252Fsearch%253Fq%253D%252B{content}%2526commend%253Dall%2526ssid%253Ds5-e%2526search_type%253Ditem%2526sourceId%253Dtb.index%2526spm%253Da21bo.jianhua.201856-taobao-item.2%2526ie%253Dutf8%2526initiative_id%253Dtbindexz_20170306&uuid=18226bb91deded8cffaff7888beed94f'
# 2.1 打开登陆网页
driver.get(url=login_url)
# 2.2 定位输入框 并且输入账号和密码
# 账号
driver.find_element_by_css_selector('#fm-login-id').send_keys("这里输入自己的淘宝号")
# 密码
driver.find_element_by_css_selector('#fm-login-password').send_keys("这里输入自己的密码")
# 点击登陆
driver.find_element_by_css_selector('#login-form > div.fm-btn > button').click()
time.sleep(3)
def get_next():
# 3. 获取商品信息
# 3.1 定位所有的商品
divs = driver.find_elements_by_css_selector('.grid.g-clearfix .items .item')
# 3.2 提取每个商品的详情信息
for div in divs:
# 二次提取
price = div.find_element_by_css_selector('.price').text
salesVolume = div.find_element_by_css_selector('.deal-cnt').text
title = div.find_element_by_css_selector('.row.row-2.title').text
link = div.find_element_by_css_selector('.J_ClickStat').get_attribute('href')
shopName = div.find_element_by_css_selector('.shopname').text
location = div.find_element_by_css_selector('.location').text
print(title, price, shopName, salesVolume, location, link)
# 4. 保存数据
csv_writer.writerow([title, price, shopName, salesVolume, location, link])
for page in range(0, 100):
get_next()
driver.get(f'https://s.taobao.com/search?q={content}&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.jianhua.201856-taobao-item.2&ie=utf8&initiative_id=tbindexz_20170306&bcoffset=1&ntoffset=1&p4ppushleft=2%2C48&s={page*44}')