from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import requests
import csv
#打开浏览器
driver=webdriver.Chrome()
driver.get('https://www.taobao.com/')
#设置时间
wait=WebDriverWait(driver,10)
#等待xpath的路径加载完成
wait.until(EC.presence_of_element_located((By.XPATH,'/html/body/div[6]/div/div/div/div')))
#睡眠一秒
time.sleep(1)
#定位到搜索框
e=driver.find_element(By.XPATH,'//*[@id="q"]')
#输入手机
e.send_keys('手机')
time.sleep(1)
#定位到搜索按钮
e=driver.find_element(By.XPATH,'//*[@id="J_TSearchForm"]/div[1]/button')
#点击
e.click()
time.sleep(1)
#点击二维码登录
e=driver.find_element(By.XPATH,'//*[@id="login"]/div[1]/i')
e.click()
time.sleep(10)
#把下拉框拉到底下
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(1)
#爬取的内容。
price=driver.find_elements(By.XPATH,'//*[@id="mainsrp-itemlist"]/div/div/div[1]/div/div[2]/div[1]/div[1]/strong')
time.sleep(1)
name=driver.find_elements(By.XPATH,'//*[@id="mainsrp-itemlist"]/div/div/div[1]/div/div[2]/div[2]')
time.sleep(1)
salary=driver.find_elements(By.XPATH,'//*[@id="mainsrp-itemlist"]/div/div/div[1]/div/div[2]/div[1]/div[2]')
time.sleep(1)
dianname=driver.find_elements(By.XPATH,'//*[@id="mainsrp-itemlist"]/div/div/div[1]/div/div[2]/div[3]/div[1]/a/span[2]')
#将爬取的内容存在列表中
priceList=[i.text for i in price]
nameList=[i.text for i in name]
salaryList=[i.text for i in salary]
diannameList=[i.text for i in dianname]
#设置总数据变量
dataList=[]
#循环将每个内容存在一个列表
for k in range(len(priceList)):
sampleList=[]
sampleList.append(priceList[k])
sampleList.append(nameList[k])
sampleList.append(salaryList[k])
sampleList.append(diannameList[k])
dataList.append(sampleList)
#使用追加保存,先保存一页的数据
with open('taobao.csv','a',encoding='utf-8',newline='') as f:
writer=csv.writer(f)
writer.writerows(dataList)
f.close()
time.sleep(3)
#等待下一页的加载
for i in range(3,5):
#定位到下一页按钮并点击
time.sleep(2)
e=driver.find_element(By.XPATH,'//*[@id="mainsrp-pager"]/div/div/div/div[2]/span[3]')
driver.execute_script("arguments[0].click();", e)
time.sleep(1)
#下拉框拉到最低
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(1)
#爬取的内容。
price=driver.find_elements(By.XPATH,'//*[@id="mainsrp-itemlist"]/div/div/div[1]/div/div[2]/div[1]/div[1]/strong')
time.sleep(1)
name=driver.find_elements(By.XPATH,'//*[@id="mainsrp-itemlist"]/div/div/div[1]/div/div[2]/div[2]')
time.sleep(1)
salary=driver.find_elements(By.XPATH,'//*[@id="mainsrp-itemlist"]/div/div/div[1]/div/div[2]/div[1]/div[2]')
time.sleep(1)
dianname=driver.find_elements(By.XPATH,'//*[@id="mainsrp-itemlist"]/div/div/div[1]/div/div[2]/div[3]/div[1]/a/span[2]')
#将爬取的内容存在列表中
priceList=[i.text for i in price]
nameList=[i.text for i in name]
salaryList=[i.text for i in salary]
diannameList=[i.text for i in dianname]
dataList=[]
for k in range(len(priceList)):
sampleList=[]
sampleList.append(priceList[k])
sampleList.append(nameList[k])
sampleList.append(salaryList[k])
sampleList.append(diannameList[k])
dataList.append(sampleList)
#将数据继续追加保存到文件中
with open('taobao.csv','a',encoding='utf-8',newline='') as f:
writer=csv.writer(f)
writer.writerows(dataList)
f.close()
print('爬取成功')