import ImgView
import Message
import pymysql
import requests
import sys
import time
import DownLoadImg
import DetailMessageView
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from requests import Response
from bs4 import BeautifulSoup
import urllib
con=pymysql.connect('127.0.0.1','root','yb19961215000000','jingdong')
cur=con.cursor()
def DeleteTable(table):#删除表
sql="DROP TABLE "+table
cur.execute(sql)
def CreatTable(cur):#建立品牌表
cur.execute("CREATE TABLE IF NOT EXISTS Brand(Id INT PRIMARY KEY AUTO_INCREMENT,BrandName VARCHAR(50))")
def CreatCommodityTable(cur):#创建商品表
cur.execute("CREATE TABLE IF NOT EXISTS Commodity(CId INT PRIMARY KEY AUTO_INCREMENT,CommodityName VARCHAR(300),Price VARCHAR(20),StoreName VARCHAR(40),Comment VARCHAR(20),goodicon VARCHAR(100),DetaileUrl VARCHAR(100))")
def InsertBrand(name):#像品牌表中插入数据
sql="insert into Brand(BrandName) values('"+name+"')"
print("生成SQL语句》》》》》》》"+sql)
try:
print("语句执行")
cur.execute(sql)
print("插入成功")
# 提交
con.commit()
print("已经提交")
except Exception as e:
# 错误回滚
con.rollback()
def InsertCommotity(CommodityName,Price,StoreName,Comment,goodicon,DetaileUrl):
sql = "insert into Commodity(CommodityName,Price,StoreName,Comment,goodicon,DetaileUrl) values('" + CommodityName + "','" + Price + "','" + StoreName + "','" +Comment + "','" + goodicon + "','" + DetaileUrl + "')"
# print("生成SQL语句》》》》》》》" + sql)
try:
# print("语句执行")
cur.execute(sql)
# print("插入成功")
# 提交
con.commit()
print("已经提交")
except Exception as e:
# 错误回滚
con.rollback()
def ClearTable(table):
sql = "DELETE FROM "+table+" WHERE id > 0"
try:
# 执行SQL语句
cur.execute(sql)
# 提交修改
con.commit()
except Exception as e:
# 错误回滚
con.rollback()
finally:
con.close()
def getSoup(url):#得到soup对象
try:
r=requests.get(url)
r.encoding="UTF-8"
r.raise_for_status()#编码如果不是200就会引发异常
soup=BeautifulSoup(r.text,"html.parser")
return soup
except:
return ''
def downLoadImage(img_url):#下载图片到本地
urllib.request.urlretrieve(img_url, 'F:\\python\\img\\' + img_url.split('/')[-1])
def findAllBrand(soup):#找到所有的品牌
assert isinstance(soup, BeautifulSoup)
data=soup.find('ul',{"class":"J_valueList v-fixed"})
if data==None:
return True
datali = data.find_all('li')
for li in datali:
bname = li.a.attrs['title']
burl = li.a.attrs['href']
InsertBrand(bname)
print("插入成功***")
return False
def getPages(url):
browser = webdriver.Chrome()
try:
browser.get(url)
pages = browser.find_element_by_xpath("//*[@id='J_bottomPage']/span[2]/em[1]/b").text
finally:
browser.close()
return pages
def findAllGoods(soup):
assert isinstance(soup, BeautifulSoup)
data=soup.find('ul',{"class":"gl-warp clearfix"})
datali = data.find_all("li")
cnt=0
for li in datali:
StoreName = li.find('div', {"class": "p-shop"})
if StoreName!=None:
if StoreName.text ==''or StoreName.text==None:
continue
print(StoreName.text)
cnt+=1
CommodityName = li.find('div', {"class": "p-name p-name-type-2"}).text
CommodityName=CommodityName.strip()+""
CommodityName=CommodityName.replace(" ","")
StoreName=StoreName.text+""
Comment = li.find('div', {"class": "p-commit"}).strong.text
Comment=Comment+""
Price = li.find('div', {"class": "p-price"}).strong.text
Price=Price+""
goodicon = li.find('div', {"class": "p-icons"}).text
goodicon = goodicon.strip()+""
DetaileUrl=li.find('div', {"class": "p-img"}).a.attrs['href']
DetaileUrl=DetaileUrl.strip()+""
# print("********************" + CommodityName.strip())
# print("********************" + Price)
# print("********************" + Comment)
# print("********************" + goodicon)
# print("-----------------------------------------------------")
InsertCommotity(str(CommodityName), str(Price), str(StoreName), str(Comment), str(goodicon),str(DetaileUrl))
print("商品数据插入数据库成功")
print(cnt)
def seletBrand():
brandList=[]
sql="select *from Brand"
try:
# 执行SQL语句
cur.execute(sql)
results=cur.fetchall()
for row in results:
single = []
single.append(row[0])
single.append(row[1])
brandList.append(single)
return brandList
except Exception as e:
# 错误回滚
con.rollback()
return ''
def selectGoods():
goodsList = []
sql = "select *from commodity"
try:
# 执行SQL语句
cur.execute(sql)
results = cur.fetchall()
for row in results:
single = []
single.append(row[0])
single.append(row[1])
single.append(row[2])
single.append(row[3])
single.append(row[4])
single.append(row[5])
goodsList.append(single)
return goodsList
except Exception as e:
# 错误回滚
con.rollback()
return ''
def selectUrl(Id):
sql = "select `DetaileUrl` FROM commodity WHERE CId="+Id
try:
# 执行SQL语句
cur.execute(sql)
myresult = cur.fetchone()
return myresult
except Exception as e:
# 错误回滚
con.rollback()
return ''
def addGoodsToCart(url):
browser = webdriver.Chrome()
browser.get(url)
addCart=browser.find_element_by_id("InitCartUrl")
addCart.click()
def main():
DeleteTable("brand")
DeleteTable("commodity")
CreatTable(cur)
CreatCommodityTable(cur)
inputCommodity = input("请输入你想要查询的商品")
url = "https://search.jd.com/Search?keyword=" + inputCommodity + "&enc=utf-8&wq=" + inputCommodity
soup = getSoup(url)
flag = findAllBrand(soup)
if flag == False:
YesOrNo=input("您是否想要选择您想要的品牌,如果想要选择请输入yes,不想要选择请输入no>>>>>>")
if YesOrNo=="yes":
brandlist = seletBrand()
BrandMessage=[]
for singleBrand in brandlist:
print("编号>>>>>"+str(singleBrand[0]), end='')
print("品牌>>>>>>"+singleBrand[1])
BrandMessage.append("编号>>>>>"+str(singleBrand[0])+" "+"品牌>>>>>>"+singleBrand[1])
Message.main(BrandMessage)
idNumber=input("请输入你想要选择的品牌编号>>>>")
print(brandlist[eval(idNumber)-brandlist[0][0]][1])
url=url+"&ev=exbrand_"+brandlist[eval(idNumber)-brandlist[0][0]][1]
print(url)
soup=getSoup(url)
pages=getPages(url)#得到总共的页数
for i in range(1,2):
print("这是第"+str(i)+"页"+"@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
a = time.time()
b = '%.5f' % a
url=url+"&page="+str(i*2-1)+"&s=58&click=0"
soupfirst=getSoup(url)
京东爬虫一站式爬虫的相关爬虫文件以及代码
需积分: 36 93 浏览量
2018-12-20
11:53:47
上传
评论 3
收藏 1.82MB RAR 举报
刹风
- 粉丝: 0
- 资源: 1