# -*- coding:utf-8 -*-
import sys
from imp import reload
import json
import jsonpath as jsonpath
# print (u'系统默认编码为',sys.getdefaultencoding())
default_encoding = 'utf-8' # 重新设置编码方式为uft-8
if sys.getdefaultencoding() != default_encoding:
reload(sys)
sys.setdefaultencoding(default_encoding)
# print (u'系统默认编码为',sys.getdefaultencoding())
import requests
from bs4 import BeautifulSoup
import traceback
import re
import xlwt
# def getURLDATA(r):
# html = BeautifulSoup(r.content.decode(), 'html.parser')
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
listid = [] # 漏洞的id id
list1 = [] # 漏洞名称 vulName
list2 = [] # cnnvd编号 cnnvdCode
list3 = [] # cve编号 cveCode
list4 = [] # 漏洞描述 vulDesc
list5 = [] # 风险描述 vulType
list6 = [] # 发布时间 publishTime
list7 = [] # 更新时间 updateTime
list8 = [] # 参考网址 referUrl
list9 = [] # 漏洞补丁 patch
# 设置页数
start = 1 #起始页
last = 2 #终止页-1
pagesize = 10 #每页的数据条数
f = xlwt.Workbook() # 创建EXCEL工作簿
sheet1 = f.add_sheet(u'sheet1', cell_overwrite_ok=True) # 创建sheet
sheet1.write(0, 0, "漏洞名称")
sheet1.write(0, 1, "CNNVD编号")
sheet1.write(0, 2, "CVE编号")
sheet1.write(0, 3, "漏洞描述")
sheet1.write(0, 4, "风险描述")
sheet1.write(0, 5, "发布时间")
sheet1.write(0, 6, "更新时间")
sheet1.write(0, 7, "参考网址")
sheet1.write(0, 8, "漏洞补丁")
for j in range(start, last + 1):
url = 'https://www.cnnvd.org.cn/web/homePage/cnnvdVulList'
print("page" + str(j))
print("pageSize" + str(pagesize))
header = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36',
'Connection': 'keep-alive',
'Content-Type': 'application/json;charset=UTF-8', }
data = {"pageIndex":j,"pageSize":pagesize,"keyword":"","hazardLevel":"","vulType":"","vendor":"","product":"","dateType":""}
req = requests.post(url, headers=header, json=data, timeout=30)
# 拿到一个字典
r = req.json()
# 字典方式解析 或采用Jsonpath解析
records = r["data"]["records"]
# print(type(records)) #recoeds类型为list
# 拿到id并存入listid
listid = [item['id'] for item in records]
lenListid = len(listid)
list2 = [item['cnnvdCode'] for item in records]
list5 = [item['vulType'] for item in records]
# print(lenList1)
for k in range(0,lenListid):
# 通过id查询详细信息
urlDetail = 'https://www.cnnvd.org.cn/web/cnnvdVul/getCnnnvdDetailOnDatasource'
headerDetail = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36',
'Connection': 'keep-alive',
'Content-Type': 'application/json;charset=UTF-8', }
id = str(listid[k])
vulType = str(list5[k])
cnnvdCode = str(list2[k])
dataDetail = {"id": id, "vulType": vulType, "cnnvdCode": cnnvdCode}
reqDetail = requests.post(urlDetail, headers=headerDetail, json=dataDetail, timeout=30)
# 拿到一个字典
rDetail = reqDetail.json()
# 字典方式解析 或采用Jsonpath解析
cnnvdDetail = rDetail["data"]["cnnvdDetail"]
# print(type(cnnvdDetail))
# print(cnnvdDetail)
list1.insert(k,cnnvdDetail["vulName"])
list3.insert(k,cnnvdDetail["cveCode"])
list4.insert(k,cnnvdDetail["vulDesc"])
list5.insert(k,cnnvdDetail["vulType"])
list6.insert(k,cnnvdDetail["publishTime"])
list7.insert(k,cnnvdDetail["updateTime"])
list8.insert(k,cnnvdDetail["referUrl"])
# list11.insert(k,cnnvdDetail[""])
list9.insert(k,cnnvdDetail["patch"])
# list3 = [item['cnnvdCode'] for item in records]
for i in range(len(listid)):
sheet1.write(i+(pagesize*(j-1)) + 1, 0, list1[i])
sheet1.write(i+(pagesize*(j-1)) + 1, 1, list2[i])
sheet1.write(i+(pagesize*(j-1)) + 1, 2, list3[i])
sheet1.write(i+(pagesize*(j-1)) + 1, 3, list4[i])
sheet1.write(i+(pagesize*(j-1)) + 1, 4, list5[i])
sheet1.write(i+(pagesize*(j-1)) + 1, 5, list6[i])
sheet1.write(i+(pagesize*(j-1)) + 1, 6, list7[i])
sheet1.write(i+(pagesize*(j-1)) + 1, 7, list8[i])
sheet1.write(i+(pagesize*(j-1)) + 1, 8, list9[i])
f.save(str(start) + "-" + str(last) + ".xls") # 保存文件
# r.raise_for_status()抛出异常
# print(r.text)
# html = BeautifulSoup(r.content.decode(), 'html.parser')
# html = BeautifulSoup(r.content, 'html.parser')
# link = html.find_all('id')
# for i in html:
# ##print (i.text.lstrip())
# try:
# list1.append(i.data.records.id())
# ##print ("http://www.cnnvd.org.cn"+i.attrs['href'])
# k = str(i.attrs['href'])
# list2.append("http://www.cnnvd.org.cn" + k)
# list3.append(k[28:])
# # print("http://www.cnnvd.org.cn"+k)
# getURLDATA("http://www.cnnvd.org.cn" + k)
# except:
# print("http://www.cnnvd.org.cn" + k)
# break
# See PyCharm help at https://www.jetbrains.com/help/pycharm/