# -*- coding:utf-8 -*-
import requests
from lxml import etree
import random
import re
# import HTMLParser
from html.parser import HTMLParser
import time
proxy = [
'http://112.83.86.88:2589',
'https://117.92.128.239:2444',
'https://117.94.120.55:4734',
'https://116.149.201.121:6436',
'https://111.72.104.133:4184',
'https://113.103.151.180:4217',
'https://60.189.139.208:4241',
'https://222.191.171.98:4263',
'https://182.108.168.108:4234',
'https://115.209.194.193:4270'
]
USER_AGENTS = [
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
"Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
"Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
"Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
"Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
"Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
"Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52"
]
cookie = [
#'cloud_token=bc34c50c90c2446c83aed5cb1be47d45; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544282809; RTYCID=74927dd837fb4732a031f393165e04eb; TYCID=f81d6a20af4d11e88c773753f20cd2b6; _gid=GA1.2.1641349744.1544194803; undefined=f81d6a20af4d11e88c773753f20cd2b6; CT_TYCID=dd01fb472ae5479ba38a69ae86aeb2f7; ssuid=4039911408; _ga=GA1.2.176006030.1535961067; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544203531,1544208878,1544228671,1544282750; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25221%2522%252C%2522discussCommendCount%2522%253A%25221%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDI4Mjc1OSwiZXhwIjoxNTU5ODM0NzU5fQ.ZaElaRIl4I-UTb78LCUl3FREMdI19fbD58hFV8rs6JJoDuiLAxfKqn41yLJmDf8RpYBzAw8sG9nyvg6G9zMelQ%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25221%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252216619777436%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDI4Mjc1OSwiZXhwIjoxNTU5ODM0NzU5fQ.ZaElaRIl4I-UTb78LCUl3FREMdI19fbD58hFV8rs6JJoDuiLAxfKqn41yLJmDf8RpYBzAw8sG9nyvg6G9zMelQ; _gat_gtag_UA_123487620_1=1; aliyungf_tc=AQAAAP3ZjDwU1g4Atqirc9QaiHisIOU/; csrfToken=7gGwERRS-FP8JQzmif8q07j0',
#"aliyungf_tc=AQAAAMwIX1VNRgAAUhVFeRNLGKWzKDfZ; csrfToken=X48pVLAHE61Kepl8V8_utX4N; TYCID=34e5cdc0faf411e88d5a635382706bce; undefined=34e5cdc0faf411e88d5a635382706bce; ssuid=1308960292; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544278750; _ga=GA1.2.1258357000.1544278750; _gid=GA1.2.243951796.1544278750; token=db623234589f45998cf6d6268788b2c3; _utm=fe7630aebe56447688616fbc70c42dab; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25221%2522%252C%2522discussCommendCount%2522%253A%25221%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDI3ODc4MCwiZXhwIjoxNTU5ODMwNzgwfQ.IdPpEcWpRgVdEEawKP1qIjg3U_x5_K6E16Dpg0kaHYxsfuD03Yn3vRtMGk5sVa8RaAvBOVgKkP4YXZ6BTFZZeQ%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25221%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252216619777436%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDI3ODc4MCwiZXhwIjoxNTU5ODMwNzgwfQ.IdPpEcWpRgVdEEawKP1qIjg3U_x5_K6E16Dpg0kaHYxsfuD03Yn3vRtMGk5sVa8RaAvBOVgKkP4YXZ6BTFZZeQ; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544278786",
#'aliyungf_tc=AQAAAMwIX1VNRgAAUhVFeRNLGKWzKDfZ; csrfToken=X48pVLAHE61Kepl8V8_utX4N; TYCID=34e5cdc0faf411e88d5a635382706bce; undefined=34e5cdc0faf411e88d5a635382706bce; ssuid=1308960292; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544278750; _ga=GA1.2.1258357000.1544278750; _gid=GA1.2.243951796.1544278750; token=db623234589f45998cf6d6268788b2c3; _utm=fe7630aebe56447688616fbc70c42dab; RTYCID=f1acc84a6142405ebb406208812ca57e; CT_TYCID=12bfe0aaa4854b3fbbc3b1e6a8e1772f; cloud_token=f67c730341da4819ad72051ebd9aa270; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25221%2522%252C%2522discussCommendCount%2522%253A%25221%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDI4NTU4NywiZXhwIjoxNTU5ODM3NTg3fQ.55n1rChdGWXW2OUrYE9QX1ETnn4Yhzg5_-gajD8F7yp0bW2b6G2VYeZKNtJT24F9QsiSkYeM5IpsANgyRD-U5w%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25221%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnread%2522%253A%25220%2522%252C%2522mobile%2522%253A%252216619777436%2522%257D; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNjYxOTc3NzQzNiIsImlhdCI6MTU0NDI4NTU4NywiZXhwIjoxNTU5ODM3NTg3fQ.55n1rChdGWXW2OUrYE9QX1ETnn4Yhzg5_-gajD8F7yp0bW2b6G2VYeZKNtJT24F9QsiSkYeM5IpsANgyRD-U5w; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544285733; _gat_gtag_UA_123487620_1=1'
#'cloud_token=bc34c50c90c2446c83aed5cb1be47d45; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1544286129; RTYCID=74927dd837fb4732a031f393165e04eb; TYCID=f81d6a20af4d11e88c773753f20cd2b6; _gid=GA1.2.1641349744.1544194803; undefined=f81d6a20af4d11e88c773753f20cd2b6; CT_TYCID=dd01fb472ae5479ba38a69ae86aeb2f7; ssuid=4039911408; _ga=GA1.2.176006030.1535961067; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1544208878,1544228671,1544282750,1544286017; tyc-user-info=%257B%2522myQuestionCount%2522%253A%25220%2522%252C%2522integrity%2522%253A%25220%2525%2522%252C%2522state%2522%253A%25220%2522%252C%2522vipManager%2522%253A%25220%2522%252C%2522onum%2522%253A%25220%2522%252C%2522monitorUnreadCount%2522%253A%25220%2522%252C%2522discussCommendCount%2522%253A%25220%2522%252C%2522token%2522%253A%2522eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxNzgzOTE5MTY5MSIsImlhdCI6MTU0NDI4NjEyMCwiZXhwIjoxNTU5ODM4MTIwfQ.vJDqZCjey7bEslU-cXFA37Vm3fTieNYfm3mDSKGTsu1RIez1tcnRsLfEkMSuoJKekAQdv54BQcg5cMdFlGNXdw%2522%252C%2522redPoint%2522%253A%25220%2522%252C%2522pleaseAnswerCount%2522%253A%25220%2522%252C%2522vnum%2522%253A%25220%2522%252C%2522bizCardUnrea
天眼查爬虫学习demo
需积分: 50 182 浏览量
2019-01-22
10:33:14
上传
评论 1
收藏 1.55MB RAR 举报
servethepeople
- 粉丝: 57
- 资源: 14
最新资源
- 课高分程设计-基于C++实现的民航飞行与地图简易管理系统-南京航空航天大学
- 航天器遥测数据故障检测系统python源码+文档说明+数据库(课程设计)
- 北京航空航天大学操作系统课设+ppt+实验报告
- 基于Vue+Echarts实现风力发电机中传感器的数据展示监控可视化系统+源代码+文档说明(高分课程设计)
- 基于单片机的风力发电机转速控制源码
- 基于C++实现的风力发电气动平衡监测系统+源代码+测量数据(高分课程设计)
- 毕业设计- 基于STM32F103C8T6 单片机,物联网技术的太阳能发电装置+源代码+文档说明+架构图+界面截图
- 基于 LSTM(长短期记忆)(即改进的循环神经网络)预测风力发电厂中风力涡轮机产生的功率+源代码+文档说明
- 基于stm32f103+空心杯电机+oled按键+运动算法
- 《CKA/CKAD应试指南/从docker到kubernetes 完全攻略》学习笔记 第1章docker基础(1.1-1.4)
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈