import json
import requests
import random
import urllib.request
import re
requests.adapters.DEFAULT_RETRIES =5
from datetime import datetime, timedelta
#------------------------------------------下列实例Start------------------------------------------
class 实例():
def 从数据库账号批量下载实例加去重():
import pymssql
#获取日期列表(昨日往前推7天)
datelist=Gk.get_date_list(-7,-1)
#获取要爬取的店铺id
db = pymssql.connect('192.168.1.202','ai','ai123456','Aidata',charset='utf8')
cursor=db.cursor()
cursor.execute("select 平台店铺id,cookie from [AiData].[dbo].[账号信息] where 平台='京东' and 状态='RUN'and left(平台店铺id,1) like '%[0-9]%'")
#cursor.execute("select 平台店铺id,cookie from [AiData].[dbo].[账号信息] where 平台='京东' and 状态='RUN'and 平台店铺id='12745356'")
account_sql=cursor.fetchall()
#数据已存在
cursor.execute("select distinct date,shopId from [Test].[dbo].[CategorySales_JD]")
record_sql=cursor.fetchall()
db.commit()
cursor.close()
db.close()
for rec in account_sql:
shopid=rec[0]
cookie=rec[1]
print(shopid)
for date in datelist:
target=tuple((date,shopid))#tuple为元祖
if target in record_sql:
print(f'店铺id{shopid},日期{date}已采集')
else:
print(f'需采集的店铺id是{shopid},日期是{date}')
_headers_dict = {
"Sec-Ch-Ua": "\"Not.A/Brand\";v=\"8\", \"Chromium\";v=\"114\", \"Google Chrome\";v=\"114\"",
"Sec-Ch-Ua-Mobile": "?0",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
"Accept": "application/json, text/plain, */*",
"User-Mup": "1689141894121",
"P-Pin": "%E7%8C%AB%E4%BA%BA%E9%92%88%E7%BA%BA-%E5%85%B3",
"Uuid": "98836b6fdb7aa9e34617-18948b3dbe9",
"X-Requested-With": "XMLHttpRequest",
"User-Mnp": "d9dba01ce39fca777f9950ed12175ec6",
"Sec-Ch-Ua-Platform": "\"Windows\"",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Dest": "empty",
"Referer": "https://sz.jd.com/sz/view/dealAnalysis/dealFeatures.html",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cookie": f"{cookie}",
"Host": "sz.jd.com",
"Connection": "Close"
}
# 自动生成的代码,设置的新值会当作变量传入url
_url = f'https://sz.jd.com/sz/api/trade/getCategoryFeatureData.ajax?channel=99&date={date}&endDate={date}&startDate={date}&shopId={shopid}'#.format()
print(_url)
_post_data_string = ''''''
response = Gk.get_request(_url, _headers_dict,True)
# print(response)
Gk.Guid2DB( _url, _headers_dict, _post_data_string, response)
#从数据库账号批量下载实例加去重()
def 从数据库账号批量下载实例():
import pymssql
db = pymssql.connect('192.168.1.202','ai','ai123456','AiData',charset='utf8')
cursor = db.cursor()
cursor.execute("SELECT [平台店铺ID],[请求头],[Cookie] FROM [账号信息] where 平台='京东' and 状态='RUN' order by 1 desc")
result_login = cursor.fetchall()
# print(result_login)
db.commit()
cursor.close( )
db.close()
for rec in result_login:
cookie=rec[2]
# 根据实际代码替换下面代码:
_headers_dict = {
"Host": "sz.jd.com",
"Connection": "keep-alive",
"Cookie": f"{cookie}",
}
# 自动生成的代码,设置的新值会当作变量传入url
_url = f'https://sz.jd.com/sz/api/realtime/getRealtimeData.ajax?'#.format()
_post_data_string = ''''''
response = Gk.get_request(_url, _headers_dict,True)
print(response)
#Gk.Guid2DB( _url, _headers_dict, _post_data_string, response)
#从数据库账号批量下载实例()
#--------------------------------------------END------------------------------------------------
def add_months(date, months):
month = (date.month + months) % 12
year = date.year + (date.month + months - 1) // 12
if month == 0:
month = 12
day = min(date.day, (date.replace(year=year, month=month, day=1) + timedelta(days=-1)).day)
return date.replace(year=year, month=month, day=day)
#获取月份列表
def get_month_list(start, end):
result = []
# 获取当前日期
current_date = datetime.now()
# 计算起始日期和结束日期
start_date = add_months(current_date, start)
end_date = add_months(current_date, end)
while start_date <= end_date:
result.append(start_date.strftime("%Y-%m"))
start_date = add_months(start_date, 1)
return result
#获取日期列表
def get_date_list(start,end):
import datetime
datelist=[]
now_time=datetime.datetime.now()
for i in range(start,end+1,-1 if start>end else 1):
datelist.append((now_time+datetime.timedelta(i)).strftime('%Y-%m-%d'))
return datelist
#Chrome请头到转字典
def headers_str_to_dict(headers):
headers_dict = {}
for line in headers.split('\n'):
if ':' in line:
key, value = re.split(':|=', line.strip(), maxsplit=1)
headers_dict[key.strip()] = value.strip()
return headers_dict
#Get请求
def get_request(url, headers,useFiddlerProxy):
if useFiddlerProxy:
# 假设 IP 池已经包含了多个代理 IP 地址
proxy_list = ['http://127.0.0.1:8888']
# 随机选择一个代理 IP 地址进行请求
proxies = {"http": random.choice(proxy_list)}
proxies = urllib.request.getproxies()
if proxies == {'http': 'http://127.0.0.1:8888', 'https': 'https://127.0.0.1:8888'}:
proxies = {
'http': 'http://127.0.0.1:8888',
'https': 'http://127.0.0.1:8888' # https -> http
}
res = requests.get(url, headers=headers, proxies=proxies, verify=False, timeout=5)
else :
res = requests.get(url, headers=headers, verify=False, timeout=5)
content_type = res.headers['Content-Type']
if 'image' in content_type:
# 返回图片内容
return res.content
else:
# 返回文本内容
return res.text
#POST请求
def post_request(url, headers, data,useFiddlerProxy):
if useFiddlerProxy:
# 假设 IP 池已经包含了多个代理 IP 地址
proxy_list = ['http://127.0.0.1:8888']
# 随机选择一个代理 IP 地址进行请求
proxies = {"http": random.choice(proxy_list)}
proxies = urllib.request.getproxies()
if proxies == {'http': 'http://127.0.0.1:8888', 'https': 'https://127.0.0.1:8888'}:
proxies = {
'http': 'http://127.0.0.1:8888',
'https': 'http://127.0.0.1:8888' # https -> http
}
res = requests.post(url, headers=headers, data=data, proxies=proxies, verify=False, timeout=5)
else:
res = requests.post(url, headers=headers, data=data, verify=False, timeout=5)
content_type = res.headers['Content-Type']
request_header_str = str
没有合适的资源?快使用搜索试试~ 我知道了~
攻克Data-数据采集与存储-适用于各个平台数据爬虫
共117个文件
dll:99个
py:6个
exe:4个
需积分: 0 9 下载量 27 浏览量
2023-07-24
11:39:31
上传
评论
收藏 21.68MB ZIP 举报
温馨提示
欢迎试用
资源推荐
资源详情
资源评论
收起资源包目录
攻克Data-数据采集与存储-适用于各个平台数据爬虫 (117个子文件)
update.bat 216B
GKData.exe.config 6KB
request_model.cs 913B
test.db 216KB
WebDriver.dll 7.49MB
System.Web.dll 2.57MB
BouncyCastle.Crypto.dll 2.41MB
OMCS.dll 2.21MB
SunnyUI.dll 2.15MB
System.Windows.Forms.dll 1.8MB
System.dll 1.41MB
MySql.Data.dll 1.36MB
System.Web.Extensions.dll 1.33MB
PresentationCore.dll 1.3MB
EPPlus.dll 1.24MB
System.Data.dll 1.22MB
SQLite.Interop.dll 1.17MB
ESFramework.dll 1.08MB
CSkin.dll 982KB
System.Xml.dll 904KB
ImageProcessor.Plugins.WebP.dll 894KB
AngleSharp.dll 868KB
Selenium.WebDriverBackedSelenium.dll 764KB
System.Text.Encoding.CodePages.dll 744KB
FiddlerCore45.dll 715KB
MetadataExtractor.dll 708KB
Svg.dll 657KB
System.Text.Json.dll 569KB
System.Deployment.dll 545KB
Newtonsoft.Json.dll 528KB
FiddlerCore4.dll 506KB
BCMakeCert.dll 474KB
System.Reflection.Metadata.dll 452KB
libwebp.dll 444KB
AutoUpdater.NET.dll 424KB
ESBasic.dll 413KB
DataRabbit.dll 382KB
Google.Protobuf.dll 380KB
System.Data.SQLite.dll 352KB
JustLib.dll 343KB
ExCSS.dll 321KB
protobuf-net.Core.dll 281KB
protobuf-net.dll 270KB
log4net.dll 264KB
libwebpdecoder.dll 251KB
ICSharpCode.SharpZipLib.dll 248KB
SunnyUI.Common.dll 221KB
System.Collections.Immutable.dll 194KB
System.Drawing.dll 185KB
System.Data.SQLite.Linq.dll 183KB
System.Data.SQLite.EF6.dll 183KB
ImageProcessor.dll 181KB
HtmlAgilityPack.dll 165KB
Microsoft.Data.Sqlite.dll 165KB
XmpCore.dll 141KB
System.Memory.dll 139KB
winRar.dll 129KB
System.Numerics.Vectors.dll 113KB
AipSdk.dll 111KB
BasicFormatsForCore.dll 103KB
System.Configuration.dll 94KB
System.Net.Http.dll 85KB
libwebpdemux.dll 81KB
System.Text.Encodings.Web.dll 77KB
System.Security.dll 72KB
CXT.HTTP.dll 70KB
System.ComponentModel.DataAnnotations.dll 65KB
System.Web.Entity.dll 65KB
Microsoft.Extensions.Logging.Abstractions.dll 63KB
System.Management.dll 60KB
K4os.Compression.LZ4.dll 59KB
Ubiety.Dns.Core.dll 55KB
SQLitePCLRaw.core.dll 50KB
Microsoft.CSharp.dll 47KB
System.Transactions.dll 46KB
CertMaker.dll 45KB
System.Xml.Linq.dll 45KB
System.Configuration.Install.dll 44KB
System.ComponentModel.Annotations.dll 42KB
System.Numerics.dll 42KB
Interop.TaskScheduler.dll 39KB
Microsoft.IO.RecyclableMemoryStream.dll 37KB
Interop.IWshRuntimeLibrary.dll 37KB
WebDriver.Support.dll 35KB
Fizzler.dll 34KB
System.Security.AccessControl.dll 33KB
K4os.Compression.LZ4.Streams.dll 33KB
WebDriverManager.dll 32KB
System.IO.Compression.dll 29KB
ThoughtWorks.Selenium.Core.dll 29KB
ZstdNet.dll 28KB
System.Data.DataSetExtensions.dll 27KB
Microsoft.Bcl.AsyncInterfaces.dll 26KB
Microsoft.Win32.Registry.dll 26KB
System.Threading.Tasks.Extensions.dll 25KB
System.ValueTuple.dll 25KB
System.Threading.Tasks.dll 21KB
System.Buffers.dll 20KB
System.ComponentModel.dll 20KB
Imazen.WebP.dll 19KB
共 117 条
- 1
- 2
资源评论
攻克oo0
- 粉丝: 9
- 资源: 1
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功