# -*- coding: utf-8 -*-
import os,time,requests,hashlib
requests.DEFAULT_RETRIES = 5
session = requests.session()
session.keep_alive = False
PATH_IMGS = "images"
def download(url):
ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
headers = {
"user-agent":ua,
"accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"proxy-connection":"keep-alive",
"Accept-Encoding":"gzip, deflate, br",
"Connection":'close',
}
name = hashlib.md5(url.encode("utf-8")).hexdigest() + ".jpg"
proxy = None
proxies = {
"http":proxy,
"https":proxy
}
r = session.get(url,headers=headers,timeout=20,proxies=proxies, verify=False)
with open(fr"{PATH_IMGS}/{name}", "wb")as f:
f.write(r.content)
def main():
with open(f"C:/Users/18332/Desktop/url列表.txt",encoding="utf-8") as fr:
urls0 = [s.strip() for s in fr.readlines() if s.strip()]
urls = list(set(urls0))
print(f"待爬取url数目={len(urls)}")
for i,url in enumerate(urls):
print(f"{i/len(urls)} {url}")
# noninspection PyBroadException
try:
download(url)
except:
print("skip!")
time.sleep(0.5)
if __name__ == "__main__":
main()

才华横溢caozy
- 粉丝: 3036
最新资源
- 2014年度福建省施工企业“三类人员”网络继续教育培训班测试题.doc
- CAD图层、块和剖面线.ppt
- 施工组织设平坏准锹100429A1GCWG施工组织设计交底aspanclass=.docx
- 2019上半年软件设计师下午真题及答案.doc
- 船舶通信导航设备开航前安全检查表.docx
- 计算机未来发展趋势.ppt
- 解决创维E750A连接网络更精彩方法--创维8A07-软件升级说明.doc
- 操作系统第三章总复习题答案.doc
- 高中数学知识点网络图-PPT.pptx
- CAD填充图案及使用方法.doc
- 1-云计算复习题.pdf
- 第2章-中望CAD设置.doc
- 建设工程项目管理二级建造师.ppt
- 让U盘自带杀毒软件-保护系统安全.ppt
- 通信业务合作协议范本.doc
- 高级办公软件实验报告格式.doc
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈


