# -*- coding:utf-8 -*-
import urllib.request as req
import urllib
import os
import re
write_file_path="C:\\Users\\testing\\Spider\\write_file\\"
file_root_path="C:\\Users\\testing\\Spider\\root_folder\\"
file_list=tuple(os.walk(file_root_path))
for file_name in file_list[0][2]:
print(file_name)
file_path=file_root_path+file_name
file_object=open(file_path)
try:
file_content=file_object.read()
restr = r'http://bbs.*?jpg'
urllist = re.findall(restr,file_content,re.S)
finally:
file_object.close()
#print(urllist)
for url in urllist:
if len(url)!=0:
split_name=url.split('/')[-1]
writed_file_name=write_file_path+split_name
#print(writed_file_name)
with open(writed_file_name, 'wb') as file:
try:
image_detail=req.urlopen(url, None, None).read()
file.write(image_detail)
except urllib.error.HTTPError as e:
print(e.code)
print(url)
评论0