import os, time
import random
import pip
try:
import requests
except ModuleNotFoundError:
print('》----正在安装requests库----《')
pip.main(['install', 'requests'])
import requests
try:
from lxml import html
except ModuleNotFoundError:
print('》----正在安装lxml库----《')
pip.main(['install', 'lxml'])
from lxml import html
def 获取headers():
user_agent_list = [ \
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
"Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6", \
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", \
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", \
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", \
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", \
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", \
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", \
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", \
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3", \
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24", \
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"]
ua = random.choice(user_agent_list)
headers = {'User-Agent': ua, 'Referer': 'http://www.mzitu.com/'}
return headers
def 开始下载(下载页):
下载页相册 = html.fromstring(
requests.get('http://www.mzitu.com/page/{}/'.format(下载页), headers=获取headers()).content).xpath(
'//ul[@id="pins"]/li/a/@href')
for 相册 in 下载页相册:
相册首页内容 = html.fromstring(requests.get(相册, headers=获取headers()).content)
相册名字 = 相册首页内容.xpath('//div/h2[@class="main-title"]/text()')[0].replace('?', '?').replace('|', '|').replace(':',
':')
相册页数 = int(相册首页内容.xpath('//div/a/span/text()')[-2])
if os.path.exists('h:/美女图/{}'.format(相册名字)) == False:
os.mkdir('h:/美女图/{}'.format(相册名字))
for i in range(1, 相册页数 + 1):
美图地址 = html.fromstring(requests.get(相册 + '/{}'.format(i), headers=获取headers()).content).xpath(
'//div[@class="main-image"]/p/a/img/@src')[0]
print('正在下载第{}页{}相册的第{}张美图'.format(下载页, 相册名字, i))
with open('h:/美女图/{}/{}.jpg'.format(相册名字, i), 'wb') as f:
f.write(requests.get(美图地址, headers=获取headers()).content)
总页数 = html.fromstring(requests.get('http://www.mzitu.com', headers=获取headers()).content).xpath("//nav/div/a/text()")[-2]
开始下载页 = int(input('妹子图当天总页数为:' + 总页数 + '页,请输入从第几页开始下载。'))
结束下载页 = int(input('妹子图当天总页数为:' + 总页数 + '页,请输入下载至第几页。'))
if os.path.exists('h:/美女图') == False:
os.mkdir(r'h:/美女图')
for 下载页 in range(开始下载页, 结束下载页 + 1):
开始下载(下载页)
print('下载已完成')