# -*- coding: UTF-8 -*-
import re
import urllib
import os
#通过url获取网页
def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
return html
#通过正则表达式来获取图片地址,并下载到本地
def getImg(html):
reg1 = r'data-original="(.*?\.jpg)"\s+alt="(.*?)".*?\s+.*?href="(.*\.html)"'
imgre1 = re.compile(reg1)
imglist1 = imgre1.findall(html)
#print imglist[1][1]
dir1=os.getcwd()
for img1 in imglist1:
#print img
print img1[0],img1[1],img1[2]
if not os.path.exists(img1[1]):
os.mkdir(img1[1],777)
#urllib.urlretrieve(img1[0],dir1+'\\'+img1[1]+'\\'+'%s.jpg' % i)
if img1[2][0:4]=='www.':
img2 = img1[2]
else:
img2='http://www.zbjuran.com'+img1[2]
print 'img2 is :' +img2
html1=getHtml(img2)
#print html1
reg2=r'src="(.*?\.jpg)"'
imgre2=re.compile(reg2)
imglist2=imgre2.findall(html1)
print imglist2
print '1231312312'
j=1
for img3 in imglist2:
if img3[0:4]=='http':
img=img3
else:
img='http://www.zbjuran.com'+img3
print 'img is :' +img
urllib.urlretrieve(img,dir1+'\\'+img1[1]+'\\'+'%d.jpg' % j)
print '123123fsfs'
j=j+1
print 'j is ' + str(j)
html = getHtml('http://www.zbjuran.com/mei/xinggan/')
#print html
getImg(html)
print 'success'