# -*- coding: utf-8 -*-
import urllib2,re,argparse,json,time
import MySQLdb as mdb
import metautils,traceback,Queue,socket
DB_HOST='127.0.0.1'
DB_PORT='3306'
DB_USER='root'
# MySQL密码
DB_PASS=''
# 数据库名称
DB_NAME='pan'
SPIDER_INTERVAL=1
ERR_NO=0#正常
ERR_REFUSE=1#爬虫爬取速度过快,被拒绝
ERR_EX=2#未知错误
def getHtml(url,ref=None,reget=5):
try:
request = urllib2.Request(url)
request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36')
if ref:
request.add_header('Referer',ref)
page = urllib2.urlopen(request,timeout=10)
html = page.read()
except:
if reget>=1:
#如果getHtml失败,则再次尝试5次
print 'getHtml error,reget...%d'%(6-reget)
time.sleep(2)
return getHtml(url,ref,reget-1)
else:
print 'request url:'+url
print 'failed to fetch html'
exit()
else:
return html
class Db(object):
def __init__(self):
self.dbconn=None
self.dbcurr=None
def check_conn(self):
try:
self.dbconn.ping()
except:
return False
else:
return True
def conn(self):
self.dbconn=mdb.connect(DB_HOST, DB_USER, DB_PASS,DB_NAME, charset='utf8')
self.dbconn.autocommit(False)
self.dbcurr = self.dbconn.cursor()
def fetchone(self):
return self.dbcurr.fetchone()
def fetchall(self):
return self.dbcurr.fetchall()
def execute(self, sql, args=None,falg=False):
if not self.dbconn:
#第一次链接数据库
self.conn()
try:
if args:
rs=self.dbcurr.execute(sql,args)
else:
rs=self.dbcurr.execute(sql)
return rs
except Exception, e:
if self.check_conn():
print 'execute error'
traceback.print_exc()
else:
print 'reconnect mysql'
self.conn()
if args:
rs=self.dbcurr.execute(sql,args)
else:
rs=self.dbcurr.execute(sql)
return rs
def commit(self):
self.dbconn.commit()
def rollback(self):
self.dbconn.rollback()
def close(self):
self.dbconn.close()
self.dbcurr.close()
def last_row_id(self):
return self.dbcurr.lastrowid
class BaiduPanSpider(object):
def __init__(self):
self.db=Db()
self.files=[]
self.got_files_count=0
self.got_follow_count=0
self.while_count=0
self.spider_queue=Queue.Queue(maxsize=20)
self.status='stop'
self.errno=ERR_NO
self.file_type_t={'video':0,'image':1,'document':2,'music':3,'package':4,'software':5,'torrent':6,'other':-1}
def getShareUser(self,uk):
url='http://yun.baidu.com/share/count?uk=%d&channel=chunlei&clienttype=0&web=1'%uk
follows_json=json.loads(getHtml(url,uk))
if follows_json['errno']!=0:
if follows_json['errno']==-55:
self.errno=ERR_REFUSE
else:
self.errno=ERR_EX
return False
return {
'pubshare_cnt':follows_json['pubshare_cnt'],
'fans':follows_json['fans'],
'follow':follows_json['follow'],
'album':follows_json['follows_json']
}
def getHotUser(self):
url='http://yun.baidu.com/pcloud/friend/gethotuserlist?type=1&from=feed&start=0&limit=24&channel=chunlei&clienttype=0&web=1'
follows_json=json.loads(getHtml(url))
if follows_json['errno']!=0:
print u'failed to fetch hot users'
return False
returns=[]
count=0
for item in follows_json['hotuser_list']:
count=count+1
hot_uname=item['hot_uname'].encode('utf-8')
hot_uk=item['hot_uk']
avatar_url=item['avatar_url'].encode('utf-8')
intro=item['intro'].encode('utf-8')
follow_count=item['follow_count']
fans_count=item['fans_count']
pubshare_count=item['pubshare_count']
album_count=item['album_count']
returns.append({'hot_uname':hot_uname,'hot_uk':hot_uk,'avatar_url':avatar_url,'intro':intro,'follow_count':follow_count,'fans_count':fans_count,'pubshare_count':pubshare_count,'album_count':album_count})
if count==0:
print "got no hot users"
return False
else:
print "success to fetched hot users: %d"%count
return returns
def getFans(self,uk,start=0,limit=24):
#query_uk:用户ID
#limit:每一页最多显示数量
#start:当前页数
follows_url='http://yun.baidu.com/pcloud/friend/getfanslist?query_uk=%d&limit=%d&start=%d'%(uk,limit,start)
follows_json=json.loads(getHtml(follows_url,uk))
if follows_json['errno']!=0:
print u'failed to fetch fens'
return False
total_count=follows_json['total_count']
returns=[]
count=0
for item in follows_json['fans_list']:
count=count+1
fans_uname=item['fans_uname'].encode('utf-8')
fans_uk=item['fans_uk']
avatar_url=item['avatar_url'].encode('utf-8')
intro=item['intro'].encode('utf-8')
follow_count=item['follow_count']
fans_count=item['fans_count']
pubshare_count=item['pubshare_count']
album_count=item['album_count']
returns.append({'fans_uname':fans_uname,'fans_uk':fans_uk,'avatar_url':avatar_url,'intro':intro,'follow_count':follow_count,'fans_count':fans_count,'pubshare_count':pubshare_count,'album_count':album_count})
return (total_count,count,returns)
def getFollows(self,uk,start=0,limit=24):
follows_url='http://yun.baidu.com/pcloud/friend/getfollowlist?query_uk=%d&limit=%d&start=%d&bdstoken=d82467db8b1f5741daf1d965d1509181&channel=chunlei&clienttype=0&web=1'%(uk,limit,start)
ref='http://yun.baidu.com/pcloud/friendpage?type=follow&uk=%d&self=1'%uk
follows_json=json.loads(getHtml(follows_url,ref))
if follows_json['errno']!=0:
print 'getFollows errno:%d'%follows_json['errno']
print 'request_url:'+follows_url
if follows_json['errno']==-55:
self.errno=ERR_REFUSE
else:
self.errno=ERR_EX
return False
total_count=follows_json['total_count']
returns=[]
count=0
if(total_count>0):
for item in follows_json['follow_list']:
count=count+1
returns.append({
'follow_uname':item['follow_uname'].encode('utf-8'),
'follow_uk':item['follow_uk'],
'avatar_url':item['avatar_url'].encode('utf-8'),
'intro':item['intro'].encode('utf-8'),
'follow_count':item['follow_count'],
'fans_count':item['fans_count'],
'pubshare_count':item['pubshare_count'],
'album_count':item['album_count']
})
return (total_count,count,returns)
def getShareLists(self,uk,start=0,limit=60):
sharelists_url='http://yun.baidu.com/pcloud/feed/getsharelist?category=0&auth_type=1&request_location=share_home&start=%d&limit=%d&query_uk=%d&channel=chunlei&clienttype=0&web=1'%(start,limit,uk)
ref='http://yun.baidu.com/share/home?uk=%d&view=share'%uk
sharelists_json=json.loads(getHtml(sharelists_url,ref))
if(sharelists_json['errno']!=0):
print 'getShareLists errno:%d'%sharelists_json['errno']
print 'request_url:'+sharelists_url
if sharelists_json['errno']==-55:
self.errno=ERR_REFUSE
else:
self.errno=ERR_EX
return False
total_count=sharelists_json['total_count']
returns=[]
count=0
if total_count>0:
for item in sharelists_json['records']:
count=count+1
feed_type=item['feed_type']
isdir=0
size=0
md5=''
album_id=''
shorturl=''
if feed_type=='share':
if item['filecount']==1:
filelist=item['filelist']
isdir=filelist[0]['isdir']
size=filelist[0]['size']
md5=filelist[0]['md5']
else:
isdir=1
elif feed_type=='album':
album_id=item['album_id']
isdir=2
if item.has_key('shorturl'):
shorturl=item['shorturl']
if feed_type=='share' or feed_type=='album':
returns.append({
'title':item['title'].encode('utf-8'),
'shorturl':shorturl,
'shareid':item['source_id'],
'feed_time':item['feed_time']//1000,#分享时间
'dCnt':item['dCnt'],
'isdir':isdir,
'size':size,
'md5':md5,
'uk':uk,
'feed_type':feed_type
})
return (total_count,count,returns)
def getAlbum(self,uk,start=0,limit=60):
url='http://yun.baidu.com/pcloud/album/getlist?start=%d&limit=%d&query_uk=%d&channel=chunlei&clienttype=0&web=1&bdstoken=d82467db8b1f5741daf1d965d1509181'%(start,limit,uk)
album_json=json.loa
没有合适的资源?快使用搜索试试~ 我知道了~
基于PHP的百度云网盘搜索引擎PHP版源码.zip
共323个文件
php:231个
html:47个
css:8个
1.该资源内容由用户上传,如若侵权请联系客服进行举报
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
版权申诉
0 下载量 68 浏览量
2023-09-03
23:39:28
上传
评论 1
收藏 1.13MB ZIP 举报
温馨提示
基于PHP的百度云网盘搜索引擎PHP版源码.zip
资源推荐
资源详情
资源评论
收起资源包目录
基于PHP的百度云网盘搜索引擎PHP版源码.zip (323个子文件)
bootstrap.css 139KB
bootstrap.css 127KB
bootstrap.min.css 115KB
bootstrap-ie6.css 115KB
style.css 2KB
ie.css 1KB
style.css 694B
zzsc.css 0B
glyphicons-halflings-regular.eot 20KB
.htaccess 127B
.htaccess 123B
.htaccess 123B
.htaccess 117B
index.html 142B
index.html 142B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
index.html 131B
favicon.ico 1KB
search.tpl.in 7KB
search.php.in 4KB
suggest.php.in 746B
pan.ini 442B
demo.ini 183B
logo.jpg 4KB
pix.jpg 1KB
bootstrap.js 68KB
bootstrap.min.js 35KB
base64.js 7KB
zzsc.js 5KB
base64.min.js 4KB
login_verify.js 2KB
How-to-Install.md 3KB
README.md 1KB
XS.php 80KB
DB_query_builder.php 61KB
Simple_html_dom.php 57KB
Email.php 49KB
DB_driver.php 43KB
Image_lib.php 42KB
Xmlrpc.php 40KB
Form_validation.php 36KB
Loader.php 35KB
Http_proxy.php 34KB
parser.php 34KB
Upload.php 30KB
Security.php 27KB
Jquery.php 25KB
XSDataSource.class.php 24KB
DB_forge.php 23KB
Encryption.php 23KB
form_helper.php 22KB
Input.php 22KB
Common.php 21KB
Javascript.php 20KB
Session.php 20KB
Profiler.php 20KB
Output.php 20KB
共 323 条
- 1
- 2
- 3
- 4
资源评论
易小侠
- 粉丝: 6454
- 资源: 9万+
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功