没有合适的资源?快使用搜索试试~ 我知道了~
爬虫代码,爬取完数据后再去做数据清洗,可视化,以及数据挖掘操作
资源推荐
资源详情
资源评论
# coding:utf-8
import urllib
import urllib2
import re
import sys
from bs4 import BeautifulSoup
reload(sys)
sys.setdefaultencoding("utf-8")
url1 = 'http://cs.58.com/ershoufang/pn'
addr1="/home/hadoop/58fang-ml-line1.txt"
def startscrapy(page, url, addr):
url=url + str(page)
user_agent='Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20131029 Firefox/17.0'
headers={'User-Agent':user_agent}
try:
request = urllib2.Request(url,headers=headers)
response = urllib2.urlopen(request)
content = response.read().decode('utf-8')
soup = BeautifulSoup(content,"html.parser")
house_list = soup.select('.house-list-wrap > li ')
for house in house_list:
room = house.select('.list-info > .baseinfo > span ')[0].string.encode('utf-8').replace(' ', '')
area = house.select('.list-info > .baseinfo > span ')[1].string.encode('utf-8')
if (len(house.select('.list-info > .baseinfo > span '))>3):
floors = house.select('.list-info > .baseinfo > span ')[3].string.encode('utf-8')
else:
continue
import urllib
import urllib2
import re
import sys
from bs4 import BeautifulSoup
reload(sys)
sys.setdefaultencoding("utf-8")
url1 = 'http://cs.58.com/ershoufang/pn'
addr1="/home/hadoop/58fang-ml-line1.txt"
def startscrapy(page, url, addr):
url=url + str(page)
user_agent='Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20131029 Firefox/17.0'
headers={'User-Agent':user_agent}
try:
request = urllib2.Request(url,headers=headers)
response = urllib2.urlopen(request)
content = response.read().decode('utf-8')
soup = BeautifulSoup(content,"html.parser")
house_list = soup.select('.house-list-wrap > li ')
for house in house_list:
room = house.select('.list-info > .baseinfo > span ')[0].string.encode('utf-8').replace(' ', '')
area = house.select('.list-info > .baseinfo > span ')[1].string.encode('utf-8')
if (len(house.select('.list-info > .baseinfo > span '))>3):
floors = house.select('.list-info > .baseinfo > span ')[3].string.encode('utf-8')
else:
continue
资源评论
tist.cug
- 粉丝: 0
- 资源: 1
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功