基于Python的广州租房信息爬虫和数据可视化源码+全部资料（毕业设计）.zip资源-CSDN文库

共23个文件

html：7个

xml：5个

py：3个

版权申诉

python

期末大作业

课程设计

73 浏览量 2024-05-18 14:53:04 上传评论收藏 56.3MB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

基于Python的广州租房信息爬虫和数据可视化源码+全部资料（毕业设计）.zip （23个子文件）

graduation-design-beatifulsoup-requests--master

spiderResult1

广州房源分布统计.html 706KB

广州各区每平每月房租单价统计.html 705KB

租房面积统计.html 705KB

广州各区每平每日房租单价统计.html 706KB

户型统计.html 707KB

租房信息词云.html 832KB

样本数量统计.html 706KB

rentHouseSpider-master

learning.py 498B

.idea

rentHouseSpider.iml 431B

workspace.xml 20KB

misc.xml 185B

modules.xml 282B

mongoSettings.xml 578B

encodings.xml 135B

main

houseSpyder.spec 883B

kh.ttf 826KB

租房信息词云.jpg 53KB

analycis.py 12KB

docker.jpg 72KB

houseSpyder.py 6KB

__pycache__

analycis.cpython-36.pyc 9KB

houseSpyder.cpython-36.pyc 5KB

dist

houseSpyder.exe 54.87MB

from os import path from wordcloud import WordCloud, ImageColorGenerator import jieba.analyse import matplotlib.pyplot as plt from scipy.misc import imread baseUrl = "http://gz.zu.fang.com" import time from pymongo import MongoClient class Analycis: def __init__(self): self.client = MongoClient('mongodb://localhost:27017/') self.gzzf2 = self.client.gzzf2 # 定义一个地区拼音的字典，用于爬虫的索引。 pinyinDir = { "不限": "rent", "天河": "tianhe", "番禺": "panyu", "海珠": "haizhu", "白云": "baiyun", "越秀": "yuexiu", "花都": "huadu", "增城": "zengcheng", "荔湾": "liwan", "黄埔": "huangpu", "南沙": "nansha", "从化": "conghua", } def getAreaList(self): return [ # "不限", "天河", "番禺", "海珠", "白云", "越秀", "花都", "增城", "荔湾", "黄埔", "南沙", "从化" ] # 获取区的拼音 def getPinyin(self, region): try: pinyin = self.pinyinDir[region] except: print("no such region pinyin") return pinyin # 求一个区的元/平方米的平均数 def getAvgPrice(self, region): areaPinYin = self.getPinyin(region=region) collection = self.gzzf2[areaPinYin] print(region) totalPrice = collection.aggregate([{'$group': {'_id': '$region', 'total_price': {'$sum': '$price'}}}]) totalArea = collection.aggregate([{'$group': {'_id': '$region', 'total_area': {'$sum': '$area'}}}]) # print(list(totalArea)) #totalNum = collection.aggregate([{'$group': {'_id': '$region', 'total_area': {'$sum': 1}}}]) totalPrice2 = list(totalPrice)[0]["total_price"] print(totalPrice2) totalArea2 = list(totalArea)[0]["total_area"] return totalPrice2 / totalArea2 # 获取各个区每个月一平方米需要多少钱 def getTotalAvgPrice(self): totalAvgPriceList = [] totalAvgPriceDirList = [] print(self.getAreaList()) for index, region in enumerate(self.getAreaList()): avgPrice = self.getAvgPrice(region) totalAvgPriceList.append(round(avgPrice, 3)) totalAvgPriceDirList.append({"value": round(avgPrice, 3), "name": region + " " + str(round(avgPrice, 3))}) return totalAvgPriceDirList # 获取各个区每一天一平方米需要多少钱 def getTotalAvgPricePerDay(self): totalAvgPriceList = [] for index, region in enumerate(self.getAreaList()): avgPrice = self.getAvgPrice(region) totalAvgPriceList.append(round(avgPrice / 30, 3)) return (self.getAreaList(), totalAvgPriceList) # 获取各区统计数据量 def getAnalycisNum(self): analycisList = [] for index, region in enumerate(self.getAreaList()): collection = self.gzzf2[self.pinyinDir[region]] print(region) totalNum = collection.aggregate([{'$group': {'_id': '', 'total_num': {'$sum': 1}}}]) totalNum2 = list(totalNum)[0]["total_num"] analycisList.append(totalNum2) print(list(analycisList)) return (self.getAreaList(), analycisList) # 获取各个区的房源比重 def getAreaWeight(self): result = self.gzzf2.rent.aggregate([{'$group': {'_id': '$region', 'weight': {'$sum': 1}}}]) areaName = [] areaWeight = [] for item in result: if item["_id"] in self.getAreaList(): areaWeight.append(item["weight"]) areaName.append(item["_id"]) print(item["_id"]) print(item["weight"]) # print(type(item)) return (areaName, areaWeight) # 获取 title 数据，用于构建词云 def getTitle(self): collection = self.gzzf2["rent"] queryArgs = {} projectionFields = {'_id': False, 'title': True} # 用字典指定 searchRes = collection.find(queryArgs, projection=projectionFields).limit(1000) content = '' for result in searchRes: print(result["title"]) content += result["title"] return content # 获取户型数据（3 室 2 厅） def getRooms(self): results = self.gzzf2.rent.aggregate([{'$group': {'_id': '$rooms', 'weight': {'$sum': 1}}}]) roomList = [] weightList = [] for result in results: roomList.append(result["_id"]) weightList.append(result["weight"]) # print(list(result)) return (roomList, weightList) # 获取租房面积 def getAcreage(self): results0_30 = self.gzzf2.rent.aggregate([ {'$match': {'area': {'$gt': 0, '$lte': 30}}}, {'$group': {'_id': '', 'count': {'$sum': 1}}} ]) results30_60 = self.gzzf2.rent.aggregate([ {'$match': {'area': {'$gt': 30, '$lte': 60}}}, {'$group': {'_id': '', 'count': {'$sum': 1}}} ]) results60_90 = self.gzzf2.rent.aggregate([ {'$match': {'area': {'$gt': 60, '$lte': 90}}}, {'$group': {'_id': '', 'count': {'$sum': 1}}} ]) results90_120 = self.gzzf2.rent.aggregate([ {'$match': {'area': {'$gt': 90, '$lte': 120}}}, {'$group': {'_id': '', 'count': {'$sum': 1}}} ]) results120_200 = self.gzzf2.rent.aggregate([ {'$match': {'area': {'$gt': 120, '$lte': 200}}}, {'$group': {'_id': '', 'count': {'$sum': 1}}} ]) results200_300 = self.gzzf2.rent.aggregate([ {'$match': {'area': {'$gt': 200, '$lte': 300}}}, {'$group': {'_id': '', 'count': {'$sum': 1}}} ]) results300_400 = self.gzzf2.rent.aggregate([ {'$match': {'area': {'$gt': 300, '$lte': 400}}}, {'$group': {'_id': '', 'count': {'$sum': 1}}} ]) results400_10000 = self.gzzf2.rent.aggregate([ {'$match': {'area': {'$gt': 400, '$lte': 10000}}}, {'$group': {'_id': '', 'count': {'$sum': 1}}} ]) results0_30_ = list(results0_30)[0]["count"] results30_60_ = list(results30_60)[0]["count"] results60_90_ = list(results60_90)[0]["count"] results90_120_ = list(results90_120)[0]["count"] results120_200_ = list(results120_200)[0]["count"] results200_300_ = list(results200_300)[0]["count"] results300_400_ = list(results300_400)[0]["count"] results400_10000_ = list(results400_10000)[0]["count"] attr = ["0-30平方米", "30-60平方米", "60-90平方米", "90-120平方米", "120-200平方米", "200-300平方米", "300-400平方米", "400+平方米"] value = [ results0_30_, results30_60_, results60_90_, results90_120_, results120_200_, results200_300_, results300_400_, results400_10000_ ] return (attr, value) print() # 展示饼图 def showPie1(self, title, attr, value): from pyecharts import Pie pie = Pie(title) pie.add("房源分布", attr, value, is_label_show=True) pie.render(r"c:\Users\Administrator\graduation design\spiderResult1\广州房源分布统计.html") def showPie2(self, title, attr, value): from pyecharts import Pie pie = Pie(title) pie.add("房源分布", attr, value, is_label_show=True) pie.render(r"c:\Users\Administrator\graduation design\spiderResult1\租房面积统计.html") # 展示矩形树图 def showTreeMap(self, title, data): from pyecharts import TreeMap data = data treemap = TreeMap(title, width=1200, height=600) treemap.add("广州", data, is_label_show=True, label_pos='inside', l

评论收藏

内容反馈

版权申诉