# Scrapy settings for beike project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
# https://docs.scrapy.org/en/latest/topics/settings.html
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
BOT_NAME = 'beike'
SPIDER_MODULES = ['beike.spiders']
NEWSPIDER_MODULE = 'beike.spiders'
# Crawl responsibly by identifying yourself (and your website) on the user-agent
# USER_AGENT = 'beike (+http://www.yourdomain.com)'
# Obey robots.txt rules
ROBOTSTXT_OBEY = False
# Configure maximum concurrent requests performed by Scrapy (default: 16)
# CONCURRENT_REQUESTS = 32
# Configure a delay for requests for the same website (default: 0)
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
# DOWNLOAD_DELAY = 3
# The download delay setting will honor only one of:
# CONCURRENT_REQUESTS_PER_DOMAIN = 16
# CONCURRENT_REQUESTS_PER_IP = 16
# Disable cookies (enabled by default)
COOKIES_ENABLED = True
# Disable Telnet Console (enabled by default)
# TELNETCONSOLE_ENABLED = False
# Override the default request headers:
DEFAULT_REQUEST_HEADERS = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
'Cookie': 'digv_extends=%7B%22utmTrackId%22%3A%2280418643%22%7D; lianjia_uuid=3d726c57-6d3f-4f6c-95a2-8b7abc9faeac; select_city=110000; lianjia_ssid=4473e3e6-43e7-4181-bb6a-ebb23ef4ec07; sajssdk_2015_cross_new_user=1; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%221769ce5aab93e7-0c28ca07c23265-59442e11-1327104-1769ce5aaba695%22%2C%22%24device_id%22%3A%221769ce5aab93e7-0c28ca07c23265-59442e11-1327104-1769ce5aaba695%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E4%BB%98%E8%B4%B9%E5%B9%BF%E5%91%8A%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fwww.baidu.com%2Fother.php%22%2C%22%24latest_referrer_host%22%3A%22www.baidu.com%22%2C%22%24latest_search_keyword%22%3A%22%E8%B4%9D%E5%A3%B3%22%2C%22%24latest_utm_source%22%3A%22baidu%22%2C%22%24latest_utm_medium%22%3A%22pinzhuan%22%2C%22%24latest_utm_campaign%22%3A%22wymoren%22%2C%22%24latest_utm_content%22%3A%22biaotimiaoshu%22%2C%22%24latest_utm_term%22%3A%22biaoti%22%7D%7D; crosSdkDT2019DeviceId=sgaxf7--ohhq7q-mq2s3hm3qk16atd-otg4hxhsr; _ga=GA1.2.999642397.1608950071; _gid=GA1.2.1417890696.1608950071; __xsptplusUT_788=1; __xsptplus788=788.1.1608950072.1608950072.1%234%7C%7C%7C%7C%7C%23%23duwbmR1LtYCy9OIqePHhHWS1htLXHyiz%23; Hm_lvt_9152f8221cb6243a53c83b956842be8a=1608950065,1608950073,1608950215; Hm_lpvt_9152f8221cb6243a53c83b956842be8a=1608950261; srcid=eyJ0Ijoie1wiZGF0YVwiOlwiM2M1NmJhYjliNzhmYzhhYzYzYWUyZGVjOWZmZWJjMjQwYzJhZmFlYmRjZTk4YWU2M2E3MDU4MjY3MDFlNDc5MThlNzkwMDI1NWM4NzNkYTA5YmQyZjBkZDFjZGIxZDg1YmJkMDlmODlmYzFkZGQxOTNiNGI3ZGU5MTU5ZmZlYWVlNWJlMjIzNTFkNzk2NDJkOTI4ZDYzYWEzNjkwYTVlNGU3MDRhMDcxYzQ5NDhmN2RiMzdjMGZiZGExZGY3NzdlZjYyMWZkOGMwMTAzMGNlZmUxNWZmYzAyMjlkODA0MTczZjE1MGRmOTFiYjZjZTgzNDEyY2JlOThjNDMwYzI1YjU2NGI2M2Q4ZTUxZjA5ZmM5MTgyMGVjZWY2OTA2ZDhkN2JiYWYxMzFkZDkxZjU3YjUxZWZhNTZjM2EyNzczMGI4ODgxNGFhNGViNjA5YjlhMjMxYmI0OWZiNzEyNzBhNFwiLFwia2V5X2lkXCI6XCIxXCIsXCJzaWduXCI6XCJjNDBjMDg1ZVwifSIsInIiOiJodHRwczovL2JqLmtlLmNvbS9lcnNob3VmYW5nL3BnMi8iLCJvcyI6IndlYiIsInYiOiIwLjEifQ==; login_ucid=2000000074667028; lianjia_token=2.0015e8780f68fe41f70445513e50d1f7b5; lianjia_token_secure=2.0015e8780f68fe41f70445513e50d1f7b5; security_ticket=WyjQtDuz1ImoP8myKzaHDGUewY7FuWIViEWxA+VfVYPS1kh3NigeIWicj7EQoTgPFJUTK6nPMHlbU+pvTlI4XRKfiyiRoeyEjIqFkcidofJneE75XwFlyXW1/eb85/AktQwvEFK2zqJHTb5owtGQiVxFGh2l/UFVDVJMjHsN4Ec='
}
# Enable or disable spider middlewares
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
# SPIDER_MIDDLEWARES = {
# 'beike.middlewares.BeikeSpiderMiddleware': 543,
# }
# Enable or disable downloader middlewares
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
# DOWNLOADER_MIDDLEWARES = {
# 'beike.middlewares.BeikeDownloaderMiddleware': 543,
# }
# Enable or disable extensions
# See https://docs.scrapy.org/en/latest/topics/extensions.html
# EXTENSIONS = {
# 'scrapy.extensions.telnet.TelnetConsole': None,
# }
# Configure item pipelines
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
'beike.pipelines.BeikePipeline': 300,
}
# Enable and configure the AutoThrottle extension (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
# AUTOTHROTTLE_ENABLED = True
# The initial download delay
AUTOTHROTTLE_START_DELAY = 5
# The maximum download delay to be set in case of high latencies
AUTOTHROTTLE_MAX_DELAY = 60
# The average number of requests Scrapy should be sending in parallel to
# each remote server
AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
AUTOTHROTTLE_DEBUG = False
# Enable and configure HTTP caching (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
# HTTPCACHE_ENABLED = True
# HTTPCACHE_EXPIRATION_SECS = 0
# HTTPCACHE_DIR = 'httpcache'
# HTTPCACHE_IGNORE_HTTP_CODES = []
# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
没有合适的资源?快使用搜索试试~ 我知道了~
资源推荐
资源详情
资源评论
收起资源包目录
贝壳.zip (44个子文件)
py
p4.py 1KB
户型玫瑰图.html 4KB
p6.py 678B
价格柱状图.html 5KB
p1.py 999B
价格饼图.html 4KB
p2.py 989B
面积堆叠图.html 6KB
词云.html 287B
p3.py 971B
价格轴交换图.html 8KB
p5.py 1KB
词云字体.ttf 9.32MB
main.py 512B
网页截图
柱状图.png 24KB
玫瑰图.png 32KB
堆叠图.png 18KB
轴交换图.png 26KB
饼图.png 35KB
贝壳词云.png 274KB
网页背景.png 504KB
.idea
misc.xml 209B
贝壳.iml 466B
workspace.xml 10KB
inspectionProfiles
profiles_settings.xml 174B
modules.xml 271B
.gitignore 244B
beike.sql 261KB
最终呈现网页.html 2KB
beike
scrapy.cfg 253B
start.py 82B
beike
settings.py 5KB
pipelines.py 1KB
middlewares.py 4KB
__init__.py 0B
items.py 455B
__pycache__
items.cpython-37.pyc 458B
settings.cpython-37.pyc 3KB
pipelines.cpython-37.pyc 1KB
__init__.cpython-37.pyc 144B
spiders
__init__.py 161B
__pycache__
__init__.cpython-37.pyc 152B
beikebeijing.cpython-37.pyc 2KB
beikebeijing.py 3KB
共 44 条
- 1
资源评论
蔡霸霸i
- 粉丝: 1291
- 资源: 7
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功