# Scrapy settings for scrapy_douban_music project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
# https://docs.scrapy.org/en/latest/topics/settings.html
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
BOT_NAME = 'scrapy_douban_music'
SPIDER_MODULES = ['scrapy_douban_music.spiders']
NEWSPIDER_MODULE = 'scrapy_douban_music.spiders'
# Crawl responsibly by identifying yourself (and your website) on the user-agent
#USER_AGENT = 'scrapy_douban_music (+http://www.yourdomain.com)'
# Obey robots.txt rules
ROBOTSTXT_OBEY = False
# Configure maximum concurrent requests performed by Scrapy (default: 16)
#CONCURRENT_REQUESTS = 32
# Configure a delay for requests for the same website (default: 0)
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
DOWNLOAD_DELAY = 0.5
# The download delay setting will honor only one of:
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
#CONCURRENT_REQUESTS_PER_IP = 16
# Disable cookies (enabled by default)
COOKIES_ENABLED = False
# Disable Telnet Console (enabled by default)
#TELNETCONSOLE_ENABLED = False
# Override the default request headers:
DEFAULT_REQUEST_HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en',
'Accept-Encoding': 'gzip, deflate, br',
'Connection': 'keep-alive'
}
# Enable or disable spider middlewares
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
#SPIDER_MIDDLEWARES = {
# 'scrapy_douban_music.middlewares.ScrapyDoubanMusicSpiderMiddleware': 543,
#}
# Enable or disable downloader middlewares
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
#DOWNLOADER_MIDDLEWARES = {
# 'scrapy_douban_music.middlewares.ScrapyDoubanMusicDownloaderMiddleware': 543,
#}
# Enable or disable extensions
# See https://docs.scrapy.org/en/latest/topics/extensions.html
#EXTENSIONS = {
# 'scrapy.extensions.telnet.TelnetConsole': None,
#}
# Configure item pipelines
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
'scrapy_douban_music.pipelines.ScrapyDoubanMusicPipeline': 300,
'scrapy_douban_music.pipelines.DoubanMusicDownloadPiepline': 301,
}
# Enable and configure the AutoThrottle extension (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
#AUTOTHROTTLE_ENABLED = True
# The initial download delay
#AUTOTHROTTLE_START_DELAY = 5
# The maximum download delay to be set in case of high latencies
#AUTOTHROTTLE_MAX_DELAY = 60
# The average number of requests Scrapy should be sending in parallel to
# each remote server
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
#AUTOTHROTTLE_DEBUG = False
# Enable and configure HTTP caching (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
#HTTPCACHE_ENABLED = True
#HTTPCACHE_EXPIRATION_SECS = 0
#HTTPCACHE_DIR = 'httpcache'
#HTTPCACHE_IGNORE_HTTP_CODES = []
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
# 是否启用重试
RETRY_ENABLED = True
# 重试次数
RETRY_TIMES = 3
# 重试状态码
RETRY_HTTP_CODES = [500, 502, 503, 504, 400, 408]
# 下载超时时间
DOWNLOAD_TIMEOUT = 30
# 日志等级
LOG_LEVEL = 'INFO'
python爬虫-爬取豆瓣音乐
需积分: 0 107 浏览量
2023-05-30
23:18:45
上传
评论 5
收藏 16KB ZIP 举报
Gwak557
- 粉丝: 13
- 资源: 1
最新资源
- Screenshot_20240430_144340_com.ss.android.ugc.live.jpg
- 回到山沟沟.mp3
- 111111111111111111
- 基于matlab实现关于语音信号声源定位DOA估计所用的一些传统算法.rar
- 基于ultralytics-yolov8, 将其检测/分类/分割/姿态等任务移植到rk3588上
- Screenshot_2024-04-30-21-47-24-26.jpg
- 基于matlab实现波束形成,包括线阵、平面阵和圆阵
- Python自动生成excel周期报告源码
- 基于matlab实现DOA 估计和自适应波束形成.rar
- 一个基于yolov8的火灾检测部署
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈