从零学Python,python-Day03.rar资源-CSDN文库

共4个文件

md：3个

py：1个

python

需积分: 5 42 浏览量 2024-03-26 07:06:09 上传评论收藏 3KB RAR 举报

资源推荐

资源详情

资源评论

收起资源包目录

python-Day03.rar （4个子文件）

Day03

book.py 2KB

爬取小说相关指南.md 1010B

if __name__ == '__main__' 是个什么鬼？.md 1KB

python3codecs.open和open的区别.md 217B

# 导入requests库 import requests # 导入文件操作库 import codecs import os from bs4 import BeautifulSoup import sys import importlib importlib.reload(sys) # 给请求指定一个请求头来模拟chrome浏览器 global headers headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'} server = 'http://www.xbiquge.la/' # 星辰变地址 book = 'http://www.xbiquge.la/5/5623/' # 定义存储位置 global save_path save_path = 'G:/星辰变' if os.path.exists(save_path) is False: os.makedirs(save_path) # 获取章节内容 def get_contents(chapter): req = requests.get(url=chapter) html = req.content html_doc = str(html, 'utf8') bf = BeautifulSoup(html_doc, 'html.parser') texts = bf.find_all('div', id="content") # 获取div标签id属性content的内容 \xa0 是不间断空白符   content = texts[0].text.replace('\xa0' * 4, '\n') return content # 写入文件 def write_txt(chapter, content, code): with codecs.open(chapter, 'a', encoding=code)as f: f.write(content) # 主方法 def main(): res = requests.get(book, headers=headers) html = res.content html_doc = str(html, 'utf8') # 使用自带的html.parser解析 soup = BeautifulSoup(html_doc, 'html.parser') # 获取所有的章节 a = soup.find('div', id='list').find_all('a') print('总章节数: %d ' % len(a)) for each in a: try: chapter = server + each.get('href') content = get_contents(chapter) chapter = save_path + "/" + each.string.replace("?", "") + ".txt" write_txt(chapter, content, 'utf8') except Exception as e: print(e) if __name__ == '__main__': main()

评论收藏

内容反馈