Python爬虫源码：微信公众号单页多音频MP3批量采集提取保存音频文件资源-CSDN文库

共2个文件

png：1个

py：1个

python

爬虫

微信

需积分: 50 171 浏览量 2022-03-20 10:46:58 上传评论 2 收藏 71KB ZIP 举报

资源详情

资源评论

资源推荐

收起资源包目录

MP.zip （2个子文件）

mpVoiceDownload.py 2KB

运行效果.png 75KB

# Python爬虫脚本简单练习：微信公众号音频批量采集，自动创建目录保存 # 作者：QQ33732186（承接Python数据采集脚本定制） import os import time import requests from bs4 import BeautifulSoup ########参数定义 # MP3内容页地址 subpage ='https://mp.weixin.qq.com/s/c_cwAc32MM7etO7VfcxauQ' # 保存目录,末尾带 / 斜杠 basedir ='c:/temp/' ################ ########### 采集函数定义 def getaudio(res, nub=0): global basedir # 开始向内容页地址发送请求 response=requests.get(res) # 获取地址源码 html=response.text soup=BeautifulSoup(html, 'lxml') pagesubject = soup.find('h1',attrs={u"class":u"rich_media_title"}).string.strip() pagesubject = pagesubject.replace("|","_") savedir = basedir + pagesubject.strip() # 获取所有声音标签 mpvoices = soup.find_all('mpvoice') # 遍历提取的声音标签 for n,mid in enumerate(mpvoices): # 防止名称中有.mp3 mp3name = 'save_%s'%(n+1) if len(mid['name'])==0 else mid['name'].replace(".mp3","") filesavepath = savedir+'/'+mp3name+'.mp3' # 判断并自动创建目录 direxists = os.path.exists(savedir) if not direxists: print('创建目录：%s' % savedir) os.makedirs(savedir) fileexists = os.path.exists(filesavepath) if not fileexists: print('正在下载：%s.mp3' % mp3name) req = requests.get('https://res.wx.qq.com/voice/getvoice?mediaid=' + mid['voice_encode_fileid']) with open(filesavepath,'wb')as f: f.write(req.content) time.sleep(1) else: print('---跳过文件：%s.mp3' % mp3name) print("第%d个主题【%s】完成" % (nub, pagesubject)) ##############函数定义结束 getaudio(subpage,1) print('\n下载任务完成-------------') quit()