python 爬取豆瓣电影评论,并进行词云展示爬取豆瓣电影评论,并进行词云展示
# -*-coding:utf-8-*-
import urllib.request
from bs4 import BeautifulSoup
def getHtml(url):
"""获取url页面"""
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)
Chrome/62.0.3202.94 Safari/537.36'}
req = urllib.request.Request(url,headers=headers)
req = urllib.request.urlopen(req)
content = req.read().decode('utf-8')
return content
def getComment(url):
"""解析HTML页面"""
html = getHtml(url)
soupComment = BeautifulSoup(html, 'html.parser')
comments = soupComment.findAll('span', 'short')
onePageComments = [] for comment in comments:
# print(comment.getText()+'')
onePageComments.append(comment.getText()+'')
return onePageComments
- 1
- 2
前往页