from weibopy import WeiboOauth2, WeiboClient
import re
import webbrowser
from collections import defaultdict
import time
import snownlp
from pyecharts.charts import Map
from pyecharts import options as opts
from pyecharts.globals import ThemeType
# 获取 token
client_key = '3038336820' # app key
client_secret = '3eb719b2f157ec56509ea00a422abc35' # app secret
redirect_url = 'https://api.weibo.com/oauth2/default.html'
auth = WeiboOauth2(client_key, client_secret, redirect_url)
webbrowser.open_new(auth.authorize_url) # 获取认证 code
code = input('输入 code:') # 免密操作
token = auth.auth_access(code) # 使用 code 获取 token
print(token)
# 获取微博评论
client = WeiboClient(token['access_token']) # token 是刚刚获得的 token,可以一直使用
# suffix 指定 API 的名称,parmas 是参数,在文档中有详细描述
result = client.get(suffix='comments/show.json', params={'id': 4318237070487349, 'count': 200, 'page': 1})
# 微博 id 很容易获得,只要打开一条微博,查看页面的 URL,比如 https://m.weibo.cn/detail/4321877356979717,后面的那串数字就是这条微博的 id。
# 如果没有发生意外,上面的代码将获取到 id 为 4318237070487349 的微博的前 200 条评论内容,并且已经存储为字典形式。
print(result)
# 评论包含一些对情感分析无用的干扰内容, 利用正则表达式去除上述的干扰内容
# 替换为空字符串
# text = re.sub('回复.*?:', '', str(comment['text']))
province_list = defaultdict(list) # 保存按省划分的评论正文
comment_text_list = [] # 保存所有评论正文
# 获取「自杀式单身」评论列表
# 共获取 10 页 * 每页最多 200 条评论
for i in range(1, 11):
result = client.get(suffix='comments/show.json', params={'id': 4322140368509204, 'count': 200, 'page': i})
comments = result['comments']
if not len(comments):
break # #coments为空则退出
for comment in comments:
text = re.sub('回复.*?:', '', str(comment['text']))
province = comment['user']['province']
province_list[province].append(text)
comment_text_list.append(text)
print('已抓取评论 {} 条'.format(len(comment_text_list)))
time.sleep(1)
# 获取省份列表
provinces = {}
results = client.get(suffix='common/get_province.json', params={'country': '001'})
for prov in results:
for code, name in prov.items():
provinces[code] = name
print(provinces)
# 评论情感分析
positives = {}
for province_code, comments in province_list.items():
sentiment_list = []
for text in comments:
s = snownlp.SnowNLP(text) # 情感分析
sentiment_list.append(s.sentiments)
# 统计平均情感
positive_number = sum(sentiment_list)
positive = positive_number / len(sentiment_list) * 100
# 按省保存数据, 0010 为国家前缀
province_code = '0010' + str(province_code)
if province_code in provinces:
provice_name = provinces[province_code]
positives[provice_name] = int(positive)
# 绘制情感分布图
keys = list(positives.keys())
values = list(positives.values())
lst = values[0:34]
data = [list(i) for i in zip(keys, lst)]
map = (
Map(init_opts=opts.InitOpts(bg_color="#FFFAFA", theme=ThemeType.ESSOS, width="1000px", height="600px"))
.add("情感指数", data)
.set_global_opts(
title_opts=opts.TitleOpts(title="自杀式单身 情感分析地域图"),
visualmap_opts=opts.VisualMapOpts(
is_piecewise=True, # 设置是否为分段显示
# 自定义的每一段的范围,以及每一段的文字,以及每一段的特别的样式。例如:
pieces=[
{"min": 67, "label": '70%', "color": "#eb2f06"},
{"min": 60, "max": 67, "label": '60%', "color": "#FF3030"}, # 不指定 max,表示 max 为无限大(Infinity)。
{"min": 53, "max": 60, "label": '50%', "color": "#FF4500"},
{"min": 46, "max": 53, "label": '40%', "color": "#FF7F50"},
{"min": 39, "max": 46, "label": '30%', "color": "#FFA500"},
{"min": 31, "max": 39, "label": '20%', "color": "#FFDEAD"},
],
# 两端的文本,如['High', 'Low']。
range_text=['高', '低'],
),
)
)
map.render(path="单身热评分布.html")
'''
#map.add("积极情感", keys, values, visual_range=[0, 100], maptype='china', is_visualmap=True, is_label_show=True,
# visual_text_color='#000')
# maptype='china' 只显示全国直辖市和省级
# 数据只能是省名和直辖市的名称
from collection import Counter
# 获取评论中出现的表情
emoji_list = []
for comment in comment_text_list:
emojis = re.findall(re.compile(u'(\[.*?\])', re.S), comment)
if emojis:
for emoji in emojis:
emoji_list.append(emoji)
emoji_dict = Counter(emoji_list)
print(emoji_dict)
'''
评论7