python电影数据分析+云地图+词云全代码_python数据对比分析代码资源-CSDN文库

共2个文件

html：1个

py：1个

python

数据分析

5星 · 超过95%的资源需积分: 50 174 浏览量 2022-07-20 18:56:24 上传评论 2 收藏 4KB ZIP 举报

资源详情

资源评论

资源推荐

收起资源包目录

test.zip （2个子文件）

test.html 7KB

电影数据分析.py 5KB

import numpy as np import pandas as pd#数据处理 import jieba import wordcloud import collections #图形显示 import matplotlib.pyplot as plt import seaborn as sns from pyecharts.charts import Map from pyecharts import options as opts from pyecharts.globals import ThemeType,SymbolType,ChartType from PIL import Image # 让表格显示全年面 pd.set_option('display.max_columns',None) pd.set_option('display.max_rows',None) pd.set_option('display.expand_frame_repr',False) pd.set_option('display.float_format',lambda x:'%.2f' % x) # 画板设置 sns.set() colors=sns.color_palette() # print(colors) # 中文配置 plt.rcParams['font.sans-serif']='SimHei' plt.rcParams['axes.unicode_minus']=False zxy=Image.open(r'D:\pyproject\excel\84期录播课配套资料\day08-电影舆情分析\代码\zxy.jfif') df=pd.read_excel(r'D:\pyproject\excel\白蛇(含省份)数据.xlsx') # print(df.isnull().mean()) df.dropna(how='any',axis=0) df.index=np.arange(df.shape[0]) # 评分处理 df_scores=df['评分'].value_counts().sort_index(ascending=False) x=np.arange(df_scores.size) # print(x) df_scores.index.map(lambda x:f'{x}分') figure=plt.figure(figsize=(15,5)) plt.bar(x,df_scores.values) plt.xticks(x,df_scores.index.map(lambda x:f'{x}分')) son_figure=figure.add_subplot(1,3,3) son_figure.pie(df_scores.values,labels=df_scores.index,autopct='%.1f%%') plt.savefig('./5.jpg') #评论量分析 df=df.set_index('评论时间') comment_count=df.resample('D')['评论'].count() # print(comment_count) plt.figure(figsize=(12,5),alpha=1) plt.plot(comment_count.index.day.tolist(),comment_count.values,color='green',marker='o') plt.title('8月每日评论量',fontsize=12,color='green') plt.fill_between(comment_count.index.day.tolist(),comment_count.values,color='green',alpha=0.6) for x,y in zip(comment_count.index.day.tolist(),comment_count.values): plt.text(x,y*1.05,str(y)) # print(comment_count.index.day.tolist) plt.xticks(comment_count.index.day.tolist()) plt.savefig('./4.jpg') #按小时评论量 df.reset_index(inplace=True) df['小时']=df['评论时间'].dt.hour time_count=df.groupby('小时')['评论'].count() plt.figure(figsize=(12,5)) plt.title('每小时评论量',fontsize=12,color='red') plt.bar(time_count.index,time_count.values) plt.xticks(time_count.index,time_count.index.map(lambda x:f'{x}时')) for x,y in zip(time_count.index,time_count.values): plt.text(x,y,str(y)) plt.xlabel('时间') plt.ylabel('评论次数') plt.savefig('./3.jpg') # 每周评论数 df['星期']=df['评论时间'].dt.dayofweek+1 week_count=df.groupby('星期')['评论'].count() dic={ 1:'星期一', 2:'星期二', 3:'星期三', 4:'星期四', 5:'星期五', 6:'星期六', 7:'星期日' } plt.figure(figsize=(12,5)) plt.plot(week_count.index,week_count.values,color=colors[3]) plt.bar(week_count.index,week_count.values,color=colors[5]) plt.xticks(week_count.index,week_count.index.map(dic)) plt.ylim(0,5000) plt.title('一周每天评论量分析') plt.savefig('./2.jpg') # # 人物热度排序 # print(df.head()) name=['小青','小白','蒙面人','司马','牛头帮主','宝青坊主','法海','蜘蛛精'] comments=''.join(df['评论'].values) list1=[] for item in name: num=comments.count(item) list1.append((item,num)) person_count=pd.DataFrame(data=list1,columns=('名字','数量')) person_count.sort_values('数量',inplace=True,ascending=False) # print(person_count) plt.figure(figsize=(12,5)) plt.bar(person_count['名字'],person_count['数量']) # print(person_count) plt.savefig('./1.jpg') # 地图显示数据 data=df['省份'].value_counts() c=( Map(init_opts=opts.InitOpts()) .add("",[list(z) for z in zip(data.index.tolist(),data.values.tolist())],"china") .set_global_opts( title_opts=opts.TitleOpts(title="评论数量的地域分布"), visualmap_opts=opts.VisualMapOpts(max_=2209,min_=53,range_color=['white','green']) ) ) c.render_notebook() c.render("./test.html") #词云分析 comment_word=[] stop_words=[] with open(r'D:\pyproject\excel\84期录播课配套资料\day08-电影舆情分析\代码\stopwords.txt','r',encoding='utf-8') as f: stop_word_1=f.readlines() for stop_word in stop_word_1: stop_word.strip('\n') stop_words.append(stop_word) for item in df['评论']: word=jieba.lcut(item) for good_word in word: if good_word not in stop_words: comment_word.append(good_word) c=collections.Counter(comment_word) plt.figure(figsize=(12,5)) wc=wordcloud.WordCloud(font_path=r'D:\pyproject\excel\84期录播课配套资料\day08-电影舆情分析\代码\蒙纳超刚黑简.ttf', width=500,height=300, background_color='white', max_font_size=200, min_font_size=5, max_words=2000, mask=np.array(zxy)) image=wc.generate_from_frequencies(c) #加入文字渲染 plt.axis('off') plt.imshow(image) plt.show()