#爬取数据
import re
import requests #发送请求
import csv
# with open('rank.csv', encoding='utf-8', newline='', mode='a') as f:
# csv_writer = csv.writer(f)
# csv_writer.writerow(['university','region','country','score','rank','year'])
# url='https://www.qschina.cn/sites/default/files/qs-rankings-data/cn/2174878_indicators.txt'
# response = requests.get(url) #<Response [200]>: 请求成功
# json_date=response.json() #python字典
# def replace(str_):
# str_=re.findall('<div class="td-wrap"><div class="td-wrap-in">(.*?)</div></div>',str_)[0]
# return str_
# for i in json_date['data']:
# year="2023"
# country= i['location']
# rank= i['overall_rank']
# region= i['region']
# score=replace(i['overall'])
# university=i['uni']
# university=re.findall('<div class="td-wrap"><div class="td-wrap-in"><a href=".*?" class="uni-link">(.*?)</a></div></div>',university)[0]
# print(university,region,country,score,rank,year)
# with open('rank.csv',encoding='utf-8',newline='',mode='a')as f:
# csv_writer=csv.writer(f)
# csv_writer.writerow([university,region,country,score,rank,year])
from pyecharts.charts import *
from pyecharts import options as opts
import pandas as pd
pd.set_option('display.max_columns', None) # 展示所有列
df = pd.read_csv('rank.csv')
# 香港,澳门与中国大陆地区等在榜单中是分开的记录的,这边都归为china
df['loc'] = df['country']
df['country'].replace(['China (Mainland)', 'Hong Kong SAR', 'Taiwan', 'Macau SAR'],'China',inplace=True)
tool_js = """
<div style="border-bottom: 1px solid rgba(255,255,255,.3); font-size: 18px;padding-bottom: 7px;margin-bottom: 7px">
{}
</div>
排名:{} <br>
国家地区:{} <br>
加权总分:{} <br>
"""
"""
国际学生:{} <br>
国际教师:{} <br>
师生比例:{} <br>
学术声誉:{} <br>
雇主声誉:{} <br>
教员引用率:{} <br>
"""
df['rank']=pd.to_numeric(df['rank'],errors='coerce')
# print(df.info())
t_data = df[(df.year =='2023') & (df['rank'] <= 100)]
# print(t_data.head())
t_data = t_data.sort_values(by="score" , ascending=True)
print(t_data.head())
university, score = [], []
for idx, row in t_data.iterrows():
tjs = tool_js.format(row['university'], row['rank'], row['country'],row['score'])
if row['country'] == 'China':
university.append('🇨🇳 {}'.format(re.sub( '(.*?)','',row['university'])))
else:
university.append(re.sub('(.*?)', '',row['university']))
score.append(opts.BarItem(name='', value=row['score'], tooltip_opts=opts.TooltipOpts(formatter=tjs)))
bar = (Bar()
.add_xaxis(university)
.add_yaxis('', score, category_gap='30%')
.set_global_opts(title_opts=opts.TitleOpts(title="2023年世界大学排名(QS) TOP 100",
pos_left="center",
title_textstyle_opts=opts.TextStyleOpts(font_size=20)),
datazoom_opts=opts.DataZoomOpts(range_start=70, range_end=100, orient='vertical'),
visualmap_opts=opts.VisualMapOpts(is_show=False, max_=100, min_=60, dimension=0,
range_color=['#00FFFF', '#FF7F50']),
legend_opts=opts.LegendOpts(is_show=False),
xaxis_opts=opts.AxisOpts(is_show=False, is_scale=True),
yaxis_opts=opts.AxisOpts(axistick_opts=opts.AxisTickOpts(is_show=False),
axisline_opts=opts.AxisLineOpts(is_show=False),
axislabel_opts=opts.LabelOpts(font_size=12)))
.set_series_opts(label_opts=opts.LabelOpts(is_show=True,
position='right',
font_style='italic'),
itemstyle_opts={"normal": {
"barBorderRadius": [30, 30, 30, 30],
'shadowBlur': 10,
'shadowColor': 'rgba(120, 36, 50, 0.5)',
'shadowOffsetY': 5,
}
}
).reversal_axis())
grid = (
Grid(init_opts=opts.InitOpts(theme='purple-passion', width='1000px', height='1200px'))
.add(bar, grid_opts=opts.GridOpts(pos_right='10%', pos_left='20%'))
)
grid.render('3.html')