import requests
from bs4 import BeautifulSoup as soup
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
from matplotlib import pyplot
# 请求数据, C罗的ID为2371
url = 'https://understat.com/player/2371'
html = requests.get(url)
# 解析处理数据
parse_soup = soup(html.content, 'lxml')
scripts = parse_soup.find_all('script')
strings = scripts[3].string
ind_start = strings.index("('")+2
ind_end = strings.index("')")
json_data = strings[ind_start:ind_end]
json_data = json_data.encode('utf8').decode('unicode_escape')
data = json.loads(json_data)
# print(data)
# 处理数据, 包含射门位置、预期进球、射门结果、赛季
x = []
y = []
xg = []
result = []
season = []
for i, _ in enumerate(data):
for key in data[i]:
if key == 'X':
x.append(data[i][key])
if key == 'Y':
y.append(data[i][key])
if key == 'xG':
xg.append(data[i][key])
if key == 'result':
result.append(data[i][key])
if key == 'season':
season.append(data[i][key])
columns = ['X', 'Y', 'xG', 'Result', 'Season']
df_understat = pd.DataFrame([x, y, xg, result, season], index=columns)
df_understat = df_understat.T
df_understat = df_understat.apply(pd.to_numeric, errors='ignore')
# 得到最终的结果
# print(df_understat)
df_understat['X'] = df_understat['X'].apply(lambda x: x*100)
df_understat['Y'] = df_understat['Y'].apply(lambda x: x*100)
# print(df_understat)
# 全局取消证书验证
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
def readfromhtml(filepath):
# 选择第二个表格
df = pd.read_html(filepath)[0]
column_lst = list(df.columns)
for index in range(len(column_lst)):
column_lst[index] = column_lst[index][1]
df.columns = column_lst
df.drop(df[df['Player'] == 'Player'].index, inplace=True)
df = df.fillna('0')
df.set_index('Rk', drop=True, inplace=True)
try:
df['Comp'] = df['Comp'].apply(lambda x: ' '.join(x.split()[1:]))
df['Nation'] = df['Nation'].astype(str)
df['Nation'] = df['Nation'].apply(lambda x: x.split()[-1])
except:
print('Error in uploading file:' + filepath)
finally:
df = df.apply(pd.to_numeric, errors='ignore')
return df
# 获取2020-2021欧洲五大联赛球员数据
df_fbref = readfromhtml('https://fbref.com/en/comps/Big5/shooting/players/Big-5-European-Leagues-Stats')
# print(df_fbref)
# from highlight_text import ax_text,fig_text
from PIL import Image,ImageDraw,ImageFont
import mplsoccer
# 背景色
background = '#D6DBD9'
# 字体颜色
text_color = 'black'
mpl.rcParams['xtick.color'] = text_color
mpl.rcParams['ytick.color'] = text_color
mpl.rcParams['text.color'] = text_color
# 中文字体
# mpl.rcParams['font.family'] = 'Hiragino Sans'
plt.rcParams['font.sans-serif'] = 'Songti SC'
plt.rcParams['axes.unicode_minus'] = False
mpl.rcParams['legend.fontsize'] = 12
# 新建画布
fig, ax = plt.subplots(figsize=(10, 8))
# 关闭坐标轴
ax.axis('off')
# 背景色填充
fig.set_facecolor(background)
# 垂直方向半个足球场
pitch = mplsoccer.VerticalPitch(half=True, pitch_type='opta', line_zorder=3, pitch_color='grass')
# 图表大小
ax_opta1 = fig.add_axes((0.05, 0.06, 0.45, 0.4))
ax_opta1.patch.set_facecolor(background)
pitch.draw(ax=ax_opta1)
# 2019-2020赛季, C罗射门位置散点图(未得分), 透明度0.6
df_fil = df_understat.loc[df_understat['Season'] == 2019]
pitch.scatter(df_fil[df_fil['Result'] != 'Goal']['X'], df_fil[df_fil['Result'] != 'Goal']['Y'],
s=np.sqrt(df_fil[df_fil['Result'] != 'Goal']['xG'])*100, marker='o', alpha=0.6,
edgecolor='black', facecolor='grey', ax=ax_opta1)
# 2019-2020赛季, C罗射门位置散点图(得分), 透明度0.9
pitch.scatter(df_fil[df_fil['Result'] == 'Goal']['X'], df_fil[df_fil['Result'] == 'Goal']['Y'],
s=np.sqrt(df_fil[df_fil['Result'] == 'Goal']['xG'])*100, marker='o', alpha=0.9,
edgecolor='black', facecolor='#6778d0', ax=ax_opta1, label='Goal')
# 添加图例
ax_opta1.legend(loc='lower right').get_texts()[0].set_color("black")
# 文字信息
ax_opta1.text(30, 61, '得分次数 : '+str(len(df_fil[df_fil['Result'] == 'Goal'])), weight='bold', size=11)
ax_opta1.text(30, 64, f"预期进球 : {round(sum(df_fil['xG']),2)}", weight='bold', size=11)
ax_opta1.text(30, 58, '射门次数 : '+str(len(df_fil)), weight='bold', size=11)
ax_opta1.text(90, 60, '2019-20赛季', weight='bold', size=14)
# 2020-2021赛季, C罗射门位置散点图
ax_opta2 = fig.add_axes((0.50, 0.06, 0.45, 0.4))
ax_opta2.patch.set_facecolor(background)
pitch.draw(ax=ax_opta2)
# 根据条件, 筛选数据
df_fil = df_understat.loc[df_understat['Season'] == 2020]
# 未得分
pitch.scatter(df_fil[df_fil['Result'] != 'Goal']['X'], df_fil[df_fil['Result'] != 'Goal']['Y'],
s=np.sqrt(df_fil[df_fil['Result']!='Goal']['xG'])*100, marker='o', alpha=0.6,
edgecolor='black', facecolor='grey', ax=ax_opta2)
# 得分
pitch.scatter(df_fil[df_fil['Result']=='Goal']['X'], df_fil[df_fil['Result'] == 'Goal']['Y'],
s=np.sqrt(df_fil[df_fil['Result'] == 'Goal']['xG'])*100, marker='o', alpha=0.9,
edgecolor='black', facecolor='#6778d0', ax=ax_opta2, label='Goal')
# 添加图例, 文字信息
ax_opta2.legend(loc='lower right').get_texts()[0].set_color("black")
ax_opta2.text(30, 61, '得分次数 : '+str(len(df_fil[df_fil['Result'] == 'Goal'])), weight='bold', size=11)
ax_opta2.text(30, 64, f"预期进球 : {round(sum(df_fil['xG']),2)}", weight='bold', size=11)
ax_opta2.text(30, 58, '射门次数 : '+str(len(df_fil)), weight='bold', size=11)
ax_opta2.text(90, 60, '2020-21赛季', weight='bold', size=14)
# 初始化
ax_scatter = fig.add_axes([0.52, 0.57, 0.4, 0.35])
ax_scatter.patch.set_facecolor(background)
# 得到散点图的X, Y坐标值
no_90s = 10
df_fil = df_fbref[df_fbref['90s'] >= no_90s]
# 前锋位置
df_fil = df_fil[df_fil['Pos'].apply(lambda x: x in ['FW', 'MF,FW', 'FW,MF'])]
# 每90s预期进球和得分次数
x, y = (df_fil['xG']/df_fil['90s']).to_list(), (df_fil['Gls']/df_fil['90s']).to_list()
# 生成所有前锋位置, 数据散点图
ax_scatter.scatter(x, y, alpha=0.3, c='#EF8804')
# C罗的数据
df_player = df_fil[df_fil['Player'] == 'Cristiano Ronaldo']
ax_scatter.scatter(df_player['xG']/df_player['90s'], df_player['Gls']/df_player['90s'], c='blue')
# 添加网格线及标签
ax_scatter.grid(visible=True, color='grey',
linestyle='-.', linewidth=0.5,
alpha=0.4)
ax_scatter.set_xlabel('每90秒的预期进球', fontdict={'fontsize': 12, 'weight': 'bold', 'color': text_color})
ax_scatter.set_ylabel('每90秒得分', fontdict=dict(fontsize=12, weight='bold', color=text_color))
# 添加C罗的头像
ax_player = fig.add_axes([0.03, 0.53, 0.25, 0.45])
ax_player.axis('off')
im = plt.imread('ronaldo.png')
ax_player.imshow(im)
ax_team = fig.add_axes([0.27, 0.55, 0.15, 0.15])
ax_team.axis('off')
im = plt.imread('FCJ.png')
ax_team.imshow(im)
plt.savefig('ronaldo_viz.png', dpi=300, facecolor=background)
# 添加标题信息
img = Image.open('ronaldo_viz.png')
draw = ImageDraw.Draw(img)
font = ImageFont.truetype(font='simsun.ttc',size = 19)
draw.text(xy=(3,94),text='<克里斯蒂亚诺·罗纳尔多(C罗)> 赛季数据',font=font)
font = ImageFont.truetype(font='simsun.ttc',size = 15)
draw.text(xy=(25,75),text='位置: <边锋>',font=font)
draw.text(xy=(25,56),text='年龄: <36>',font=font)
img.save('ronaldo_viz.png')
# 添加俱乐部logo
# 添加备注
plt.figtext(0.07, 0.03, '制作者:张新宸 数据来源:Fbref.com、Understat.com',
size=12, weigh
- 1
- 2
- 3
- 4
前往页