# 导入包
import pandas as pd
import plotly.graph_objs as go
#* 定义类
class EDAnalysis:
def __init__(self,
data:pd.DataFrame =None, # type: ignore
id_col: str=None, # type: ignore
target: str = None, # type: ignore
cate_list: list = None, # type: ignore
num_list: list = None, # type: ignore
):
self.data = data
self.id_col = id_col
self.target = target
self.num_list = num_list
self.cate_list = cate_list
def draw_bar(self, col_name: str):
bar_num = self.data[col_name].value_counts()
#! 条形图
trace0 = go.Bar(x=bar_num.index.tolist(),
y=bar_num.values.tolist(), # type: ignore
text=bar_num.values.tolist(), # type: ignore
textposition='auto',
marker=dict(color=["blue", "red", "green", "indianred", "darkgrey"], opacity=0.5)
)
data = [trace0]
layout = go.Layout(title=f'Distribution_num of {col_name}', bargap=0.4, height=600,
xaxis={'title': col_name})
fig = go.Figure(data=data, layout=layout)
return fig
def draw_pie(self, col_name: str):
pie_num = self.data[col_name].value_counts()
#! 饼图
trace1 = go.Pie(labels=pie_num.index.tolist(),
values=pie_num.values.tolist(), # type: ignore
hole=.5,
marker=dict(line=dict(color='white', width=1.3))
)
data = [trace1]
layout = go.Layout(title=f'Distribution_ratio of {col_name}', height=600)
fig = go.Figure(data=data, layout=layout)
return fig
def draw_bar_stack_cat(self, col_name: str):
#! 交叉表
cross_table = round(pd.crosstab(self.data[col_name], self.data[self.target], normalize='index') * 100, 2)
#! 索引
index_cols = cross_table.columns.tolist() # type: ignore
#! 轨迹列表
data = []
for i in index_cols:
trace = go.Bar(x=cross_table[i].values.tolist(), # type: ignore
y=cross_table.index.tolist(), # type: ignore
name=str(i),
orientation='h',
marker={'opacity': 0.8}
)
data.append(trace)
#! 布局
layout = go.Layout(title=f'Relationship Between {cross_table.index.name} and {cross_table.columns.name}', # type: ignore
bargap=0.4,
barmode='stack',
height=600,
xaxis={'title': '百分比'},
yaxis={'title': col_name}
)
#! 画布
fig = go.Figure(data=data, layout=layout)
return fig
def draw_histogram(self, col_name: str):
trace = go.Histogram(x=self.data[col_name], histnorm='probability', opacity=0.8)
data = [trace]
layout = go.Layout(title=f'Histogram of {col_name}', height=600,
xaxis={'title': col_name})
fig = go.Figure(data=data, layout=layout)
return fig
def draw_bar_stack_num(self, col_name: str, bins_num:int = 25):
#! 交叉表
x_data = pd.cut(self.data[col_name], bins=bins_num)
cross_table = round(pd.crosstab(x_data, self.data[self.target], normalize='index') * 100, 2)
#! 索引
index_cols = cross_table.columns.tolist() #type:ignore
#! 轨迹列表
data = []
for i in index_cols:
trace = go.Bar(x=cross_table.index.astype('str').tolist(), #type:ignore
y=cross_table[i].values.tolist(), #type:ignore
name=str(i),
orientation='v',
marker={'opacity': 0.8},
)
data.append(trace)
#! 布局
layout = go.Layout(title=f'Relationship Between {cross_table.index.name} and {cross_table.columns.name}', #type:ignore
bargap=0,
barmode='stack',
height=600,
xaxis={'title': col_name},
yaxis={'title': '百分比'}
)
#! 画布
fig = go.Figure(data=data, layout=layout)
return fig
def draw_scatter_matrix(self):
#! 目标
index_vals = self.data[self.target].astype('category').cat.codes
dimension_list = []
for i in self.num_list:
dimension_list.append(dict(label=i, values=self.data[i]))
trace = go.Splom(dimensions=dimension_list,
text=self.data[self.target],
marker=dict(color=index_vals,
showscale=False,
line_color='white', line_width=0.5)
)
data = [trace]
layout = go.Layout(title='ScatterPlot Matrix Between numeric Attributes', height=600)
fig = go.Figure(data=data, layout=layout)
return fig
R语言数据分析案例,基于python实现的保险客户价值分析(聚类分析)
需积分: 1 156 浏览量
2024-01-09
17:21:16
上传
评论
收藏 5.2MB ZIP 举报
五轮车
- 粉丝: 1096
- 资源: 280
最新资源
- 基于java开发的密码解锁APP,包含手势密码解锁+指纹密码解锁+源码+项目界面展示(毕业设计&课程设计&项目开发)
- 基于单片机的智能窗帘设计
- 基于STM32F103C8T6+LCD1602+MCP6S28的8通道模拟可编程增益放大器Proteus仿真
- 5G RAN NR TDD单用户峰值吞吐率计算过程.pptx
- 基于Objective-C开发的手势密码解锁和面容解锁(代码简洁高效通俗易懂)+源码+开发文档+源码解析(毕业设计&课程设计)
- LangChain学习之 Question And Answer的操作附带数据文件
- linux常用命令大全.zip
- CATIA入门操作案例-正八边异形带孔凸台绘制,等距点绘制正多边形,凸台绘制
- 大型代码语言模型的项目级提示生成pdf
- 纸片战争The War of paper.sb3
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈