import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import glob
import os
import re
import datetime as dt
import chardet
import warnings
warnings.filterwarnings('ignore')
# 设置字体, 解决中文乱码问题
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
# 解决图像中的'-'负号的乱码问题
plt.rcParams['axes.unicode_minus'] = False
# 获取当前脚本文件所在目录
path_current = os.getcwd()
# 获取当前脚本文件所在目录的上一级目录
path_pardir = os.path.dirname(path_current)
print(path_pardir)
file_lists_Input = glob.glob(path_pardir + '/Input/*.xlsx')
print(file_lists_Input)
file_lists_config = glob.glob(path_pardir + '/config/*.csv')
print(file_lists_config)
file_lists_split = glob.glob(path_pardir + '/split/*.csv')
print(file_lists_split)
# 自动识别文件编码格式
def get_encoding(file):
with open(file, 'rb') as f:
tmp = chardet.detect(f.read())
return tmp['encoding']
# 数据格式转换
def data_format_exchange(data_list):
for each_data in data_list:
# 判断数据是否是字符串格式
if isinstance(each_data, str):
# 若数据是字符串格式, 再判断数据是否是带%
if re.match(r'[\d.]+%', each_data, re.I):
# 若带%, 去掉%后, 再转化成float型
each_data_trans = float(each_data[:-1])
# print('The type of the number {} is {}'.format(each_data_trans, type(each_data_trans)))
else:
# 若不带%, 直接转化成float型
each_data_trans = float(each_data)
# print('The type of the number {} is {}'.format(each_data_trans, type(each_data_trans)))
index_num = data_list.index(each_data)
data_list[index_num] = each_data_trans
# 获取config数据, 并存储在字典中
def get_config_data(file_list_config):
for files in file_list_config:
dict_config = {}
encoding = get_encoding(files)
print('$$$encoding:', encoding)
pd.options.display.max_columns = None
df = pd.read_csv(files, encoding=encoding)
# print(df.tail())
# print('#' * 50)
dict_config.setdefault('Sort_Gar', {})
dict_config.setdefault('Sort_Ban', {})
dict_config.setdefault('Sort_Para', {})
dict_config.setdefault('Sort_Arg', {})
dict_config.setdefault('Sort_Argc', {})
# print('$$$dict_config:', dict_config)
for i in range(len(df['Sort_Gar'])):
dict_config['Sort_Gar'].setdefault(df['Sort_Gar'][i], []).append(df['Gar_Item'][i])
for i in range(len(df['Sort_Ban'])):
dict_config['Sort_Ban'].setdefault(df['Sort_Ban'][i], []).append(df['Ban_Item'][i])
for i in range(len(df['Sort_Para'])):
dict_config['Sort_Para'].setdefault(df['Sort_Para'][i], []).append(df['Para_Item'][i])
for i in range(len(df['Sort_Arg'])):
dict_config['Sort_Arg'].setdefault(df['Sort_Arg'][i], []).append(df['Arg_Item'][i])
for i in range(len(df['Sort_Argc'])):
dict_config['Sort_Argc'].setdefault(df['Sort_Argc'][i], []).append(df['Argc_Item'][i])
# 获取dict_config的key, 并存储在dict_config_keys中
dict_config_keys = list(dict_config)
# print('$$$dict_config_keys:', dict_config_keys)
for dict_config_key in dict_config_keys:
embedded_keys = list(dict_config[dict_config_key])
for embedded_key in embedded_keys:
if str(embedded_key) == 'nan':
del dict_config[dict_config_key][embedded_key]
# print('###dict_config是:###', dict_config)
return dict_config
# 获取split数据, 并存储在DataFrame中
def get_split_data(file_list_split):
for files in file_list_split:
encoding = get_encoding(files)
# print(encoding)
pd.options.display.max_columns = None
df_split = pd.read_csv(files, encoding=encoding)
# 删除df_split中全为空的行
df_split = df_split.dropna(axis=0, how='all')
# print(df_split.head())
# print('#' * 50)
return df_split
# 获取Input数据, 并存储起来
def get_Input_data(file_list_Input):
for files in file_list_Input:
# sheet_name=None可以让read_excel()函数读取该excel中所有的sheet
df_input_dict = pd.read_excel(files, sheet_name=None)
# print(list(df_input_dict.keys()))
# df_Gar = df_input_dict['Gar']
# df_Ban = df_input_dict['Ban']
# df_Para = df_input_dict['Para']
# df_Arg = df_input_dict['Arg']
# df_Argc = df_input_dict['Argc']
# print(df_Gar.head())
# element = df_Gar['Apple_Large_4kg'][0]
# print('The type of the element is:', type(element))
# element_trans = df_Gar['Apple_Large_4kg'].astype(float)
# print(type(element_trans))
# cols = list(df_Gar)
# print('The column label of the df is:', cols)
# for i in df_Gar['Apple_Large_4kg']:
# i = float(i[:-1])
# print('The type of the number {} is {}'.format(i, type(i)))
# data = df_Gar['Apple_Large_4kg']
# data = df_Arg['SB']
# data_format_exchange(data)
# print(data)
# print('#' * 50)
return df_input_dict
class BarchartFather:
def __init__(self, Barchart_byGroup_xlabels, Barchart_byGroup_data, Barchart_byGroup_legend, title_name, picture_name, number, Barchart_byGroup_ylabels):
self.local_path = path_pardir + '/output/' + str(Type_ID) + '_' + Nowtime + '/'
self.Barchart_byGroup_xlabels = Barchart_byGroup_xlabels
self.Barchart_byGroup_data = Barchart_byGroup_data
self.Barchart_byGroup_legend = Barchart_byGroup_legend
self.title_name = title_name
self.picture_name = picture_name
self.number = number
self.Barchart_byGroup_ylabels = Barchart_byGroup_ylabels
self.fig = plt.figure(figsize=(8, 6), facecolor='#B0C4DE')
self.ax = self.fig.add_subplot(facecolor='white')
################################################################################################################
# self.color_list = [深橙色, 纯蓝色, 纯绿色, 洋红色, 青色, 橙红色, 淡蓝色, 紫色, 纯红色, 酸橙色, 棕色, 淡珊瑚色,\
# 靛青色, 橄榄色, 水鸭色, 暗淡的灰色, 热情的粉红色, 兰花的紫色, 黑色, 深粉色, 淡钢蓝色, 橄榄土褐色,\
# 深紫罗兰色, 适中的紫色, 道奇蓝色]
################################################################################################################
self.color_list = ['#FF8C00', '#0000FF', '#008000', '#FF00FF', '#00FFFF', '#FF4500', '#87CEFA', '#800080',
'#FF0000', '#00FF00', '#A52A2A', '#F08080', '#4B0082', '#808000', '#008080', '#696969',
'#FF69B4', '#DA70D6', '#000000', '#FF1493', '#B0C4DE', '#556B2F', '#9400D3', '#9370DB', '#1E90FF']
self.length = len(self.Barchart_byGroup_xlabels)
self.x_locs = np.arange(self.length)
self.total_width = 0.8
self.box_total_width = self.total_width * 0.85
self.interval_total_width = self.total_width * 0.15
self.width = self.total_width / self.number
self.box_width = self.box_total_width / self.number
###################################################
if self.number == 1:
self.interval_width = self.interval_total_width
else:
self.interval_width = self.interval_total_width / (self.number - 1)
########################
没有合适的资源?快使用搜索试试~ 我知道了~
利用Python中的pandas、matplotlib对xlsx和csv文件批量绘图
共4个文件
csv:2个
py:1个
xlsx:1个
5星 · 超过95%的资源 需积分: 43 12 下载量 127 浏览量
2022-10-22
18:38:41
上传
评论 1
收藏 27KB ZIP 举报
温馨提示
此资源是我的csdn中一篇博文的文件格式的附件,该博文的地址如下:https://blog.csdn.net/Mr_Dragon66/article/details/127462562?spm=1001.2014.3001.5501;本文的主要内容是利用**Python**中的**pandas**库读取**xlsx文件**和**csv文件**中的数据,并存储为**DataFrame**结构的数据;然后进行一系列的**数据清洗**、**格式转换**、以及**数据分组归类**;接着利用**matplotlib**模块**按照指定的格式绘制【分组柱状图】、【双轴分组柱状图】、【分组箱线图】、【分组散点图和分组箱线图叠加图】**等。该程序可以批量绘制图片,**此例子中**的数据量可以绘制**4000+张.jpg或.png图片**,一般配置的电脑绘制**5000~10000+**张图片的体量是没有问题的。**可以灵活运用在自动化办公领域!**该博文中涉及到的具体文件在此资源中都有,下载下来保证可以正常运行!!!
资源详情
资源评论
资源推荐
收起资源包目录
analysis by split_csdn.zip (4个子文件)
analysis by split_csdn
config
BarChart_Config.csv 2KB
py_script
scripts_final.py 87KB
output
split
split.csv 759B
Input
Dataset.xlsx 18KB
共 4 条
- 1
北极熊在南极_whl
- 粉丝: 66
- 资源: 1
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功
评论10