# 开发者:董书雄
# 开发时间:2022-11-13 23:33
# 开发内容:{}
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
import joblib
from sklearn import metrics, ensemble
from catboost import CatBoostRegressor # 调用模型
import matplotlib.pyplot as plt
import seaborn as sns
pd.set_option('display.max_columns', None) # 结果显示所有列
pd.set_option('display.max_rows', None) # 结果显示所行行
# 1:TGAN合成数据的训练效果
# data1 = pd.read_csv('traindata928.csv')
# data2 = pd.read_csv('testdata.csv')
# 2:纯原数据232个的训练效果
#data2 = pd.read_csv('NSGA专用CAT.csv')
#data1, data2 = train_test_split(data2, test_size=0.3)
data1 = pd.read_csv('CATtraindata.csv')
data2 = pd.read_csv('CATtestdata.csv')
print('traindata=', data1)
print('testdata=', data2)
# 记得把测试和训练数据集的特征第一行特征整一样!
# print(data1,type(data1))
x_train = data1.iloc[:, 0:9]
y_train = data1.iloc[:, 9]
x_test = data2.iloc[:, 0:9]
y_test = data2.iloc[:, 9]
X = pd.concat([x_train, x_test])
Y = pd.concat([y_train, y_test])
col = data1.columns[:-1]
# 训练模型
rfmodel = CatBoostRegressor(
learning_rate=0.3, # 学习率
max_depth=5,
# n_estimators=120,
n_estimators=200
) #
rfmodel.fit(x_train, y_train)
# 模型预测
joblib.dump(rfmodel, 'catboost.model')
rfmodel = joblib.load('catboost.model')
rf_predict_test = rfmodel.predict(x_test)
rf_predict_train = rfmodel.predict(x_train)
rf_predict_all = rfmodel.predict(X)
rf_error = rf_predict_test - y_test
# 计算性能指标
rf_R2_rtrain = metrics.r2_score(y_train, rf_predict_train)
rf_R2_rtest = metrics.r2_score(y_test, rf_predict_test)
rf_R2_rall = metrics.r2_score(Y, rf_predict_all)
rf_RMSE_train = metrics.mean_squared_error(y_train, rf_predict_train) ** 0.5
rf_RMSE_test = metrics.mean_squared_error(y_test, rf_predict_test) ** 0.5
rf_RMSE_all = metrics.mean_squared_error(Y, rf_predict_all) ** 0.5
rf_MAPE_train = metrics.mean_absolute_percentage_error(y_train, rf_predict_train)
rf_MAPE_test = metrics.mean_absolute_percentage_error(y_test, rf_predict_test)
rf_MAPE_all = metrics.mean_absolute_percentage_error(Y, rf_predict_all)
rf_performance = (rf_R2_rtrain, rf_R2_rtest, rf_R2_rall, rf_RMSE_train,
rf_RMSE_test, rf_RMSE_all, rf_MAPE_train, rf_MAPE_test, rf_MAPE_all)
# 绘图数据
data_array = np.array(rf_predict_test)
rf_predict_test = data_array.tolist()
data_array = np.array(rf_predict_all)
rf_predict_all = data_array.tolist()
print('y_test=', y_test)
print('rf_predict_test=', rf_predict_test, type(rf_predict_test))
print('Y=', Y)
print('rf_predict_all=', rf_predict_all)
print('rf_R2_rtrain=', rf_R2_rtrain)
print('rf_R2_rtest=', rf_R2_rtest)
print('rf_R2_rall=', rf_R2_rall)
print('rf_RMSE_train=', rf_RMSE_train)
print('rf_RMSE_test=', rf_RMSE_test)
print('rf_RMSE_all=', rf_RMSE_all)
print('rf_MAPE_train=', rf_MAPE_train)
print('rf_MAPE_test=', rf_MAPE_test)
print('rf_MAPE_all=', rf_MAPE_all)
plt.scatter(Y, rf_predict_all, )
plt.show()
机器学习模型+CATboost(CGB)+greadsearchCV+源码
需积分: 5 109 浏览量
2024-03-15
15:27:02
上传
评论
收藏 3KB ZIP 举报
智能提桶工程师
- 粉丝: 228
- 资源: 5
最新资源
- (2005-2014期间)中国环境统计年鉴
- 2015高中信息技术excel操作题及素材(精品文档).xls
- SW3518S全协议快充USB Type-c接口电源模块硬件参考设计评估版硬件(原理图 +pcb)+封装库文件.zip
- 基于深度强化学习算法实现多星对区域目标观测的规划python源码+数据集+模型+超详细注释.zip
- RT1052+SDRAM(IS42S16160) +SIM7600CE(PCIE接口封装)控制板硬件(原理图+PCB)+封装库
- 2017大学英语四级词汇-excel-列表版(精品文档).xls
- 2017版国家医保药品目录(excel版)完整版.xls
- 基于STM32F103单片机设计的无刷电机控制板硬件(原理图+PCB+BOM)+MCU软件控制源码+文档资料.zip
- 肺结节检测数据集VOC+YOLO格式1186张1类别.zip
- Faster-RCNN基于知识蒸馏的目标检测模型增量深度学习方法python源码+项目运行说明.zip
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈