# coding: utf-8
# In[ ]:
import numpy as np
import scipy.io as scio
import xgboost as xgb
from xgboost.sklearn import XGBClassifier
from xgboost.sklearn import XGBRegressor
from sklearn import metrics
from sklearn.model_selection import GridSearchCV,train_test_split
from sklearn.externals import joblib
np.set_printoptions(suppress= True)
# In[ ]:
# 加载数据
cloudy = scio.loadmat('data\data_cloud.mat')['cloudy']
clear = scio.loadmat('data\data_cloud.mat')['clear']
# cloudy和clear合并一个数组
data = np.concatenate((cloudy,clear),axis = 0)
# 提取分类与特征
classification = data[:,2].copy()
classification = np.where(classification > 0,1,0)
features = np.delete(data,[2,3],axis=1)
#交叉验证
x_train,x_test,y_train,y_test = train_test_split(features,classification,test_size = 0.25,random_state = 10)
# In[ ]:
# 迭代次数与learning rate
param_test1 = {'n_estimators': list(range(100,201,50)),'learning_rate': np.linspace(0.05,0.3,6)}
gsearch1 = GridSearchCV(estimator = XGBClassifier(max_depth= 5,
# learning_rate= 0.1,
# n_estimators= 150,
n_jobs= -1,
gamma= 0,
reg_alpha= 0,
reg_lambda= 1,
seed= 10),
param_grid = param_test1, scoring='accuracy',iid = False,n_jobs = -1,return_train_score = True)
gsearch1.fit(x_train,y_train)
gsearch1.cv_results_, gsearch1.best_score_, gsearch1.best_params_
# In[ ]:
# max_depth
param_test2 = {'max_depth': np.arange(5,11)}
gsearch2 = GridSearchCV(estimator = XGBClassifier(
# max_depth= 5,
learning_rate= 0.3,
n_estimators= 200,
n_jobs= -1,
gamma= 0,
reg_alpha= 0,
reg_lambda= 1,
seed= 10),
param_grid = param_test2, scoring='accuracy',iid = False,n_jobs = -1,return_train_score = True)
gsearch2.fit(x_train,y_train)
gsearch2.cv_results_, gsearch2.best_score_, gsearch2.best_params_
# In[ ]:
# min_child_weight
param_test3 = {'min_child_weight': np.arange(1,1002,200)}
gsearch3 = GridSearchCV(estimator = XGBClassifier(
max_depth= 10,
learning_rate= 0.3,
n_estimators= 200,
n_jobs= -1,
gamma= 0,
reg_alpha= 0,
reg_lambda= 1,
# min_child_weight = 300,
seed= 10),
param_grid = param_test3, scoring='accuracy',iid = False,n_jobs = -1,return_train_score = True)
gsearch3.fit(x_train,y_train)
gsearch3.cv_results_, gsearch3.best_score_, gsearch3.best_params_
# In[ ]:
# gamma
param_test4 = {'gamma': np.linspace(0,0.5,5)}
gsearch4 = GridSearchCV(estimator = XGBClassifier(
max_depth= 10,
learning_rate= 0.3,
n_estimators= 200,
n_jobs= -1,
# gamma= 0,
reg_alpha= 0,
reg_lambda= 1,
min_child_weight = 200,
seed= 10),
param_grid = param_test4, scoring='accuracy',iid = False,n_jobs = -1,return_train_score = True)
gsearch4.fit(x_train,y_train)
gsearch4.cv_results_, gsearch4.best_score_, gsearch4.best_params_
# In[ ]:
# subsample
param_test5 = {'subsample': [0.6,0.7,0.8,0.9,1.0]}
gsearch5 = GridSearchCV(estimator = XGBClassifier(
max_depth= 10,
learning_rate= 0.3,
n_estimators= 200,
n_jobs= -1,
gamma= 0.25,
reg_alpha= 0,
reg_lambda= 1,
min_child_weight = 200,
seed= 10),
param_grid = param_test5, scoring='accuracy',iid = False,n_jobs = -1,return_train_score = True)
gsearch5.fit(x_train,y_train)
gsearch5.cv_results_, gsearch5.best_score_, gsearch5.best_params_
# In[ ]:
# colsample_bytree
param_test6 = {'colsample_bytree': [0.6,0.7,0.8,0.9,1.0]}
gsearch6 = GridSearchCV(estimator = XGBClassifier(
max_depth= 10,
learning_rate= 0.3,
n_estimators= 200,
n_jobs= -1,
gamma= 0.25,
reg_alpha= 0,
reg_lambda= 1,
min_child_weight = 200,
subsample = 1.0,
seed= 10),
param_grid = param_test6, scoring='accuracy',iid = False,n_jobs = -1,return_train_score = True)
gsearch6.fit(x_train,y_train)
gsearch6.cv_results_, gsearch6.best_score_, gsearch6.best_params_
# In[ ]:
# 训练
xgb_skl = XGBClassifier(max_depth= 10,
learning_rate= 0.1,
n_estimators= 600,
n_jobs= -1,
gamma= 0.25,
reg_alpha= 0,
reg_lambda= 1,
min_child_weight = 200,
subsample = 1.0,
colsample_bytree = 1.0,
seed= 10)
xgb_skl.fit(x_train,y_train)
# 预测,测试集
y_pred = xgb_skl.predict(x_test)
accuracy_test = metrics.accuracy_score(y_test, y_pred)
print ("Accuracy(test) : %.4f" %accuracy_test)
# 预测,训练集
y_pred = xgb_skl.predict(x_train)
accuracy_train = metrics.accuracy_score(y_train, y_pred)
print ("Accuracy(train) : %.4f" %accuracy_train)
# In[ ]:
# 微调训练
xgb_skl = XGBClassifier(max_depth= 13,
learning_rate= 0.1,
n_estimators= 150,
n_jobs= -1,
gamma= 0.11,
reg_alpha= 0,
reg_lambda= 3,
min_child_weight = 1,
subsample = 1,
colsample_bytree = 0.9,
seed= 10)
xgb_skl.fit(x_train,y_train)
# 预测,测试集
y_pred = xgb_skl.predict(x_test)
accuracy_test = metrics.accuracy_score(y_test, y_pred)
print ("Accuracy(test) : %.4f" %accuracy_test)
# 预测,训练集
y_pred = xgb_skl.predict(x_train)
accuracy_train = metrics.accuracy_score(y_t
博士僧小星
- 粉丝: 2391
- 资源: 5995
最新资源
- (176519854)SpringBoot+Vue房屋租赁管理系统任务书.doc
- (176573234)课程实验 使用QT+Mysql 实现学生选课管理系统.zip
- matlab实现77GHz毫米波FMCW雷达2D-SAR成像-雷达仿真-matlab-毫米波-SAR成像-FMCW雷达
- 混合动力汽车能量管理策略
- (176815810)labview入门开发实例对应部分vi
- 2024年中国低空经济的发展机遇与挑战研究报告
- (177369424)使用qt开发的选课系统.zip
- (178004212)JAVA毕业项目-图书馆管理系统项目源码
- (179418602)有关电源电路的Multisim仿真(逆变)
- matlab实现卡尔曼滤波EKF在毫米波雷达和红外数据信息融合中的应用与优化-卡尔曼滤波-毫米波雷达-数据融合-matlab
- 软件测试-myb.zip
- Delphi 12 之打包工具 wiseinstall 9.2
- RGV穿梭车(实际投产)sw16可编辑全套技术资料100%好用.zip
- 全国高铁站点shp数据,2022年
- 基于SSM的青少年体质健康数据管理与分析系统
- 混合动力汽车控制策略HEV
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈