import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn import model_selection, metrics #Additional scklearn functions
from sklearn.model_selection import GridSearchCV #Perforing grid search
from collections import Counter
import matplotlib.pyplot as plt
from xgboost import plot_importance
#matplotlib inline
from matplotlib.pylab import rcParams
import time
rcParams['figure.figsize'] = 12, 4
start=time.time()
test=pd.read_csv('corrected1.csv')
X_train=pd.read_csv('X_train2.4.csv')
y_train=pd.read_csv('y_train2.4.csv')
train,target,=X_train.ix[:,0:40],y_train.ix[:,0]
X_test, y_test =test.ix[:,0:40],test.ix[:,41]
print(train.shape)
print(target.shape)
print(X_test.shape)
print(y_test.shape)
print(sorted(Counter(target).items()))
#from sklearn.preprocessing import MinMaxScaler
#mms=MinMaxScaler()
#train = mms.fit_transform(train)
#X_test = mms.transform(X_test)
from sklearn.preprocessing import StandardScaler
# 从仍然需要对训练和测试的特征数据进行标准化。
ss = StandardScaler()
train = ss.fit_transform(train)
X_test = ss.transform(X_test)
from sklearn.ensemble import GradientBoostingClassifier
#xgb1 = XGBClassifier(reg_alpha=0.005,learning_rate =0.1,n_estimators=2000,max_depth=8,min_child_weight=9.5,gamma=0.1,subsample=0.9,colsample_bytree=0.6,objective= 'binary:logistic',nthread=4,scale_pos_weight=1,seed=27)
gbc=GradientBoostingClassifier(random_state=10)
gbc.fit(train,target)
#plot_importance(xgb1)
#plt.show()
print(gbc)
y_predict=gbc.predict(X_test)
end=time.time()
print('GradientBoostingClassifier needs times:',end-start)
#从sklearn.metrics导入classification_report
from sklearn.metrics import classification_report
print('The Accuracy of GradientBoostingClassifier is', gbc.score(X_test, y_test))
print (classification_report(y_test, y_predict, target_names=['1','2','3','4','5'],digits=5))
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,y_predict)
print(confusion_matrix(y_test,y_predict))
评论0