数据挖掘—数据.rar_用决策树做催收策略模型数据资源-CSDN文库

共4个文件

csv：2个

py：1个

xlsx：1个

数据挖掘

需积分: 50 39 浏览量 2019-08-02 22:31:28 上传评论 9 收藏 1.1MB RAR 举报

资源推荐

资源详情

资源评论

收起资源包目录

数据挖掘—数据.rar （4个子文件）

tree.py 5KB

test.csv 469KB

字段解释.xlsx 17KB

model.csv 4.57MB

#!/usr/bin/env python # coding: utf-8 from sklearn import tree from sklearn.datasets import load_wine import pandas as pda #数据读取及预处理 fname="E:\model.csv" dataf=pda.read_csv(fname,encoding="gbk") #数据预处理，缺失的空值填上平均值 dataf.iloc[:,2:201]=dataf.iloc[:,2:201].fillna(dataf.iloc[:,2:201].median()) dataf.dropna(axis=0,inplace=True) x=dataf.iloc[:,2:201].as_matrix() y=dataf.iloc[:,0].as_matrix() xf=pda.DataFrame(x) yf=pda.DataFrame(y) from sklearn.model_selection import train_test_split Xtrain,Xtest,Ytrain,Ytest = train_test_split(xf,yf,test_size=0.1) #剪枝，调值 ''' import matplotlib.pyplot as plt test=[] Max=0 I=J=K=0 for i in range(1,30): for j in range(2,30): for k in range(2,30): clf=tree.DecisionTreeClassifier( min_samples_leaf=i ,min_samples_split=j ,max_leaf_nodes=k ,criterion="entropy" ,random_state=30 ,splitter="random" ) clf=clf.fit(Xtrain,Ytrain) score=clf.score(Xtest,Ytest) if score>Max: Max=score I=i J=j K=k #test.append(score) #plt.plot(range(1,30),test,color="red",label="max_depth") #plt.legend() #plt.show() ''' #剪枝后的树，输出模型精确度 clf=tree.DecisionTreeClassifier(criterion="entropy" ,min_samples_leaf=1 ,min_samples_split=2 ,max_leaf_nodes=27 ,random_state=30 #,max_depth=6 ,splitter="random" ) clf=clf.fit(Xtrain,Ytrain) score=clf.score(Xtest,Ytest) #score=clf.score(Xtrain,Ytrain) score #决策树可视化 import graphviz import pydotplus feature_name=['x_001','x_002','x_003','x_004','x_005','x_006','x_007','x_008','x_009','x_010', 'x_011','x_012','x_013','x_014','x_015','x_016','x_017','x_018','x_019','x_020', 'x_021','x_022','x_023','x_024','x_025','x_026','x_027','x_028','x_029','x_030', 'x_031','x_032','x_033','x_034','x_035','x_036','x_037','x_038','x_039','x_040', 'x_041','x_042','x_043','x_044','x_045','x_046','x_047','x_048','x_049','x_050', 'x_051','x_052','x_053','x_054','x_055','x_056','x_057','x_058','x_059','x_060', 'x_061','x_062','x_063','x_064','x_065','x_066','x_067','x_068','x_069','x_070', 'x_071','x_072','x_073','x_074','x_075','x_076','x_077','x_078','x_079','x_080', 'x_081','x_082','x_083','x_084','x_085','x_086','x_087','x_088','x_089','x_090', 'x_091','x_092','x_093','x_094','x_095','x_096','x_097','x_098','x_099','x_100', 'x_101','x_102','x_103','x_104','x_105','x_106','x_107','x_108','x_109','x_110', 'x_111','x_112','x_113','x_114','x_115','x_116','x_117','x_118','x_119','x_120', 'x_121','x_122','x_123','x_124','x_125','x_126','x_127','x_128','x_129','x_130', 'x_131','x_132','x_133','x_134','x_135','x_136','x_137','x_138','x_139','x_140', 'x_141','x_142','x_143','x_144','x_145','x_146','x_147','x_148','x_149','x_150', 'x_151','x_152','x_153','x_154','x_155','x_156','x_157','x_158','x_159','x_160', 'x_161','x_162','x_163','x_164','x_165','x_166','x_167','x_168','x_169','x_170', 'x_171','x_172','x_173','x_174','x_175','x_176','x_177','x_178','x_179','x_180', 'x_181','x_182','x_183','x_184','x_185','x_186','x_187','x_188','x_189','x_190', 'x_191','x_192','x_193','x_194','x_195','x_196','x_197','x_198','x_199'] dot_data=tree.export_graphviz(clf ,feature_names=feature_name ,class_names=['0','1'] ,filled=True ,rounded=True) #graph=graphviz.Source(dot_data) graph = pydotplus.graph_from_dot_data(dot_data) graph.write_pdf("tree.pdf") #[*zip(feature_name,clf.feature_importances_)] #预测Xtest anwser=clf.predict(Xtest) y2=Ytest.as_matrix().astype(int) #预测分类 count=0 for i in range(0,1002): if(y2[i]!=anwser[i]): count+=1 print(1-count/len(Xtest)) #pda.DataFrame(anwser) #预测test fname1="test.csv" dataf1=pda.read_csv(fname1,encoding="gbk") #数据预处理，同dataf dataf1.iloc[:,1:200]=dataf1.iloc[:,1:200].fillna(dataf1.iloc[:,1:200].median()) dataf1.dropna(axis=0,inplace=True) test=dataf1.iloc[:,1:200].as_matrix() anwser1=clf.predict(test) #输出结果 pda.DataFrame(anwser1) #写入文件 data = pda.DataFrame(pda.DataFrame(anwser1)) writer = pda.ExcelWriter('E:\result.xlsx') data.to_excel(writer, 'page_1', float_format='%.5f') writer.save() writer.close()

评论收藏

内容反馈