clc;
clear;
clear all
%选取数据
data_all=xlsread('shuizhifenlei_data.xls');
[m,n]=size(data_all);
%选取训练样本
train_row=round(m*17/20);%行值
train_col=round(n);%列植
train_label=data_all(1:train_row,end);%训练样本类型
train_class_all=data_all(1:train_row,1:end-1);%训练样本
%选取测试样本
text_data=data_all(train_row+1:end,1:end-1);%测试样本
text_label=data_all(train_row+1:end,end);%测试样本原来的类型
class=train_label';%训练样本类型,由于下面函数特征数在列,而所给的数据在行,故转置
p=train_class_all'; %训练样本数据,转置原因同理
P_test=text_data'; %测试样本数据,转置原因同理
t1=text_label'; %测试样本类型,转置原因同理,计算误差用
%构造输出矩阵
s=length(class); %由于有4个类别,所以输出层应该有4个神经元
t=zeros(s,5); %4个输出分别代表样本属于某一样本的可能性
for i = 1 : s
t(i,class(i)) = 1;
end
t=t'; %转置成行向量
[pn,minp,maxp,tn,mint,maxt] = premnmx(p,t); %训练样本归一化-1~1
p2= tramnmx(P_test,minp,maxp); %测试样本归一化
%动态优化,寻找最佳隐含层节点数
hidcount=5; %隐含层优化范围
for hn=1:1:hidcount %改变中间层节点数,优化程序,寻找最佳节点数
net=newff(minmax(pn),[9,hn,5],{'tansig','tansig','purelin'},'traingdm','learngdm','mse');
net.trainparam.show=40; %创建网络参数,可以根据自己要求修改
net.trainparam.mc=0.9;
net.trainparam.lr=0.05;
net.trainparam.epochs=1000;
net.trainparam.goal=0.01;
net=init(net); %网络初始化
[net,tr]=train(net,pn,tn); %训练网络
PN=sim(net,p2); %网络仿真
[t2]= postmnmx(PN,mint,maxt); %反归一化
%*统计识别率
%统计识别正确率
[s1,s2]=size(t2); %结果概率
hitNum=0;
for i=1: s2
[m,Index] =max(t2(:,i));
Class_sort(i,:)=Index;
if(Index==t1(i))
hitNum=hitNum+1;
end
end
Precision={'正确率',hitNum/length(t1)};
Pre(hn)=hitNum/length(t1);
end
%画出不同节点数的识别率折线图
figure(1);
subplot(2,1,1);
xx=1:1:hidcount;
hold on;
plot(xx,Pre,'b--o');
[point,location]=sort(Pre','descend'); %把E按从大到小排序
Res=[point,location];
%找到最优节点数,重新训练
hn=Res(1,2);
net=newff(minmax(pn),[9,hn,5],{'tansig','tansig','purelin'},'traingdm','learngdm','mse');
net.trainparam.show=50;
net.trainparam.mc=0.9;
net.trainparam.lr=0.05;
net.trainparam.epochs=1000;
net.trainparam.goal=0.01;
net=init(net); %网络初始化
[net,tr]=train(net,pn,tn); %训练网络
PN=sim(net,p2); %网络仿真
[t2]= postmnmx(PN,mint,maxt); %反归一化
%统计识别正确率
[s1,s2]=size(t2); %结果概率
hitNum=0;
for i=1: s2
[m,Index] =max(t2(:,i));
Class_sort(i,:)=Index;
if(Index==t1(i))
hitNum=hitNum+1;
end
end
Precision={'正确率',hitNum/length(t1)};
%做出预测类别结果的散点图
figure(1);
hold on;
subplot(2,1,2);
m2=length(Class_sort');
X=1:1:m2;
plot(X,Class_sort','r*',X,t1,'bo');
set(gca,'xtick',0:1:50); %设置x轴属性
set(gca,'ytick',1:1:5); %设置y轴属性
xlabel('测试样本序号');
ylabel('样本所属类别');
title('o为真实值,*为预测值');
%求每一类识别率
% 计算每个类别识别率
start=[ %测试样本类别统计
size(text_label(find(text_label==1)),1),
size(text_label(find(text_label==2)),1),
size(text_label(find(text_label==3)),1),
size(text_label(find(text_label==4)),1),
size(text_label(find(text_label==5)),1)];
Star={'原数据',start(1),start(2),start(3),start(4),start(5)}
result=[ %统计测试结果数据
size(Class_sort(find(Class_sort==1)),1),
size(Class_sort(find(Class_sort==2)),1),
size(Class_sort(find(Class_sort==3)),1),
size(Class_sort(find(Class_sort==4)),1),
size(Class_sort(find(Class_sort==5)),1)];
Res={'测试结果',result(1),result(2),result(3),result(4),result(5)}
count=length(t1); %测试样本个数
num_1=0;num_2=0;
num_3=0;num_4=0;
num_5=0;num_sum=0;
for i=1:count %计算正确率
if text_label(i)==Class_sort(i)
num_sum=num_sum+1;
if Class_sort(i)==1
num_1=num_1+1;
elseif Class_sort(i)==2
num_2=num_2+1;
elseif Class_sort(i)==3
num_3=num_3+1;
elseif Class_sort(i)==4
num_4=num_4+1;
elseif Class_sort(i)==5
num_5=num_5+1;
end
end
end
Precision={ '正确率',num_1/start(1),num_2/start(2),num_3/start(3),num_4/start(4),num_5/start(5),num_sum/count}