function [err,perfm,avg_div]=iris_rs_boosting_NNE(hidden_num,routin_num,N)
%routin_num 重采样轮数赋值
%N 待集成的最好的N个成员网络
org_train=load('iris_train.txt');
[h_n,l_n]=size(org_train);
cls_n=org_train(h_n,l_n);
w_sample=ones(1,h_n)/h_n; %sample weights initization
w_component=ones(1,routin_num)/routin_num; % 网络权值初始化.
net_num=routin_num; % 网络数=重采样轮数
for i=1:routin_num
rand('state',sum(100*clock));
fid=fopen('iris_bagging_train.ros','w+');
fprintf(fid,'A1 A2 A3 A4 DEC\n');
fprintf(fid,'float(2) float(2) float(2) float(2) integer\n');
for j=1:h_n
sample_Distrb(j)=sum(w_sample(1:j)); %样本权值初始化
end
for j=1:h_n
t=find((sample_Distrb>rand)==1); %采样
id=t(1);
boosting_train(j,:)=org_train(id,:); % boosting_train 采样样本子集
fprintf(fid,'%4.2f,%4.2f,%4.2f,%4.2f,%2d\n',org_train(id,:));
end
fclose(fid);
%调用Rosetta命令格式函数,实现离散化,约简求取输出,将约简提取出来后生成训练样本,iris_cmds.txt是rostta的命令脚本文件
!D:\applog\rosetta\clrosetta SerialExecutor "OUTPUT=0; FILENAME.COMMANDS = iris_cmds.txt; FILENAME.LOG= log.txt"
fid=fopen('iris_bagging_reduct.txt','r');
while ~isempty(fgetl(fid))
1; %do nothing
end
line=fgetl(fid);
fclose(fid);
k=1;
attr_id_str=[];
clear attr_id;
c_n=1;
while c_n<size(line,2)
while line(c_n)~=','&line(c_n)~='}'
if ~isempty(str2num(line(c_n)))
attr_id_str=[attr_id_str line(c_n)];
end
c_n=c_n+1;
end
attr_id(k)=str2num(attr_id_str);
k=k+1;
while line(c_n)==','
c_n=c_n+1;
end
attr_id_str=[];
end
attr{i}=attr_id;
save iris_attr attr;
test=load('iris_test.txt');
test_target=test(:,l_n:(l_n+cls_n-1))';
testtarget = (test_target == repmat(max(test_target),cls_n,1));
v2.P=test(:,attr_id)';
v2.T=test(:,l_n:l_n+cls_n-1)';
%生成训练样本和期望输出样本矩阵
train_input=boosting_train(:,attr_id);
for j=1:cls_n
idx{j}=find(boosting_train(:,l_n)==j);
end
train_target=ones(size(train_input,1),cls_n)/10;
for j=1:cls_n
train_target(idx{j},j)=0.9;
end
org_train_input=org_train(:,attr_id);
for j=1:cls_n
idx{j}=find(org_train(:,l_n)==j);
end
org_train_target=ones(size(org_train_input,1),cls_n)/10;
for j=1:cls_n
org_train_target(idx{j},j)=0.9;
end
org_train_target =org_train_target';
org_train_target = (org_train_target == repmat(max(org_train_target),cls_n,1));
% train the component neural networks
net=newff(MinMax(train_input'),[hidden_num cls_n],{'tansig' 'purelin'},'trainlm');
init(net);
net.trainParam.epochs=1000;
net.trainParam.goal=0.0;
net=train(net,train_input',train_target',[],[],v2);
% save the component neural networks
netfile = strcat('net',dec2base(i,10));
save(netfile,'net');
output=sim(net, org_train_input');
output=(output==repmat(max(output),cls_n,1));
err_id= sum(xor(output,org_train_target))~=0;
err = sum(w_sample(err_id));
if err>0.5
net_num=i;
i=routin_num;
break
else w_component(i)=log((1-err)/err);
end
err_id=find(sum(xor(output,org_train_target))~=0);
w_sample(err_id)=w_sample(err_id)/(2*err);
cor_id=find(sum(xor(output,org_train_target))==0);
w_sample(cor_id)=w_sample(cor_id)/(2*(1-err));
w_sample=w_sample/sum(w_sample);
end
enoutput=zeros(cls_n,size(test,1));
if nargin<3
%used all network ensemble output
fprintf('all network ensemble result:');
for i=1:net_num
netfile = strcat('net',dec2base(i,10));
load(netfile);
test_input=test(:,attr{i});
output=sim(net, test_input');
output=(output==repmat(max(output),cls_n,1));
enoutput = enoutput + w_component(i)*output;
end
enoutput = (enoutput == repmat(max(enoutput),cls_n,1));
err = sum(any(xor(enoutput,testtarget))) / size(test,1);
perfm=1-err;
avg_div=iris_div_measure(net_num);
else % used best N neural network to ensemble
fprintf('the best %d network ensemble result:',N);
for i = 1:net_num
netfile = strcat('net',dec2base(i,10));
load(netfile);
test_input=test(:,attr{i});
output=sim(net, test_input');
output=(output==repmat(max(output),cls_n,1)); %%???????????????
err_member(i) = sum(any(xor(output,testtarget))) / size(test,1);
end
[err_seq,id]=sort(err_member); % 各个网络误差排序
id_min=id(1:N); % 取前N个网络
for i = 1:size(id_min,2)
netfile = strcat('net',dec2base(id_min(i),10));
load(netfile);
test_input=test(:,attr{id_min(i)});
output=sim(net, test_input');
output=(output==repmat(max(output),cls_n,1));
enoutput = enoutput + output;
end
enoutput = (enoutput == repmat(max(enoutput),cls_n,1));
err = sum(any(xor(enoutput,testtarget))) / size(test,1) ;
perfm=1-err;
avg_div=iris_div_measure(net_num,id_min);
end