clear all;
clc;
%num=700;
tree_level=4;
attribute_num=6;
class_split=0.9; %类分裂的最低纯度
element_num=2; %类分裂的最少元素个数
attribute_test=5; %测试的变量
attribute_num_use=attribute_num;
attribute_use=zeros(attribute_num_use,1);
attribute_use_0=zeros(attribute_num_use,1);
attribute_use_1=zeros(attribute_num_use,1);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
class_0(:,1)=0;
class_1(:,1)=0;
class_0_0(:,1)=0;
class_0_1(:,1)=0;
class_1_0(:,1)=0;
class_1_1(:,1)=0;
class_0_0_0(:,1)=0;
class_0_0_1(:,1)=0;
class_0_1_0(:,1)=0;
class_0_1_1(:,1)=0;
class_1_0_0(:,1)=0;
class_1_0_1(:,1)=0;
class_1_1_0(:,1)=0;
class_1_1_1(:,1)=0;
class_0_0_0_0(:,1)=0;
class_0_0_0_1(:,1)=0;
class_0_0_1_0(:,1)=0;
class_0_0_1_1(:,1)=0;
class_0_1_0_0(:,1)=0;
class_0_1_0_1(:,1)=0;
class_0_1_1_0(:,1)=0;
class_0_1_1_1(:,1)=0;
class_1_0_0_0(:,1)=0;
class_1_0_0_1(:,1)=0;
class_1_0_1_0(:,1)=0;
class_1_0_1_1(:,1)=0;
class_1_1_0_0(:,1)=0;
class_1_1_0_1(:,1)=0;
class_1_1_1_0(:,1)=0;
class_1_1_1_1(:,1)=0;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
datarow=xlsread('1.xls','血常规','i2:n994');
%load datarow;
data_ret=xlsread('1.xls','血常规','a2:h994');
%归一化
%min-max normalization
data_min_max=zeros(993,6);
for i=1:6
minA=min(datarow(:,i));
maxA=max(datarow(:,i));
for j=1:993
data_min_max(j,i)=(datarow(j,i)-minA)/(maxA-minA);
end
end
%z-score normalization
% data_z_score=zeros(993,6);
% for i=1:6
% meanA=mean(datarow(:,i));
% sum=0;
% for j=1:993
% sum=sum+(datarow(j,i)-meanA)^2;
% meanB=sum/(993-1);
% xigmad=sqrt(meanB);
% data_z_score(j,i)=(datarow(j,i)-meanA)/xigmad;
% end
% end
% narmalization by decimal scaling
% data_decimal=zeros(993,6);
% for i=1:6
% maxA=max(datarow(:,i));
% if maxA/10<1
% for j=1:993
% data_decimal(j,i)=datarow(j,i)/10;
% end
% else if maxA/100<1
% for j=1:993
% data_decimal(j,i)=datarow(j,i)/100;
% end
% else
% for j=1:993
% data_decimal(j,i)=datarow(j,i)/1000;
% end
% end
% end
% end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
rand_a=0.25;
[m_temp,n_temp]=size(data_min_max);
num11=0;
num22=0;
for i=1:m_temp
if (rand>rand_a)
num11=num11+1;
data(num11,:)=data_min_max(i,:);
class_ret(num11,:)=data_ret(i,:);
else
num22=num22+1;
data_test(num22,:)=data_min_max(i,:);
class_test(num22,:)=data_ret(i,:);
end
end
num=num11;
save class_test;
class_train(:,1)=class_ret(:,attribute_test);
class_train(:,2)=ones(num,1);
%分根节点
%class=class_train(:,1);
class_split_a=split(class_train(:,1),num);
if class_split_a<class_split
classsplit=1;
else
classsplit=0;
end
classificate_condition=classsplit*attribute_num_use*tree_level;
if classificate_condition~=0
flag=1;
for i=1:attribute_num
if attribute_use(i,1)==0;
all_gini(1,flag)=mingini(num,i,class_train,data(:,i));
flag=flag+1;
end
end
[min,attribute]=min(all_gini);
attribute_use(attribute,1)=1;
attribute_use_0=attribute_use;
attribute_use_1=attribute_use;
cla_point=attribute;
disp('................................................the root !!')
[class_num ,class_value_root,class_0,class_1]=classificate_root(num,attribute,class_train,data(:,attribute));
clear min all_gini ;
end
tree_level=tree_level-1;
%分第一层左节点
%data_1=zeros(num,12);
data_1=chongpai_1(attribute,num,data);
%clear data;
attribute_num_use=attribute_num_use-1;
if class_num>=element_num
class_split_a=split(class_0(:,1),class_num);
if class_split_a<class_split
classsplit=1;
else
classsplit=0;
end
classificate_condition=classsplit*attribute_num_use*tree_level;
if classificate_condition~=0
flag=1;
for i=1:attribute_num
if attribute_use(i,1)==0;
all_gini(1,flag)=mingini(class_num,i,class_0,data_1(1:class_num,(2*i-1)));
attri(1,flag)=i;
flag=flag+1;
end
end
[min,attribute_0]=min(all_gini);
attribute_use_0(attri(1,attribute_0),1)=1;
cla_point_0=attri(1,attribute_0);
disp('................................................the classification 0 !!')
[class_num_0_0 ,class_value_0,class_0_0,class_0_1]=classificate(class_num,attri(1,attribute_0),class_0,data_1(1:class_num,(attri(1,attribute_0)*2-1)));
clear all_gini min flag attri;
attribute_use_0_0=attribute_use_0;
attribute_use_0_1=attribute_use_0;
else
class_value_0=0;
cla_point_0=0;
class_num_0_0=0;
end
else
class_value_0=0;
cla_point_0=0;
class_num_0_0=0;
end
%分第一层右节点
if num-class_num>=element_num
class_split_a=split(class_1(:,1),num-class_num);
if class_split_a<class_split
classsplit=1;
else
classsplit=0;
end
classificate_condition=classsplit*attribute_num_use*tree_level;
if classificate_condition~=0
flag=1;
for i=1:attribute_num
if attribute_use(i,1)==0;
all_gini(1,flag)=mingini(num-class_num,i,class_1,data_1((class_num+1):num,(2*i-1)));
attri(1,flag)=i;
flag=flag+1;
end
end
[min,attribute_1]=min(all_gini);
attribute_use_1(attri(1,attribute_1),1)=1;
cla_point_1=attri(1,attribute_1);
disp('................................................the classification 1 !!')
[class_num_1_0 ,class_value_1,class_1_0,class_1_1]=classificate((num-class_num),attri(1,attribute_1),class_1,data_1((class_num+1):num,(attri(1,attribute_1)*2-1)));
attribute_use_1_0=attribute_use_1;
attribute_use_1_1=attribute_use_1;
else
class_value_1=0;
cla_point_1=0;
class_num_1_0=0;
end
else
class_value_1=0;
cla_point_1=0;
class_num_1_0=0;
end
clear all_gini min flag attri;
tree_level=tree_level-1;
%分第二层左节点的左节点
data_2=chongpai_2(class_num,(num-class_num),data_1,cla_point_0,cla_point_1);
clear data_1;
attribute_num_use=attribute_num_use-1;
%all_gini=zeros(attribute_num_use,1);
if class_num_0_0~=0
if class_num_0_0>=element_num
class_split_a=split(class_0_0(:,1),class_num_0_0);
if class_split_a<class_split
classsplit=1;
else
classsplit=0;
end
classificate_condition=classsplit*attribute_num_use*tree_level;
if classificate_condition~=0
flag=1;
for i=1:attribute_num
if attribute_use_0(i,1)==0;
all_gini(1,flag)=mingini(class_num_0_0,i,class_0_0,data_2(1:class_num_0_0,(2*i-1)));
attri(1,flag)=i;
flag=flag+1;
end
end
[min,attribute_0_0]=min(all_gini);
cla_point_0_0=attri(1,attribute_0_0);
disp('................................................the classification 0_0 !!')
[class_num_0_0_0 ,class_value_0_0,class_0_0_0,class_0_0_1]=classificate(class_num_0_0,attri(1,attribute_0_0),class_0_0,data_2(1:class_num_0_0,(attri(1,attribute_0_0)*2-1)));
clear all_gini min flag attri;
attribute_use_0_0_0=attribute_use_0_0;
attribute_use_0_0_1=attribute_use_0_0;
else
class_value_0_0=0;
class_num_0_0_0=0;
cla_point_0_0=0;
end
else
class_value_0_0=0;
class_num_0_0_0=0;
cla_point_