function my_NaiveBayes(data,label)
%% data shuffle and split
re_ind=randperm(size(label,1));
data=data(re_ind,:);
label=label(re_ind,:);
[vec1,~,~]=pca(data);
accuracy=[];
i=1;
for t=1:40
disp(['No. ',num2str(i),' starts !']);
para={};
% PCA for dimention reduction
data_pca=data*vec1(:,1:t);
traindata=data_pca(1:floor(size(data,1)*0.8),:);
trainlabel=label(1:floor(size(data,1)*0.8),:);
trainlabel(trainlabel==0)=10;
testdata=data_pca(floor(size(data,1)*0.8)+1:end,:);
testlabel=label(floor(size(data,1)*0.8)+1:end,:);
testlabel(testlabel==0)=10;
fprintf('The number of traindata is %d and the number of testdata is %d \n',size(trainlabel,1),size(testlabel,1))
%% Get the data and label of every class
loc={};
for loc1=1:10
loc{loc1}=find(trainlabel==loc1);
end
X={};
for x1=1:10
X{x1}=traindata(loc{x1},:);
end
Y={};
for y1=1:10
Y{y1}=trainlabel(loc{y1},:);
end
%% Calculate the prior probability
ratio=zeros(10,1);
for class_ratio=1:10
ratio(class_ratio)=length(Y{class_ratio})/length(trainlabel);
end
for classes=1:10
mat1=[];
x=X{classes};
for feature_dim=1:t
tem_mean=mean(x(:,feature_dim));
tem_var=var(x(:,feature_dim));
mat1=[mat1;[tem_mean,tem_var]];
end
para{classes}=mat1;
end
predicted_label=zeros(length(testlabel),1);
for test_num=1:size(testlabel,1)
prob=zeros(10,1);
for test_classes=1:10
test_data=testdata(test_num,:);
mm=1;
for feature_num=1:t
mm=mm*1./(sqrt(2*pi*para{test_classes}(feature_num,2))).*exp(-0.5.*(test_data(feature_num)-para{test_classes}(feature_num,1)).^2./para{test_classes}(feature_num,2));
end
prob(test_classes)=mm*ratio(test_classes);
end
[~,loc_find]=max(prob);
predicted_label(test_num)=loc_find;
end
accuracy1=sum(predicted_label==testlabel)./length(testlabel);
accuracy=[accuracy;accuracy1];
disp(['No. ',num2str(i),' completes !']);
i=i+1;
end
figure(2)
plot(1:40,accuracy)
title('Naive Bayes Classification Accuracy')
ylabel('accuracy(%)')
xlabel('dimensionality of data')
end
评论0