% This codes the k-means algorithm. Run this first and then run
% statpatpca.m to get the PCA analysis
data.names = {
'Sepal Length';
'Sepal Width';
'Petal Length';
'Petal Width'
};
data.samples = [
5.1 3.5 1.4 0.2;
4.9 3.0 1.4 0.2;
4.7 3.2 1.3 0.2;
4.6 3.1 1.5 0.2;
5.0 3.6 1.4 0.2;
5.4 3.9 1.7 0.4;
4.6 3.4 1.4 0.3;
5.0 3.4 1.5 0.2;
4.4 2.9 1.4 0.2;
4.9 3.1 1.5 0.1;
5.4 3.7 1.5 0.2;
4.8 3.4 1.6 0.2;
4.8 3.0 1.4 0.1;
4.3 3.0 1.1 0.1;
5.8 4.0 1.2 0.2;
5.7 4.4 1.5 0.4;
5.4 3.9 1.3 0.4;
5.1 3.5 1.4 0.3;
5.7 3.8 1.7 0.3;
5.1 3.8 1.5 0.3;
5.4 3.4 1.7 0.2;
5.1 3.7 1.5 0.4;
4.6 3.6 1.0 0.2;
5.1 3.3 1.7 0.5;
4.8 3.4 1.9 0.2;
5.0 3.0 1.6 0.2;
5.0 3.4 1.6 0.4;
5.2 3.5 1.5 0.2;
5.2 3.4 1.4 0.2;
4.7 3.2 1.6 0.2;
4.8 3.1 1.6 0.2;
5.4 3.4 1.5 0.4;
5.2 4.1 1.5 0.1;
5.5 4.2 1.4 0.2;
4.9 3.1 1.5 0.1;
5.0 3.2 1.2 0.2;
5.5 3.5 1.3 0.2;
4.9 3.1 1.5 0.1;
4.4 3.0 1.3 0.2;
5.1 3.4 1.5 0.2;
5.0 3.5 1.3 0.3;
4.5 2.3 1.3 0.3;
4.4 3.2 1.3 0.2;
5.0 3.5 1.6 0.6;
5.1 3.8 1.9 0.4;
4.8 3.0 1.4 0.3;
5.1 3.8 1.6 0.2;
4.6 3.2 1.4 0.2;
5.3 3.7 1.5 0.2;
5.0 3.3 1.4 0.2;
7.0 3.2 4.7 1.4;
6.4 3.2 4.5 1.5;
6.9 3.1 4.9 1.5;
5.5 2.3 4.0 1.3;
6.5 2.8 4.6 1.5;
5.7 2.8 4.5 1.3;
6.3 3.3 4.7 1.6;
4.9 2.4 3.3 1.0;
6.6 2.9 4.6 1.3;
5.2 2.7 3.9 1.4;
5.0 2.0 3.5 1.0;
5.9 3.0 4.2 1.5;
6.0 2.2 4.0 1.0;
6.1 2.9 4.7 1.4;
5.6 2.9 3.6 1.3;
6.7 3.1 4.4 1.4;
5.6 3.0 4.5 1.5;
5.8 2.7 4.1 1.0;
6.2 2.2 4.5 1.5;
5.6 2.5 3.9 1.1;
5.9 3.2 4.8 1.8;
6.1 2.8 4.0 1.3;
6.3 2.5 4.9 1.5;
6.1 2.8 4.7 1.2;
6.4 2.9 4.3 1.3;
6.6 3.0 4.4 1.4;
6.8 2.8 4.8 1.4;
6.7 3.0 5.0 1.7;
6.0 2.9 4.5 1.5;
5.7 2.6 3.5 1.0;
5.5 2.4 3.8 1.1;
5.5 2.4 3.7 1.0;
5.8 2.7 3.9 1.2;
6.0 2.7 5.1 1.6;
5.4 3.0 4.5 1.5;
6.0 3.4 4.5 1.6;
6.7 3.1 4.7 1.5;
6.3 2.3 4.4 1.3;
5.6 3.0 4.1 1.3;
5.5 2.5 4.0 1.3;
5.5 2.6 4.4 1.2;
6.1 3.0 4.6 1.4;
5.8 2.6 4.0 1.2;
5.0 2.3 3.3 1.0;
5.6 2.7 4.2 1.3;
5.7 3.0 4.2 1.2;
5.7 2.9 4.2 1.3;
6.2 2.9 4.3 1.3;
5.1 2.5 3.0 1.1;
5.7 2.8 4.1 1.3;
6.3 3.3 6.0 2.5;
5.8 2.7 5.1 1.9;
7.1 3.0 5.9 2.1;
6.3 2.9 5.6 1.8;
6.5 3.0 5.8 2.2;
7.6 3.0 6.6 2.1;
4.9 2.5 4.5 1.7;
7.3 2.9 6.3 1.8;
6.7 2.5 5.8 1.8;
7.2 3.6 6.1 2.5;
6.5 3.2 5.1 2.0;
6.4 2.7 5.3 1.9;
6.8 3.0 5.5 2.1;
5.7 2.5 5.0 2.0;
5.8 2.8 5.1 2.4;
6.4 3.2 5.3 2.3;
6.5 3.0 5.5 1.8;
7.7 3.8 6.7 2.2;
7.7 2.6 6.9 2.3;
6.0 2.2 5.0 1.5;
6.9 3.2 5.7 2.3;
5.6 2.8 4.9 2.0;
7.7 2.8 6.7 2.0;
6.3 2.7 4.9 1.8;
6.7 3.3 5.7 2.1;
7.2 3.2 6.0 1.8;
6.2 2.8 4.8 1.8;
6.1 3.0 4.9 1.8;
6.4 2.8 5.6 2.1;
7.2 3.0 5.8 1.6;
7.4 2.8 6.1 1.9;
7.9 3.8 6.4 2.0;
6.4 2.8 5.6 2.2;
6.3 2.8 5.1 1.5;
6.1 2.6 5.6 1.4;
7.7 3.0 6.1 2.3;
6.3 3.4 5.6 2.4;
6.4 3.1 5.5 1.8;
6.0 3.0 4.8 1.8;
6.9 3.1 5.4 2.1;
6.7 3.1 5.6 2.4;
6.9 3.1 5.1 2.3;
5.8 2.7 5.1 1.9;
6.8 3.2 5.9 2.3;
6.7 3.3 5.7 2.5;
6.7 3.0 5.2 2.3;
6.3 2.5 5.0 1.9;
6.5 3.0 5.2 2.0;
6.2 3.4 5.4 2.3;
5.9 3.0 5.1 1.8;
];
data.classes = {
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Se';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Ve';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi';
'Vi'
};
%Does k-means
m{1}=data.samples(randi([2,49]),1:4); %For random initialisations
m{2}=data.samples(randi([51,98]),1:4); % within the data points
m{3}=data.samples(randi([101,149]),1:4);
% m{1}=[randi([0,10]),randi([0,10]),randi([0,10]),randi([0,10])]; %Initialisations of cluster centres random number between 0 and 10
% m{2}=[randi([0,10]),randi([0,10]),randi([0,10]),randi([0,10])];
% m{3}=[randi([0,10]),randi([0,10]),randi([0,10]),randi([0,10])];
% m{1}=[randi([round(min(min(data.samples))),round(max(max(data.samples)))]),randi([round(min(min(data.samples))),round(max(max(data.samples)))]),randi([round(min(min(data.samples))),round(max(max(data.samples)))]),randi([round(min(min(data.samples))),round(max(max(data.samples)))])];
% m{2}=[randi([round(min(min(data.samples))),round(max(max(data.samples)))]),randi([round(min(min(data.samples))),round(max(max(data.samples)))]),randi([round(min(min(data.samples))),round(max(max(data.samples)))]),randi([round(min(min(data.samples))),round(max(max(data.samples)))])];
% m{3}=[randi([round(min(min(data.samples))),round(max(max(data.samples)))]),randi([round(min(min(data.samples))),round(max(max(data.samples)))]),randi([round(min(min(data.samples))),round(max(max(data.samples)))]),randi([round(min(min(data.samples))),round(max(max(data.samples)))])];
% m{1}
% m{2}
% m{3}
% for i=1:3
% for j=1:4
% m{i}(1,j)=randi([round(min(min(data.samples))),round(max(max(data.samples)))]);
% % m2{i}(1,j)=randi([round(min(min(data.samples))),round(max(max(data.samples)))]);
% % m3{i}(1,j)=randi([round(min(min(data.samples))),round(max(max(data.samples)))]);
% end
% end
for z=1:100
for i=1:3
for j=1:150
D(i,j)=norm(m{i}-data.samples(j,1:4));
end
end
for i=1:150
mini=min(D(1:3,i));
count=find(D(1:3,i)==mini);
if length(count)>=2
count=randi(count);
else
count=count;
end
Clus(i)=count;
end
for i=1:150
for j=1:4
if Clus(i)==1
U1(i,j)=data.samples(i,j);
else U1(i,j)=0;
end
if Clus(i)==2
U2(i,j)=data.samples(i,j);
else U2(i,j)=0;
end
if Clus(i)==3
U3(i,j)=data.samples(i,j);
else U3(i,j)=0;
end
end
end
for j=1:4
m{1}(1,j)=mean(nonzeros(U1(1:end,j)));
m{2}(1,j)=m