function result = ClusteringMeasure(Y, predY)
if size(Y,2) ~= 1
Y = Y';
end;
if size(predY,2) ~= 1
predY = predY';
end;
n = length(Y);
uY = unique(Y);
nclass = length(uY);
Y0 = zeros(n,1);
if nclass ~= max(Y)
for i = 1:nclass
Y0(find(Y == uY(i))) = i;
end;
Y = Y0;
end;
uY = unique(predY);
nclass = length(uY);
predY0 = zeros(n,1);
if nclass ~= max(predY)
for i = 1:nclass
predY0(find(predY == uY(i))) = i;
end;
predY = predY0;
end;
Lidx = unique(Y); classnum = length(Lidx);
predLidx = unique(predY); pred_classnum = length(predLidx);
% purity
correnum = 0;
for ci = 1:pred_classnum
incluster = Y(find(predY == predLidx(ci)));
% cnub = unique(incluster);
% inclunub = 0;
% for cnubi = 1:length(cnub)
% inclunub(cnubi) = length(find(incluster == cnub(cnubi)));
% end;
inclunub = hist(incluster, 1:max(incluster)); if isempty(inclunub) inclunub=0;end;
correnum = correnum + max(inclunub);
end;
Purity = correnum/length(predY);
%if pred_classnum
res = bestMap(Y, predY);
% accuarcy
ACC = length(find(Y == res))/length(Y);
% NMI
MIhat = MutualInfo(Y,res);
result = [ACC MIhat Purity];
%%
function [newL2, c] = bestMap(L1,L2)
%bestmap: permute labels of L2 match L1 as good as possible
% [newL2] = bestMap(L1,L2);
%===========
L1 = L1(:);
L2 = L2(:);
if size(L1) ~= size(L2)
error('size(L1) must == size(L2)');
end
L1 = L1 - min(L1) + 1; % min (L1) <- 1;
L2 = L2 - min(L2) + 1; % min (L2) <- 1;
%=========== make bipartition graph ============
nClass = max(max(L1), max(L2));
G = zeros(nClass);
for i=1:nClass
for j=1:nClass
G(i,j) = length(find(L1 == i & L2 == j));
end
end
%=========== assign with hungarian method ======
[c,t] = hungarian(-G);
newL2 = zeros(nClass,1);
for i=1:nClass
newL2(L2 == i) = c(i);
end
%%
function MIhat = MutualInfo(L1,L2)
% mutual information
%===========
L1 = L1(:);
L2 = L2(:);
if size(L1) ~= size(L2)
error('size(L1) must == size(L2)');
end
L1 = L1 - min(L1) + 1; % min (L1) <- 1;
L2 = L2 - min(L2) + 1; % min (L2) <- 1;
%=========== make bipartition graph ============
nClass = max(max(L1), max(L2));
G = zeros(nClass);
for i=1:nClass
for j=1:nClass
G(i,j) = length(find(L1 == i & L2 == j))+eps;
end
end
sumG = sum(G(:));
%=========== calculate MIhat
P1 = sum(G,2); P1 = P1/sumG;
P2 = sum(G,1); P2 = P2/sumG;
H1 = sum(-P1.*log2(P1));
H2 = sum(-P2.*log2(P2));
P12 = G/sumG;
PPP = P12./repmat(P2,nClass,1)./repmat(P1,1,nClass);
PPP(abs(PPP) < 1e-12) = 1;
MI = sum(P12(:) .* log2(PPP(:)));
MIhat = MI / max(H1,H2);
%%%%%%%%%%%%% why complex ? %%%%%%%%
MIhat = real(MIhat);
%%
function [C,T]=hungarian(A)
%HUNGARIAN Solve the Assignment problem using the Hungarian method.
%
%[C,T]=hungarian(A)
%A - a square cost matrix.
%C - the optimal assignment.
%T - the cost of the optimal assignment.
%s.t. T = trace(A(C,:)) is minimized over all possible assignments.
% Adapted from the FORTRAN IV code in Carpaneto and Toth, "Algorithm 548:
% Solution of the assignment problem [H]", ACM Transactions on
% Mathematical Software, 6(1):104-111, 1980.
% v1.0 96-06-14. Niclas Borlin, niclas@cs.umu.se.
% Department of Computing Science, Ume� University,
% Sweden.
% All standard disclaimers apply.
% A substantial effort was put into this code. If you use it for a
% publication or otherwise, please include an acknowledgement or at least
% notify me by email. /Niclas
[m,n]=size(A);
if (m~=n)
error('HUNGARIAN: Cost matrix must be square!');
end
% Save original cost matrix.
orig=A;
% Reduce matrix.
A=hminired(A);
% Do an initial assignment.
[A,C,U]=hminiass(A);
% Repeat while we have unassigned rows.
while (U(n+1))
% Start with no path, no unchecked zeros, and no unexplored rows.
LR=zeros(1,n);
LC=zeros(1,n);
CH=zeros(1,n);
RH=[zeros(1,n) -1];
% No labelled columns.
SLC=[];
% Start path in first unassigned row.
r=U(n+1);
% Mark row with end-of-path label.
LR(r)=-1;
% Insert row first in labelled row set.
SLR=r;
% Repeat until we manage to find an assignable zero.
while (1)
% If there are free zeros in row r
if (A(r,n+1)~=0)
% ...get column of first free zero.
l=-A(r,n+1);
% If there are more free zeros in row r and row r in not
% yet marked as unexplored..
if (A(r,l)~=0 & RH(r)==0)
% Insert row r first in unexplored list.
RH(r)=RH(n+1);
RH(n+1)=r;
% Mark in which column the next unexplored zero in this row
% is.
CH(r)=-A(r,l);
end
else
% If all rows are explored..
if (RH(n+1)<=0)
% Reduce matrix.
[A,CH,RH]=hmreduce(A,CH,RH,LC,LR,SLC,SLR);
end
% Re-start with first unexplored row.
r=RH(n+1);
% Get column of next free zero in row r.
l=CH(r);
% Advance "column of next free zero".
CH(r)=-A(r,l);
% If this zero is last in the list..
if (A(r,l)==0)
% ...remove row r from unexplored list.
RH(n+1)=RH(r);
RH(r)=0;
end
end
% While the column l is labelled, i.e. in path.
while (LC(l)~=0)
% If row r is explored..
if (RH(r)==0)
% If all rows are explored..
if (RH(n+1)<=0)
% Reduce cost matrix.
[A,CH,RH]=hmreduce(A,CH,RH,LC,LR,SLC,SLR);
end
% Re-start with first unexplored row.
r=RH(n+1);
end
% Get column of next free zero in row r.
l=CH(r);
% Advance "column of next free zero".
CH(r)=-A(r,l);
% If this zero is last in list..
if(A(r,l)==0)
% ...remove row r from unexplored list.
RH(n+1)=RH(r);
RH(r)=0;
end
end
% If the column found is unassigned..
if (C(l)==0)
% Flip all zeros along the path in LR,LC.
[A,C,U]=hmflip(A,C,LC,LR,U,l,r);
% ...and exit to continue with next unassigned row.
break;
else
% ...else add zero to path.
% Label column l with row r.
LC(l)=r;
% Add l to the set of labelled columns.
SLC=[SLC l];
% Continue with the row assigned to column l.
r=C(l);
% Label row r with column l.
LR(r)=l;
% Add r to the set of labelled rows.
SLR=[SLR r];
end
end
end
% Calculate the total cost.
T=sum(orig(logical(sparse(C,1:size(orig,2),1))));
function A=hminired(A)
%HMINIRED Initial reduction of cost matrix for the Hungarian method.
%
%B=assredin(A)
%A - the unreduced cost matris.
%B - the reduced cost matrix with linked zeros in each row.
% v1.0 96-06-13. Niclas Borlin, niclas@cs.umu.se.
[m,n]=size(A);
% Subtract column-minimum values from each column.
colMin=min(A);
A=A-colMin(ones(n,1),:);
% Subtract row-minimum values from each row.
rowMin=min(A')';
A=A-rowMin(:,ones(1,n));
% Get positions of all zeros.
[i,j]=find(A==0);