function main()
%%% experimental networks used in the paper %%%%%%%%%%%%%%%%%%%%%%%%
W = load('05.gra');
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
W = sparse(W);
W = sym(W); % for the FEC, no need symmetry
close all;
% show the original adjacency matrix
% show_matrix(W,'The original adjacency matrix');
% disp('press Enter to continue...');
% pause;
% disp('mining the communities...');
% control error, the unique parameter required by this algorithm
err = 0.1^7;
%% initializing the hierarchy matrix
H = sparse(length(W),length(W));
id = 1;% id denotes the community number
%% Initializing the name array
for c = 1:length(W)
W_names(c) = c;
end
%% main body to mining communities
tic;
[new_W,new_W_names,H,id] = NCMA(W,W_names,H,id,err);
disp(['The computation took ' num2str(toc) ' seconds']);
% output results
for c = 1:length(H)
s = sum(abs(H(:,c)));
if s==0
break;
end
end
if c ==length(H)
H = H(:,1:c);
else
if c>1
H = H(:,1:c-1);
else
H = zeros(length(H),1);
end
end
for r = 1:length(new_W_names)
new_H(r,:) = H(new_W_names(r),:);
end
% n = length(new_W_names);
% for r = 1:n
% flection(new_W_names(r)) = r;
% end
% A = sparse(n,n);
% [i,j,V] = find(W);
% for r = 1:length(i)
% x = flection(i(r));
% y = flection(j(r));
% A(x,y) = V(r);
% end
%
%
% show_matrix(new_H,'The community hierarchy structure');
%
% WH = [new_W new_H];
% show_matrix(WH,'The cleaned transfered matrix with community hierarchy');
%
%
% WH = [A new_H];
% show_matrix(WH,'The transfered matrix with community hierarchy');
% indexing
output(new_W_names, new_H);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [new_W,new_W_names,H,id] = NCMA(W,W_names,H,id,err)
[pos, EP1,EP2,new_W, new_W_names]= PL(W,W_names,err);
if (EP1 < 0.5) && (EP2 < 0.5) % stop criterion
W_A = new_W(1:pos,1:pos);
W_A_names = new_W_names(1:pos);
H = makeH(H, id, W_A_names, 1, pos);
id = id +1;
if length(W_A) > 1
[W_A,W_A_names,H,id]= NCMA(W_A,W_A_names,H,id,err);
end
W_B = new_W(pos+1:length(new_W),pos+1:length(new_W));
W_B_names = new_W_names(pos+1:length(new_W));
H = makeH(H, id, W_B_names, pos+1, length(new_W));
id = id +1;
if length(W_B) > 1
[W_B,W_B_names,H,id] = NCMA(W_B,W_B_names,H,id,err);
end
new_W = zeros(length(new_W),length(new_W));
new_W(1:pos,1:pos) = W_A;
new_W(pos+1:length(new_W),pos+1:length(new_W)) = W_B;
new_W_names = [W_A_names W_B_names];
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function H = makeH(H, id, names, first, last)
for c = first:last
v = names(c-first+1);
col = find_first_col(H, v);
H(v,col) = id;
end
function col = find_first_col(H,v)
for col = 1:length(H)
if H(v,col) == 0
break;
end
end
function [pos, EP1, EP2, new_W, new_W_names]= PL(W,W_names,error)
n = length(W);
% compute the degree vector
[i,j,V] = find(W);
NNZ = length(i);
D = zeros(1,n);
for r = 1:NNZ
D(i(r)) = D(i(r)) + V(r);
end
% computer one-step transfer probability matrix
P = sparse(W);
for r = 1:NNZ
x = i(r);
y = j(r);
w = V(r);
if D(x)~= 0
P(x,y) = w/D(x);
end
end
% select the vertex with maximum degree as the sink
[s_D, IX]=sort(D);
sink = IX(n);
%computing the limit probability pi_sink
pi_sink = D(sink) / sum(D);
% R0 = rand(n,1);
R0 = zeros(n,1);
R0(sink) =1;
R1 = R0;
err = 1;
steps = 0;
while err> error && steps <1000
S = zeros(n,1);
for r = 1:NNZ
% check r-th element W(i,j) = v
S(i(r)) = S(i(r)) + R0(j(r))*V(r);
end
for r = 1:n
if D(r) ~= 0
R1(r) = S(r) / D(r);
else
R1(r) = 0;
end
end
err = sum(abs(R1 - R0));
steps = steps +1;
R0 = R1;
end
% sort the L-transfer probabilities
[SR, IX] = sort(R1);
% transfer original adjacency matrix
% compute the flection pos
for r = 1:n
flection(IX(r)) = r;
new_W_names(r) = W_names(IX(r));
end
new_W = sparse(n,n);
new_P = sparse(n,n);
[i1,j1,VP] = find(P);
for r = 1:NNZ
x = flection(i(r));
y = flection(j(r));
new_W(x,y) = V(r);
new_P(x,y) = VP(r);
end
% finding the pos satisfying with the pcut criterion
[pos, EP1, EP2] = FC(new_P);
function [pos, EP1, EP2] = FC(B)
n = length(B);
[i,j,V] = find(B);
NNZ = length(i);
if n<=1
pos = 1;
EP1 = 1;
EP2 = 1;
return;
end
% computing the trapping probability for each position by top-down
S1 = zeros(n,1);
T1 = zeros(n,1);
for r = 1:NNZ
x = i(r);
y = j(r);
w = V(r);
if y<=x
S1(x) = S1(x) + w;
else
S1(y) = S1(y) + w;
end
end
for r = 2:n
S1(r) = S1(r-1)+S1(r);
T1(r) = S1(r)/r;
end
T1 = T1(1:n-1);
% computing the trapping probability for each position by bottom-up
S2 = zeros(n,1);
T2 = zeros(n,1);
for r = NNZ:-1:1
x = i(r);
y = j(r);
w = V(r);
if y<=x
S2(y) = S2(y) + w;
else
S2(x) = S2(x) + w;
end
end
for r = n-1:-1:1
S2(r) = S2(r+1)+S2(r);
T2(r) = S2(r)/(n-r+1);
end
T2 = T2(2:n);
% computing the pcut for each positions
for pos = 1:n-1
E1(pos) = 1 - T1(pos);
E2(pos) = 1 - T2(pos);
end
Pcut = zeros(n-1,1);
for pos =1:n-1
Pcut(pos) = 2-T1(pos)-T2(pos);
end
% finding the minimum pcut
[pcut, pos] = min(Pcut);
EP1 = 1 - T1(pos);
EP2 = 1 - T2(pos);
function show_matrix(A,str)
figure(...
'Color',[1 1 1],...
'Name',str);
clf;
imagesc(A);
colorbar;
function W = sym(W)
for i=1:length(W)
W(i,i) = 0;
end
[i,j,V] = find(W);
for r = 1:length(i)
W(j(r),i(r)) = V(r);
end
function output(new_W_names, new_H)
n =length(new_W_names);
clusterID = 0;
Fid = fopen('result.txt', 'w');
fprintf(Fid, '%d: ', clusterID);
fprintf(Fid,'%d ',new_W_names(1)-1);
for r=2:n
if sum(abs(new_H(r,:)-new_H(r-1,:))) == 0
fprintf(Fid,'%d ',new_W_names(r)-1);
else
fprintf(Fid,'\r\n');
clusterID = clusterID +1;
fprintf(Fid, '%d: ', clusterID);
fprintf(Fid,'%d ',new_W_names(r)-1);
end
end
fclose(Fid);
评论0