#include "StdAfx.h"
#include "HierarchicalCluster.h"
HierarchicalCluster::HierarchicalCluster(void)
{
}
HierarchicalCluster::~HierarchicalCluster(void)
{
}
Mat HierarchicalCluster::cluster(Mat data, int k, int distance_type)
{
//初始化列表
for(int i = 0; i < data.rows; i++)
{
Node temp_node;
temp_node.nodeLabel = i;
temp_node.data = data.row(i);
Cluster temp_cluster;
temp_cluster.clusterLabel = i;
temp_cluster.data.push_back(temp_node);
clusters.push_back(temp_cluster);
}
//开始聚类
while(clusters.size() > k)
{
double min_distance = MAXNUM;
list<Cluster>::iterator cIdx1, cIdx2;
for(list<Cluster>::iterator itri = clusters.begin(); itri != clusters.end(); itri++)
{
list<Cluster>::iterator itrj = itri;
for(itrj++; itrj != clusters.end(); itrj++)
{
double distance = getDistance(*itri, *itrj, distance_type);
if(distance < min_distance)
{
min_distance = distance;
cIdx1 = itri;
cIdx2 = itrj;
}
}
}
merge(cIdx1, cIdx2);
}
//输出聚类结果
Mat result = Mat(data.rows, 1, CV_32S);
//调整聚类标签
int index = 0;
for(list<Cluster>::iterator itri = clusters.begin(); itri != clusters.end(); itri++, index++)
{
(*itri).clusterLabel = index;
}
//生成聚类结果
for(list<Cluster>::iterator itri = clusters.begin(); itri != clusters.end(); itri++)
{
for(list<Node>::iterator itrj = (*itri).data.begin(); itrj != (*itri).data.end(); itrj++)
{
result.at<int>((*itrj).nodeLabel) = (*itri).clusterLabel;
}
}
return result;
}
double HierarchicalCluster::getDistance(Cluster c1, Cluster c2, int type)
{
switch(type)
{
case CLUSTER_SINGLE_LINKAGE:
return distance_sl(c1, c2);
break;
case CLUSTER_COMPLETE_LINKAGE:
return distance_cl(c1, c2);
break;
case CLUSTER_AVERAGE_LINKAGE:
return distance_al(c1, c2);
break;
case CLUSTER_MEDIAN_LINKAGE:
return distance_ml(c1, c2);
break;
default:
return distance_ml(c1, c2);
break;
}
}
double HierarchicalCluster::distance_sl(Cluster c1, Cluster c2)
{
double min = MAXNUM;
for(list<Node>::iterator i = c1.data.begin(); i != c1.data.end(); i++)
{
for(list<Node>::iterator j = c2.data.begin(); j != c2.data.end(); j++)
{
Mat a = (*i).data;
Mat b = (*j).data;
double temp = cvNorm(&a.operator CvMat(), &b.operator CvMat(), CV_L1);
if(temp < min)
{
min = temp;
}
}
}
return min;
}
double HierarchicalCluster::distance_cl(Cluster c1, Cluster c2)
{
double max = MINNUM;
for(list<Node>::iterator i = c1.data.begin(); i != c1.data.end(); i++)
{
for(list<Node>::iterator j = c2.data.begin(); j != c2.data.end(); j++)
{
Mat a = (*i).data;
Mat b = (*j).data;
double temp = cvNorm(&a.operator CvMat(), &b.operator CvMat(), CV_L1);
if(temp > max)
{
max = temp;
}
}
}
return max;
}
double HierarchicalCluster::distance_al(Cluster c1, Cluster c2)
{
double distance = 0;
for(list<Node>::iterator i = c1.data.begin(); i != c1.data.end(); i++)
{
for(list<Node>::iterator j = c2.data.begin(); j != c2.data.end(); j++)
{
Mat a = (*i).data;
Mat b = (*j).data;
double temp = cvNorm(&a.operator CvMat(), &b.operator CvMat(), CV_L1);
distance += temp;
}
}
distance = distance / (c1.data.size() * c2.data.size());
return distance;
}
double HierarchicalCluster::distance_ml(Cluster c1, Cluster c2)
{
int n1 = c1.data.size();
int n2 = c2.data.size();
int n = n1 * n2;
double *data = (double *)malloc(sizeof(double) * n);
int k = 0, l =0;
for(list<Node>::iterator i = c1.data.begin(); i != c1.data.end(); i++, k++)
{
l = 0;
for(list<Node>::iterator j = c2.data.begin(); j != c2.data.end(); j++, l++)
{
Mat a = (*i).data;
Mat b = (*j).data;
double temp = cvNorm(&a.operator CvMat(), &b.operator CvMat(), CV_L1);
data[k * n2 + l] = temp;
}
}
sort(data, data + n);
if(n % 2 == 0)
{
return (data[n / 2 - 1] + data[n / 2]) / 2;
}
else
{
return data[n / 2];
}
}
void HierarchicalCluster::merge(list<Cluster>::iterator idx1, list<Cluster>::iterator idx2)
{
for(list<Node>::iterator itr = (*idx2).data.begin(); itr != (*idx2).data.end(); itr++)
{
(*idx1).data.push_back((*itr));
}
clusters.erase(idx2);
}