/*kmeans算法实现(此处只考虑元组只有两个属性的情况)
*@File:k_means.cpp
*@Author:Cai0538
*@Create:2011-12-10
*@Last Modified:2011-12-10
*/
#include <iostream>
#include <fstream>
#include <vector>
#include <math.h>
#define k 5
using namespace std;
//存放元组的属性信息
struct Tuple{
float attr1;
float attr2;
};
//计算两个元组间的欧几里距离
float getDistXY(Tuple t1, Tuple t2)
{
return sqrt((t1.attr1 - t2.attr1) * (t1.attr1 - t2.attr1) + (t1.attr2 - t2.attr2) * (t1.attr2 - t2.attr2));
}
//根据质心,决定当前元组属于哪个簇
int clusterOfTuple(Tuple means[],Tuple tuple){
float dist=getDistXY(means[0],tuple);
float tmp;
int label=0;//标示属于哪一个簇
for(int i=1;i<k;i++){
tmp=getDistXY(means[i],tuple);
if(tmp<dist) {dist=tmp;label=i;}
}
return label;
}
//获得给定簇集的平方误差
float getVar(vector<Tuple> clusters[],Tuple means[]){
float var = 0;
for (int i = 0; i < k; i++)
{
vector<Tuple> t = clusters[i];
for (int j = 0; j< t.size(); j++)
{
var += getDistXY(t[j],means[i]);
}
}
//cout<<"sum:"<<sum<<endl;
return var;
}
//获得当前簇的均值(质心)
Tuple getMeans(vector<Tuple> cluster){
int num = cluster.size();
double meansX = 0, meansY = 0;
Tuple t;
for (int i = 0; i < num; i++)
{
meansX += cluster[i].attr1;
meansY += cluster[i].attr2;
}