#include <stdio.h>
#include <math.h>
#include <time.h>
/////////////////////////////////////////////////////////
#define TS_SIZE 603219 //training set size
#define CB_SIZE 128 //code book size
#define VEC_DEM 4 //vector dimension
#define MAX_ROUND 2000
#define BIG_NUMBER (1e+10)
#define SMALL_NUMBER (1e-20)
#define TS_FILE "ts.txt"
#define CB_FILE "cb.txt"
#define CB_FILE_2 "cb2.txt"
///////////////////////////////////////////////////////////
typedef struct _tTSVector { //training set vector
double data[VEC_DEM]; //vector data
int cluster; //cluster belong to
}tTSVector;
typedef struct _tCBVector { //code book vector
double data[VEC_DEM]; //vector data
int num; //number of vectors in this cluster
double sum[VEC_DEM]; //sum of vectors in this cluster, to calculate cluster center
}tCBVector;
tTSVector TrainingSet[TS_SIZE];
tCBVector CodeBook[CB_SIZE];
typedef enum {
SQUARE_D,
ABS_D,
CORRELATION_D
} tDistortionType;
tDistortionType DistType = SQUARE_D;
double g_dist_threshold = 0.0001;
/////////////////////////////////////////////////////////////
//read training set from file
int ReadTS()
{
FILE *fp;
//open file
if ((fp = fopen(TS_FILE, "rt")) == NULL) {
perror("Open TS_FILE filed");
return -1;
}
//read training set
for (int i=0; i<TS_SIZE; i++)
for (int j=0; j<VEC_DEM; j++)
fscanf(fp, "%lf", &(TrainingSet[i].data[j]));
fclose(fp);
return 0;
}
//init code book, using some vectors from training set
void InitCB()
{
for (int i=0; i<CB_SIZE; i++) {
for (int j=0; j<VEC_DEM; j++) {
CodeBook[i].data[j] = TrainingSet[i * TS_SIZE / CB_SIZE].data[j];
CodeBook[i].sum[j] = 0;
}
CodeBook[i].num = 0;
}
}
//update code book, using new cluster center
void UpdateCB()
{
for (int i=0; i<CB_SIZE; i++) {
//if CodeBook[i] means a empty cluster
//split cluster with max members, give a half of its members to CodeBook[i]
if (CodeBook[i].num == 0) {
//find cluster with max members
int MaxCluster=0;
for (int j=0; j<CB_SIZE; j++)
MaxCluster = CodeBook[j].num > CodeBook[MaxCluster].num ? j: MaxCluster;
//number of the half of members
int Num = CodeBook[MaxCluster].num/2;
//clear this biggest cluster to refill
for (j=0; j<VEC_DEM; j++ )
CodeBook[MaxCluster].sum[j] = 0;
CodeBook[MaxCluster].num = 0;
//find all member training vector belonged to the original biggest cluster
//assign first half to CodeBook[i], and later half to the original biggest cluster
for (j=0; j<TS_SIZE; j++) {
if (TrainingSet[j].cluster != MaxCluster)
continue;
if (CodeBook[i].num < Num) {
TrainingSet[j].cluster = i;
for (int m=0; m<VEC_DEM; m++)
CodeBook[i].sum[m] += TrainingSet[j].data[m];
CodeBook[i].num++;
}
else {
for (int m=0; m<VEC_DEM; m++)
CodeBook[MaxCluster].sum[m] += TrainingSet[j].data[m];
CodeBook[MaxCluster].num++;
}
}
//recalculate the original biggest cluster center if necessary
if (MaxCluster < i) {
for( j=0; j<VEC_DEM; j++ ) {
CodeBook[MaxCluster].data[j] = CodeBook[MaxCluster].sum[j] / CodeBook[MaxCluster].num;
CodeBook[MaxCluster].sum[j] = 0;
}
CodeBook[MaxCluster].num = 0;
}
}
//calculate new cluster center, clear sum and num for next round
for (int j=0; j<VEC_DEM; j++) {
CodeBook[i].data[j] = CodeBook[i].sum[j] / CodeBook[i].num; //calculate cluster center
CodeBook[i].sum[j] = 0;
}
}
//clear num at last
for (i=0; i<CB_SIZE; i++)
CodeBook[i].num = 0;
}
//calculate distortion of two vector, using different methods.
double CalDistortion(double *X, double *Y, tDistortionType method)
{
int i;
double sum = 0;
switch (method) {
case SQUARE_D:
for (i=0; i<VEC_DEM; i++)
sum += (X[i] - Y[i]) * (X[i] - Y[i]);
break;
case ABS_D:
for (i=0; i<VEC_DEM; i++)
sum += fabs(X[i] - Y[i]);
break;
case CORRELATION_D:
{
double cor=0, DX=0, DY=0;
for (i=0; i<VEC_DEM; i++) {
cor += X[i] * Y[i];
DX += X[i] * X[i];
DY += Y[i] * Y[i];
}
if (!DX || !DY)
cor = 0;
else
cor = cor/sqrt(DX * DY);
sum = 1 - cor; //big correlation is good, so define distortion as (1-cor)
break;
}
}
return sum;
}
//write code book to file
int WriteCB(char *fname, double dist)
{
char fname2[128];
sprintf(fname2, "2%s", fname);
FILE *fp1 = fopen(fname, "wt");
FILE *fp2 = fopen(fname2, "wt");
if (!fp1 || !fp2) {
perror("Create code book file error!");
return -1;
}
fprintf(fp2, "distortion: %.21lf\n", dist);
for (int i=0; i<CB_SIZE; i++) {
fprintf(fp2, "Center%02d: ", i);
for (int j=0; j<VEC_DEM; j++ ) {
fprintf(fp1, "%0.5lf ", CodeBook[i].data[j]);
fprintf(fp2, "%0.5lf ", CodeBook[i].data[j]);
}
fprintf( fp1, "\n");
fprintf( fp2, "Num=%03d\n", CodeBook[i].num);
}
fclose(fp1);
fclose(fp2);
return 0;
}
//
void DoCluster()
{
double d = BIG_NUMBER; //big number, distortion of all clusters
double d_bak; //bak of distortion of all clusters
double e = SMALL_NUMBER; //small number, threshold of training
double rd = 1; //relative distortion
double v_dist; //distortion between ts vector and cb vector
double temp_v_dist; //temp distortion
char fname[128];
long nRound = 0;
do {
d_bak = d;
d = 0; //as accumulator of distortion
if (nRound++)
UpdateCB();
else
InitCB();
//clustering
for (int i=0; i<TS_SIZE; i++) { //cluster every ts vector
//which cb vector should a ts vector belong to
v_dist = BIG_NUMBER;
for (int j=0; j<CB_SIZE; j++) {
temp_v_dist = CalDistortion(TrainingSet[i].data, CodeBook[j].data, DistType);
if (temp_v_dist < v_dist) {
v_dist = temp_v_dist;
TrainingSet[i].cluster = j; //belong to this cb vector
}
}
//this cluster member increase
CodeBook[ TrainingSet[i].cluster ].num ++;
//sum up ts vector to calculate cluster center in UpdateCB
for (j=0; j<VEC_DEM; j++)
CodeBook[ TrainingSet[i].cluster ].sum[j] += TrainingSet[i].data[j];
//add up distortion
d += v_dist;
}
d /= TS_SIZE;
rd = fabs((d_bak-d)/d);
printf("==Finished the %3dth round training, rd = %.21lf\n", nRound, rd);
if(rd < g_dist_threshold)
{
sprintf(fname, "cb(%.021lf).txt", g_dist_threshold);
WriteCB(fname, rd);
g_dist_threshold = rd / 2;
}
}
while (rd > e && nRound < MAX_ROUND);
}
void ChoiceDistType()
{
char c;
printf("Please choose distortion measuring type\n");
printf("0 - Square.\n");
printf("1 - Absolute.\n");
printf("2 - Correlation.\n");
printf("Please input (default 0):");
scanf("%c", &c);
switch (c)
{
case '1':
printf("You choosed \"1 - Absolute.\"\n\n");
DistType = ABS_D;
break;
case '2':
printf("You choosed \"2 - Correlation.\"\n\n");
DistType = CORRELATION_D;
break;
default:
printf("You choosed \"0 - Square.\"\n\n");
DistType = SQUARE_D;
break;
}
}
int main()
{
ChoiceDistType();
printf("Reading training set...\n");
if (ReadTS() < 0)
return -1;
printf("Training...\n");
time_t t1 = time(NULL);
DoCluster();
time_t t2 = time(NULL);
printf("Training time: %ld seconds\n", t2 - t1);
/*
printf("Writing code book...\n");
if (WriteCB() < 0)
return -1;
*/
printf("Done.\n");
return 0;
}
- 1
- 2
- 3
- 4
前往页