//Knn.cpp
#include <iostream>
#include "Knn.h"
#include <cmath>
#include <map>
using namespace std;
Knn::Knn(double** trainingDataset, int m, int n)
{
this->trainingDataset = trainingDataset;
this->m = m;
this->n = n;
ComputeArithmeticMean();//计算算术均值
ComputeStandardDeviation();//计算标准差
RescaleTrainingDataset();//重新调节训练数据集:减去均值,除以标准差
}
void Knn::ComputeArithmeticMean()
{
arithmeticMean = new double[n - 1];//只开辟n-1大小,因为最后一维是类别号
double sum;
for(int i = 0; i < n - 1; i++)//因为最后一维是类别号,不参与计算
{
sum = 0;
for(int j = 0; j < m; j++)
{
sum += trainingDataset[j][i];
}
// arithmeticMean[i] = sum / n;//这里除以n吗?除以m吧
arithmeticMean[i] = sum / m;
}
}
void Knn::ComputeStandardDeviation()
{
standardDeviation = new double[n - 1];
double sum, temp;
for(int i = 0; i < n - 1; i++)//因为最后一维是类别号,不参与计算
{
sum = 0;
for(int j = 0; j < m; j++)
{
temp = trainingDataset[j][i] - arithmeticMean[i];
sum += temp * temp;
}
// standardDeviation[i] = sqrt(sum / n);//这里除以n吗?除以m吧
standardDeviation[i] = sqrt(sum / m);
}
}
void Knn::RescaleDistance(double* row)
{
for(int i = 0; i < n - 1; i++)
{
row[i] = (row[i] - arithmeticMean[i]) / standardDeviation[i];
}
}
void Knn::RescaleTrainingDataset()
{
for(int i = 0; i < m; i++)
{
RescaleDistance(trainingDataset[i]);
}
}
Knn::~Knn()
{
delete[] arithmeticMean;
delete[] standardDeviation;
}
double Knn::Distance(double* x, double* y)
{
double sum = 0, temp;
for(int i = 0; i < n - 1; i++)//因为最后一维是类别号,不参与距离计算
{
temp = (x[i] - y[i]);
sum += temp * temp;
}
return sqrt(sum);
}
double Knn::Vote(double* test, int k)//投票函数,k是在测试样本周围选择k个训练样本
{
RescaleDistance(test);//调整测试样本:减去训练均值,除以训练标准差
double distance;
map<int, double>::iterator max;
map<int, double> mins;
//选出k个近邻
for(int i = 0; i < m; i++)//遍历训练样本
{
distance = Distance(test, trainingDataset[i]);//算出测试样本与当前训练样本的距离
if(mins.size() < k)
mins.insert(map<int, double>::value_type(i, distance));//遍历的样本数达到k之前,依次入键。超过k之后就要判断新样本是否更接近测试样本
else
{
max = mins.begin();
for(map<int, double>::iterator it = mins.begin(); it != mins.end(); it++)
{
if(it->second > max->second)//键中first是样本标号,second是距离
max = it;
}
if(distance < max->second)
{
mins.erase(max);
mins.insert(map<int, double>::value_type(i, distance));
}
}
}
//输出近邻
for(map<int, double>::iterator it0 = mins.begin(); it0 != mins.end(); it0++)
{
cout<<it0->first<<" "<<it0->second<<endl;
}
//在邻域投票选举(无加权) ---------每个特征向量最后一维是类别
map<double, int> votes;
double temp;
for(map<int, double>::iterator it = mins.begin(); it != mins.end(); it++)
{
temp = trainingDataset[it->first][n-1];//
map<double, int>::iterator voteIt = votes.find(temp);//votes里是空,查找不到返回end指针
if(voteIt != votes.end())
voteIt->second ++;
else
votes.insert(map<double, int>::value_type(temp, 1));
}
//投票数最多的类当选该测试样本的预测类别
map<double, int>::iterator maxVote = votes.begin();
for(map<double, int>::iterator it1 = votes.begin(); it1 != votes.end(); it1++)
{
if(it->second > maxVote->second)
maxVote = it1;
}
test[n-1] = maxVote->first;//测试样本最后一维置类别标号
return maxVote->first;
}