#include "bayes.h"
#include <iostream>////////////适合计算属性值是离散的分类
#include <fstream>
#include <sstream>
#include <map>
using namespace std;
bayes::bayes(){
TrainNum=130;
TestNum=48;
m=0;
for(int i=0;i<3;i++)
A[i]=0;
}
void bayes::showVector(vector<OriginalData> data)
{
for(int i=0;i<data.size();i++)
cout<<i+1<<":"<<data[i].A1<<" "<<data[i].A2<<" "<<data[i].A3<<" "<<data[i].A4<<" "<<data[i].A5<<" "<<data[i].A6<<" "<<data[i].A7<<" "<<data[i].A8<<" "<<data[i].A9<<" "<<data[i].A10<<" "<<data[i].A11<<" "<<data[i].A12<<" "<<data[i].A13<<" "<<data[i].A14<<endl;
}
//从文件中读取数值
void bayes::DataRead(const char* fileName)
{
f.open(fileName);
int num;
if (fileName[1] == 'r')
num = TrainNum;
else
num = TestNum;
string line;
OriginalData wine;
vector<OriginalData> data;
for (int i = 0; i < num; i++)
{
f >> line;//一行字符
while (line.find(',') > 0 && line.find(',') < line.length())
{
line[line.find(',')] = ' ';
}
istringstream stream(line);//将stream绑定到读取的行
stream >> wine.A1 >> wine.A2 >> wine.A3 >> wine.A4 >> wine.A5 >> wine.A6 >> wine.A7 >> wine.A8 >>
wine.A9 >> wine.A10 >> wine.A11 >> wine.A12 >> wine.A13 >> wine.A14;//读取数据传给wine
data.push_back(wine);
}
if(num==TrainNum)
trainData=data;
else
testData=data;
//showVector(data);
f.close();
}
void bayes::do_bayes()
{
int count1 = 0, count2 = 0, count3 = 0;
int i;
//cout<<TrainNum<<endl;
//showVector(trainData);
for(i = 0; i < TrainNum ; i++)
{
if(trainData[i].A1 == 1)
{
count1 ++;
}
if(trainData[i].A1 == 2)
{
count2 ++;
}
if(trainData[i].A1 == 3)
{
count3 ++;
}//统计三类数据,各自求和
}
A[0] = (double)count1/(double)TrainNum; //求先验概率 p(c)
A[1] = (double)count2/(double)TrainNum;
A[2] = (double)count3/(double)TrainNum;
cout<<"A:"<<A[0]<<" "<<A[1]<<" "<<A[2]<<endl;
map<double, double>::iterator pipei;
for(i = 0 ; i < TrainNum; i++)
{
if(trainData[i].A1 == 1) //求P(Xk|C1) 中Xk的个数
{
int j=0;
for(;j< 13 ;j++)
{
double temp = *(&trainData[i].A2+j);
pipei = C1_map[j].find(temp);
if(pipei == C1_map[j].end())
{
C1_map[j].insert(map<double, double>::value_type(temp,1));
}
else
{
double j = pipei->second;
pipei->second = j + 1;
}
}
}
if(trainData[i].A1 == 2) //求P(Xk|C2) 中Xk的个数
{
int j = 0;
for(;j< 13 ;j++)
{
double temp = *(&trainData[i].A2+j);
pipei = C2_map[j].find(temp);
if(pipei == C2_map[j].end())
{
C2_map[j].insert(map<double, double>::value_type(temp,1));
}
else
{
double j = pipei->second;
pipei->second = j + 1;
}
}
}
if(trainData[i].A1 == 3) //求P(Xk|C3) 中Xk的个数
{
int j = 0;
for(;j< 13 ;j++)
{
double temp = *(&trainData[i].A2+j);
pipei = C3_map[j].find(temp);
if(pipei == C3_map[j].end())
{
C3_map[j].insert(map<double, double>::value_type(temp,1));
}
else
{
double j = pipei->second;
pipei->second = j + 1;
}
}
}
}
//概率 累加p(x|c)
for(i = 0; i < attributeNum; i++)
{
for(pipei=C1_map[i].begin(); pipei!=C1_map[i].end(); ++pipei)
{
double num = pipei->second;
pipei->second = (double)num/(double)count1; //p(x|c)
}
for(pipei=C2_map[i].begin(); pipei!=C2_map[i].end(); ++pipei)
{
double num = pipei->second;
pipei->second = (double)num/(double)count2;
}
for(pipei=C3_map[i].begin(); pipei!=C3_map[i].end(); ++pipei)
{
double num = pipei->second;
pipei->second = (double)num/(double)count3;
}
}
}
void bayes::houyan()//计算后验分布,找出最大值
{
//showVector(testData);
int i,j,k;
double p[3];
for(i = 0; i<TestNum; i++)
{
double pXC[3]={0,0,0};
for(j = 0; j < 3; j++)
{
map<double, double>::iterator pipei;
//计算p(X|C1)=p(c1)*累加(p(x|c))/p(X),因为p(X)都一样,所以只需要比较分子的大小
for(k = 0; k < attributeNum; k++)
{
pipei = C1_map[k].find(*(&testData[i].A2+k));
if(pipei != C1_map[k].end())
{
pXC[0] =pXC[0] + pipei->second;
}
}
p[0] = A[0] * pXC[0]; //p(c)*累加(p(x|c))
//计算p(X|C2)
for(k = 0; k < attributeNum; k++)
{
pipei = C2_map[k].find(*(&testData[i].A2+k));
if(pipei != C2_map[k].end())
{
pXC[1] =pXC[1] + pipei->second;
}
}
p[1] = A[1] * pXC[1];
//计算p(X|C3)
for(k = 0; k < attributeNum; k++)
{
pipei = C3_map[k].find(*(&testData[i].A2+k));
if(pipei != C3_map[k].end())
{
pXC[2] =pXC[2] + pipei->second;
}
}
p[2] = A[2] * pXC[2];
}
//找出最大值
if(p[0] > p[1] && p[0] >p[2])
{
cout<<i+1<<":"<<p[0]<<"\t "<<1<<endl;
cout<<"other classification's probability:"<<p[1]<<"\t\t"<<p[2]<<endl;
if(testData[i].A1==1)
{
m++;
cout<<"aaa"<<endl;
}
}
else
{
if(p[1] > p[2])
{
cout<<i+1<<":"<<p[1]<<"\t "<<2<<endl;
cout<<"other classification's probability:"<<p[0]<<"\t\t"<<p[2]<<endl;
if(testData[i].A1==2)
{
m++;
cout<<"bbb"<<endl;
}
}
else
{
cout<<i+1<<":"<<p[2]<<"\t "<<3<<endl;
cout<<"other classification's probability:"<<p[0]<<"\t\t"<<p[1]<<endl;
if(testData[i].A1==3)
{
m++;
cout<<"ccc"<<endl;
}
}
}
}
}