#include <iostream>
#include <fstream>
#include <sstream>
#include <cstdlib>
#include <vector>
#include <string>
#include <map>
using namespace std;
int TOTAL_NUM=0;
int STATE_NUM=0;
double delta=0.0001;
map <string,int> pos_index;
int **A_STATE;
double **A_table;
void structsv ()
{
ifstream fin;
fin.open("199801.txt");
if(!fin)
{
cerr<<"error 100 opening 199801.txt";
}
ofstream out2("ty.txt"); //字符集文件
if(!out2)
{
cerr<<"error 100 opening ty.txt";
}
ofstream out4("test.txt"); //加工好的可以用来测试的文件
if(!out4)
{
cerr<<"error 100 opening test.txt";
}
ofstream out5("pos.txt");
if(!out5)
{
cerr<<"error 100 opening pos.txt";
}
string s1,s_pos,s_word,s,s2;
int index_pos=0;
while (getline(fin,s1))
{ string::size_type end;
istringstream stream(s1);
string word;
while (stream>>word)
{ if(word.size()==21&&word[20]=='m')
continue;
if(word[0]=='[')
word=word.substr(1);
if(int i=word.find(']',0))
word=word.substr(0,i);
TOTAL_NUM++;
out2<<word<<" ";
end=word.find_first_of('/',0);
s2=word.substr(end+1);
pair<map<string,int>::iterator,bool> ret=
pos_index.insert(make_pair(s2,1));
if (!ret.second)
{
++ret.first->second;
}
word=word.substr(0,end);
out4<<word<<" ";
}
out4<<endl;
out2<<endl;
}
cout<<TOTAL_NUM<<endl;
for (map<string,int>::iterator iter=pos_index.begin();iter!=pos_index.end();iter++)
{
out5<<iter->first<<" "<<iter->second<<endl;
}
STATE_NUM=pos_index.size();
cout<<STATE_NUM<<endl;
fin.close();
out2.close();
out4.close();
out5.close();
}
int main()
{
structsv ();
ifstream fin1;
fin1.open("ty.txt");
if(!fin1)
{
cerr<<"error 100 opening ty.txt";
}
ofstream out("b_table.txt");
if(!out)
{
cerr<<"error 100 opening b_table.txt";
}
ofstream out1("a_table.txt");
if(!out1)
{
cerr<<"error 100 opening a_table.txt";
}
string s1;
map<string,int> b_table,a_table;
while (getline(fin1,s1))
{
istringstream stream(s1);
string word;
while (stream>>word)
{
pair<map<string,int>::iterator,bool> ret=
b_table.insert(make_pair(word,1));
if (!ret.second)
{
++ret.first->second;
}
}
}
fin1.close();
string s2;
string::size_type end;
for (map<string,int>::iterator iter=b_table.begin();iter!=b_table.end();iter++)
{
s2=iter->first;
end=s2.find_first_of('/',0);
s2=s2.substr(end);
out<<iter->first<<" "<<iter->second<<" "<<s2<<"\t"<<(double)iter->second/double(TOTAL_NUM)<<endl;
}
A_STATE=new int*[STATE_NUM]; //存储状态转化表(次数)
for (int row=0;row!=STATE_NUM;row++)
{
A_STATE[row]=new int [STATE_NUM];
for(int i=0;i!=STATE_NUM;i++)
A_STATE[row][i]=0;
}
A_table=new double*[STATE_NUM]; //存储状态转化表(频率)
for (int row=0;row!=STATE_NUM;row++)
{
A_table[row]=new double [STATE_NUM];
for(int i=0;i!=STATE_NUM;i++)
A_table[row][i]=0;
}
ifstream fin2;
fin2.open("ty.txt");
if(!fin2)
{
cerr<<"error 100 opening ty.txt";
}
string stateF;//前词串
string stateB;//后词串
string endF;//前词词性
string endB;//后词词性
int ends=0;
int indexF=0;//前词词性在词性表的位置
int indexB=0;//后词词性在词性表的位置
string *CIXING=new string[STATE_NUM];
map<string,int>::iterator itea=pos_index.begin();
for (int i=0;i!=STATE_NUM;i++)
{
CIXING[i]=itea->first;
itea++;
}
while (getline(fin2,s1))
{
istringstream stream(s1);
stream>>stateF;
ends=stateF.find_first_of('/');
endF=stateF.substr(ends+1);
indexF=0;
for (int i=0;i!=STATE_NUM;i++)
{
if (CIXING[i]==endF)
{
break;
}
indexF++;
}
while (stream>>stateB)
{
ends=stateB.find_first_of('/');
endB=stateB.substr(ends+1);
indexB=0;
for (int i=0;i!=STATE_NUM;i++)
{
if (CIXING[i]==endB)
{
break;
}
indexB++;
}
A_STATE[indexF][indexB]++;
indexF=indexB;
}
}
double *Un_Reg=new double[STATE_NUM];
for (int i=0;i!=STATE_NUM;i++)
{
Un_Reg[i]=delta/(double) TOTAL_NUM;
out1<<Un_Reg[i]<<" ----";
for (int j=0;j!=STATE_NUM;j++)
{
A_table[i][j]=(double)A_STATE[i][j] /(double) TOTAL_NUM;
out1<<" "<<A_table[i][j];
}
out1<<endl;
}
out1.close();
out.close();
fin2.close();
return 0;
}
/*int main()
{
ifstream fin1;
fin1.open("test.txt");
if(!fin1)
{
cerr<<"error 100 opening test.txt";
}
ofstream out("result.txt");
if(!out)
{
cerr<<"error 100 opening result.txt";
}
vector<string> sentence; //一个待标的句子。
string s1;
int end;
vector<int>::size_type n;
while (getline(fin1,s1))
{
istringstream stream(s1);
string word;
while (stream>>word)
{
if (word[word.end()-1]!='。'||word[word.end()-1]!='?'||word[word.end()-1]!='!'||
word[word.end()-1]!=':'||word[word.end()-1]!=';')
{
sentence.push_back(word);
continue;
}
sentence.push_back(word);
n=sentence.size();
}
}
fin1.close();
out.close();
return 0;
}
int main()
{
string sentence1="我们 即将 以 丰收 的 喜悦 送 走 牛年 , 以 昂扬 的 斗志 迎来 虎年 。";
string result;
vector<string> sentence;
string word;
istringstream stream(sentence1);
while (stream>>word)
{
sentence.push_back(word);
}
while ()
{
}
}*/