#include "Apriori.h"
Apriori::Apriori(void)
{
m_nRowCount = 0;
m_nColCount = 0;
m_pData = NULL;
m_minSupport = -1;
}
Apriori::~Apriori(void)
{
m_pData = NULL;
}
bool Apriori::SetData(string *pData, int nRowCount, int nColCount)
{
if (pData == NULL)
{
return false;
}
m_pData = pData;
m_nRowCount = nRowCount;
m_nColCount = nColCount;
return true;
}
bool Apriori::Statistics(vector<ITEM> &contentLst)
{
int nLength = contentLst.size();
int i,j,r;
ITEM itemTmp;
string str;
for (int nrow=0; nrow<nLength; nrow++)
{
itemTmp = contentLst.at(nrow);
for (i=0; i<m_nRowCount; i++)
{
for (j=0; j<itemTmp.nCol; j++)
{
for (r=0; r<m_nColCount; r++)
{
str = *(m_pData+i*m_nColCount+r);
if (str.empty() || itemTmp.ContentLst.at(j) == str)
{
break;
}
}
if (str.empty())
{
break;
}
}
if (itemTmp.ContentLst.at(itemTmp.nCol - 1) == str)
{
contentLst[nrow].nCount++;
}
}
}
return true;
}
bool Apriori::Pruning(vector<ITEM> &contentLst, int nCount)
{
vector<ITEM>::iterator iteratorstart = contentLst.begin();
vector<ITEM> vectTmp;
ITEM itemTmp;
int nLength = contentLst.size();
bool bHasContent = false;
for (int i=0; i<nLength; i++)
{
itemTmp = contentLst.at(i);
if (itemTmp.nCount >= nCount)
{
vectTmp.push_back(itemTmp);
bHasContent = true;
}
}
if (bHasContent)
{
contentLst = vectTmp;
}
return bHasContent;
}
bool Apriori::Link(ITEM *pItem1, ITEM *pItem2, ITEM &itemResult)
{
if (pItem1->nCol != pItem2->nCol)
{
return false;
}
int nLength = pItem1->nCol - 1;
for (int i=0; i<nLength; i++)
{
if (pItem1->ContentLst[i] != pItem2->ContentLst[i])
{
return false;
}
}
itemResult.ContentLst = pItem1->ContentLst;
itemResult.nCol = pItem1->nCol + 1;
itemResult.nCount = 0;
itemResult.ContentLst.push_back(pItem2->ContentLst[nLength]);
return true;
}
bool Apriori::Link(vector<ITEM> &contentLst)
{
int nSize = contentLst.size();
vector<ITEM> vecTmp;
ITEM *pItemTmp1 = NULL, *pItemTmp2 = NULL;
ITEM ItemResult;
for (int i=0; i<nSize; i++)
{
for (int j=i+1; j<nSize; j++)
{
pItemTmp1 = &contentLst[i];
pItemTmp2 = &contentLst[j];
if (Link(pItemTmp1, pItemTmp2, ItemResult))
{
vecTmp.push_back(ItemResult);
}
}
}
contentLst = vecTmp;
return true;
}
void Apriori::SetMinSupport(int nSupport)
{
m_minSupport = nSupport;
}
bool Apriori::Run()
{
if (-1 == m_minSupport)
{
cout << "请设置一个最小支持度" <<endl;
return false;
}
//生成一项集
int nAllContent = m_nRowCount * m_nColCount;
vector<ITEM> vecTmp;
int nsize = 0;
for (int i=0; i<nAllContent; i++)
{
int j;
string str(*(m_pData + i));
if (str.empty())
{
continue;
}
for (j=0; j<nsize; j++)
{
if (str == vecTmp[j].ContentLst[0])
{
vecTmp[j].nCount++;
break;
}
}
if (j == nsize)
{
ITEM itemTmp;
itemTmp.ContentLst.push_back(str);
itemTmp.nCol = 1;
itemTmp.nCount = 1;
vecTmp.push_back(itemTmp);
nsize++;
}
}
//对一项集进行剪枝y
Pruning(vecTmp, m_minSupport);
//迭代生成频繁集
for (;Pruning(vecTmp, m_minSupport);)
{
m_Result = vecTmp;
Link(vecTmp);
Statistics(vecTmp);
}
return true;
}
void Apriori::PrintResult()
{
int nLength = m_Result.size();
for (int i=0; i<nLength; i++)
{
int nSize = m_Result[i].ContentLst.size();
cout << "item" << i << ": ";
for (int j=0; j<nSize; j++)
{
cout << m_Result[i].ContentLst[j] << ",";
}
cout << " Support: " << m_Result[i].nCount << endl;
}
}
- 1
- 2
前往页