/*----------------------------------------------------------------------
File : DIC.cpp
Contents : DIC algorithm for finding frequent sets
Author : Bart Goethals
Creation : 25/5/2004
----------------------------------------------------------------------*/
#include <time.h>
#include <algorithm>
#include <iostream>
#include <time.h>
using namespace std;
#include "DIC.h"
DIC::DIC()
{
data=0;
minsup=0;
trie = new Item(0);
M=0;
}
DIC::~DIC()
{
if(data) delete data;
if(trie) {
trie->deleteChildren();
delete trie;
}
}
void DIC::setData(char *fn, int type)
{
data = new Data(fn, type);
}
int DIC::setOutputSets(char *fn)
{
setsout.open(fn);
if(!setsout.is_open()) {
cerr << "error: could not open " << fn << endl;
return -1;
}
return 0;
}
int DIC::generateSets()
{
int total=0, pass=0, generated=0;
tnr=0;
trie->makeChildren();
counting=1;
frequent=0;
clock_t start = clock();
while(counting) {
generated += countCandidates();
generated += generateCandidates();
if(tnr==0 || counting==0) {
total += generated;
cout << ++pass;
if(tnr != 0) cout << "(" << tnr << ")";
cout << ": " << " " << generated << " " << total << " " << frequent
<< " [" << (clock()-start)/double(CLOCKS_PER_SEC) << "s] " << endl << flush;
start = clock();
generated=0;
}
}
cout << endl;
return frequent;
}
int DIC::generateCandidates()
{
if(trie->Counting() && tnr==0) {
trie->setFlag('d');
counting--;
}
int *tmp = new int[trie->getChildren()->size()];
int generated = generateCandidates(trie->getChildren(), 1, tmp);
delete [] tmp;
return generated;
}
int DIC::generateCandidates(set<Item> *items, int depth, int *current)
{
if(items == 0) return 0;
int generated = 0;
set<Item>::reverse_iterator runner;
for(runner = items->rbegin(); runner != items->rend(); runner++) {
current[depth-1] = runner->getId();
if(runner->Counting()) {
if(runner->getStart() == tnr) {
if(runner->getSupport() >= minsup) {
if(!runner->Frequent()) frequent++;
if(setsout.is_open()) printSet(*runner, current, depth);
runner->setFlag('d');
}
else runner->setFlag('c');
counting--;
}
else {
if(runner->getSupport() >= minsup) {
if(!runner->Frequent()) frequent++;
runner->setFlag('b');
}
}
}
if(runner->Frequent()) {
generated += generateCandidates(runner->getChildren(), depth+1, current);
set<Item> *children = runner->makeChildren();
for(set<Item>::reverse_iterator it = items->rbegin(); it != runner; it++) {
current[depth] = it->getId();
if(it->Frequent() && children->find(Item(current[depth])) == children->end()) {
if(depth==1 || checkSubsets(depth+1, current, trie->getChildren(), 0, 1)) {
if(children->insert(Item(it->getId(),tnr)).second) {
counting++;
generated++;
}
}
}
}
}
}
return generated;
}
bool DIC::checkSubsets(int sl, int *iset, set<Item> *items, int spos, int depth)
{
if(items==0) return 0;
bool ok = true;
set<Item>::iterator runner;
int loper = spos;
spos = depth+1;
while(ok && (--spos >= loper)) {
runner = items->find(Item(iset[spos]));
if(runner != items->end() && runner->Frequent()) {
if(depth<sl-1) ok = checkSubsets(sl, iset, runner->getChildren(), spos+1, depth+1);
}
else ok=false;
}
return ok;
}
int DIC::countCandidates()
{
int generated=0;
Transaction *t=0;
while((t = data->getNext())) {
tnr++;
if(trie->Counting()) {
trie->Increment();
set<Item> *children = trie->getChildren();
for(int i=0; i<t->length; i++) {
set<Item>::iterator it = children->find(Item(t->t[i]));
if(it == children->end()) {
it = children->insert(Item(t->t[i])).first;
counting++;
generated++;
}
}
}
processTransaction(t, trie->getChildren(), 0);
delete t;
if(tnr%M == 0) break;
}
if(tnr%M != 0) tnr = 0;
return generated;
}
int DIC::processTransaction(Transaction *t, set<Item> *items, int spos)
{
if(items == 0) return 0;
for(set<Item>::iterator it = items->begin(); it!=items->end(); it++) {
while((spos<t->length) && t->t[spos] < it->getId()) spos++;
if((spos<t->length) && t->t[spos]==it->getId()) {
if(it->Counting()) it->Increment();
if(spos+1<t->length) processTransaction(t,it->getChildren(),spos+1);
}
}
return 0;
}
void DIC::printSet(const Item& item, int *itemset, int length)
{
set<int> outset;
for(int j=0; j<length; j++) outset.insert(itemset[j]);
for(set<int>::iterator k=outset.begin(); k!=outset.end(); k++) setsout << *k << " ";
setsout << "(" << item.getSupport() << ")" << endl << flush;
}
dic1.rar_Apriori_Incremental apriori_cfr_dic_关联规则
版权申诉
191 浏览量
2022-09-14
14:41:52
上传
评论
收藏 6KB RAR 举报
alvarocfc
- 粉丝: 105
- 资源: 1万+
最新资源
- #P0015. 全排列 超级简单
- pta题库答案c语言之排序4统计工龄.zip
- pta题库答案c语言之树结构7堆中的路径.zip
- pta题库答案c语言之树结构3TreeTraversalsAgain.zip
- pta题库答案c语言之树结构2ListLeaves.zip
- pta题库答案c语言之树结构1树的同构.zip
- 基于C++实现民航飞行与地图简易管理系统可执行程序+说明+详细注释.zip
- pta题库答案c语言之复杂度1最大子列和问题.zip
- 三维装箱问题(Three-Dimensional Bin Packing Problem,3D-BPP)是一个经典的组合优化问题
- 以下是一些关于Linux线程同步的基本概念和方法.txt
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈