#include "gen.h"
#include <assert.h>
#include <string.h>
#include <math.h>
#include <iomanip.h>
//------------------------------- Parameters -------------------------------
void PatternPar::write(ostream &fp)
{
fp << "\tNumber of patterns = " << npats << endl;
fp << "\tAverage length of pattern = " << patlen << endl;
fp << "\tCorrelation between consecutive patterns = " << corr << endl;
fp << "\tAverage confidence in a rule = " << conf << endl;
fp << "\tVariation in the confidence = " << conf_var << endl;
}
void TransPar::write(ostream &fp)
{
fp << "Number of transactions in database = " << ntrans << endl;
fp << "Average transaction length = " << tlen << endl;
fp << "Number of items = " << nitems << endl;
fp << "Large Itemsets:" << endl;
lits.write(fp);
fp << endl;
}
// calculate the number of roots, given the number of levels
void TaxPar::calc_values(void)
{
LINT nset;
nset = 0;
nset += (nlevels != 0);
nset += (fanout != 0);
nset += (nroots != 0);
switch (nset)
{
case 0: // fill in defaults
nroots = 250;
fanout = 5;
return;
case 1: // need to fill in 1 value
assert (nlevels == 0);
if (fanout == 0)
fanout = 5;
else if (nroots == 0)
nroots = 250;
return;
case 2:
if (nlevels == 0) // all set!
return;
if (fanout != 0) { // calculate nroots
nroots = nitems / (1 + pow(DOUBLE(fanout), DOUBLE(nlevels-1)));
if (nroots < 1)
nroots = 1;
}
else if (nroots != 0) { // calculate fanout
FLOAT temp;
temp = (FLOAT)nitems / nroots - 1;
temp = log((DOUBLE)temp) / (nlevels - 1);
fanout = exp((DOUBLE)temp);
}
case 3: // all set!
return;
}
}
void TaxPar::write(ostream &fp)
{
fp << "Number of transactions in database = " << ntrans << endl;
fp << "Average transaction length = " << tlen << endl;
fp << "Number of items = " << nitems << endl;
fp << "Number of roots = " << nroots << endl;
fp << "Number of levels = " << nlevels << endl;
fp << "Average fanout = " << fanout << endl;
fp << "Large Itemsets:" << endl;
lits.write(fp);
fp << endl;
}
void SeqPar::write(ostream &fp)
{
fp << "Number of customers in database = " << ncust << endl;
fp << "Average sequence length = " << slen << endl;
fp << "Average transaction length = " << tlen << endl;
fp << "Number of items = " << nitems << endl;
fp << "Repetition-level = " << rept << endl;
fp << "Variation in repetition-level = " << rept_var << endl;
fp << "Large Itemsets:" << endl;
lits.write(fp);
fp << "Large Sequences:" << endl;
lseq.write(fp);
fp << endl;
}
//------------------------------ Taxonomy ------------------------------
const LINT Taxonomy::item_len = 7;
//
// Constructor: reads taxonomy file and builds taxonomy as a DAG
//
Taxonomy::Taxonomy(LINT num_items, // total number of items
LINT num_roots, // number of roots
FLOAT fanout, // average fanout
FLOAT depth_ratio // average ratio of ....
)
: nitems(num_items), nroots(num_roots), depth(depth_ratio)
{
LINT i, j;
LINT next_child;
PoissonDist nchildren(fanout-1); // string length
// allocate memory
par = new LINT [nitems];
child_start = new LINT [nitems];
child_end = new LINT [nitems];
next_child = nroots;
// initialize parents (or lack thereof) for roots
for (i = 0; i < nroots; i++)
par[i] = -1;
// set up all the interior nodes
for (i = 0, j = next_child; i < nitems && next_child < nitems; i++)
{
child_start[i] = next_child;
next_child += nchildren() + 1;
if (next_child > nitems)
next_child = nitems;
child_end[i] = next_child;
for (; j < next_child; j++)
par[j] = i;
}
// initialize children (or lack thereof) for all the leaves
for (; i < nitems; i++)
child_start[i] =
child_end[i] = -1;
}
Taxonomy::~Taxonomy(void)
{
LINT i;
delete [] par;
delete [] child_start;
delete [] child_end;
}
void Taxonomy::write(ofstream &fp)
{
for (LINT i = 0; i < nitems; i++)
if (par[i] >= 0) {
assert(i != par[i]);
fp.write((char *)&i, sizeof(LINT));
fp.write((char *)&par[i], sizeof(LINT));
}
}
void Taxonomy::write_asc(ofstream &fp)
{
for (LINT i = 0; i < nitems; i++)
if (par[i] >= 0) {
assert(i != par[i]);
fp << setw(item_len) << i << " " << setw(item_len) << par[i] << endl;
}
}
void Taxonomy::display(ofstream &fp)
{
fp << "Taxonomy: " << endl;
for (LINT i = 0; i < nitems && child_start[i] > 0; i++)
fp << i << " " << child_start[i] << " " << child_end[i]-1 << endl;
fp << endl;
}
//------------------------------- ItemSet -------------------------------
ItemSet::ItemSet(LINT num_items, // number of items
Taxonomy *ptax // taxonomy (optional)
)
: tax(ptax), nitems(num_items)
{
ExpDist freq;
LINT i, j;
cum_prob = new FLOAT [nitems];
if (tax)
tax_prob = new FLOAT [nitems];
else
tax_prob = NULL;
for (i = 0; i < nitems; i++)
cum_prob[i] = freq(); // prob. that this pattern will be picked
if (tax) { // weight(itm) += wieght(children)
// normalize probabilities for the roots and for children
normalize(cum_prob, 0, tax->num_roots()-1);
for (i = 0; i < nitems && tax->num_children(i) > 0; i++)
normalize(cum_prob, tax->first_child(i), tax->last_child(i));
// calulate cumulative probabilities for children
for (i = 0; i < nitems; i++)
tax_prob[i] = cum_prob[i];
for (i = 1; i < nitems; i++)
if (tax->num_children(i) > 0)
for (j = tax->first_child(i); j < tax->last_child(i); j++)
tax_prob[j+1] += tax_prob[j];
// set real probabilities
for (i = tax->num_roots(); i < nitems; i++)
cum_prob[i] *= cum_prob[ tax->parent(i) ] * tax->depth_ratio();
}
// normalize probabilites (why -- see get_pat)
normalize(cum_prob, 0, nitems-1);
for (i = 1; i < nitems; i++) // calulate cumulative probabilities
cum_prob[i] += cum_prob[i-1];
}
ItemSet::~ItemSet()
{
delete [] cum_prob;
}
// normalize probabilities between low and high
//
void ItemSet::normalize(FLOAT prob[], LINT low, LINT high)
{
FLOAT tot;
LINT i;
// normalize probabilites
tot = 0;
for (i = low; i <= high; i++)
tot += prob[i];
for (i = low; i <= high; i++)
prob[i] /= tot;
}
// returns a pattern chosen at random
//
Item ItemSet::get_item(void)
{
FLOAT r;
LINT i;
// find the desired pattern using cum_prob table
r = rand();
// want item i such that cum_prob[i-1] < r <= cum_prob[i];
i = r * nitems; // guess location of item
i += (r-cum_prob[i]) * nitems; // refine guess
if (i >= nitems) // check boundaries
i = nitems-1;
if (i < 0)
i = 0;
while ( i < (nitems-1) && r > cum_prob[i] ) // find item
i++;
while ( i > 0 && r <= cum_prob[i-1] )
i--;
return i;
};
// if no taxonomy, returns itm
//
Item ItemSet::specialize(Item itm)
{
FLOAT r;
LINT i, nchildren;
Item first, last;
if (!tax) // no taxonomy
return itm;
nchildren = tax->num_children(itm);
if (nchildren == 0) // no children
return itm;
first = tax->child(itm, 0);
last = tax->child(itm, nchildren-1);
// find the desired pattern using cum_prob table
r = rand();
i = first + r * nchildren;
if (i == last)
i--;
while ( i < last && r > tax_prob[i] )
i++;
while ( i > first && r < tax_prob[i-1] )
i--;
return specialize(i);
}
FLOAT ItemSet::weight(Item itm) // returns prob. of choosing item
{
if (itm == 0)
return cum_prob[itm];
else
return cum_prob[itm] - cum_prob[itm-1];
}
void ItemSet::display(ofstream &fp)
{
// if (tax != NULL)
// tax->display(fp);
fp << "Items:" << endl;
fp << setprecision(3);
if (tax != NULL) {
if (cum_prob[0] * nitems > 10)
fp << 0 << " " << cum_prob[0] * nitems << " "
<< tax->first_child(0) << " " << tax->last_child(0) << endl;
for (LINT i = 1; i < nitems; i++)
if ((cum_prob[i]-cum_prob[i-1]) * nitems > 10)
fp