#include<iostream>
#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<fstream>
#include<vector>
#include<algorithm>
#define SUP_NUM 0.01
using namespace std;
int array[30000], all_num = 0, SUP_SUM;
vector< vector <int> > table(90000);
vector< vector<int> > a(90000), b(90000);
//真 剪枝
bool jianzhi(int k, int line){//
vector <int> tmp;
for(int i = 0; i < b[i].size(); i++){
for(int j = 0; j < b[i].size(); j++){
if(i != j)
tmp.push_back(b[i][j]);
}
int count = 0;
for(int k = 0; k < line; k++){
for(int t = 0; t < a[k].size(); t++){
if(tmp[t] == a[k][t])
count++;
else
break;
}
if(count != k)
count = 0;
else
break;
}
if(count != k){//剪去
return true;
}
//else return false;
tmp.clear();
}
return false;
}
int zilianjie(int line, int tab_num, int k, int &flag)
{//k项集
//cout << "a" << endl;
int num = 0;
for(int i = 0; i < line; i++){
for(int j = i+1; j < line; j++){
for(int t = 0; t < k; t++){
if(a[i][t] != a[j][t]){
if(t != k-1){
break;//不连接
}
else{
//cout << "c" << endl;
flag = 1;
b[num].assign(a[i].begin(), a[i].end());//连接 end()是加一后的指针?
b[num].push_back(a[j][t]);//删不删呢?不删
if(jianzhi(k, line))
num++;
}
}
}
}
}
//cout << num << endl;
if(!flag)//没有一个能连
return 0;
/*for(int i = 0; i < num; i++){//给连接上的但是还没有计数的排序
sort(b[i].begin(), b[i].end());
}*///用不上吧
//支持度计数
int support[80000] = {0};
for(int i = 0; i < num; i++){
for(int j = 0; j < tab_num; j++){
int p = 0;
for(int t = 0; t < b[i].size(); t++, p++){
while(b[i][t] != table[j][p]){
p++;
if(p >= table[j].size())
break;
}
if(p == table[j].size())
break;
if((t == b[i].size()-1) && (p < table[j].size())){
support[i]++;
}
}
}
}
for(int i = 0; i < line; i++){
a[i].clear();
}
int q = 0;
for(int i = 0; i < num; i++){
if(support[i] >= SUP_SUM){
a[q].assign(b[i].begin(), b[i].end());
q++;
}
}
for(int i = 0; i < q; i++){
for(int j = 0; j < a[i].size(); j++){
printf("%d ", a[i][j]);
}
printf("\n");
}
all_num += q;
for(int i = 0; i < num; i++){
b[i].clear();
}
return q;
}
int main()
{
ifstream in("retail.dat");
string s;
char s1[3000];
int tab_num = 0, n, maxm = 0;
while(getline(in, s)){
strcpy(s1, s.c_str());
char *tmp = strtok(s1, " ");
while(tmp){
n = atoi(tmp);
array[n]++;
if(n > maxm){
maxm = n;
}
table[tab_num].push_back(n);
tmp = strtok(NULL, " ");
}
tab_num++;
}
SUP_SUM = SUP_NUM*tab_num;
int t = 0;
for(int i = 0; i <= maxm; i++){
if(array[i] >= SUP_SUM){
a[t].push_back(i);
t++;
}
}
//打印频繁一项集
for(int i = 0; i < t; i++){
for(int j = 0; j < a[i].size(); j++){
printf("%d\n", a[i][j]);
}
}
all_num += t;
int flag = 0;
for(int i = 1; i <= 5; i++){
t = zilianjie(t, tab_num, i, flag);
if(!flag)
break;
flag = 0;
}
//cout << all_num << endl;
return 0;
}