import sys
import time
def loadData():
itemset = [[1, 2, 5], [2, 4], [2, 3], [1, 2, 4], [1, 3], [2, 3], [1, 3], [1, 2, 3, 5], [1, 2, 3]]
# itemset = []
#
# cf = open("D:\\retail.txt")
# lines = cf.readlines()
# for line in lines:
# line = line.replace('\n', '')
# line = line.split(' ')
# itemset.append(line)
return itemset
def find_frequent_1_itemsets(D, minsupport):
L1 = []
C1 = []
cnt = {}
for transcation in D:
for item in transcation:
if not (item == '\n' or item == ' '):
if not [item] in C1:
C1.append([item])
cnt[item] = 1
else:
cnt[item] += 1
for transcation in C1:
for item in transcation:
if cnt[item] >= minsupport:
L1.append(transcation)
L1.sort()
return L1 # return all itemset in the same level
def aproiri_gen(L, L1, D, minsupport): # 生成新一层的所有节点
res = []
for i in range(len(L)):
for j in range(len(L1)):
if L[i][-1] < L1[j][0]: # L[i][-1]是L列表中,第i项最后一个字母
candidate = list(set(L[i]).union(set(L1[j]))) # union
cnt = 0
for transcation in D:
if compareList(candidate, transcation):
cnt += 1
if cnt >= minsupport:
res.append(candidate)
return res
def compareList(l1, l2): # l1的每一项都在l2中
for item in l1:
if item not in l2:
return False
return True
def Aproiri(D, minsupport):
L = []
maximal = []
close = []
L1 = find_frequent_1_itemsets(D, minsupport) # 第一层的候选项集
# print('第一层的候选项集为:',L1)
L.append([])
L.append(L1)
for k in range(2, len(L1)):
Lk = []
if len(L[k - 1]) == 0:
break
Ck = aproiri_gen(L[k - 1], L1, D, minsupport)
# print("第",k,"层的候选项集为:" , Ck)
L.append(Ck)
for i in range(len(L)):
for item in L[i]:
maximal.append(item)
print("频繁项集个数为:", len(maximal) - 1)
for k in range(2, len(L1)):
if len(L[k - 1]) == 0:
break
for transcation in L[k - 1]:
for candidate in L[k]:
if compareList(transcation, candidate):
if transcation in maximal:
maximal.remove(transcation)
return L, maximal
if __name__ == '__main__':
start = time.time()
D = loadData()
L, maximal = Aproiri(D, 2)
# print ("频繁项集为:",L)
# print("频繁项集个数为:",len(L))
print("极大频繁项集为:", len(maximal))
print("极大频繁项集为:", maximal)
end = time.time()
print(end - start, 's')