def createC1(dataSet):
c1 =list(set([y for x in dataSet for y in x]))
c1.sort()
c2 = [[x] for x in c1]
return list(map(frozenset, c2))
def scanD(D, Ck, minSupport):
ssCnt = {}
for tid in D:
for can in Ck:
if can.issubset(tid):
if can not in ssCnt.keys(): ssCnt[can] = 1
else: ssCnt[can] += 1
numItems = float(len(D))
Lk= []
supportData = {}
for key in ssCnt:
support = ssCnt[key] / numItems
if support >= minSupport:
Lk.append(key)
supportData[key] = support
return Lk, supportData
def aprioriGen(Lk_1, k):
Ck = []
lenLk = len(Lk_1)
for i in range(lenLk):
L1_list = list(Lk_1[i])
L1 = L1_list[:k - 2]
L1.sort()
for j in range(i + 1, lenLk):
L2_list = list(Lk_1[j])
L2 = list(Lk_1[j])[:k - 2]
L2.sort()
if L1 == L2:
Ck.append(Lk_1[i] | Lk_1[j])
return Ck
def apriori(dataSet, minSupport):
C1 = createC1(dataSet)