#!/usr/bin/env python
"""
a simple implementation of Apriori algorithm by Python.
"""
import sys
import csv
import argparse
import json
import os
from collections import namedtuple
from itertools import combinations
from itertools import chain
################################################################################
# 数据结构.
################################################################################
class TransactionManager(object):
"""
Transaction managers.
"""
def __init__(self, transactions):
"""
Initialize.
Arguments:
transactions -- A transaction iterable object
(eg. [['A', 'B'], ['B', 'C']]).
"""
self.__num_transaction = 0
self.__items = []
self.__transaction_index_map = {}
for transaction in transactions:
self.add_transaction(transaction)
def add_transaction(self, transaction):
"""
Add a 交易.
Arguments:
transaction -- A transaction as an iterable object (eg. ['A', 'B']).
"""
for item in transaction:
if item not in self.__transaction_index_map:
self.__items.append(item)
self.__transaction_index_map[item] = set()
self.__transaction_index_map[item].add(self.__num_transaction)
self.__num_transaction += 1
def calc_support(self, items):
"""
返回每个item的支持率.
Arguments:
items -- Items as an iterable object (eg. ['A', 'B']).
"""
# 空值处理.
if not items:
return 1.0
if not self.num_transaction:
return 0.0
# 创建交易索引交集.
sum_indexes = None
for item in items:
indexes = self.__transaction_index_map.get(item)
if indexes is None:
# 不支持包含不存在项的任何集合。.
return 0.0
if sum_indexes is None:
# 第一次分配索引.
sum_indexes = indexes
else:
# 计算交集.
sum_indexes = sum_indexes.intersection(indexes)
# 计算支持率.
return float(len(sum_indexes)) / self.__num_transaction
def initial_candidates(self):
"""
Returns the initial candidates.
"""
return [frozenset([item]) for item in self.items]
@property
def num_transaction(self):
"""
返回交易数量
"""
return self.__num_transaction
@property
def items(self):
"""
返回由交易组成的项列表
"""
return sorted(self.__items)
@staticmethod
def create(transactions):
"""
Create the TransactionManager with a transaction instance.
If the given instance is a TransactionManager, this returns itself.
"""
if isinstance(transactions, TransactionManager):
return transactions
return TransactionManager(transactions)
# 忽略名称错误,因为这些名称是名称元组
SupportRecord = namedtuple( # pylint: disable=C0103
'SupportRecord', ('items', 'support'))
RelationRecord = namedtuple( # pylint: disable=C0103
'RelationRecord', SupportRecord._fields + ('ordered_statistics',))
OrderedStatistic = namedtuple( # pylint: disable=C0103
'OrderedStatistic', ('items_base', 'items_add', 'confidence', 'lift',))
################################################################################
# 内部函数.
################################################################################
def create_next_candidates(prev_candidates, length):
"""
返回 apriori 候补列表.
Arguments:
prev_candidates -- Previous candidates as a list.
length -- The lengths of the next candidates.
"""
# Solve the items.
item_set = set()
for candidate in prev_candidates:
for item in candidate:
item_set.add(item)
items = sorted(item_set)
# 创建临时候选。这些将在下面过滤
tmp_next_candidates = (frozenset(x) for x in combinations(items, length))
# 如果下一个候选的长度为2,则返回所有候选
# 因为它们的子集与项相同.
if length < 3:
return list(tmp_next_candidates)
# 筛选所有子集都在前一个候选项中的候选项
next_candidates = [
candidate for candidate in tmp_next_candidates
if all(
True if frozenset(x) in prev_candidates else False
for x in combinations(candidate, length - 1))
]
return next_candidates
def gen_support_records(transaction_manager, min_support, **kwargs):
"""
返回具有给定交易的支持记录生成器
Arguments:
transaction_manager -- Transactions as a TransactionManager instance.
min_support -- A minimum support (float).
Keyword arguments:
max_length -- The maximum length of relations (integer).
"""
# 解析参数.
max_length = kwargs.get('max_length')
# 测试.
_create_next_candidates = kwargs.get(
'_create_next_candidates', create_next_candidates)
# 处理.
candidates = transaction_manager.initial_candidates()
length = 1
while candidates:
relations = set()
for relation_candidate in candidates:
support = transaction_manager.calc_support(relation_candidate)
if support < min_support:
continue
candidate_set = frozenset(relation_candidate)
relations.add(candidate_set)
yield SupportRecord(candidate_set, support)
length += 1
if max_length and length > max_length:
break
candidates = _create_next_candidates(relations, length)
def gen_ordered_statistics(transaction_manager, record):
"""
作为有序统计实例返回有序统计信息生成器
Arguments:
transaction_manager -- Transactions as a TransactionManager instance.
record -- A support record as a SupportRecord instance.
"""
items = record.items
for combination_set in combinations(sorted(items), len(items) - 1):
items_base = frozenset(combination_set)
items_add = frozenset(items.difference(items_base))
confidence = (
record.support / transaction_manager.calc_support(items_base))
lift = confidence / transaction_manager.calc_support(items_add)
yield OrderedStatistic(
frozenset(items_base), frozenset(items_add), confidence, lift)
def filter_ordered_statistics(ordered_statistics, **kwargs):
"""
过滤有序统计对象。
Arguments:
ordered_statistics -- A OrderedStatistic iterable object.
Keyword arguments:
min_confidence -- The minimum confidence of relations (float).
min_lift -- The minimum lift of relations (float).
"""
min_confidence = kwargs.get('min_confidence', 0.0)
min_lift = kwargs.get('min_lift', 0.0)
for ordered_statistic in ordered_statistics:
if ordered_statistic.confidence < min_confidence:
continue
if ordered_statistic.lift < min_lift:
continue
yield ordered_statistic
################################################################################
# API function.
################################################################################
def apriori(transactions, **kwargs):
"""
执行Apriori算法
Arguments:
transactions -- A transaction iterable object
(eg. [['A', 'B'], ['B', 'C']]).
Keyword arguments:
min_support -- The minimum support of relations (float).
min_confidence -- The minimum confidence of relations (float).
min_lift -- The minimum lift of relations (float).
max_length -- The maximum length of the relation (integer).
"
没有合适的资源?快使用搜索试试~ 我知道了~
基于Apriori算法的商品推荐代码示例
共8个文件
xml:3个
py:2个
csv:2个
需积分: 0 43 下载量 107 浏览量
2020-09-02
10:10:02
上传
评论 4
收藏 137KB ZIP 举报
温馨提示
Apriori算法是一种挖掘关联规则的频繁项集算法,其核心思想是通过候选集生成和情节的向下封闭检测两个阶段来挖掘频繁项集。代码比较简单,直接使用python中的Aprio库调用Apriori算法。下载前请考虑好
资源推荐
资源详情
资源评论
收起资源包目录
基于Apriori算法的商品推荐系统.zip (8个子文件)
基于Apriori算法的商品推荐系统
apyori.py 13KB
groceries.csv 764KB
Market_Basket_Optimisation.csv 296KB
.idea
misc.xml 295B
modules.xml 332B
workspace.xml 8KB
inspectionProfiles
基于Apriori算法的商品推荐系统.iml 467B
apriori.py 2KB
共 8 条
- 1
资源评论
考古学家lx(李玺)
- 粉丝: 2w+
- 资源: 71
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功