# 决策树分类算法
# 创建数据集
def createdataset():
dataSet = [[0, 2, 0, 0, 'N'],
[0, 2, 0, 1, 'N'],
[1, 2, 0, 0, 'Y'],
[2, 1, 0, 0, 'Y'],
[2, 0, 1, 0, 'Y'],
[2, 0, 1, 1, 'N'],
[1, 0, 1, 1, 'Y'],
[0, 1, 0, 0, 'N'],
[0, 0, 1, 0, 'Y'],
[2, 1, 1, 0, 'Y'],
[0, 1, 1, 1, 'Y'],
[1, 1, 0, 1, 'Y'],
[1, 2, 1, 0, 'Y'],
[2, 1, 0, 1, 'N'], ]
labels = ['age', 'income', 'job', 'credit']
return dataSet, labels
ds1, lab = createdataset()
print(ds1)
print(lab)
# 计算给定数据集的香农熵
from math import log
def calcShannonEnt(dataSet):
numEntries = len(dataSet)
labelCounts = {}
for featVec in dataSet:
currentLabel = featVec[-1]
if currentLabel not in labelCounts.keys():
labelCounts[currentLabel] = 0
labelCounts[currentLabel] += 1
shannonEnt = 0.0
for key in labelCounts:
prob = float(labelCounts[key]) / numEntries
shannonEnt -= prob * log(prob, 2)
return shannonEnt
shan = calcShannonEnt(ds1)
print(shan)
# 按照给定特征划分数据集
# dataSet:待划分的数据集
# axis:划分数据集的特征--数据的第几列
# value:需要返回的特征值
def splitDataSet(dataSet, axis, value):
retDataSet = []
for featVec in dataSet:
if featVec[axis] == value:
reducedFeatVec = featVec[:axis] # 获取从第0列到特征列的数据
reducedFeatVec.extend(featVec[axis + 1:]) # 获取从特征列之后的数据
retDataSet.append(reducedFeatVec)
return retDataSet
def chooseBestFeatureToSplit(dataSet):
numFeatures = len(dataSet[0]) - 1
baseEntropy = calcShannonEnt(dataSet)
bestInfoGain = 0.0;
bestFeature = -1
for i in range(numFeatures):
featList = [example[i] for example in dataSet]
uniqueVals = set(featList)
newEntroy = 0.0
for value in uniqueVals:
subDataSet = splitDataSet(dataSet, i, value)
prop = len(subDataSet) / float(len(dataSet))
newEntroy += prop * calcShannonEnt(subDataSet)
infoGain = baseEntropy - newEntroy
if (infoGain > bestInfoGain):
bestInfoGain = infoGain
bestFeature = i
return bestFeature
col = chooseBestFeatureToSplit(ds1)
print(col)
# 辅助函数,将相同路径的样本归为出现最多次的分类结果
def majorityCnt(classList):
classCount = {}
for vote in classList:
if vote not in classCount.keys(): classCount[vote] = 0
classCount[vote] += 1
sortedClassCount = sorted(classList.iteritems(), key=operator.itemgetter(1), reverse=True) # 利用operator操作键值排序字典
return sortedClassCount[0][0]
# 创建树的函数
def createTree(dataSet, labels):
classList = [example[-1] for example in dataSet]
if classList.count(classList[0]) == len(classList):
return classList[0]
if len(dataSet[0]) == 1:
return majorityCnt(classList)
bestFeat = chooseBestFeatureToSplit(dataSet)
bestFeatLabel = labels[bestFeat]
myTree = {bestFeatLabel: {}}
del (labels[bestFeat])
featValues = [example[bestFeat] for example in dataSet]
uniqueVals = set(featValues)
for value in uniqueVals:
subLabels = labels[:]
myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value), subLabels)
return myTree
# 根据样本得出决策树
Tree = createTree(ds1, lab)
print("决策树:")
print(Tree)
# 决策树绘制
import matplotlib.pyplot as plt
decisionNode = dict(boxstyle="sawtooth", fc="0.8")
leafNode = dict(boxstyle="round4", fc="0.8")
arrow_args = dict(arrowstyle="<-")
# 获取叶节点的数目
def getNumLeafs(myTree):
numLeafs = 0
firstStr = list(myTree.keys())[0]
secondDict = myTree[firstStr]
for key in secondDict.keys():
if type(secondDict[key]).__name__ == 'dict': # 测试节点的数据是否为字典,以此判断是否为叶节点
numLeafs += getNumLeafs(secondDict[key])
else:
numLeafs += 1
return numLeafs
# 获取树的层数
def getTreeDepth(myTree):
maxDepth = 0
firstStr = list(myTree.keys())[0]
secondDict = myTree[firstStr]
for key in secondDict.keys():
if type(secondDict[key]).__name__ == 'dict': # 测试节点的数据是否为字典,以此判断是否为叶节点
thisDepth = 1 + getTreeDepth(secondDict[key])
else:
thisDepth = 1
if thisDepth > maxDepth: maxDepth = thisDepth
return maxDepth
# 绘制节点
def plotNode(nodeTxt, centerPt, parentPt, nodeType):
createPlot.ax1.annotate(nodeTxt, xy=parentPt, xycoords='axes fraction',
xytext=centerPt, textcoords='axes fraction',
va="center", ha="center", bbox=nodeType, arrowprops=arrow_args)
# 绘制连接线
def plotMidText(cntrPt, parentPt, txtString):
xMid = (parentPt[0] - cntrPt[0]) / 2.0 + cntrPt[0]
yMid = (parentPt[1] - cntrPt[1]) / 2.0 + cntrPt[1]
createPlot.ax1.text(xMid, yMid, txtString, va="center", ha="center", rotation=30)
# 绘制树结构
def plotTree(myTree, parentPt, nodeTxt): # if the first key tells you what feat was split on
numLeafs = getNumLeafs(myTree) # this determines the x width of this tree
depth = getTreeDepth(myTree)
firstStr = list(myTree.keys())[0] # the text label for this node should be this
cntrPt = (plotTree.xOff + (1.0 + float(numLeafs)) / 2.0 / plotTree.totalW, plotTree.yOff)
plotMidText(cntrPt, parentPt, nodeTxt)
plotNode(firstStr, cntrPt, parentPt, decisionNode)
secondDict = myTree[firstStr]
plotTree.yOff = plotTree.yOff - 1.0 / plotTree.totalD
for key in secondDict.keys():
if type(secondDict[
key]).__name__ == 'dict': # test to see if the nodes are dictonaires, if not they are leaf nodes
plotTree(secondDict[key], cntrPt, str(key)) # recursion
else: # it's a leaf node print the leaf node
plotTree.xOff = plotTree.xOff + 1.0 / plotTree.totalW
plotNode(secondDict[key], (plotTree.xOff, plotTree.yOff), cntrPt, leafNode)
plotMidText((plotTree.xOff, plotTree.yOff), cntrPt, str(key))
plotTree.yOff = plotTree.yOff + 1.0 / plotTree.totalD
# 创建决策树图形
def createPlot(inTree):
fig = plt.figure(1, facecolor='white')
fig.clf()
axprops = dict(xticks=[], yticks=[])
createPlot.ax1 = plt.subplot(111, frameon=False, **axprops) # no ticks
# createPlot.ax1 = plt.subplot(111, frameon=False) #ticks for demo puropses
plotTree.totalW = float(getNumLeafs(inTree))
plotTree.totalD = float(getTreeDepth(inTree))
plotTree.xOff = -0.5 / plotTree.totalW;
plotTree.yOff = 1.0;
plotTree(inTree, (0.5, 1.0), '')
plt.savefig('决策时.png', dpi=300, bbox_inches='tight')
plt.show()
createPlot(Tree)
# 使用生成的决策树模型,对测试样本进行分类
def classify(inputtree, featlabels, testvec):
firststr = list(inputtree.keys())[0]
seconddict = inputtree[firststr]
featindex = featlabels.index(firststr)
for key in seconddict.keys():
if testvec[featindex] == key:
if type(seconddict[key]).__name__ == 'dict':
classlabel = classify(seconddict[key], featlabels, testvec)
else:
classlabel = seconddict[key]
return classlabel
labels = ['age', 'income', 'job', 'credit']
tsvec = [0, 0, 1, 1]
print('result:', classify(Tree, labels, tsvec))
tsvec1 = [0, 2, 0, 1]
print('result
没有合适的资源?快使用搜索试试~ 我知道了~
温馨提示
资源中包括决策树分类算法、朴素贝叶斯分类算法、人工神经网络分类算法的代码(.ipynb,.py)和案例股票价格波动分析的数据(.csv),建议使用jupyter notebook打开.ipynb文件,体验更佳 1、资源配合博文《【python代码实现】决策树分类算法》、《【python代码实现】朴素贝叶斯分类算法》、《【python代码实现】人工神经网络分类算法及其实战案例(股票价格波动分析)》实操可掌握: 2、决策树分类算法原理及其python代码实现 3、贝叶斯分类算法原理及其python代码实现 4、人工神经网络分类算法及其python代码实现 5、案例股票价格波动分析的实操
资源推荐
资源详情
资源评论
收起资源包目录
分类算法.zip (3个子文件)
分类算法.ipynb 148KB
a601318.csv 55KB
分类算法.py 15KB
共 3 条
- 1
貮叁
- 粉丝: 1153
- 资源: 6
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功
- 1
- 2
- 3
- 4
- 5
- 6
前往页