机器学习的作业.zip资源-CSDN文库

共6个文件

py：3个

md：2个

png：1个

需积分: 5 45 浏览量 2024-04-08 17:38:20 上传评论收藏 48KB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

机器学习的作业.zip （6个子文件）

content

Week2

Week2第二节_朴素贝叶斯.md 2KB

week2第二节_2.3使用朴素贝叶斯过滤垃圾邮件.py 4KB

Week2第一节_决策树.md 1017B

Week2_forecast_type_of_glass.py 378B

Week3

logRegres.py 4KB

Img

Week_homework2.png 42KB

# coding: utf-8 # # 从疝气病症预测病马的死亡率 # # # Logistic 回归梯度上升优化算法 # In[16]: import numpy as np # In[17]: def loadDataSet(): dataMat = []; labelMat = [] fr = open(r'C:\Users\MILI\Desktop\Machine learning\MachineLearningInAction-Camp\Week3\Reference Code\Ch05\testSet.txt') for line in fr.readlines(): lineArr = line.strip().split() dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])]) labelMat.append(int(lineArr[2])) return dataMat, labelMat def sigmoid(inX): return 1.0 / (1 + np.exp(-inX)) def gradAscent(dataMatIn, classLabels): dataMatrix = np.mat(dataMatIn) labelMat = np.mat(classLabels).transpose() m, n = np.shape(dataMatrix) alpha = 0.001 maxCycles = 500 weights = np.ones((n, 1)) for k in range(maxCycles): h = sigmoid(dataMatrix*weights) error = (labelMat - h) weights = weights + alpha * dataMatrix.transpose()* error return weights # # 画出数据集和Logistics回归最佳拟合直线的函数 # In[18]: def plotBestFit(weights): import matplotlib.pyplot as plt dataMat, labelMat = loadDataSet() dataArr = np.array(dataMat) n = np.shape(dataArr)[0] xcord1 = []; ycord1 = [] xcord2 = []; ycord2 = [] for i in range(n): if int(labelMat[i]) == 1: xcord1.append(dataArr[i, 1]); ycord1.append(dataArr[i, 2]) else: xcord2.append(dataArr[i, 1]); ycord2.append(dataArr[i, 2]) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(xcord1, ycord1, s=30, c='red', marker='s') ax.scatter(xcord2, ycord2, s=30, c='green') x = np.arange(-3.0, 3.0, 0.1) y = (-weights[0]-weights[1]*x)/weights[2] ax.plot(x, y) plt.xlabel('X1'); plt.ylabel('X2') plt.show() # # #随机梯度上升算法 # In[19]: def stocGradAscent0(dataMatrix, classLabels): m, n = np.shape(dataMatrix) alpha = 0.01 weights = np.ones(n) for i in range(m): h = sigmoid(sum(dataMatrix[i]*weights)) error = classLabels[i] - h weights = weights + alpha * error * dataMatrix[i] return weights # # # 改进的随机梯度上升算法 # In[20]: def stocGradAscent1(dataMatrix, classLabels, numIter=150): m, n = np.shape(dataMatrix) weights = np.ones(n) #initialize to all ones for j in range(numIter): dataIndex = list(range(m)) for i in range(m): alpha = 4/(1.0+j+i)+0.0001 randIndex = int(np.random.uniform(0, len(dataIndex))) h = sigmoid(sum(dataMatrix[randIndex]*weights)) error = classLabels[randIndex] - h weights = weights + alpha * error * dataMatrix[randIndex] del(dataIndex[randIndex]) return weights # # # Logistic回归分类函数 # In[21]: def classifyVector(inX, weights): prob = sigmoid(sum(inX*weights)) if prob > 0.5: return 1.0 else: return 0.0 def colicTest(): frTrain = open('horseColicTraining.txt'); frTest = open('horseColicTest.txt') trainingSet = []; trainingLabels = [] for line in frTrain.readlines(): currLine = line.strip().split('\t') lineArr = [] for i in range(21): lineArr.append(float(currLine[i])) trainingSet.append(lineArr) trainingLabels.append(float(currLine[21])) trainWeights = stocGradAscent1(np.array(trainingSet), trainingLabels, 1000) errorCount = 0; numTestVec = 0.0 for line in frTest.readlines(): numTestVec += 1.0 currLine = line.strip().split('\t') lineArr = [] for i in range(21): lineArr.append(float(currLine[i])) if int(classifyVector(np.array(lineArr), trainWeights)) != int(currLine[21]): errorCount += 1 errorRate = (float(errorCount)/numTestVec) print("the error rate of this test is: %f" % errorRate) return errorRate def multiTest(): numTests = 10; errorSum = 0.0 for k in range(numTests): errorSum += colicTest() print("after %d iterations the average error rate is: %f" % (numTests, errorSum/float(numTests))) # In[22]: reload(logRegres) # In[24]: logRegres.multiTest()

评论收藏

内容反馈