# -*- coding:UTF-8 -*-
import matplotlib.pyplot as plt
import numpy as np
import time
file_path = r"你的数据路径"
start = time.time()
# 01 读取文件数据并预处理
dataMat = []
labelMat = []
with open(file_path) as file:
data = file.readlines()
for i in range(len(data)):
data_list = data[i].strip().split("\t")
dataMat.append([1.0,float(data_list[0]),float(data_list[1])])
labelMat.append(int(data_list[2]))
# # 02 绘制数据集
# n = len(dataMat)
# xcord1 = []; ycord1 = []
# xcord2 = []; ycord2 = []
# for i in range(n):
# if int(labelMat[i]) == 1:
# xcord1.append(float(dataMat[i][1])); ycord1.append(float(dataMat[i][2]))
# else:
# xcord2.append(float(dataMat[i][1])); ycord2.append(float(dataMat[i][2]))
# fig = plt.figure()
# ax = fig.add_subplot(111)
# ax.scatter(xcord1, ycord1, s = 20, c = 'red', marker = 's',alpha=.5)
# ax.scatter(xcord2, ycord2, s = 20, c = 'green',alpha=.5)
# plt.title('DataSet')
# plt.xlabel('x1'); plt.ylabel('x2')
# plt.show()
# 03 搭建回归模型
# sigmoid函数
def sigmoid(Z):
return 1.0 / (1 + np.exp(-Z))
# 梯度上升算法
def gradAscent(dataMatIn, classLabels):
dataMatrix = np.mat(dataMatIn)#转换成numpy的mat
labelMat = np.mat(classLabels).transpose()
m, n = np.shape(dataMatrix)
a = 0.001 #步长
maxCycles = 500 #循环次数
weights = np.ones((n,1))#初始化回归系数为1
for k in range(maxCycles):
h = sigmoid(dataMatrix * weights)
error = labelMat - h
weights = weights + a * dataMatrix.transpose() * error
return weights.getA()
# 04 绘制决策边界
def plotBestFit(weights):
dataArr = np.array(dataMat)
n = np.shape(dataMat)[0]
xcord1 = []; ycord1 = []
xcord2 = []; ycord2 = []
for i in range(n):
if int(labelMat[i]) == 1:
xcord1.append(dataArr[i,1]); ycord1.append(dataArr[i,2])
else:
xcord2.append(dataArr[i,1]); ycord2.append(dataArr[i,2])
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xcord1, ycord1, s = 20, c = 'red', marker = 's',alpha=.5)
ax.scatter(xcord2, ycord2, s = 20, c = 'green',alpha=.5)
x = np.arange(-3.0, 3.0, 0.1)
y = (-weights[0] - weights[1] * x) / weights[2]
ax.plot(x, y)
plt.title('BestFit')
plt.xlabel('X1'); plt.ylabel('X2')
plt.show()
weights = gradAscent(dataMat,labelMat)
plotBestFit(weights)
end = time.time()
print("运行时间:{}".format(end-start))