import numpy as np
import matplotlib.pyplot as plt
import random
def load_file(filename):
"""
input:filename(string)训练数据的位置
output:feature_data(mat)特征
label_data(mat)标签
"""
f = open(filename) # 打开文件
feature_data = []
label_data = []
for line in f.readlines():
feature_tmp = []
label_tmp = []
lines = line.strip().split('\t') # strip移除末尾换行符 #通过指定分隔符对字符串进行切片
for i in range(len(lines) - 1):
feature_tmp.append(float(lines[i]))
label_tmp.append(float(lines[-1]))
feature_data.append(feature_tmp)
label_data.append(label_tmp)
f.close()
return np.mat(feature_data), np.mat(label_data)
def save_model(filename, w):
"""
保存最终的模型
:param filename(string):模型保存的文件名
:param w(mat):LR模型的权重
:return:
"""
m = np.shape(w)[0]
f_w = open(filename, 'w')
w_array = []
for i in range(m):
w_array.append(str(w[i, 0]))
f_w.write('\t'.join(w_array))
f_w.close()
def sign(x):
"""
决策函数
:param x:(float)预测值
:return:属于的类
"""
if x > 0:
return 1
else:
return -1
def training(feature_data, label_data, weights, biases, learning_rate, iter_num):
"""
训练函数
:param feature_data:(mat)输入的特征
:param label_data:(mat)输入的特征
:param weights:(mat)权重系数
:param biases:(float)偏置量
:param learning_rate:(float)学习速率
:param iter_num:(int)迭代次数
:return:
"""
for index in range(iter_num):
# 随机从样本中选取一个点,计算预测值
tmp_index = random.choice(range(np.shape(feature_data)[0]))
tmp_feature = feature_data[tmp_index, :]
y = label_data[tmp_index]
predict = sign(tmp_feature * weights + biases)
if y * predict <= 0:
weights = weights + learning_rate * tmp_feature.T * y
biases = biases + learning_rate * y
if index % 10 == 0:
obj_value = objective_function(feature_data, label_data, weights, biases)
print("\t-----------iter = " + str(index) + \
" , train error rate = " + str(obj_value[0, 0]))
return weights, biases
def initial():
"""
初始化相关参数
:return:(int)迭代次数,(float)学习速率, (mat)初始权重,
"""
iter_num = 1000 # 初始值设置的不好的时候,迭代次数需要设置的很大
learning_rate = 1
weights = np.mat(np.ones((2, 1)))
biases = 0
return np.mat(weights), biases, learning_rate, iter_num
def plot_points_line(feature, label, weights, bias):
"""
输入数据点特征,标签,权重系数,偏移值,画出散点图和感知机训练得到的分类直线
:param feature_data:
:param label_data:
:param weights:
:param bias:
:return:
"""
point_red = []
point_blue = []
point_yellow = []
point_green = []
r = np.shape(feature)[0]
for index in range(r):
temp = -2 # 从右到左第二个数
if label[index] == 0:
point_red.append(feature[index, temp])
point_red.append(feature[index, temp + 1]) # 如果一次放两个元素feature[index, 1:3],会被当做一个整体存放在list中
elif label[index] == 1:
point_blue.append(feature[index, temp])
point_blue.append(feature[index, temp + 1])
elif label[index] == 2:
point_yellow.append(feature[index, temp])
point_yellow.append(feature[index, temp + 1])
else:
point_green.append(feature[index, temp])
point_green.append(feature[index, temp + 1])
# 画出图形
point_red = np.mat(point_red).reshape(-1, 2) # list转换成一维矩阵,再转换成二维矩阵
point_blue = np.mat(point_blue).reshape(-1, 2)
point_green = np.mat(point_green).reshape(-1, 2)
point_yellow = np.mat(point_yellow).reshape(-1, 2)
# 画散点图
plt.scatter(point_red[:, 0].tolist(), point_red[:, 1].tolist(), c='r') # scatter函数只接受list,不接受matrix
plt.scatter(point_blue[:, 0].tolist(), point_blue[:, 1].tolist(), c='b')
plt.scatter(point_green[:, 0].tolist(), point_green[:, 1].tolist(), c='g')
plt.scatter(point_yellow[:, 0].tolist(), point_yellow[:, 1].tolist(), c='y')
# 画直线
line_x = np.linspace(-1, 10, 100)
line_y = -(weights[0] * line_x + bias) / weights[1]
plt.plot(line_x, line_y.T)
plt.show()
def objective_function(feature_data, label, weights, biases):
"""
计算目标函数的值
:param feature_data:(mat)特征
:param label:(mat)标签
:param weights:(mat)权重
:param biases:(float)偏移量
:return:(float)目标函数的值
"""
m = np.shape(feature_data)[0]
obj_value = np.mat(np.zeros((1, 1)))
for index in range(m):
tmp_value = - label[index] * (feature_data[index] * weights + biases)
if tmp_value > 0:
obj_value += tmp_value
return obj_value