from sklearn.datasets import load_boston
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import linear_model
dataset = load_boston()
x_data = dataset.data
y_data = dataset.target
name_data = dataset.feature_names
for i in range(13): # 13是name_data的维数,你可以在上一步中将name_data打印出来查看
plt.subplot(7, 2, i+1) # 7行2列第i+1个图
plt.scatter(x_data[:,i], y_data, s = 10) # 横纵坐标和点的大小
plt.title(name_data[i])
plt.show()
print(name_data[i], np.corrcoef(x_data[:,i], y_data))
i_ = []
for i in range(len(y_data)):
if y_data[i] == 50:
i_.append(i) # 存储房价等于50 的异常值下标
x_data = np.delete(x_data, [i], axis = 0) # 删除样本异常值数据
y_data = np.delete(y_data, [i], axis = 0) # 删除标签异常值
me_data = dataset.feature_names
j_ = []
for i in range(13):
if x_data[i] == 'RM' or 'PTRATIO' or 'LSTAT':
continue
j_.append(i) # 存储其他次要特征下标
x_data = np.delete(x_data, j_, axis = 1) # 在总特征中删除次要特征
print(np.shape(y_data))
print(np.shape(x_data))
X_train, X_test, y_train, y_test = train_test_split(x_data, y_data, random_state = 0, test_size = 0.20)
'''
print(len(X_train))
print(len(X_test))
print(len(y_train))
print(len(y_test))
'''
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler()
X_train = min_max_scaler.fit_transform(X_train)
X_test = min_max_scaler.fit_transform(X_test)
#标签归一化的目的是什么呢,实验证明,归一化之后结果好了0.1左右
y_train = min_max_scaler.fit_transform(y_train.reshape(-1,1)) #转化为任意行一列
y_test = min_max_scaler.fit_transform(y_test.reshape(-1,1)) #转化为一列
lr = linear_model.LinearRegression(fit_intercept=True, normalize=False)
lr.fit(X_train, y_train)
lr_y_predict = lr.predict(X_test)
from sklearn.metrics import r2_score
score_lr = r2_score(y_test,lr_y_predict)
from sklearn.linear_model import RidgeCV
rr = RidgeCV(alphas=np.array([.1, .2, .3, .4]))
rr.fit(X_train,y_train)
rr_y_predict = rr.predict(X_test)
score_rr = r2_score(y_test,rr_y_predict)
score_rr
lassr = linear_model.Lasso(alpha=.0001)
lassr.fit(X_train,y_train)
lassr_y_predict=lassr.predict(X_test)
score_lassr = r2_score(y_test,lassr_y_predict)
print(score_lassr)
from sklearn.svm import SVR
svr_rbf = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=.1) #高斯核
svr_lin = SVR(kernel='linear', C=100, gamma='auto') #线性核
svr_poly = SVR(kernel='poly', C=100, gamma='auto', degree=3, epsilon=.1,
coef0=1) #径向基核函数
svr_rbf_y_predict=svr_rbf.fit(X_train, y_train).predict(X_test)
score_svr_rbf = r2_score(y_test,svr_rbf_y_predict)
svr_lin_y_predict=svr_lin.fit(X_train, y_train).predict(X_test)
score_svr_lin = r2_score(y_test,svr_lin_y_predict)
svr_poly_y_predict=svr_poly.fit(X_train, y_train).predict(X_test)
score_svr_poly = r2_score(y_test,svr_poly_y_predict)
#绘制真实值和预测值对比图
def draw_infer_result(groud_truths,infer_results):
title='Boston'
plt.title(title, fontsize=24)
x = np.arange(-0.2,2)
y = x
plt.plot(x, y)
plt.xlabel('ground truth', fontsize=14)
plt.ylabel('infer result', fontsize=14)
plt.scatter(groud_truths, infer_results,color='green',label='training cost')
plt.grid()
plt.show()
draw_infer_result(y_test,lr_y_predict)
draw_infer_result(y_test,rr_y_predict)
draw_infer_result(y_test,lassr_y_predict)
draw_infer_result(y_test,svr_rbf_y_predict)
draw_infer_result(y_test,svr_lin_y_predict)
draw_infer_result(y_test,svr_poly_y_predict)
print("score of lr:",score_lr)
print("score of rr:",score_rr)
print("score of lassr:",score_lassr)
print("score of svr_rbf:",score_svr_rbf)
print("score of svr_lin:",score_svr_lin)
print("score of svr_poly:",score_svr_poly)
没有合适的资源?快使用搜索试试~ 我知道了~
资源详情
资源评论
资源推荐
收起资源包目录
基于线性回归实现波士顿房价预测.zip (13个子文件)
回归-波士顿房价预测.pdf 160KB
任务说明.docx 153KB
boston3.0.py 4KB
boston2.0.py 3KB
.idea
.gitignore 50B
misc.xml 208B
workspace.xml 5KB
实验2.iml 412B
inspectionProfiles
profiles_settings.xml 174B
modules.xml 273B
housing.data 48KB
boston.csv 34KB
boston.py 2KB
共 13 条
- 1
来杯橙汁压惊
- 粉丝: 72
- 资源: 6
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功
评论0