import pandas as pd
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
import warnings
warnings.filterwarnings("ignore")
# 加载数据
data = pd.read_csv('./boston_housing.data', sep='\s+', header=None)
# 获取特征属性X和目标属性Y
X = data.iloc[:, :-1]
Y = data.iloc[:, -1]
# 划分训练集和测试集
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=10)
# 多项式扩展
poly = PolynomialFeatures(degree=3, interaction_only=False, include_bias=True)
x_train_poly = poly.fit_transform(x_train)
x_test_poly = poly.transform(x_test)
'''linear模型'''
# 构建模型
linear = LinearRegression()
# 模型训练
linear.fit(x_train_poly, y_train)
# 预测 测试集
y_test_hat1 = linear.predict(x_test_poly)
# 评估
print(linear.score(x_train_poly, y_train)) # 1.0
print(linear.score(x_test_poly, y_test)) # -4922.138866671163
'''Lasso模型'''
# 构建模型
lasso = Lasso(alpha=10000)
# 模型训练
lasso.fit(x_train_poly, y_train)
# 预测 测试集
y_test_hat2 = lasso.predict(x_test_poly)
# 评估
print(lasso.score(x_train_poly, y_train)) # 0.8612579413693275
print(lasso.score(x_test_poly, y_test)) # 0.8183003056515205
''' Ridge模型'''
# 构建模型
ridge = Ridge(alpha=1000000000000)
# 模型训练
ridge.fit(x_train_poly, y_train)
# 预测 测试集
y_test_hat3 = ridge.predict(x_test_poly)
# 评估
print(ridge.score(x_train_poly, y_train)) # 0.8177143202090826
print(ridge.score(x_test_poly, y_test)) # 0.773014538352527
''' ElasticNet模型'''
# 构建模型
elasticNet = ElasticNet(alpha=600)
# 模型训练
elasticNet.fit(x_train_poly, y_train)
# 预测 测试集
y_test_hat4 = elasticNet.predict(x_test_poly)
# 评估
print(elasticNet.score(x_train_poly, y_train)) # 0.9047004688116072
print(elasticNet.score(x_test_poly, y_test)) # 0.8246288062659108
波士顿房价预测正则化代码实现
需积分: 0 190 浏览量
2024-02-26
21:04:22
上传
评论
收藏 13KB RAR 举报
ToBeCertain
- 粉丝: 669
- 资源: 23
最新资源
- #P0015. 全排列 超级简单
- pta题库答案c语言之排序4统计工龄.zip
- pta题库答案c语言之树结构7堆中的路径.zip
- pta题库答案c语言之树结构3TreeTraversalsAgain.zip
- pta题库答案c语言之树结构2ListLeaves.zip
- pta题库答案c语言之树结构1树的同构.zip
- 基于C++实现民航飞行与地图简易管理系统可执行程序+说明+详细注释.zip
- pta题库答案c语言之复杂度1最大子列和问题.zip
- 三维装箱问题(Three-Dimensional Bin Packing Problem,3D-BPP)是一个经典的组合优化问题
- 以下是一些关于Linux线程同步的基本概念和方法.txt
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈