import warnings
import numpy as np
import argparse
from sklearn.datasets import load_boston
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler, StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression, SGDRegressor, Ridge, Lasso
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error, r2_score
from joblib import dump, load
def get_arguments():
parser = argparse.ArgumentParser(description='LinearRegression')
parser.add_argument('--data_name', type=str, default='california', choices=('boston', 'california'),
help='choose datasets')
parser.add_argument('--test_size', type=float, default=0.33, help='the proportion of test data')
parser.add_argument('--random_state', type=int, default=42, help='the random seed of dataset split')
parser.add_argument('--normalization', type=int, default=3, choices=(0, 1, 2, 3),
help='select the type of data normalization,'
'0: no normalization,'
'1: rescale the data to [0, 1],'
'2: rescale the data to [-1, 1],'
'3: z-score normalization')
parser.add_argument('--Regression', type=int, default=2, choices=(1, 2, 3, 4, 5),
help='select the type of Regression,'
'1: normal equation of LinearRegression,'
'2: SGD LinearRegression,'
'3: Ridge Regression,'
'4: Lasso Regression,'
'5: Polynomial Regression')
parser.add_argument('--loss', type=int, default=1, choices=(1, 2),
help='select the type of loss,'
'1: R^2,'
'2: MSE')
parser.add_argument('--max_iteration', type=int, default=1000, help='the max iteration of SGD')
parser.add_argument('--eta0', type=float, default=0.01, help='the learning rate of SGD')
parser.add_argument('--alpha', type=float, default=0.5,
help='Intensity of regularization, must be a positive floating')
parser.add_argument('--degree', type=int, default=2, help='the degree of PolynomialFeatures')
args = parser.parse_args()
return args
class MyLinearRegression:
def __init__(self, parser):
self.data_name = parser.data_name
self.test_size = parser.test_size
self.random_state = parser.random_state
self.normalization = parser.normalization
self.Regression = parser.Regression
self.loss = parser.loss
self.max_iter = parser.max_iteration
self.eta0 = parser.eta0
self.alpha = parser.alpha
self.degree = parser.degree
def load_dataset(self):
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
if self.data_name == 'boston':
dataset = load_boston()
print("The boston datasets is loaded successfully!")
elif self.data_name == 'california':
dataset = fetch_california_housing()
print("The california datasets is loaded successfully!")
else:
raise ValueError("Please choose 'boston' or 'california'")
description = dataset.DESCR
feature_names = dataset.feature_names
datas = dataset.data
target = dataset.target
print("The description of datasets is: ", end="")
print(description)
print("The feature names of datasets is: ", end="")
print(*feature_names)
if self.data_name == 'california':
target_names = dataset.target_names
print("The target names of datasets is: ", end="")
print(*target_names)
print("The shape of dataset is: ", end="")
print(datas.shape)
return datas, target
def split_dataset(self, X, y):
assert 0 < self.test_size < 1, "Please choose right test size between 0 and 1"
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=self.test_size, random_state=self.random_state)
return X_train, X_test, y_train, y_test
def normalize_dataset(self, X_train, X_test):
if self.normalization == 0:
# 不进行任何操作
X_train_normalization = X_train
X_test_normalization = X_test
elif self.normalization == 1:
# 将数值放缩到[0, 1]
min_max_scaler = MinMaxScaler()
X_train_normalization = min_max_scaler.fit_transform(X_train)
X_test_normalization = min_max_scaler.fit_transform(X_test)
elif self.normalization == 2:
# 将数值放缩到[-1, 1]
max_abs_scaler = MaxAbsScaler()
X_train_normalization = max_abs_scaler.fit_transform(X_train)
X_test_normalization = max_abs_scaler.fit_transform(X_test)
elif self.normalization == 3:
# 将数值进行z-score标准化
scaler = StandardScaler()
X_train_normalization = scaler.fit_transform(X_train)
X_test_normalization = scaler.fit_transform(X_test)
else:
raise ValueError("Please choose right normalization type", self.normalization)
return X_train_normalization, X_test_normalization
def regression(self, X_train, y_train):
if self.Regression == 1:
reg = LinearRegression().fit(X_train, y_train)
print("The score of LinearRegression is: {}".format(reg.score(X_train, y_train)))
print("The coefficient of LinearRegression is: {}".format(reg.coef_))
print("The intercept of LinearRegression is: {}".format(reg.intercept_))
dump(reg, 'LinearRegression.joblib')
elif self.Regression == 2:
reg = SGDRegressor(loss='squared_loss', fit_intercept=True, learning_rate='invscaling',
eta0=self.eta0, max_iter=self.max_iter)
reg.fit(X_train, y_train)
print("The coefficient of SGDRegressor is: {}".format(reg.coef_))
print("The intercept of SGDRegressor is: {}".format(reg.intercept_))
dump(reg, 'SGDRegressor.joblib')
elif self.Regression == 3:
reg = Ridge(self.alpha)
reg.fit(X_train, y_train)
print("The coefficient of Ridge is: {}".format(reg.coef_))
print("The intercept of Ridge is: {}".format(reg.intercept_))
dump(reg, 'Ridge.joblib')
elif self.Regression == 4:
reg = Lasso(self.alpha)
reg.fit(X_train, y_train)
print("The coefficient of Lasso is: {}".format(reg.coef_))
print("The intercept of Lasso is: {}".format(reg.intercept_))
dump(reg, 'Lasso.joblib')
elif self.Regression == 5:
reg = make_pipeline(PolynomialFeatures(self.degree), LinearRegression())
reg.fit(X_train, y_train)
dump(reg, 'PolynomialFeatures.joblib')
else:
raise ValueError('Please choose right regression model', self.Regression)
def evaluate(self, X_train, X_test, y_train, y_test):
if self.Regression == 1:
reg = load('LinearRegression.joblib')
print("The pretrain model of LinearRegression is loaded successfully!")
elif self.Regression == 2:
reg = load('SGDRegressor.joblib')
print("The pretrain model of SGDRegressor is loaded successfully!")
elif self.Regression == 3:
reg = load('Ridge.joblib')
print("The pretrain model
没有合适的资源?快使用搜索试试~ 我知道了~
机器学习-采用线性回归模型对波士顿房价进行预测-numpy实现(python源码)
共3个文件
py:2个
png:1个
需积分: 5 0 下载量 115 浏览量
2024-12-13
09:01:00
上传
评论
收藏 119KB ZIP 举报
温馨提示
波士顿房价预测 介绍 采用线性回归模型对波士顿房价进行预测-numpy实现 需运行BDG_4.py,代码用到了LinearRegression_1.py中的函数,须将二者放在同一文件夹下运行~ 代码内容说明 采用梯度下降法(BGD)优化线性回归模型,对波士顿房价进行预测 (1)导入数据(从 boston.csv 文件中导入数据) (2)划分数据(分成训练集和数据集) (3)数据归一化 (4)训练模型 model(train_x,train_y) (a)初始化参数 w 可使用 np.concatenate 数组拼接函数,将截距与权重参数合并在一起。 (b)求 f(x) (c)求 J(w) (d)求梯度 (e)更新参数 w 其中,(b-e)的过程经过 epochs 次迭代。 (5)画出损失函数虽迭代次数的变化曲线(通过损失函数变化曲线来观察梯度下降执行情况) (6)测试集数据进行预测,模型评估 (7)可视化:展示数据拟合的效果 小批量梯度下降算法(MBGD)的编程实现
资源推荐
资源详情
资源评论
收起资源包目录
numpy-linear-regression-master.zip (3个子文件)
numpy-linear-regression-master
image.png 121KB
BGD_4.py 9KB
LinearRegression_1.py 9KB
共 3 条
- 1
资源评论
LeonDL168
- 粉丝: 2874
- 资源: 772
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功