import numpy as np
from .metrics import r2_score
class LinearRegression:
def __init__(self):
"""初始化Linear Regression模型"""
self.coef_ = None
self.intercept_ = None
self._theta = None
def fit_normal(self, X_train, y_train):
"""根据训练数据集X_train, y_train训练Linear Regression模型"""
assert X_train.shape[0] == y_train.shape[0], \
"the size of X_train must be equal to the size of y_train"
X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
self._theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train)
self.intercept_ = self._theta[0]
self.coef_ = self._theta[1:]
return self
def fit_bgd(self, X_train, y_train, eta=0.01, n_iters=1e4):
"""根据训练数据集X_train, y_train, 使用梯度下降法训练Linear Regression模型"""
assert X_train.shape[0] == y_train.shape[0], \
"the size of X_train must be equal to the size of y_train"
def J(theta, X_b, y):
try:
return np.sum((y - X_b.dot(theta)) ** 2) / len(y)
except:
return float('inf')
def dJ(theta, X_b, y):
return X_b.T.dot(X_b.dot(theta) - y) * 2. / len(y)
def gradient_descent(X_b, y, initial_theta, eta, n_iters=1e4, epsilon=1e-8):
theta = initial_theta
cur_iter = 0
while cur_iter < n_iters:
gradient = dJ(theta, X_b, y)
last_theta = theta
theta = theta - eta * gradient
if (abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon):
break
cur_iter += 1
return theta
X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
initial_theta = np.zeros(X_b.shape[1])
self._theta = gradient_descent(X_b, y_train, initial_theta, eta, n_iters)
self.intercept_ = self._theta[0]
self.coef_ = self._theta[1:]
return self
def fit_sgd(self, X_train, y_train, n_iters=50, t0=5, t1=50):
"""根据训练数据集X_train, y_train, 使用梯度下降法训练Linear Regression模型"""
assert X_train.shape[0] == y_train.shape[0], \
"the size of X_train must be equal to the size of y_train"
assert n_iters >= 1
def dJ_sgd(theta, X_b_i, y_i):
return X_b_i * (X_b_i.dot(theta) - y_i) * 2.
def sgd(X_b, y, initial_theta, n_iters=5, t0=5, t1=50):
def learning_rate(t):
return t0 / (t + t1)
theta = initial_theta
m = len(X_b)
for i_iter in range(n_iters):
indexes = np.random.permutation(m)
X_b_new = X_b[indexes,:]
y_new = y[indexes]
for i in range(m):
gradient = dJ_sgd(theta, X_b_new[i], y_new[i])
theta = theta - learning_rate(i_iter * m + i) * gradient
return theta
X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
initial_theta = np.random.randn(X_b.shape[1])
self._theta = sgd(X_b, y_train, initial_theta, n_iters, t0, t1)
self.intercept_ = self._theta[0]
self.coef_ = self._theta[1:]
return self
def predict(self, X_predict):
"""给定待预测数据集X_predict,返回表示X_predict的结果向量"""
assert self.intercept_ is not None and self.coef_ is not None, \
"must fit before predict!"
assert X_predict.shape[1] == len(self.coef_), \
"the feature number of X_predict must be equal to X_train"
X_b = np.hstack([np.ones((len(X_predict), 1)), X_predict])
return X_b.dot(self._theta)
def score(self, X_test, y_test):
"""根据测试数据集 X_test 和 y_test 确定当前模型的准确度"""
y_predict = self.predict(X_test)
return r2_score(y_test, y_predict)
def __repr__(self):
return "LinearRegression()"
没有合适的资源?快使用搜索试试~ 我知道了~
温馨提示
机器学习入门专栏笔记对应jupyter notebook以及封装的各种算法。 包含:jupyter notebook、numpy、matplotlib的使用以及常见函数,KNN算法,线性回归算法,梯度下降算法(随机梯度下降算法),PCA与梯度上升法,多项式回归与模型泛化,逻辑回归,评价分类,SVM,决策树,集成学习与随机森林等机器学习的基础算法。 笔记还在更新中 个人笔记,如有错误,感谢指出!
资源推荐
资源详情
资源评论
收起资源包目录
机器学习入门专栏笔记对应jupyter notebook以及封装的各种算法 (294个子文件)
.gitignore 494B
11-Matplotlib-Basics.ipynb 634KB
09-Eigenface.ipynb 575KB
11-Matplotlib-Basics-checkpoint.ipynb 339KB
Optional-01-PCA-in-3d-Data.ipynb 166KB
02-Gradient-Descent-Simulations.ipynb 162KB
Optional-01-Learning-Curve-for-Decision-Tree.ipynb 146KB
05-Regression-Metrics-MSE-vs-MAE.ipynb 146KB
02-Gradient-Descent-Simulations-checkpoint.ipynb 136KB
08-PCA-for-Noise-Reduction.ipynb 110KB
06-PCA-in-scikit-learn.ipynb 108KB
03-Overfitting-and-Underfitting.ipynb 103KB
07-Logistic-Regression-in-scikit-learn.ipynb 102KB
08-Model-Regularization-and-Ridge-Regression.ipynb 102KB
05-Decision-Boundary.ipynb 90KB
05-Regression-Metrics-MSE-vs-MAE-checkpoint.ipynb 83KB
08-RBF-Kernel-in-scikit-learn.ipynb 76KB
05-Learning-Curve.ipynb 74KB
Optional-01-kNN-Decision-Boundary.ipynb 70KB
04-SVM-in-scikit-learn.ipynb 69KB
05-CART-and-Decision-Tree-Hyperparameters.ipynb 68KB
09-LASSO-Regression.ipynb 66KB
06-Polynomial-Features-in-Logistic-Regression.ipynb 63KB
12-Loading-Data-and-Simple-Data-Exploring.ipynb 58KB
03-Implement-PCA-in-BGA.ipynb 57KB
12-Loading-Data-and-Simple-Data-Exploring-checkpoint.ipynb 56KB
03-Simple-Linear-Regression-Implementation.ipynb 52KB
05-Polynomial-Features-in-SVM-and-Kernel-Function.ipynb 50KB
06-Precision-Recall-Curve.ipynb 47KB
07-Feature-Scaling.ipynb 43KB
Optional-02-Model-Complexity-Curve-for-Decision-Tree.ipynb 43KB
07-Feature-Scaling-checkpoint.ipynb 43KB
05-Data-Projection.ipynb 38KB
01-What-is-Polynomial-Regression.ipynb 38KB
04-Why-Train-Test-Split.ipynb 37KB
01-kNN-Basics.ipynb 36KB
01-kNN-Basics-checkpoint.ipynb 36KB
06-More-Hyper-Parameters-in-kNN-and-Grid-Search-checkpoint.ipynb 35KB
02-Polynomial-Regression-in-scikit-learn.ipynb 35KB
08-OVR-and-OVO.ipynb 33KB
04-Getting-First-n-Components.ipynb 31KB
07-MNIST.ipynb 31KB
Optional-03-MNIST-in-LogisticRegression.ipynb 30KB
03-Simple-Linear-Regression-Implementation-checkpoint.ipynb 30KB
05-Random-Forest-and-Extra-Trees.ipynb 29KB
06-AdaBoost-and-Gradient-Boosting.ipynb 29KB
04-OOB-and-More-about-Bagging-Classifier.ipynb 29KB
01-What-is-Ensemble-Learning.ipynb 28KB
06-Stochastic-Gradient-Descent.ipynb 28KB
07-Computation-on-Numpy-Array-checkpoint.ipynb 27KB
07-Computation-on-Numpy-Array.ipynb 27KB
02-Soft-Voting-Classifier.ipynb 27KB
03-Bagging-and-Pasting.ipynb 26KB
08-Scaler-in-Scikit-Learn.ipynb 25KB
07-Problems-of-Decision-Tree.ipynb 24KB
06-More-Hyper-Parameters-in-kNN-and-Grid-Search.ipynb 24KB
04-Implement-Gradient-Descent-in-Linear-Regression.ipynb 23KB
04-Implement-Gradient-Descent-in-Linear-Regression-checkpoint.ipynb 23KB
04-Vectorization.ipynb 22KB
01-What-is-Decision-Tree.ipynb 22KB
04-Accuracy-Score.ipynb 22KB
08-Scaler-in-Scikit-Learn-checkpoint.ipynb 21KB
04-Create-Numpy-Array-checkpoint.ipynb 20KB
09-Regression-in-scikit-learn.ipynb 20KB
04-Create-Numpy-Array.ipynb 20KB
05-Numpy-Array-Basic-Operations.ipynb 19KB
05-Numpy-Array-Basic-Operations-checkpoint.ipynb 19KB
02-kNN-in-scikit-learn.ipynb 18KB
04-Accuracy-Score-checkpoint.ipynb 18KB
03-Entropy-Split-Simulation.ipynb 17KB
04-Gini-Index.ipynb 17KB
06-Stochastic-Gradient-Descent-checkpoint.ipynb 16KB
02-Entropy.ipynb 16KB
06-Concatenate-and-Split.ipynb 15KB
07-What-is-RBF-Kernel.ipynb 15KB
08-Confusion-Matrix-in-Multiclass-Classification.ipynb 15KB
06-Concatenate-and-Split-checkpoint.ipynb 15KB
10-Comparison-and-Fancy-Indexing.ipynb 15KB
10-Comparison-and-Fancy-Indexing-checkpoint.ipynb 15KB
07-ROC-Curve.ipynb 15KB
04-Implement-Logistic-Regression.ipynb 13KB
10-More-about-Linear-Regression.ipynb 13KB
10-More-about-Linear-Regression-checkpoint.ipynb 13KB
01-What-is-Logistic-Regression.ipynb 12KB
02-Magic-Commands.ipynb 11KB
02-Magic-Commands-checkpoint.ipynb 11KB
Optional-04-MNIST-Simple-Data-Exploring.ipynb 11KB
08-Aggregation-Operator-in-Numpy.ipynb 10KB
08-Aggregation-Operator-in-Numpy-checkpoint.ipynb 10KB
03-Test-Our-Algorithm-checkpoint.ipynb 10KB
03-Test-Our-Algorithm.ipynb 10KB
03-Numpy-Array-Basics.ipynb 10KB
05-Vectorize-Gradient-Descent.ipynb 10KB
03-Numpy-Array-Basics-checkpoint.ipynb 9KB
09-Arg-Operation-in-Numpy.ipynb 9KB
09-Arg-Operation-in-Numpy-checkpoint.ipynb 9KB
05-Vectorize-Gradient-Descent-checkpoint.ipynb 9KB
06-Validation-and-Cross-Validation.ipynb 8KB
05-Precision-Recall-Tradeoff.ipynb 7KB
09-Regression-in-scikit-learn-checkpoint.ipynb 7KB
共 294 条
- 1
- 2
- 3
资源评论
Allen’shub
- 粉丝: 2
- 资源: 1
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
- 技术资料分享TF卡资料很好的技术资料.zip
- 技术资料分享TF介绍很好的技术资料.zip
- 10、安徽省大学生学科和技能竞赛A、B类项目列表(2019年版).xlsx
- 9、教育主管部门公布学科竞赛(2015版)-方喻飞
- C语言-leetcode题解之83-remove-duplicates-from-sorted-list.c
- C语言-leetcode题解之79-word-search.c
- C语言-leetcode题解之78-subsets.c
- C语言-leetcode题解之75-sort-colors.c
- C语言-leetcode题解之74-search-a-2d-matrix.c
- C语言-leetcode题解之73-set-matrix-zeroes.c
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功