"""
assignment_utils.py
contains routines used by C2_W3 Assignments
"""
import copy
import math
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.patches import FancyArrowPatch
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from matplotlib.widgets import Button, CheckButtons
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_blobs
from ipywidgets import Output
np.set_printoptions(precision=2)
dlc = dict(dlblue = '#0096ff', dlorange = '#FF9300', dldarkred='#C00000', dlmagenta='#FF40FF', dlpurple='#7030A0', dldarkblue = '#0D5BDC')
dlblue = '#0096ff'; dlorange = '#FF9300'; dldarkred='#C00000'; dlmagenta='#FF40FF'; dlpurple='#7030A0'; dldarkblue = '#0D5BDC'
dlcolors = [dlblue, dlorange, dldarkred, dlmagenta, dlpurple]
plt.style.use('./deeplearning.mplstyle')
# --- Assignment ----------------------------------------
def gen_data(m, seed=1, scale=0.7):
""" generate a data set based on a x^2 with added noise """
c = 0
x_train = np.linspace(0,49,m)
np.random.seed(seed)
y_ideal = x_train**2 + c
y_train = y_ideal + scale * y_ideal*(np.random.sample((m,))-0.5)
x_ideal = x_train #for redraw when new data included in X
return x_train, y_train, x_ideal, y_ideal
def gen_blobs():
classes = 6
m = 800
std = 0.4
centers = np.array([[-1, 0], [1, 0], [0, 1], [0, -1], [-2,1],[-2,-1]])
X, y = make_blobs(n_samples=m, centers=centers, cluster_std=std, random_state=2, n_features=2)
return (X, y, centers, classes, std)
class lin_model:
def __init__(self, degree, regularization = False, lambda_=0):
if regularization:
self.linear_model = Ridge(alpha=lambda_)
else:
self.linear_model = LinearRegression()
self.poly = PolynomialFeatures(degree, include_bias=False)
self.scaler = StandardScaler()
def fit(self, X_train,y_train):
''' just fits the data. mapping and scaling are not repeated '''
X_train_mapped = self.poly.fit_transform(X_train.reshape(-1,1))
X_train_mapped_scaled = self.scaler.fit_transform(X_train_mapped)
self.linear_model.fit(X_train_mapped_scaled, y_train )
def predict(self, X):
X_mapped = self.poly.transform(X.reshape(-1,1))
X_mapped_scaled = self.scaler.transform(X_mapped)
yhat = self.linear_model.predict(X_mapped_scaled)
return(yhat)
def mse(self, y, yhat):
err = mean_squared_error(y,yhat)/2 #sklean doesn't have div by 2
return (err)
def plt_train_test(X_train, y_train, X_test, y_test, x, y_pred, x_ideal, y_ideal, degree):
fig, ax = plt.subplots(1,1, figsize=(4,4))
fig.canvas.toolbar_visible = False
fig.canvas.header_visible = False
fig.canvas.footer_visible = False
ax.set_title("Poor Performance on Test Data",fontsize = 12)
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.scatter(X_train, y_train, color = "red", label="train")
ax.scatter(X_test, y_test, color = dlc["dlblue"], label="test")
ax.set_xlim(ax.get_xlim())
ax.set_ylim(ax.get_ylim())
ax.plot(x, y_pred, lw=0.5, label=f"predicted, degree={degree}")
ax.plot(x_ideal, y_ideal, "--", color = "orangered", label="y_ideal", lw=1)
ax.legend(loc='upper left')
plt.tight_layout()
plt.show()
def plt_optimal_degree(X_train, y_train, X_cv, y_cv, x, y_pred, x_ideal, y_ideal, err_train, err_cv, optimal_degree, max_degree):
fig, ax = plt.subplots(1,2,figsize=(8,4))
fig.canvas.toolbar_visible = False
fig.canvas.header_visible = False
fig.canvas.footer_visible = False
ax[0].set_title("predictions vs data",fontsize = 12)
ax[0].set_xlabel("x")
ax[0].set_ylabel("y")
ax[0].plot(x_ideal, y_ideal, "--", color = "orangered", label="y_ideal", lw=1)
ax[0].scatter(X_train, y_train, color = "red", label="train")
ax[0].scatter(X_cv, y_cv, color = dlc["dlorange"], label="cv")
ax[0].set_xlim(ax[0].get_xlim())
ax[0].set_ylim(ax[0].get_ylim())
for i in range(0,max_degree):
ax[0].plot(x, y_pred[:,i], lw=0.5, label=f"{i+1}")
ax[0].legend(loc='upper left')
ax[1].set_title("error vs degree",fontsize = 12)
cpts = list(range(1, max_degree+1))
ax[1].plot(cpts, err_train[0:], marker='o',label="train error", lw=2, color = dlc["dlblue"])
ax[1].plot(cpts, err_cv[0:], marker='o',label="cv error", lw=2, color = dlc["dlorange"])
ax[1].set_ylim(*ax[1].get_ylim())
ax[1].axvline(optimal_degree, lw=1, color = dlc["dlmagenta"])
ax[1].annotate("optimal degree", xy=(optimal_degree,80000),xycoords='data',
xytext=(0.3, 0.8), textcoords='axes fraction', fontsize=10,
arrowprops=dict(arrowstyle="->", connectionstyle="arc3",
color=dlc['dldarkred'], lw=1))
ax[1].set_xlabel("degree")
ax[1].set_ylabel("error")
ax[1].legend()
fig.suptitle("Find Optimal Degree",fontsize = 12)
plt.tight_layout()
plt.show()
def plt_tune_regularization(X_train, y_train, X_cv, y_cv, x, y_pred, err_train, err_cv, optimal_reg_idx, lambda_range):
fig, ax = plt.subplots(1,2,figsize=(8,4))
fig.canvas.toolbar_visible = False
fig.canvas.header_visible = False
fig.canvas.footer_visible = False
ax[0].set_title("predictions vs data",fontsize = 12)
ax[0].set_xlabel("x")
ax[0].set_ylabel("y")
ax[0].scatter(X_train, y_train, color = "red", label="train")
ax[0].scatter(X_cv, y_cv, color = dlc["dlorange"], label="cv")
ax[0].set_xlim(ax[0].get_xlim())
ax[0].set_ylim(ax[0].get_ylim())
# ax[0].plot(x, y_pred[:,:], lw=0.5, label=[f"$\lambda =${i}" for i in lambda_range])
for i in (0,3,7,9):
ax[0].plot(x, y_pred[:,i], lw=0.5, label=f"$\lambda =${lambda_range[i]}")
ax[0].legend()
ax[1].set_title("error vs regularization",fontsize = 12)
ax[1].plot(lambda_range, err_train[:], label="train error", color = dlc["dlblue"])
ax[1].plot(lambda_range, err_cv[:], label="cv error", color = dlc["dlorange"])
ax[1].set_xscale('log')
ax[1].set_ylim(*ax[1].get_ylim())
opt_x = lambda_range[optimal_reg_idx]
ax[1].vlines(opt_x, *ax[1].get_ylim(), color = "black", lw=1)
ax[1].annotate("optimal lambda", (opt_x,150000), xytext=(-80,10), textcoords="offset points",
arrowprops={'arrowstyle':'simple'})
ax[1].set_xlabel("regularization (lambda)")
ax[1].set_ylabel("error")
fig.suptitle("Tuning Regularization",fontsize = 12)
ax[1].text(0.05,0.44,"High\nVariance",fontsize=12, ha='left',transform=ax[1].transAxes,color = dlc["dlblue"])
ax[1].text(0.95,0.44,"High\nBias", fontsize=12, ha='right',transform=ax[1].transAxes,color = dlc["dlblue"])
ax[1].legend(loc='upper left')
plt.tight_layout()
plt.show()
def tune_m():
""" tune the number of examples to reduce overfitting """
m = 50
m_range = np.array(m*np.arange(1,16))
num_steps = m_range.shape[0]
degree = 16
err_train = np.zeros(num_steps)
err_cv = np.zeros(num_steps)
y_pred = np.zeros((100,num_steps))
for i in range(num_steps):
X, y, y_ideal, x_ideal = gen_data(m_range[i],5,0.7)
x = np.linspace(0,int(X.max()),100)
X_train, X_, y_train, y_ = train_test_split(X,y,test_size=0.40, random_state=1)
X_cv, X_test, y_cv, y_test = train_test_split(X_,y_,test_size=0.50, random_state=1)
lmodel = lin_model(degree) # no regularization
lmodel.fit(X_train, y_train)
yhat = lmodel.predict(X_train)
err_train[i] = lmodel.mse(y_train, yhat)
yhat = lmodel.predict(X_cv)
err_cv[i] = lmodel.mse(y_cv, yhat)
y_
哇哇哇哇池
- 粉丝: 109
- 资源: 15
最新资源
- HTML红色实用形式精通商务科技企业网站模板代码.zip
- HTML红色实用形式美食餐饮公司企业网站模板代码.zip
- HTML红色实用形式摩托车维修企业网站模板代码.zip
- HTML红色实用形式旅游新闻类企业网站模板代码.zip
- HTML红色实用形式汽车维修项目服务企业网站模板代码.zip
- HTML红色实用形式清新民宿企业网站模板代码.zip
- HTML红色实用形式商业解决方案企业网站模板代码.zip
- HTML红色实用形式室内健身运动企业网站模板代码.zip
- HTML红色实用形式室内家具商店企业网站模板代码.zip
- HTML红色实用形式时尚产品商城网页模板代码.zip
- HTML红色实用形式手提包背包企业网站模板代码.zip
- HTML红色实用形式手机数码产品企业网站模板代码.zip
- HTML红色实用形式图形公寓房地产企业网站模板代码.zip
- HTML红色实用形式图纸印刷企业网站模板代码.zip
- HTML红色实用形式研发产品动态企业网站模板代码.zip
- HTML红色实用形式线上鲜花店铺企业网站模板代码.zip
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈