"""Implementation of GradientBoostingRegressor in sklearn using the
boston dataset which is very popular for regression problem to
predict house price.
"""
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
def main():
# loading the dataset from the sklearn
df = load_boston()
print(df.keys())
# now let construct a data frame
df_boston = pd.DataFrame(df.data, columns=df.feature_names)
# let add the target to the dataframe
df_boston["Price"] = df.target
# print the first five rows using the head function
print(df_boston.head())
# Summary statistics
print(df_boston.describe().T)
# Feature selection
x = df_boston.iloc[:, :-1]
y = df_boston.iloc[:, -1] # target variable
# split the data with 75% train and 25% test sets.
x_train, x_test, y_train, y_test = train_test_split(
x, y, random_state=0, test_size=0.25
)
model = GradientBoostingRegressor(
n_estimators=500, max_depth=5, min_samples_split=4, learning_rate=0.01
)
# training the model
model.fit(x_train, y_train)
# to see how good the model fit the data
training_score = model.score(x_train, y_train).round(3)
test_score = model.score(x_test, y_test).round(3)
print("Training score of GradientBoosting is :", training_score)
print("The test score of GradientBoosting is :", test_score)
# Let us evaluation the model by finding the errors
y_pred = model.predict(x_test)
# The mean squared error
print(f"Mean squared error: {mean_squared_error(y_test, y_pred):.2f}")
# Explained variance score: 1 is perfect prediction
print(f"Test Variance score: {r2_score(y_test, y_pred):.2f}")
# So let's run the model against the test data
fig, ax = plt.subplots()
ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0))
ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "k--", lw=4)
ax.set_xlabel("Actual")
ax.set_ylabel("Predicted")
ax.set_title("Truth vs Predicted")
# this show function will display the plotting
plt.show()
if __name__ == "__main__":
main()
没有合适的资源?快使用搜索试试~ 我知道了~
资源推荐
资源详情
资源评论
收起资源包目录
python-machine_learning.rar (40个子文件)
machine_learning
__init__.py 0B
forecasting
__init__.py 0B
ex_data.csv 1KB
run.py 6KB
loss_functions.py 23KB
apriori_algorithm.py 4KB
sequential_minimum_optimization.py 20KB
random_forest_classifier.py.broken.txt 1KB
self_organizing_map.py 2KB
word_frequency_functions.py 5KB
astar.py 4KB
gradient_boosting_regressor.py.broken.txt 2KB
decision_tree.py 7KB
xgboost_classifier.py 3KB
support_vector_machines.py 6KB
frequent_pattern_growth.py 11KB
automatic_differentiation.py 10KB
lstm
__init__.py 0B
lstm_prediction.py 2KB
sample_data.csv 70KB
linear_discriminant_analysis.py 17KB
gaussian_naive_bayes.py.broken.txt 1KB
local_weighted_learning
__init__.py 0B
local_weighted_learning.md 3KB
local_weighted_learning.py 6KB
gradient_descent.py 4KB
data_transformations.py 3KB
polynomial_regression.py 8KB
multilayer_perceptron_classifier.py 488B
xgboost_regressor.py 2KB
scoring_functions.py 3KB
mfcc.py 14KB
random_forest_regressor.py.broken.txt 1KB
gradient_boosting_classifier.py 4KB
k_nearest_neighbours.py 3KB
linear_regression.py 4KB
similarity_search.py 5KB
logistic_regression.py 5KB
k_means_clust.py 12KB
dimensionality_reduction.py 7KB
共 40 条
- 1
资源评论
流华追梦
- 粉丝: 9801
- 资源: 3844
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功