import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, make_scorer
import warnings
warnings.filterwarnings("ignore")
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
if __name__ == '__main__':
column_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT',
'MEDV']
data = pd.read_csv('./dataset/housing.csv', header=None, delimiter=r'\s+', names=column_names)
print(data.head())
print(data.info())
print(data.shape)
print(data.isnull().sum())
print(data.duplicated().sum())
print(data.corr())
fig_hm = sns.heatmap(data.corr())
fig_hm.get_figure().savefig("./pic/heatmap", dpi=400)
plt.cla()
fig_bar = sns.barplot(x='CHAS', y='MEDV', data=data)
fig_bar.get_figure().savefig("./pic/bar", dpi=400)
plt.cla()
print(data.info())
# 认为CHAS和MEDV之间的相关性较弱,但事实表明,靠近查尔斯河边界的城镇MEDV较高,因此MEDV受到CHAS特征的影响
# 将数据集拆分为特征和目标
X = data.drop('MEDV', axis=1)
y = data['MEDV']
# 规范数据
ss = StandardScaler()
X = ss.fit_transform(X)
# 在对训练集和测试集进行相同的预处理后,将其分成训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=1)
print(X_train.shape, X_test.shape)
param_grid = {
'n_estimators': [100, 200, 300],
'max_depth': [None, 5, 10],
'min_samples_split': [2, 5, 10],
'min_samples_leaf': [1, 2, 4]
}
grid = GridSearchCV(
estimator=RandomForestRegressor(),
param_grid=param_grid,
scoring={'mse': 'neg_mean_squared_error', 'accuracy': 'accuracy'},
refit='accuracy',
cv=5,
n_jobs=-1
)
grid.fit(X_train, y_train)
print(f"随机森林模型 最佳参数: {grid.best_params_}")
print(f"随机森林模型 最佳MSE分数值: {grid.best_score_}")
y_pred = grid.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"随机森林模型 MSE: {mse}")
model = tf.keras.Sequential([
# 输入隐藏层
tf.keras.layers.Dense(units=13, activation='relu', input_shape=(13,)),
tf.keras.layers.Dense(units=10, activation="relu"),
# 输出层
tf.keras.layers.Dense(units=1, activation='linear')
])
model.compile(optimizer='sgd', loss='mean_squared_error', metrics=['mse'])
print(model.summary())
# 防止过度拟合
cb = EarlyStopping(
monitor='val_loss',
min_delta=0.00001,
patience=20,
verbose=1,
mode='auto',
baseline=None,
restore_best_weights=False
)
# 训练模型
history = model.fit(X_train, y_train, validation_split=0.2, epochs=100, callbacks=cb)
mse, mae = model.evaluate(X_test, y_test)
print(f"神经网络模型 MSE: {mse}")
housing.zip
需积分: 0 151 浏览量
2023-11-27
14:53:07
上传
评论
收藏 247KB ZIP 举报
尾巴988
- 粉丝: 0
- 资源: 1
最新资源
- 基于Javascript和Python的微商城项目设计源码 - MicroMall
- 基于Java的网上订餐系统设计源码 - online ordering system
- 基于Javascript的超级美眉网络资源管理应用模块设计源码
- 基于Typescript和PHP的编程知识储备库设计源码 - study-php
- Screenshot_2024-05-28-11-40-58-177_com.tencent.mm.jpg
- 基于Dart的Flutter小提琴调音器APP设计源码 - violinhelper
- 基于JavaScript和CSS的随寻订购网页设计源码 - web-order
- 基于MATLAB的声纹识别系统设计源码 - VoiceprintRecognition
- 基于Java的微服务插件集合设计源码 - wsy-plugins
- 基于Vue和微信小程序的监理日志系统设计源码 - supervisionLog
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈