from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression # 分类
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import StackingClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split
import time
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = [u'simHei']
# 读取数据
X, y = load_iris(return_X_y=True) # 返回值赋给x,y
# staking的第一层模型
estimators = [
('rf', RandomForestClassifier(n_estimators=10, random_state=40)),
('svc', make_pipeline(StandardScaler(), LinearSVC(random_state=40))) # 鸢尾花数据其实不太需要标准化处理,已经在统一量纲下
]
# 构建模型
stacking = StackingClassifier(
estimators=estimators, final_estimator=LogisticRegression()
)
# 模型对比
softmax = LogisticRegression(C=0.1, solver='lbfgs', multi_class='multinomial', fit_intercept=False)
gbdt = GradientBoostingClassifier(learning_rate=0.1, n_estimators=90, max_depth=4)
rf = RandomForestClassifier(max_depth=5, n_estimators=100)
# stratify=y 表示按照目标向量y的类别进行分层抽样,以确保训练集和测试集中各类别的比例与原始数据集中相同
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=40)
scores_train = []
scores_test = []
models = []
times = []
for clf, modelname in zip([softmax, gbdt, rf, stacking],
['softmax', 'gbdt', 'rf', 'stacking']):
print('start:%s' % (modelname))
start = time.time()
clf.fit(X_train, y_train)
end = time.time()
score_train = clf.score(X_train, y_train)
score_test = clf.score(X_test, y_test)
scores_train.append(score_train)
scores_test.append(score_test)
models.append(modelname)
times.append(end - start)
print('scores_train:', scores_train)
print('scores_test', scores_test)
print('models:', models)
# 画图
plt.figure(num=1)
plt.plot([0, 1, 2, 3], scores_train, 'r', label=u'训练集')
plt.plot([0, 1, 2, 3], scores_test, 'b', label=u'测试集')
plt.title(u'鸢尾花数据不同分类器准确率比较', fontsize=16)
plt.xticks([0, 1, 2, 3], models, rotation=0)
plt.legend(loc='lower left')
plt.figure(num=2)
plt.plot([0, 1, 2, 3], times)
plt.title(u'鸢尾花数据不同分类器训练时间比较', fontsize=16)
plt.xticks([0, 1, 2, 3], models, rotation=0)
plt.show()
Stacking思想.rar
需积分: 5 79 浏览量
2024-03-22
10:06:12
上传
评论
收藏 1KB RAR 举报
ToBeCertain
- 粉丝: 669
- 资源: 23
最新资源
- 使用 C 语言实现的计算非负整数的阶乘
- 2011-2021最新版本北京大学数字普惠金融指数(PKU-DFIIC).xlsx
- 县域数字乡村指数2018-2020(1).xlsx
- Docker容器配置进阶
- tensorflow-gpu-2.7.4-cp37-cp37m-manylinux2010-x86-64.whl
- 多段线、 圆、弧转多段线(仅我可见)
- tensorflow-2.7.2-cp38-cp38-manylinux2010-x86-64.whl
- 李慧琴C语言基础部分.zip
- yeyue-p8Yi4-ve4a83792.apk
- tensorflow-gpu-2.7.3-cp38-cp38-manylinux2010-x86-64.whl
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈