lrrfgbdtxgboost.rar_stacking_stackingxgboost_xgboost_xgboost分类_

共1个文件

py：1个

版权申诉

stacking

xgboost

集成分类器

83 浏览量 2022-07-14 04:51:57 上传评论收藏 2KB RAR 举报

资源推荐

资源详情

资源评论

收起资源包目录

lrrfgbdtxgboost.rar （1个子文件）

lrrfgbdtxgboost.py 8KB

# -*- coding: utf-8 -*- """ Created on Wed Apr 25 16:42:42 2018 @author: zju """ def compute_class_weight(class_weight, classes, y): import numpy as np from numpy import bincount import warnings from sklearn.preprocessing import LabelEncoder if set(y) - set(classes): raise ValueError("classes should include all valid labels that can " "be in y") if class_weight is None or len(class_weight) == 0: # uniform class weights weight = np.ones(classes.shape[0], dtype=np.float64, order='C') elif class_weight in ['auto', 'balanced']: # Find the weight of each class as present in y. le = LabelEncoder() y_ind = le.fit_transform(y) if not all(np.in1d(classes, le.classes_)): raise ValueError("classes should have valid labels that are in y") # inversely proportional to the number of samples in the class if class_weight == 'auto': recip_freq = 1. / bincount(y_ind) weight = recip_freq[le.transform(classes)] / np.mean(recip_freq) warnings.warn("The class_weight='auto' heuristic is deprecated in" " 0.17 in favor of a new heuristic " "class_weight='balanced'. 'auto' will be removed in" " 0.19", DeprecationWarning) else: recip_freq = len(y) / (len(le.classes_) * bincount(y_ind).astype(np.float64)) weight = recip_freq[le.transform(classes)] else: # user-defined dictionary weight = np.ones(classes.shape[0], dtype=np.float64, order='C') if not isinstance(class_weight, dict): raise ValueError("class_weight must be dict, 'balanced', or None," " got: %r" % class_weight) for c in class_weight: i = np.searchsorted(classes, c) if i >= len(classes) or classes[i] != c: raise ValueError("Class label %d not present." % c) else: weight[i] = class_weight[c] return weight def compute_sample_weight(class_weight, y, indices=None): import six from numpy import in1d y = np.atleast_1d(y) if y.ndim == 1: y = np.reshape(y, (-1, 1)) n_outputs = y.shape[1] if isinstance(class_weight, six.string_types): if class_weight not in ['balanced', 'auto']: raise ValueError('The only valid preset for class_weight is ' '"balanced". Given "%s".' % class_weight) elif (indices is not None and not isinstance(class_weight, six.string_types)): raise ValueError('The only valid class_weight for subsampling is ' '"balanced". Given "%s".' % class_weight) elif n_outputs > 1: if (not hasattr(class_weight, "__iter__") or isinstance(class_weight, dict)): raise ValueError("For multi-output, class_weight should be a " "list of dicts, or a valid string.") if len(class_weight) != n_outputs: raise ValueError("For multi-output, number of elements in " "class_weight should match number of outputs.") expanded_class_weight = [] for k in range(n_outputs): y_full = y[:, k] classes_full = np.unique(y_full) classes_missing = None if class_weight in ['balanced', 'auto'] or n_outputs == 1: class_weight_k = class_weight else: class_weight_k = class_weight[k] if indices is not None: # Get class weights for the subsample, covering all classes in # case some labels that were present in the original data are # missing from the sample. y_subsample = y[indices, k] classes_subsample = np.unique(y_subsample) weight_k = np.choose(np.searchsorted(classes_subsample, classes_full), compute_class_weight(class_weight_k, classes_subsample, y_subsample), mode='clip') classes_missing = set(classes_full) - set(classes_subsample) else: weight_k = compute_class_weight(class_weight_k, classes_full, y_full) weight_k = weight_k[np.searchsorted(classes_full, y_full)] if classes_missing: # Make missing classes' weight zero weight_k[in1d(y_full, list(classes_missing))] = 0. expanded_class_weight.append(weight_k) expanded_class_weight = np.prod(expanded_class_weight, axis=0, dtype=np.float64) return expanded_class_weight import pandas import numpy as np f = pandas.read_csv('changqiao.csv', encoding='gbk') X = f.iloc[:,0:23] y = f.iloc[:,23] from sklearn.model_selection import StratifiedKFold skf= StratifiedKFold(n_splits=10) skf.get_n_splits(X,y) print(skf) sumaccuracy=0 sumprecision=0 sumrecall=0 for train_index, test_index in skf.split(X,y): X_train= np.array(X)[train_index] y_train= np.array(y)[train_index] X_test= np.array(X)[test_index] y_test= np.array(y)[test_index] from sklearn.model_selection import train_test_split X_train, X_train_xgb, y_train, y_train_xgb = train_test_split(X_train, y_train, test_size=0.5) expanded_class_weight = compute_sample_weight('balanced', y_train, indices=None) from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import AdaBoostClassifier lr = AdaBoostClassifier(DecisionTreeClassifier(max_depth=2, min_samples_split=20, min_samples_leaf=5), algorithm="SAMME", n_estimators=200, learning_rate=0.8) lr.fit(X_train, y_train) from xgboost.sklearn import XGBClassifier rf = XGBClassifier( learning_rate =0.1, n_estimators=1000, max_depth=5, min_child_weight=1, gamma=0, subsample=0.8, colsample_bytree=0.8, objective= 'binary:logistic', nthread=4, scale_pos_weight=1, seed=27) rf.fit(X_train, y_train) from sklearn.ensemble import GradientBoostingClassifier gbdt = GradientBoostingClassifier(n_estimators=200) gbdt.fit(X_train, y_train) y_pred_lr_xgb = lr.predict_proba(X_train_xgb)[:, 1] y_pred_rf_xgb= rf.predict_proba(X_train_xgb)[:, 1] y_pred_gbdt_xgb = gbdt.predict_proba(X_train_xgb)[:, 1] input1_xgb = np.array(y_pred_lr_xgb).round() input2_xgb = np.array(y_pred_rf_xgb).round() input3_xgb = np.array(y_pred_gbdt_xgb).round() input = np.vstack((input1_xgb,input2_xgb,input3_xgb)).T from sklearn.linear_model import LogisticRegression xgb = LogisticRegression(penalty='l2') xgb.fit(input, y_train_xgb) y_pred_lr = lr.predict_proba(X_test)[:, 1] y_pred_rf = rf.predict_proba(X_test)[:, 1] y_pred_gbdt = gbdt.predict_proba(X_test)[:, 1] input1 = np.array(y_pred_lr).round() input2 = np.array(y_pred_rf).round() input3 = np.array(y_pred_gbdt).round() input = np.vstack((input1,input2,input3)).T y_pred = xgb.predict_proba(input)[:, 1]

评论收藏

内容反馈

版权申诉