import pandas as pd;
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams["axes.unicode_minus"] = False
mpl.style.use('ggplot')
from sklearn import metrics
from sklearn.metrics import roc_curve, auc, make_scorer, f1_score, fbeta_score, precision_score, \
roc_auc_score, accuracy_score, precision_recall_curve
from sklearn.utils.multiclass import type_of_target
from tools import *
####################################################################################################
####################################################################################################
###################################### ######################################
###################################### 13. MODEL EVALUATION ######################################
###################################### ######################################
####################################################################################################
####################################################################################################
def get_mae(depvar_value, pred_label):
"""
Calculate mae for predicted dataset.
: params depvar_value: real label
: params pred_label: predicted label
"""
mae = metrics.mean_absolute_error(depvar_value,pred_label)
return mae
def get_mse(depvar_value, pred_label):
"""
Calculate mse for predicted dataset.
: params depvar_value: real label
: params pred_label: predicted label
"""
mse = metrics.mean_squared_error(depvar_value,pred_label)
return mse
def get_rmse(depvar_value, pred_label):
"""
Calculate rmse for predicted dataset.
: params depvar_value: real label
: params pred_label: predicted label
"""
rmse = np.sqrt(metrics.mean_squared_error(depvar_value,pred_label))
return rmse
def get_msle(depvar_value, pred_label):
"""
Calculate msle for predicted dataset.
: params depvar_value: real label
: params pred_label: predicted label
"""
rmse = metrics.mean_squared_log_error(depvar_value,pred_label)
return rmse
def get_precision(depvar_value, pred_label, pos_label=1):
"""
Calculate precision rate for predicted dataset.
: params pred_label: real label
: params pred_label: predicted label
: params pos_label: positive label(0 or 1)
"""
precision = metrics.precision_score(depvar_value, pred_label, pos_label)
return precision
def get_false_neg_rate(depvar_value, pred_label, pos_label=1):
"""
Calculate false negative rate (eg missed fraudulent cases) for predicted dataset.
: params depvar_value: real label
: params pred_label: predicted label
: params pos_label: positive label(0 or 1)
"""
tn, fp, fn, tp = metrics.confusion_matrix(depvar_value, pred_label).ravel()
miss_neg_rate = fn / (fn + tn)
# 误报为负占所有预测为负的比例
return miss_neg_rate
def get_auc_ks(depvar_value, pred_value, pos_label=1):
"""
Calculate AUC and KS for predicted dataset.
: params depvar_value: real label
: params pred_value: predicted value
: params pos_label: positive label(0 or 1)
"""
fpr, tpr, thr = metrics.roc_curve(depvar_value, pred_value, pos_label=pos_label)
ks_list = tpr - fpr
indx = np.argmax(ks_list)
thr_point = thr[indx]
if pos_label == 0:
pred_value = 1 - pred_value
pred_label = pd.Series(pred_value).map(lambda x: 1 if x > thr_point else 0)
ac = metrics.auc(fpr, tpr)
ks = max(tpr - fpr)
return pred_label, ac, ks, thr_point, fpr, tpr
def get_pos_neg_rate(pred_label):
"""
Calculate positive rate and negative rate for predicted dataset.
: params pred_label: predicted label
"""
neg_rate = sum(pred_label == 0) / len(pred_label)
pos_rate = sum(pred_label == 1) / len(pred_label)
return neg_rate, pos_rate
def get_f_beta(depvar_value, pred_label, pos_label, beta):
"""
Calculate positive rate and negative rate for predicted dataset.
: params depvar_value: real label
: params pred_label: predicted label
: params pos_label: positive label(0 or 1)
: params beta: beta for calculate f_beta
"""
f_beta = metrics.fbeta_score(depvar_value, pred_label, beta=beta)
return f_beta
def get_clf_statistics(pred_value, depvar_value, pos_label=1, beta=1, extra_stat=False, silent=True):
"""
Calculate all statistics for predicted value if label is binary.
: params depvar_value: real label
: params pred_value: predicted value
: params pos_label: positive label(0 or 1)
: params beta: beta for calculate f_beta
: params extra_stat: determine if extra statistics is needed
: params silent: If True, restrict prints
"""
try:
pred_label, ac, ks, thr_point, fpr, tpr = get_auc_ks(depvar_value, pred_value, pos_label)
except Exception as e:
pred_label, ac, ks, thr_point, fpr, tpr = None, None, None, None, None, None
print(e)
try:
miss_neg_rate = get_false_neg_rate(depvar_value, pred_label, pos_label)
except Exception as e:
miss_neg_rate = None
print(e)
try:
neg_rate, pos_rate = get_pos_neg_rate(pred_label)
except Exception as e:
neg_rate, pos_rate = None, None
print(e)
if extra_stat:
try:
recall = get_recall(depvar_value, pred_label, pos_label)
except Exception as e:
recall = None
print(e)
try:
precision = get_precision(depvar_value, pred_label, pos_label)
except Exception as e:
precision = None
print(e)
try:
f_beta = get_f_beta(depvar_value, pred_label, pos_label, beta)
except Exception as e:
f_beta = None
print(e)
if not silent:
print('outputs are auc, ks, threshold, negativerate, missing_negative_rate, recall rate, \
precision rate, f_beta, fpr, tpr')
return ac, ks, thr_point, pos_rate, neg_rate, miss_neg_rate, recall, precision, f_beta, fpr, tpr
else:
if not silent:
print('outputs are auc, ks, threshold, negativerate, missing_negative_rate, fpr, tpr')
return ac, ks, thr_point, neg_rate, miss_neg_rate, fpr, tpr
def get_reg_statistics(pred_value, depvar_value, silent=True):
"""
Calculate all statistics for predicted value if label is continuous.
: params depvar_value: real label
: params pred_value: predicted value
: params silent: If True, restrict prints
"""
try:
mae = get_mae(depvar_value, pred_value)
except Exception as e:
mae = None
print(e)
try:
mse = get_mse(depvar_value, pred_value)
except Exception as e:
mse = None
print(e)
try:
msle = get_msle(depvar_value, pred_value)
except Exception as e:
msle = None
print(e)
try:
rmse = get_rmse(depvar_value, pred_value)
except Exception as e:
rmse = None
print(e)
if not silent:
print('outputs are mae, mse, msle, rmse')
return mae, mse, msle, rmse
def get_xgb_fi(model, method='interaction', alpha=0.7, top=20):
"""
Get feature importance from xgb model.
: params model: input model
: params method: method for choosing features
: params alpha: weight parameter for importance_type
(larger the alpha, larger the weight for importance_type = gain)
: params top: number of top features from each importance_type
"""
importance_by_weight = model.get_score(importance_type='weight')
importance_by_gain = model.get_score(importance_type='gain')
importance_by_weight = pd.DataFrame(importance_by_weight, index=range(1)).T
importance_by_weight.columns = {'weight'}
importance_by_weight = importance_by_weight.sort
评论0
最新资源