# coding:utf-8
import datetime
import time
import warnings
import networkx as nx
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedKFold
warnings.filterwarnings('ignore')
def gen_thres_new(df_train, oof_preds):
df_train['oof_preds'] = oof_preds
quantile_point = df_train['black_flag'].mean()
thres = df_train['oof_preds'].quantile(1 - quantile_point)
_thresh = []
for thres_item in np.arange(thres - 0.2, thres + 0.2, 0.01):
_thresh.append(
[thres_item, f1_score(df_train['black_flag'], np.where(oof_preds > thres_item, 1, 0), average='macro')])
_thresh = np.array(_thresh)
best_id = _thresh[:, 1].argmax()
best_thresh = _thresh[best_id][0]
print("阈值: {}\n训练集的f1: {}".format(best_thresh, _thresh[best_id][1]))
return best_thresh
def lgb_f1_score(y_hat, data):
y_true = data.get_label()
# y_hat = np.round(y_hat) # scikits f1 doesn't like probabilities
submit = pd.DataFrame()
submit['bz_predict'] = y_hat
submit['flag'] = y_true
submit = submit.sort_values(['bz_predict'], ascending=False).reset_index(drop=True)
submit['black_flag'] = 0
submit.loc[submit.index < submit.shape[0] * 0.25, 'black_flag'] = 1
return 'f1', f1_score(submit['black_flag'], submit['flag']), True
def calc_max_coutinut_times(a, b=1):
t = 0
w = 1
for k, v in enumerate(a):
if k > 0:
if v == b and a[k - 1] == b:
t += 1
if w < t:
w = t
else:
t = 1
return w
def gen_target_encoding_feats(train, test, encode_cols, target_col, n_fold=5):
'''生成target encoding特征'''
# for training set - cv
tg_feats = np.zeros((train.shape[0], len(encode_cols)))
kfold = StratifiedKFold(n_splits=n_fold, random_state=42, shuffle=True)
for _, (train_index, val_index) in enumerate(kfold.split(train[encode_cols], train[target_col])):
df_train, df_val = train.iloc[train_index], train.iloc[val_index]
for idx, col in enumerate(encode_cols):
target_mean_dict = df_train.groupby(col)[target_col].mean()
df_val[f'{col}_mean_target'] = df_val[col].map(target_mean_dict)
tg_feats[val_index, idx] = df_val[f'{col}_mean_target'].values
for idx, encode_col in enumerate(encode_cols):
train[f'{encode_col}_mean_target'] = tg_feats[:, idx]
# for testing set
for col in encode_cols:
target_mean_dict = train.groupby(col)[target_col].mean()
test[f'{col}_mean_target'] = test[col].map(target_mean_dict)
return train, test
def findMaxAverage(nums, k):
average = [] # 平均数的列表,最后取最大即可
sum_, start = 0, 0 # 末尾下划线避免与关键字冲突
for end in range(len(nums)):
sum_ += nums[end]
if end >= k - 1: # 达到窗口大小
average.append(sum_ / k) # 计算平均值
sum_ -= nums[start] # 减去窗口外的元素
start += 1 # 滑动窗口一位
return max(average)
def findMinAverage(nums, k):
average = [] # 平均数的列表,最后取最大即可
sum_, start = 0, 0 # 末尾下划线避免与关键字冲突
for end in range(len(nums)):
sum_ += nums[end]
if end >= k - 1: # 达到窗口大小
average.append(sum_ / k) # 计算平均值
sum_ -= nums[start] # 减去窗口外的元素
start += 1 # 滑动窗口一位
return min(average)
path = '../data/'
static_info = pd.read_csv(path + '账户静态信息.csv')
time_info = pd.read_csv(path + '账户交易信息.csv')
train_label = pd.read_csv(path + '训练集标签.csv')
test_no_label = pd.read_csv(path + 'test_dataset.csv')
time_info['tt'] = time_info['jyrq'] + ' ' + time_info['jysj']
time_info['tt'] = pd.to_datetime(time_info['tt'])
time_info = time_info.sort_values(['zhdh','tt'])
time_info['ttt'] = time_info['jdbj'].apply(lambda x:1 if x == 0 else -1)
time_info['ttt_jyje'] = time_info['jyje'] * time_info['ttt']
time_info['tttt'] = time_info['zhye'] + time_info['ttt_jyje']
a = time_info[['zhdh','jyje','zhye','tttt','ttt_jyje','ttt']]
a['ttttt'] = a.groupby(['zhdh'])['tttt'].shift(-1)
a['zhye'] = a['zhye'].astype('float')
a['ttttt'] = a['ttttt'].astype('float')
a['ttttt_tttt'] = a['zhye'] - a['ttttt']
a = a.dropna()
a['ttttt_tttt'] = a['ttttt_tttt'].astype(int)
ff1 = a.groupby(['zhdh']).agg({'ttttt_tttt':['mean','var']}).reset_index()
ff1.columns = ['zhdh','ttttt_tttt_mean','ttttt_tttt_var']
del time_info['ttt']
del time_info['ttt_jyje']
del time_info['tt']
del time_info['tttt']
# 3月模型
add_train_3 = pd.read_csv('../submit/train_3_lgb.csv')
add_train_3.columns = ['zhdh', 'black_flag_3', 'predict_3']
add_test_3 = pd.read_csv('../submit/test_3_lgb.csv')
add_test_3.columns = ['zhdh', 'black_flag_3', 'predict_3']
# 4月模型
add_train_4 = pd.read_csv('../submit/train_4_lgb.csv')
add_train_4.columns = ['zhdh', 'black_flag_4', 'predict_4']
add_test_4 = pd.read_csv('../submit/test_4_lgb.csv')
add_test_4.columns = ['zhdh', 'black_flag_4', 'predict_4']
# 5月模型
add_train_5 = pd.read_csv('../submit/train_5_lgb.csv')
add_train_5.columns = ['zhdh', 'black_flag_5', 'predict_5']
add_test_5 = pd.read_csv('../submit/test_5_lgb.csv')
add_test_5.columns = ['zhdh', 'black_flag_5', 'predict_5']
add_train = pd.merge(add_train_3[['zhdh', 'predict_3']], add_train_4[['zhdh', 'predict_4']], on=['zhdh'])
add_train = pd.merge(add_train, add_train_5[['zhdh', 'predict_5']], on=['zhdh'])
add_test = pd.merge(add_test_3[['zhdh', 'predict_3']], add_test_4[['zhdh', 'predict_4']], on=['zhdh'])
add_test = pd.merge(add_test, add_test_5[['zhdh', 'predict_5']], on=['zhdh'])
# # 3月模型
# add_train_xgb_3 = pd.read_csv('../submit/train_3_xgb.csv')
# add_train_xgb_3.columns = ['zhdh', 'black_flag_xgb_3', 'predict_xgb_3']
# add_test_xgb_3 = pd.read_csv('../submit/test_3_xgb.csv')
# add_test_xgb_3.columns = ['zhdh', 'black_flag_xgb_3', 'predict_xgb_3']
# # 4月模型
# add_train_xgb_4 = pd.read_csv('../submit/train_4_xgb.csv')
# add_train_xgb_4.columns = ['zhdh', 'black_flag_xgb_4', 'predict_xgb_4']
# add_test_xgb_4 = pd.read_csv('../submit/test_4_xgb.csv')
# add_test_xgb_4.columns = ['zhdh', 'black_flag_xgb_4', 'predict_xgb_4']
# # 5月模型
# add_train_xgb_5 = pd.read_csv('../submit/train_5_xgb.csv')
# add_train_xgb_5.columns = ['zhdh', 'black_flag_xgb_5', 'predict_xgb_5']
# add_test_xgb_5 = pd.read_csv('../submit/test_5_xgb.csv')
# add_test_xgb_5.columns = ['zhdh', 'black_flag_xgb_5', 'predict_xgb_5']
#
# add_train_xgb = pd.merge(add_train_xgb_3[['zhdh', 'predict_xgb_3']], add_train_xgb_4[['zhdh', 'predict_xgb_4']],
# on=['zhdh'])
# add_train_xgb = pd.merge(add_train_xgb, add_train_xgb_5[['zhdh', 'predict_xgb_5']], on=['zhdh'])
#
# add_test_xgb = pd.merge(add_test_xgb_3[['zhdh', 'predict_xgb_3']], add_test_xgb_4[['zhdh', 'predict_xgb_4']],
# on=['zhdh'])
# add_test_xgb = pd.merge(add_test_xgb, add_test_xgb_5[['zhdh', 'predict_xgb_5']], on=['zhdh'])
###########################
# # 3月模型
# add_train_cat_3 = pd.read_csv('../submit/train_cat_3.csv')
# add_train_cat_3.columns = ['zhdh', 'black_flag_cat_3', 'predict_cat_3']
# add_test_cat_3 = pd.read_csv('../submit/test_3_xgb.csv')
# add_test_cat_3.columns = ['zhdh', 'black_flag_cat_3', 'predict_cat_3']
# # 4月模型
# add_train_cat_4 = pd.read_csv('../submit/train_cat_4.csv')
# add_train_cat_4.columns = ['zhdh', 'black_flag_cat_4', 'predict_cat_4']
# add_test_cat_4 = pd.read_csv('../submit/test_cat_4.csv')
# add_test_cat_4.columns = ['zhdh', 'black_flag_cat_4', 'predict_cat_4']
# # 5月模型
# add_tra
.whl
- 粉丝: 3936
- 资源: 4861
最新资源
- 基于Python智能红绿灯控制系统的开发-实时交通流量检测与红绿灯状态优化切换实现方案含代码
- Java-Java资源
- jsBook-跨年倒计时html代码
- New-Year-Card-新年倒计时
- 跨年烟花大屏show-跨年
- fireworks-烟花代码
- vue-mapvgl-烟花代码
- CS-Books-Store-计算机网络课后题答案
- front-end-Doc-烟花代码大全html
- html新年快乐3d烟花代码HTML/CSS/JS实现新年快乐动态3D烟花特效及应用案例
- Front-end-tutorial-烟花代码大全html
- walle-web-烟花代码
- hardseed-python新年快乐代码
- JavaFamily-master-上学的小垃圾
- 基于HTML5的交互式新年倒计时与动态烟花展示
- 元旦烟花html,Web前端技术实现元旦烟花动画特效:HTML/CSS/JavaScript联合演示
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈