信用卡欺诈检测研究信用卡欺诈检测研究

共8个文件

xml：4个

csv：1个

gitignore：1个

网络

需积分: 4 64 浏览量 2023-11-17 12:53:25 上传评论收藏 62.92MB RAR 举报

资源推荐

资源详情

资源评论

收起资源包目录

基于时序卷积网络（TCN）的信用卡欺诈检测研究.rar （8个子文件）

main.py 6KB

.idea

workspace.xml 3KB

misc.xml 188B

inspectionProfiles

profiles_settings.xml 174B

modules.xml 385B

基于时序卷积网络（TCN）的信用卡欺诈检测研究.iml 291B

.gitignore 50B

data.csv 143.84MB

import keras from keras import regularizers, optimizers from keras import losses from keras.models import Sequential, Model, load_model from keras.layers import Dense, Input, Dropout, Embedding, LSTM # from keras.optimizers import RMSprop, Adam, Nadam from keras.preprocessing import sequence from keras import optimizers from keras.layers import Conv1D, Flatten, Activation, SpatialDropout1D from keras.callbacks import ModelCheckpoint, TensorBoard from tensorflow.keras.utils import to_categorical import sklearn from sklearn.preprocessing import StandardScaler, MinMaxScaler from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix, roc_auc_score from sklearn.metrics import classification_report from sklearn.metrics import roc_auc_score import seaborn as sns import pandas as pd import numpy as np import matplotlib import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import tensorflow import sys path = 'data.csv' df = pd.read_csv(path, sep=",", index_col=None) # Standardize df['Amount'] = StandardScaler().fit_transform(df['Amount'].values.reshape(-1, 1)) df['Time'] = StandardScaler().fit_transform(df['Time'].values.reshape(-1, 1)) print(df.head()) anomalies = df[df["Class"] == 1] normal = df[df["Class"] == 0] print(anomalies.shape, normal.shape) for f in range(0, 20): #间接处理：不改变原数据（对数组下标的处理）,随机20次 normal = normal.iloc[np.random.permutation(len(normal))] data_set = pd.concat([normal[:2000], anomalies]) #print(data_set) #0.6训练集每次不一样，0.4测试集每次一样。 x_train, x_test = train_test_split(data_set, test_size = 0.4, random_state = 42)#test_set占0.4，random_state = 42重复执行的时候，测试集也是一样的。 # print(x_test) x_train = x_train.sort_values(by=['Time']) x_test = x_test.sort_values(by=['Time']) #print(x_train) #print(x_test) y_train = x_train["Class"] y_test = x_test["Class"] #print(y_train) x_train = np.array(x_train).reshape(x_train.shape[0], x_train.shape[1], 1)#理解为1495个时刻，每个时刻31个数据。 x_test = np.array(x_test).reshape(x_test.shape[0], x_test.shape[1], 1) input_shape = (x_train.shape[1], 1) #[31,1] y_train = to_categorical(y_train, 2)#将标签转为onehot。2为标签类别总数。 y_test = to_categorical(y_test, 2) print(y_train) print("Shapes:\nx_train:%s\ny_train:%s\n" % (x_train.shape, y_train.shape)) print("x_test:%s\ny_test:%s\n" % (x_test.shape, y_test.shape)) print("input_shape:{}\n".format(input_shape)) input_layer = Input(shape=(input_shape )) #Series of temporal convolutional layers with dilations increasing by powers of 2. conv_1 = Conv1D(filters=128, kernel_size=2, dilation_rate=1, padding='causal', strides=1,input_shape=input_shape, kernel_regularizer=regularizers.l2(0.01), activation='relu')(input_layer) #Dropout layer after each 1D-convolutional layer drop_1 = SpatialDropout1D(0.05)(conv_1) conv_2 = Conv1D(filters=128, kernel_size=2, dilation_rate=2, padding='causal',strides=1, kernel_regularizer=regularizers.l2(0.01), activation='relu')(drop_1) drop_2 = SpatialDropout1D(0.05)(conv_2) conv_3 = Conv1D(filters=128, kernel_size=2, dilation_rate=4, padding='causal', strides=1,kernel_regularizer=regularizers.l2(0.01), activation='relu')(drop_2) drop_3 = SpatialDropout1D(0.05)(conv_3) conv_4 = Conv1D(filters=128, kernel_size=2, dilation_rate=8, padding='causal', strides=1,kernel_regularizer=regularizers.l2(0.05), activation='relu')(drop_3) drop_4 = SpatialDropout1D(0.05)(conv_4) #Flatten layer to feed into the output layer flat = Flatten()(drop_4) output_layer = Dense(2, activation='softmax')(flat) TCN = Model(inputs=input_layer, outputs=output_layer) TCN.compile(loss='mean_squared_error', optimizer=optimizers.adam_v2.Adam(lr=0.002), metrics=['mae', 'accuracy']) checkpointer = ModelCheckpoint(filepath="model_TCN_creditcard.h5", verbose=0, save_best_only=True) TCN.summary() TCN.fit(x_train, y_train, batch_size=128, epochs=50, verbose=1, validation_data=(x_test, y_test)) score = TCN.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) preds = TCN.predict(x_test) y_pred = np.round(preds) #四舍五入 print(y_pred) auc = roc_auc_score( y_pred, y_test) print("AUC: {:.2%}".format (auc)) print(classification_report(y_test, y_pred)) class Visualization: labels = ["Normal", "Anomaly"] def draw_confusion_matrix(self, y, ypred): matrix = confusion_matrix(y, ypred) plt.figure(figsize=(10, 8)) colors = ["orange", "green"] sns.heatmap(matrix, xticklabels=self.labels, yticklabels=self.labels, cmap=colors, annot=True, fmt="d") plt.title("Confusion Matrix") plt.ylabel('Actual') plt.xlabel('Predicted') plt.show() def draw_anomaly(self, y, error, threshold): groupsDF = pd.DataFrame({'error': error, 'true': y}).groupby('true') figure, axes = plt.subplots(figsize=(12, 8)) for name, group in groupsDF: axes.plot(group.index, group.error, marker='x' if name == 1 else 'o', linestyle='', color='r' if name == 1 else 'g', label="Anomaly" if name == 1 else "Normal") axes.hlines(threshold, axes.get_xlim()[0], axes.get_xlim()[1], colors="b", zorder=100, label='Threshold') axes.legend() plt.title("Anomalies") plt.ylabel("Error") plt.xlabel("Data") plt.show() def draw_error(self, error, threshold): plt.plot(error, marker='o', ms=3.5, linestyle='', label='Point') plt.hlines(threshold, xmin=0, xmax=len(error) - 1, colors="b", zorder=100, label='Threshold') plt.legend() plt.title("Reconstruction error") plt.ylabel("Error") plt.xlabel("Data") plt.show() visualize = Visualization() y_pred2 = np.argmax(y_pred, axis=1) y_test2 = np.argmax(y_test, axis=1) visualize.draw_confusion_matrix(y_test2, y_pred2) plt.show()

评论收藏

内容反馈