基于机器学习实现android恶意软件检测模型（源码）.zip

共12个文件

py：7个

xml：4个

iml：1个

版权申诉

项目源码

学习资料

5星 · 超过95%的资源 57 浏览量 2024-02-20 23:47:17 上传评论收藏 11KB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

基于机器学习实现android恶意软件检测模型（源码）.zip （12个子文件）

code_20105

apk_decompile

decompile_apk.py 1KB

.idea

dictionaries

Administrator.xml 270B

misc.xml 288B

android恶意软件检测模型实现.iml 383B

inspectionProfiles

Project_Default.xml 727B

modules.xml 501B

predict

charge_category_Logistic regression.py 6KB

charge_category_KNN.py 6KB

charge_category_SVM.py 8KB

parse_AndroidManifest.py 3KB

charge_category_NaiveBayesian.py 6KB

get_DataSets.py 2KB

#!/usr/bin/env python 3.7 # -*- coding: utf-8 -*- # @Time : 2019/5/8 14:18 # @Author : wkend # @File : charge_category_SVM.py # @Software: PyCharm import numpy as np from sklearn.model_selection import train_test_split, GridSearchCV from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC from predict.get_DataSets import get_dataSets from sklearn.metrics import precision_recall_curve, roc_curve, roc_auc_score import matplotlib.pyplot as plt def charge_category_SVM(X, y): """利用数据集进行分类""" X = np.array(X, dtype=float) y = np.array(y) # 分离数据集，得到训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666) # 对训练、测试数据进行归一化处理 standardScaler = StandardScaler() standardScaler.fit(X_train) X_train = standardScaler.transform(X_train) # 对训练数据集进行归一化 X_test_standrad = standardScaler.transform(X_test) # 对测试数据进行归一化 # 寻找超参数 param_grid = [ { 'kernel': ['linear'], 'C': [i for i in range(1, 101)] }, { 'kernel': ['rbf'], 'gamma': [i for i in range(1, 11)], 'C': [i for i in range(1, 101)] } ] # svc = SVC() # grid_search = GridSearchCV(svc,param_grid,cv=5) # grid_search.fit(X_train, y_train) # # print(grid_search.best_estimator_) # print(grid_search.best_params_) # # svc = grid_search.best_estimator_ # 使用归一化的数据进行分类 svc = SVC(kernel='rbf',C=2,gamma=25) svc.fit(X_train, y_train) y_predict = svc.predict(X_test_standrad) # 预测结果向量 decision_scores = svc.decision_function(X_test_standrad) # 决策分数值 # score = svc.score(X_test_standrad, y_test) # 预测准确率 # print('预测准确率：' + str(score)) return y_test, y_predict, decision_scores def TN(y_true, y_predict): """ :param y_true: 样本真实值，为-1表示为恶意软件，为1表示为良性软件 :param y_predict: 样本预测值，为-1表示预测为恶意软件，为1表示预测为良性软件 :return: 返回TN值 """ assert len(y_true) == len(y_predict) return np.sum((y_true == -1) & (y_predict == -1)) def FP(y_true, y_predict): """ :param y_true: 样本真实值，为-1表示为恶意软件，为1表示为良性软件 :param y_predict: 样本预测值，为-1表示预测为恶意软件，为1表示预测为良性软件 :return: 返回FP值 """ assert len(y_true) == len(y_predict) return np.sum((y_true == -1) & (y_predict == 1)) def FN(y_true, y_predict): """ :param y_true: 样本真实值，为-1表示为恶意软件，为1表示为良性软件 :param y_predict: 样本预测值，为-1表示预测为恶意软件，为1表示预测为良性软件 :return: 返回FN值 """ assert len(y_true) == len(y_predict) return np.sum((y_true == 1) & (y_predict == -1)) def TP(y_true, y_predict): """ :param y_true: 样本真实值，为-1表示为恶意软件，为1表示为良性软件 :param y_predict: 样本预测值，为-1表示预测为恶意软件，为1表示预测为良性软件 :return: 返回TP值 """ assert len(y_true) == len(y_predict) return np.sum((y_true == 1) & (y_predict == 1)) def confusion_matrix(y_true, y_predict): """ 求混淆矩阵 :param y_true:样本真实值，为-1表示为恶意软件，为1表示为良性软件 :param y_predict:样本预测值，为-1表示预测为恶意软件，为1表示预测为良性软件 :return: 返回混淆矩阵 """ return np.array([ [TN(y_true, y_predict), FP(y_true, y_predict)], [FN(y_true, y_predict), TP(y_true, y_predict)], ]) def accuracy_score(y_true, y_predict): """ 求准确率 :param y_true: 样本真实值，为-1表示为恶意软件，为1表示为良性软件 :param y_predict: 样本预测值，为-1表示预测为恶意软件，为1表示预测为良性软件 :return: 返回预测结果的准确率 """ assert len(y_true) == len(y_predict) return sum(y_true == y_predict) / len(y_true) def precision_score(y_true, y_predict): """ 求精准率 :param y_true: 样本真实值，为-1表示为恶意软件，为1表示为良性软件 :param y_predict: 样本预测值，为-1表示预测为恶意软件，为1表示预测为良性软件 :return: 返回预测结果的精准率 """ tp = TN(y_true, y_predict) fp = TN(y_true, y_predict) try: return tp / (tp + fp) except: return 0.0 def recall_score(y_true, y_predict): """ 求精准率 :param y_true: 样本真实值，为-1表示为恶意软件，为1表示为良性软件 :param y_predict: 样本预测值，为-1表示预测为恶意软件，为1表示预测为良性软件 :return: 返回预测结果的精准率 """ tp = TN(y_true, y_predict) fn = FN(y_true, y_predict) try: return tp / (tp + fn) except: return 0.0 def f1_score(precision, recall): """ 求精准率和召回率的调和平均值 :param precision: 精准率 :param recall: 召回率 :return: 精准率和召回率的调和平均值 """ try: return 2 * precision * recall / (precision + recall) except: return 0.0 def make_PR_curve(y_test, decision_scores): """绘制PR曲线""" precisions, recalls, thresholds = precision_recall_curve(y_test, decision_scores) plt.plot(thresholds, precisions[:-1], label="precisions",linestyle='--') plt.plot(thresholds, recalls[:-1], label="recalls") plt.legend() plt.show() def make_PR_balance_curve(y_test, decision_scores): """绘制PR平衡曲线""" precisions, recalls, thresholds = precision_recall_curve(y_test, decision_scores) plt.plot(precisions, recalls) plt.xlabel('precision') plt.ylabel('recall') plt.show() def make_roc_curve(y_test, decision_scores): """绘制ROC曲线""" fprs, tprs, thresholds = roc_curve(y_test, decision_scores) plt.plot(fprs, tprs) plt.xlabel('FPR') plt.ylabel('TPR') plt.show() if __name__ == '__main__': malware_path = 'G:/毕设/软件样本库/恶意样本/decompile' benign_path = 'G:/毕设/软件样本库/良性样本/decompile' # path = 'E:/毕设/软件样本库/恶意软件/test' malware_dataSets = get_dataSets(malware_path) # print(len(malware_dataSets)) # 1260 benign_dataSets = get_dataSets(benign_path) # print(len(benign_dataSets)) # 1184 malware_dataSets.extend(benign_dataSets) dataSets = malware_dataSets # print(dataSets) category = [-1] * 1260 + [1] * 1184 # print(category) y_test, y_predict, decision_scores = charge_category_SVM(dataSets, category) # print(y_test) # print(y_predict) # print(decision_scores) confusion_matrix = confusion_matrix(y_test, y_predict) print(confusion_matrix) accuracy_score = accuracy_score(y_test, y_predict) print('SVM准确率：' + str(accuracy_score)) precision_score = precision_score(y_test, y_predict) print('SVM精准率：' + str(precision_score)) recall_score = recall_score(y_test, y_predict) print('SVM召回率：' + str(recall_score)) f1_score = f1_score(precision_score, recall_score) print('SVM F1 Score：' + str(recall_score)) # 绘制PR曲线 make_PR_curve(y_test, decision_scores) # 绘制PR平衡曲线 # make_PR_balance_curve(y_test, decision_scores) # # 绘制ROC曲线 # make_roc_curve(y_test, decision_scores) # ROC曲线面积 roc_auc_score = roc_auc_score(y_test, decision_scores) print('ROC曲线面积:

评论收藏

内容反馈

版权申诉