# 基于机器学习的恶意软件检测系统 GUI界面
import sys
import imutils
# pip install -i https://pypi.douban.com/simple --trusted-host pypi.douban.com scikit-image
# pip install Cython
# pip install cython_bbox
import numpy as np
import os
import re
import pandas as pd
import shutil
import argparse
import numpy
from collections import *
import binascii
import cv2
from collections import *
from PIL import Image
from skimage.feature import greycomatrix, greycoprops
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from PyQt5.QtGui import QImage
# import LoginDC
# from LoginDC import MyDialog
from MainWindow import Ui_MainWindow # 导入了Ui_MainWindow类
from PyQt5.QtWidgets import QApplication, QMainWindow, QFileDialog
from PyQt5 import QtCore, QtGui, QtWidgets
class MyMainWindows(QMainWindow, Ui_MainWindow): # 新建一个类 Ui_MainWindow 为first中的一个类
def __init__(self, parent=None):
super(MyMainWindows, self).__init__(parent)
self.setupUi(self)
# 绑定按钮事件
# 预处理模块
self.btnOpenfile.clicked.connect(self.Openfile)
self.btnDecompile.clicked.connect(self.Decompile)
self.btnVisualize.clicked.connect(self.Visualize)
# 特征提取模块
self.btnText.clicked.connect(self.getTextFeatures)
self.btnColor.clicked.connect(self.getColorFeatures)
self.btnTexture.clicked.connect(self.getTextureFeatures)
self.btnFusion.clicked.connect(self.getFusionFeatures)
# 分类检测模块
self.btnDT.clicked.connect(self.ClassifyOfDT)
self.btnLR.clicked.connect(self.ClassifyOfLR)
self.btnKNN.clicked.connect(self.ClassifyOfKNN)
self.btnRF.clicked.connect(self.ClassifyOfRF)
self.btnXGBoost.clicked.connect(self.ClassifyOfXGBoost)
# 家族分类模块
self.btnDetection.clicked.connect(self.Detection)
self.btnClassification.clicked.connect(self.Classification)
# 预处理模块
def Openfile(self):
self.file, self.Type = QFileDialog.getOpenFileName(self, "打开文件", "", "*.exe;;*.png;;*.dll;;All Files(*)")
print(self.file)
(self.filepath, self.tempfilename) = os.path.split(self.file)
(self.shotname, self.extension) = os.path.splitext(self.tempfilename)
print(self.filepath, self.tempfilename, self.shotname, self.extension)
self.ResultLabel.setText("文件打开成功!\n 文件打开路径:\n" + str(self.file))
def Decompile(self):
# idcScriptFileName = "handle.idc"
# ida32tFilePath = '"D:\SoftwarePaths\IDA 6.8\idaw.exe"'
# ida64tFilePath = "D:\SoftwarePaths\IDA 6.8\idaw64.exe"
# self.file = self.file.replace('\n', '').replace('\r', '')
# print(self.file)
# if os.path.exists(self.file):
# tmpExecStr = ida32tFilePath + " -A -c -S" + idcScriptFileName + " " + self.file
# os.system(tmpExecStr) # single process with cmdwindow
# self.fileasm = self.filepath + "/" + self.shotname + ".asm"
# self.filebytes = self.filepath + "/" + self.shotname + ".bytes"
self.fileasm = self.filepath + "/0A32eTdBKayjCWhZqDOQ.asm"
self.filebytes = self.filepath + "/0A32eTdBKayjCWhZqDOQ.bytes"
self.ResultLabel.setText(
"文件反汇编结束!\n 反汇编文件保存路径:\n" + self.fileasm + "\n" + self.filebytes + "\n")
def Visualize(self):
f = open(self.fileasm, mode='rb')
image = np.fromfile(f, dtype=np.ubyte)
filesize = image.shape[0]
print(filesize)
width = 256 # 设置图片宽度为256
rem = filesize % width
print(rem)
if rem != 0:
image = image[:-rem]
height = int(image.shape[0] / width)
self.imageasm = image.reshape(height, width)
f = open(self.filebytes, mode='rb')
image = np.fromfile(f, dtype=np.ubyte)
filesize = image.shape[0]
print(filesize)
width = 256 # 设置图片宽度为256
rem = filesize % width
print(rem)
if rem != 0:
image = image[:-rem]
height = int(image.shape[0] / width)
self.imagebytes = image.reshape(height, width)
self.imageasmpath = self.filepath + "/" + self.shotname + "_asm.png"
self.imagebytespath = self.filepath + "/" + self.shotname + "_bytes.png"
cv2.imwrite(self.imageasmpath, self.imageasm)
cv2.imshow(self.imageasmpath, self.imageasm)
cv2.waitKey(0)
cv2.imwrite(self.imagebytespath, self.imagebytes)
cv2.imshow(self.imagebytespath, self.imagebytes)
cv2.waitKey(0)
self.ResultLabel.setText(
"文件图像化结束!\n 图像保存路径:\n" + self.imageasmpath + "\n" + self.imagebytespath + "\n")
# 特征提取模块
# 文本特征提取
def getOpcodeSequence(self, filename):
print(filename)
opcode_seq = []
p = re.compile(r'\s([a-fA-F0-9]{2}\s)+\s*([a-z]+)')
with open(filename, encoding='gb18030', errors='ignore') as f:
for line in f:
if line.startswith(".text"):
m = re.findall(p, line)
if m:
opc = m[0][1]
if opc != "align":
opcode_seq.append(opc)
print(opcode_seq)
return opcode_seq
def train_opcode_lm(self, ops, order=4):
lm = defaultdict(Counter)
prefix = ["~"] * order
prefix.extend(ops)
data = prefix
for i in range(len(data) - order):
history, char = tuple(data[i:i + order]), data[i + order]
lm[history][char] += 1
def normalize(counter):
s = float(sum(counter.values()))
return [(c, cnt / s) for c, cnt in counter.items()]
outlm = {hist: chars for hist, chars in lm.items()}
return outlm
def getOpcodeNgram(self, ops, n=3):
opngramlist = [tuple(ops[i:i + n]) for i in range(len(ops) - n)]
opngram = Counter(opngramlist)
return opngram
def getTextFeatures(self):
filename = self.fileasm
map3gram = defaultdict(Counter)
ops = self.getOpcodeSequence(filename)
op3gram = self.getOpcodeNgram(ops)
map3gram[0] = op3gram
# print(ops, op3gram, map3gram)
cc = Counter([])
for d in map3gram.values():
cc += d
selectedfeatures = {}
tc = 0
for k, v in cc.items():
if v >= 500:
selectedfeatures[k] = v
print(k, v)
tc += 1
dataframelist = []
for fid, op3gram in map3gram.items():
standard = {}
standard["Id"] = fid
for feature in selectedfeatures:
if feature in op3gram:
standard[feature] = op3gram[feature]
else:
standard[feature] = 0
dataframelist.append(standard)
df = pd.DataFrame(dataframelist)
print(dataframelist)
self.textfeatures = str(dataframelist)
df.to_csv("./Features/text.csv", index=False)
self.ShowLabel.setText(self.textfeatures)
self.ResultLabel.setText("文本特征提取结束!\n 文本特征保存路径:./Features/text.csv")
# 颜色特征提取
def getMatrixfrom_bin(self, filename, width=512, oneRow=False):
with open(filename, 'rb') as f:
content = f.read()
hexst = binascii.hexlify(content)
fh = numpy.array([int(hexst[i:i + 2], 16) for i in range(0, len(hexst), 2)])
没有合适的资源?快使用搜索试试~ 我知道了~
MalwareClassification数据
共25个文件
csv:9个
py:6个
png:2个
5星 · 超过95%的资源 需积分: 4 14 下载量 113 浏览量
2023-05-14
13:24:17
上传
评论
收藏 19.12MB RAR 举报
温馨提示
MalwareClassification数据
资源推荐
资源详情
资源评论
收起资源包目录
MalwareClassification.rar (25个子文件)
MalwareClassification
MalwareClassification
Allimgfeature.csv 3.5MB
Results
family
classification_KNN
classifition_All
classification_LR
classification_DT
classification_RF
classification_XGBoost
All3gramfeature.csv 2.57MB
Features
fusion.csv 273KB
texture.csv 1006B
color.csv 15KB
text.csv 257KB
dadaSample
0ACDbR5M3ZhBJajygTuf.asm 11.33MB
0ACDbR5M3ZhBJajygTuf.bytes 5.37MB
0A32eTdBKayjCWhZqDOQ_asm.png 20.15MB
0A32eTdBKayjCWhZqDOQ.asm 35.86MB
0A32eTdBKayjCWhZqDOQ.exe 0B
0A32eTdBKayjCWhZqDOQ.bytes 4.08MB
0A32eTdBKayjCWhZqDOQ_bytes.png 2.49MB
getText.py 2KB
subtrainLabels.csv 21KB
Allglcmasmfeature.csv 925KB
MainWindowControl.py 18KB
getColor.py 2KB
Allglcmbytesfeature.csv 915KB
getTexture.py 3KB
MainWindow.py 14KB
__pycache__
MainWindow.cpython-38.pyc 7KB
MainWindow.cpython-37.pyc 8KB
getMap.py 1KB
MainWindow.ui 16KB
共 25 条
- 1
甜辣uu
- 粉丝: 8383
- 资源: 1103
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功
- 1
- 2
前往页