MTCNN_by_Toocy7.zip资源-CSDN文库

共25个文件

py：9个

pyc：5个

pth：5个

需积分: 11 28 浏览量 2020-05-27 10:23:06 上传评论收藏 15.1MB ZIP 举报

**MTCNN(多任务级联卷积神经网络)**是一种在计算机视觉领域广泛使用的面部检测技术，由Zhang et al. 在2016年的论文《Joint Face Detection and Alignment Using Multi-task Cascaded Convolutional Networks》中提出。该模型通过三个连续的卷积神经网络（P-Net、R-Net、O-Net）实现端到端的面部检测和关键点定位，具有高效和准确的特点。 **1. P-Net（Proposal Network）** P-Net是整个MTCNN框架的第一步，它主要负责初步的面部区域检测。P-Net使用了多任务学习，即同时进行面部检测和关键点定位。输出包含两个部分：面部框的回归坐标和每个候选框内是否包含人脸的概率。P-Net的输出结果用于筛选出潜在的人脸区域，并为下一步提供候选框。 **2. R-Net（Refine Network）** R-Net是对P-Net初步筛选出的候选框进行精炼的网络。R-Net同样采用多任务学习，对候选框进行更精确的调整，并进一步确定人脸存在性。R-Net的目的是减少假阳性，并提高检测框的精度。 **3. O-Net（Output Network）** O-Net是最后一步，它在R-Net的基础上进行更细致的面部关键点定位，包括眼睛、鼻子、嘴巴等。O-Net不仅优化面部框，还预测5个关键点的位置，这五个点通常是左眼、右眼、鼻尖、左嘴角和右嘴角。 **4. 数据集生成与预处理** 在复现和改进MTCNN的过程中，数据集的生成至关重要。通常，这涉及到收集大量包含人脸的图像，然后人工标注每个图像的面部框和关键点。生成数据集的代码可能包括图像裁剪、翻转、缩放以及归一化等预处理步骤，以适应神经网络的输入需求。 **5. 工具代码** 在MTCNN的实现中，可能会用到各种工具，如Python库（如OpenCV、TensorFlow、Keras等）、数据处理工具（如NumPy）、可视化工具（如Matplotlib）等。这些工具代码用于处理图像、训练模型、绘制损失函数和准确率曲线等。 **6. 网络模型改进** 在"last_version"中，可能包含了作者对原始MTCNN模型的改进。这可能涉及修改网络结构（如增加或减少层、调整卷积核大小等）、优化算法（如更换损失函数、调整学习率策略）、正则化手段（如L1或L2正则化、Dropout）等，以提升模型的性能和泛化能力。 **7. 训练与检测流程** 训练MTCNN通常包括多个阶段，如数据预处理、网络模型训练、验证集评估以及模型保存。在检测阶段，已训练好的模型将被应用于新的图像，以检测并定位其中的人脸。这个过程可能需要对输入图像进行预处理，然后通过P-Net、R-Net和O-Net逐步得到最终的检测结果。 MTCNN_by_Toocy7.zip提供的内容涵盖了MTCNN的复现和改进，从数据集构建到模型训练、优化，再到实际应用，对于理解和实践面部检测技术具有很高的价值。通过研究和理解这些代码，开发者可以深入学习MTCNN的工作原理，并可能在此基础上开发出更高效的面部检测解决方案。

资源推荐

资源详情

资源评论

收起资源包目录

MTCNN.zip （25个子文件）

MTCNN_祝斌林.pptx 7.09MB

MTCNN_last_version

params

Pnet2048.pth 7KB

Onet_Landmark.pth 1.36MB

Rnet.pth 353KB

Onet.pth 1.35MB

Pnet.pth 7KB

tets_img

3.jpg 70KB

4.jpg 144KB

2.jpg 4.87MB

1.jpg 116KB

BRLNSDB.TTF 95KB

Tools.py 4KB

Gen_data.py 8KB

TR.py 337B

TP.py 341B

Dataset.py 1KB

__pycache__

Tools.cpython-37.pyc 3KB

Train.cpython-37.pyc 2KB

Module.cpython-37.pyc 3KB

Dataset.cpython-37.pyc 1KB

Train_Onet_Landmark.cpython-37.pyc 5KB

Module.py 3KB

Detector.py 8KB

Train_Onet_Landmark.py 6KB

Train.py 3KB

import numpy as np import time import torch from MTCNN_last_version import Tools from MTCNN_last_version.Tools import convert_to_squre from MTCNN_last_version.Module import RNet, PNet from MTCNN_last_version.Train_Onet_Landmark import ONet # Landmark from PIL import Image from PIL import ImageDraw from torchvision import transforms class Detector: def __init__(self, pp=r'./params/Pnet2048.pth', rp=r'./params/Rnet.pth', op=r'./params/Onet_Landmark.pth', isCuda=True): self.isCuda = isCuda self.pnet = PNet() self.rnet = RNet() self.onet = ONet() if self.isCuda: self.pnet.cuda() self.rnet.cuda() self.onet.cuda() self.pnet.eval() self.rnet.eval() self.onet.eval() self.pnet.load_state_dict(torch.load(pp, map_location='cpu')) self.rnet.load_state_dict(torch.load(rp, map_location='cpu')) self.onet.load_state_dict(torch.load(op, map_location='cpu')) self.__transfrom = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) def detect(self, image): empty = np.array([]) t1 = time.time() p_boxes = self.__pnet_detect(image) print('Pbox', p_boxes.shape) if p_boxes.shape[0] == 0: return empty t2 = time.time() t_p = t2 - t1 t3 = time.time() r_boxes = self.__rnet_detect(image, p_boxes) print('Rbox', r_boxes.shape) if r_boxes.shape[0] == 0: return empty t4 = time.time() t_r = t4 - t3 t5 = time.time() o_boxes = self.__onet_detect(image, r_boxes) print('Obox', o_boxes.shape) if o_boxes.shape[0] == 0: return empty t6 = time.time() t_o = t6 - t5 t_sum = t_p + t_r + t_o print('t_sum :{} t_p: {} t_r: {} t_o: {}'.format(t_sum, t_p, t_r, t_o)) return o_boxes def __pnet_detect(self, x): boxes = [] img = x w, h = img.size min_side = min(w, h) scale = 1 while min_side > 12: img_data = self.__transfrom(img) if self.isCuda: img_data = img_data.cuda() img_data.unsqueeze_(0) _cls, _offset = self.pnet(img_data) cls, offset = _cls[0][0].cpu().data, _offset[0].cpu().data indexes = torch.nonzero(torch.gt(cls, 0.3)) for idx in indexes: boxes.append(self.__box(idx, offset, cls[idx[0], idx[1]], scale)) scale *= 0.68 _w = int(w * scale) _h = int(h * scale) img = img.resize((_w, _h)) min_side = np.minimum(_w, _h) return Tools.nms2(np.array(boxes), thresh=0.3) def __box(self, start_index, offset, cls, scale, stride=2, side_len=12): _x1 = int(start_index[1] * stride) / scale # 宽，W，x _y1 = int(start_index[0] * stride) / scale # 高，H,y _x2 = int(start_index[1] * stride + side_len) / scale _y2 = int(start_index[0] * stride + side_len) / scale ow = _x2 - _x1 # 12 oh = _y2 - _y1 _offset = offset[:, start_index[0], start_index[1]] x1 = _x1 + ow * _offset[0] y1 = _y1 + oh * _offset[1] x2 = _x2 + ow * _offset[2] y2 = _y2 + oh * _offset[3] return [x1, y1, x2, y2, cls] def __rnet_detect(self, image, y): _img_dataset = [] pnet_boxes = convert_to_squre(y) for _box in pnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((24, 24)) img_data = self.__transfrom(img) _img_dataset.append(img_data) img_dataset = torch.stack(_img_dataset) if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset = self.rnet(img_dataset) _cls = _cls.cpu().data.numpy() offset = _offset.cpu().data.numpy() boxes = [] indexes, _ = np.where(_cls > 0.5) for idx in indexes: _box = pnet_boxes[idx] _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) ow = _x2 - _x1 oh = _y2 - _y1 x1 = _x1 + ow * offset[idx][0] y1 = _y1 + oh * offset[idx][1] x2 = _x2 + ow * offset[idx][2] y2 = _y2 + oh * offset[idx][3] cls = _cls[idx][0] boxes.append([x1, y1, x2, y2, cls]) return Tools.nms(np.array(boxes), 0.3) def __onet_detect(self, image, _rnet_box): _img_data = [] rnet_box = convert_to_squre(_rnet_box) for _box in rnet_box: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((48, 48)) img_data = self.__transfrom(img) _img_data.append(img_data) img_dataset = torch.stack(_img_data) if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset, fll_ = self.onet(img_dataset) _cls = _cls.cpu().data.numpy() offset = _offset.cpu().data.numpy() fll = fll_.cpu().data.numpy() boxes = [] indexes, _ = np.where(_cls > 0.001) for idx in indexes: _box = rnet_box[idx] _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) ow = _x2 - _x1 oh = _y2 - _y1 x1 = _x1 + ow * offset[idx][0] y1 = _y1 + oh * offset[idx][1] x2 = _x2 + ow * offset[idx][2] y2 = _y2 + oh * offset[idx][3] cls = _cls[idx][0] fllx1 = _x1 + ow * fll[idx][0] flly1 = _y1 + oh * fll[idx][1] fllx2 = _x1 + ow * fll[idx][2] flly2 = _y1 + oh * fll[idx][3] fllx3 = _x1 + ow * fll[idx][4] flly3 = _y1 + oh * fll[idx][5] fllx4 = _x1 + ow * fll[idx][6] flly4 = _y1 + oh * fll[idx][7] fllx5 = _x1 + ow * fll[idx][8] flly5 = _y1 + oh * fll[idx][9] boxes.append([x1, y1, x2, y2, cls, fllx1, flly1, fllx2, flly2, fllx3, flly3, fllx4, flly4, fllx5, flly5]) return Tools.nms(np.array(boxes), 0.3, isMin=True) if __name__ == '__main__': t01 = time.time() with torch.no_grad() as grad: image_file = r'C:\Projects\MTCNN_last_version\tets_img\3.jpg' detect = Detector() with Image.open(image_file) as img: boxes = detect.detect(img) print(boxes.shape) imgDraw = ImageDraw.Draw(img) for box in boxes: x1 = int(box[0]) y1 = int(box[1]) x2 = int(box[2]) y2 = int(box[3]) '''5 - 14''' fllx1 = int(box[5]) flly1 = int(box[6]) fllx2 = int(box[7]) flly2 = int(box[8]) fllx3 = int(box[9]) flly3 = int(box[10]) fllx4 = int(box[11]) flly4 = int(box[12]) fllx5 = int(box[13]) flly5 = int(box[14])

评论收藏

内容反馈