#include "TRT.h"
#include "common.h"
#include "NvInferRuntimeCommon.h"
#include "preprocess.h"
namespace TRT
{
#define USE_FP16 // set USE_INT8 or USE_FP16 or USE_FP32
#define DEVICE 0 // GPU id
#define NMS_THRESH 0.4
#define CONF_THRESH 0.5
#define BATCH_SIZE 1
#define MAX_IMAGE_INPUT_SIZE_THRESH 3000 * 3000 // ensure it exceed the maximum size in the input images !
// stuff we know about the network and the input/output blobs
static const int INPUT_H = Yolo::INPUT_H;
static const int INPUT_W = Yolo::INPUT_W;
static const int CLASS_NUM = Yolo::CLASS_NUM;
static const int OUTPUT_SIZE = Yolo::MAX_OUTPUT_BBOX_COUNT * sizeof(Yolo::Detection) / sizeof(float) + 1; // we assume the yololayer outputs no more than MAX_OUTPUT_BBOX_COUNT boxes that conf >= 0.1
const char* INPUT_BLOB_NAME = "data";
const char* OUTPUT_BLOB_NAME = "prob";
static int get_width(int x, float gw, int divisor = 8) {
return int(ceil((x * gw) / divisor)) * divisor;
}
static int get_depth(int x, float gd) {
if (x == 1) return 1;
int r = round(x * gd);
if (x * gd - int(x * gd) == 0.5 && (int(x * gd) % 2) == 0) {
--r;
}
return std::max<int>(r, 1);
}
class Logger : public ILogger
{
//void log(Severity severity, const char* msg) override
void log(Severity severity, nvinfer1::AsciiChar const* msg) noexcept override
{
// suppress info-level messages
if (severity <= Severity::kWARNING)
std::cout << msg << std::endl;
}
} gLogger;
RTModule::RTModule()
{
cudaSetDevice(DEVICE);
this->engine = NULL;
}
ICudaEngine* RTModule::BuildEngine(string wts_name,int maxBatchSize = 1)
{
IBuilder* builder = createInferBuilder(gLogger);
IBuilderConfig* config = builder->createBuilderConfig();
INetworkDefinition* network = builder->createNetworkV2(0U);
float gd = 0.33,gw = 0.50; //yolov5s
// Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
ITensor* data = network->addInput(INPUT_BLOB_NAME, nvinfer1::DataType::kFLOAT, Dims3{ 3, INPUT_H, INPUT_W });
assert(data);
std::map<std::string, Weights> weightMap = loadWeights(wts_name);
/* ------ yolov5 backbone------ */
auto conv0 = convBlock(network, weightMap, *data, get_width(64, gw), 6, 2, 1, "model.0");
assert(conv0);
auto conv1 = convBlock(network, weightMap, *conv0->getOutput(0), get_width(128, gw), 3, 2, 1, "model.1");
auto bottleneck_CSP2 = C3(network, weightMap, *conv1->getOutput(0), get_width(128, gw), get_width(128, gw), get_depth(3, gd), true, 1, 0.5, "model.2");
auto conv3 = convBlock(network, weightMap, *bottleneck_CSP2->getOutput(0), get_width(256, gw), 3, 2, 1, "model.3");
auto bottleneck_csp4 = C3(network, weightMap, *conv3->getOutput(0), get_width(256, gw), get_width(256, gw), get_depth(6, gd), true, 1, 0.5, "model.4");
auto conv5 = convBlock(network, weightMap, *bottleneck_csp4->getOutput(0), get_width(512, gw), 3, 2, 1, "model.5");
auto bottleneck_csp6 = C3(network, weightMap, *conv5->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(9, gd), true, 1, 0.5, "model.6");
auto conv7 = convBlock(network, weightMap, *bottleneck_csp6->getOutput(0), get_width(1024, gw), 3, 2, 1, "model.7");
auto bottleneck_csp8 = C3(network, weightMap, *conv7->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), true, 1, 0.5, "model.8");
auto spp9 = SPPF(network, weightMap, *bottleneck_csp8->getOutput(0), get_width(1024, gw), get_width(1024, gw), 5, "model.9");
/* ------ yolov5 head ------ */
auto conv10 = convBlock(network, weightMap, *spp9->getOutput(0), get_width(512, gw), 1, 1, 1, "model.10");
auto upsample11 = network->addResize(*conv10->getOutput(0));
assert(upsample11);
upsample11->setResizeMode(ResizeMode::kNEAREST);
upsample11->setOutputDimensions(bottleneck_csp6->getOutput(0)->getDimensions());
ITensor* inputTensors12[] = { upsample11->getOutput(0), bottleneck_csp6->getOutput(0) };
auto cat12 = network->addConcatenation(inputTensors12, 2);
auto bottleneck_csp13 = C3(network, weightMap, *cat12->getOutput(0), get_width(1024, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.13");
auto conv14 = convBlock(network, weightMap, *bottleneck_csp13->getOutput(0), get_width(256, gw), 1, 1, 1, "model.14");
auto upsample15 = network->addResize(*conv14->getOutput(0));
assert(upsample15);
upsample15->setResizeMode(ResizeMode::kNEAREST);
upsample15->setOutputDimensions(bottleneck_csp4->getOutput(0)->getDimensions());
ITensor* inputTensors16[] = { upsample15->getOutput(0), bottleneck_csp4->getOutput(0) };
auto cat16 = network->addConcatenation(inputTensors16, 2);
auto bottleneck_csp17 = C3(network, weightMap, *cat16->getOutput(0), get_width(512, gw), get_width(256, gw), get_depth(3, gd), false, 1, 0.5, "model.17");
/* ------ detect ------ */
IConvolutionLayer* det0 = network->addConvolutionNd(*bottleneck_csp17->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.0.weight"], weightMap["model.24.m.0.bias"]);
auto conv18 = convBlock(network, weightMap, *bottleneck_csp17->getOutput(0), get_width(256, gw), 3, 2, 1, "model.18");
ITensor* inputTensors19[] = { conv18->getOutput(0), conv14->getOutput(0) };
auto cat19 = network->addConcatenation(inputTensors19, 2);
auto bottleneck_csp20 = C3(network, weightMap, *cat19->getOutput(0), get_width(512, gw), get_width(512, gw), get_depth(3, gd), false, 1, 0.5, "model.20");
IConvolutionLayer* det1 = network->addConvolutionNd(*bottleneck_csp20->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.1.weight"], weightMap["model.24.m.1.bias"]);
auto conv21 = convBlock(network, weightMap, *bottleneck_csp20->getOutput(0), get_width(512, gw), 3, 2, 1, "model.21");
ITensor* inputTensors22[] = { conv21->getOutput(0), conv10->getOutput(0) };
auto cat22 = network->addConcatenation(inputTensors22, 2);
auto bottleneck_csp23 = C3(network, weightMap, *cat22->getOutput(0), get_width(1024, gw), get_width(1024, gw), get_depth(3, gd), false, 1, 0.5, "model.23");
IConvolutionLayer* det2 = network->addConvolutionNd(*bottleneck_csp23->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{ 1, 1 }, weightMap["model.24.m.2.weight"], weightMap["model.24.m.2.bias"]);
auto yolo = addYoLoLayer(network, weightMap, "model.24", std::vector<IConvolutionLayer*>{det0, det1, det2});
yolo->getOutput(0)->setName(OUTPUT_BLOB_NAME);
network->markOutput(*yolo->getOutput(0));
// Build engine
builder->setMaxBatchSize(maxBatchSize);
config->setMaxWorkspaceSize(16 * (1 << 20)); // 16MB
#if defined(USE_FP16)
config->setFlag(BuilderFlag::kFP16);
#elif defined(USE_INT8)
std::cout << "Your platform support int8: " << (builder->platformHasFastInt8() ? "true" : "false") << std::endl;
assert(builder->platformHasFastInt8());
config->setFlag(BuilderFlag::kINT8);
Int8EntropyCalibrator2* calibrator = new Int8EntropyCalibrator2(1, INPUT_W, INPUT_H, "./coco_calib/", "int8calib.table", INPUT_BLOB_NAME);
config->setInt8Calibrator(calibrator);
#endif
std::cout << "Building engine, please wait for a while..." << std::endl;
engine = builder->buildEngineWithConfig(*network, *config);
std::cout << "Build engine successfully!" << std::endl;
builder->destroy();
networ
没有合适的资源?快使用搜索试试~ 我知道了~
资源详情
资源评论
资源推荐
收起资源包目录
YOLOv5.rar (64个子文件)
YOLOv5
.vs
YOLOv5
v16
ipch
AutoPCH
b0d7e39c89fe92cd
TRT.ipch 125.69MB
8da5cc58a59d584f
COMMON.ipch 104.13MB
ca18a0ef504b6c2a
4c6e3e14f2cc28fe
931a91785936e432
d69103262155e98
744f66a351c843d5
94e30c08c933464c
YOLOLAYER.ipch 38.56MB
4c0675d40cecd159
88ac53f64be94ade
b4018d55f738347b
6800299187c887bb
1e51086c00693220
BUILDER.ipch 37.25MB
bb0435a61b03f71f
51837d8495868657
CUDA_UTILS.ipch 2.38MB
2c25fe84d7241bae
GLOGGER.ipch 33.94MB
8ea482bbbacceb93
ded4529caa41a324
PREPROCESS.ipch 105MB
2dedd3009d45ca79
484dbf1d28146357
8708b7986538eff6
MAIN.ipch 125.25MB
55f094487dbfbd44
TRT.ipch 104.94MB
.suo 69KB
Browse.VC.db 50.95MB
YOLOv5
preprocess.cu 4KB
TRT.cpp 14KB
yolov5s.onnx 27.98MB
yolov5s.wts 62.28MB
TRT.h 812B
gen_wts.py 2KB
common.h 12KB
dirent.h 31KB
yololayer.cu 12KB
yolov5s_sim.onnx 27.98MB
cuda_utils.h 445B
_bus.jpg 479KB
x64
Release
YOLOv5.ipdb 759KB
YOLOv5.log 5KB
preprocess.cu-241680459.deps 3KB
YOLOv5.tlog
CL.read.1.tlog 108KB
YOLOv5.write.1u.tlog 938B
link.read.1.tlog 5KB
link.write.1.tlog 726B
CL.write.1.tlog 876B
link.command.1.tlog 2KB
YOLOv5.lastbuildstate 221B
CudaCompile.read.1u.tlog 13KB
CL.command.1.tlog 3KB
CudaCompile.write.1u.tlog 152B
builder.obj 1.13MB
yololayer.cu.obj 182KB
yololayer.cu.cache 1KB
preprocess.cu.cache 1KB
main.obj 3.81MB
YOLOv5.iobj 1.28MB
preprocess.cu.obj 42KB
yololayer.cu509965158.deps 3KB
vc142.pdb 2.3MB
TRT.obj 4.38MB
YOLOv5.exe.recipe 280B
gLogger.obj 1.14MB
main.cpp 1KB
Cuda.props 1KB
macros.h 503B
YOLOv5.vcxproj.filters 2KB
utils.h 2KB
YOLOv5.vcxproj 8KB
_zidane.jpg 241KB
yolo.h 5KB
YOLOv5.vcxproj.user 226B
yolov5.trt 23.41MB
preprocess.h 357B
x64
Release
YOLOv5.exp 11KB
YOLOv5.pdb 2.69MB
YOLOv5.exe 99KB
YOLOv5.lib 19KB
YOLOv5.sln 1KB
共 64 条
- 1
HollowKnightZ
- 粉丝: 81
- 资源: 3
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功
评论2