1、TensorRT 多线程推理
此时需要在每个线程内创建上下文,且在初始化 ctx 之后进行 ctx.pop(),推理
操作前,ctx.push(),运行结束前运行 ctx.pop()
class detector():
def __init__(device):
self.ctx = cuda.Device(device).make_context()
self.engine = self.get_engine
self.context = self.get_context
self.inputs, self.outputs, self.bindings, self.stream =
self.allocate_buffers(self.engine)
self.inference_fn = self.do_inference if trt.__version__[0] <
'7' \
else self.do_inference_v2
#此处 pop 必须添加,实测不添加会报错
self.ctx.pop()
def do_inference_v2(self,context, bindings, inputs, outputs,
stream):
#此处必须添加 push
self.ctx.push()
# Transfer input data to the GPU.
[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp
in inputs]
# Run inference.
context.execute_async_v2(bindings=bindings,
stream_handle=stream.handle)
# Transfer predictions back from the GPU.
[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out
in outputs]
# Synchronize the stream
stream.synchronize()
# Return only the host outputs.
return [out.host for out in outputs]
def detect():
"""
pre process
"""
res = self.do_inference_v2(context, bindings, inputs,
outputs, stream)
"""
post process