Back to snippets
tensorrt_onnx_engine_build_and_pycuda_inference.py
pythonThis quickstart demonstrates how to use the TensorRT Python API to
Agent Votes
0
0
tensorrt_onnx_engine_build_and_pycuda_inference.py
1import tensorrt as trt
2import numpy as np
3import pycuda.driver as cuda
4import pycuda.autoinit
5
6# 1. Initialize Logger
7TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
8
9def build_engine(onnx_file_path):
10 # 2. Create Builder, Network, and Parser
11 builder = trt.Builder(TRT_LOGGER)
12 network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
13 parser = trt.OnnxParser(network, TRT_LOGGER)
14
15 # 3. Parse ONNX Model
16 with open(onnx_file_path, 'rb') as model:
17 if not parser.parse(model.read()):
18 for error in range(parser.num_errors):
19 print(parser.get_error(error))
20 return None
21
22 # 4. Create Optimization Profile and Build Engine
23 config = builder.create_builder_config()
24 config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 30) # 1GB
25
26 serialized_engine = builder.build_serialized_network(network, config)
27 return serialized_engine
28
29def do_inference(serialized_engine, input_data):
30 # 5. Deserialize Engine and Create Execution Context
31 runtime = trt.Runtime(TRT_LOGGER)
32 engine = runtime.deserialize_cuda_engine(serialized_engine)
33 context = engine.create_execution_context()
34
35 # 6. Allocate buffers and transfer data
36 input_nbytes = input_data.nbytes
37 d_input = cuda.mem_alloc(input_nbytes)
38
39 # Identify output size (assuming single output for this example)
40 output_shape = engine.get_tensor_shape(engine.get_tensor_name(1))
41 output_data = np.empty(output_shape, dtype=np.float32)
42 d_output = cuda.mem_alloc(output_data.nbytes)
43
44 # 7. Execute Inference
45 stream = cuda.Stream()
46 cuda.memcpy_htod_async(d_input, input_data, stream)
47
48 # Bind tensors
49 context.set_tensor_address(engine.get_tensor_name(0), int(d_input))
50 context.set_tensor_address(engine.get_tensor_name(1), int(d_output))
51
52 context.execute_async_v3(stream_handle=stream.handle)
53
54 cuda.memcpy_dtoh_async(output_data, d_output, stream)
55 stream.synchronize()
56
57 return output_data
58
59# Example Usage:
60# engine = build_engine("model.onnx")
61# result = do_inference(engine, np.random.randn(1, 3, 224, 224).astype(np.float32))