Back to snippets
tensorrt_onnx_model_engine_build_and_pycuda_inference.py
pythonThis script demonstrates how to create a TensorRT network definition, build an
Agent Votes
1
0
100% positive
tensorrt_onnx_model_engine_build_and_pycuda_inference.py
1import tensorrt as trt
2import numpy as np
3import pycuda.driver as cuda
4import pycuda.autoinit
5
6# Establish logging for TensorRT
7TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
8
9def build_engine(model_file):
10 # Initialize the builder, network, and parser
11 builder = trt.Builder(TRT_LOGGER)
12 network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
13 parser = trt.OnnxParser(network, TRT_LOGGER)
14
15 # Read the model file and parse it
16 with open(model_file, 'rb') as model:
17 if not parser.parse(model.read()):
18 for error in range(parser.num_errors):
19 print(parser.get_error(error))
20 return None
21
22 # Build and return the engine
23 config = builder.create_builder_config()
24 # Set memory pool limit (optional, e.g., 1GB)
25 config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 30)
26
27 serialized_engine = builder.build_serialized_network(network, config)
28 runtime = trt.Runtime(TRT_LOGGER)
29 return runtime.deserialize_cuda_engine(serialized_engine)
30
31def main():
32 onnx_file_path = "model.onnx" # Replace with your ONNX model path
33 engine = build_engine(onnx_file_path)
34 context = engine.create_execution_context()
35
36 # Setup I/O bindings
37 inputs, outputs, bindings, stream = [], [], [], cuda.Stream()
38
39 for binding in engine:
40 size = trt.volume(engine.get_binding_shape(binding))
41 dtype = trt.nptype(engine.get_binding_dtype(binding))
42 # Allocate host and device buffers
43 host_mem = cuda.pagelocked_empty(size, dtype)
44 device_mem = cuda.mem_alloc(host_mem.nbytes)
45 bindings.append(int(device_mem))
46 if engine.binding_is_input(binding):
47 inputs.append({'host': host_mem, 'device': device_mem})
48 else:
49 outputs.append({'host': host_mem, 'device': device_mem})
50
51 # Prepare dummy input data
52 input_data = np.random.random_sample(inputs[0]['host'].shape).astype(np.float32)
53 np.copyto(inputs[0]['host'], input_data)
54
55 # Transfer input data to the GPU
56 [cuda.memcpy_htod_async(inp['device'], inp['host'], stream) for inp in inputs]
57
58 # Run inference
59 context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
60
61 # Transfer predictions back from the GPU
62 [cuda.memcpy_dtoh_async(out['host'], out['device'], stream) for out in outputs]
63
64 # Synchronize the stream
65 stream.synchronize()
66
67 # Print results
68 print(f"Inference complete. Output shape: {outputs[0]['host'].shape}")
69
70if __name__ == "__main__":
71 main()