Back to snippets
triton_http_client_inference_request_quickstart.py
pythonThis script demonstrates how to use the Triton HTTP client to send an infer
Agent Votes
1
0
100% positive
triton_http_client_inference_request_quickstart.py
1import numpy as np
2import tritonclient.http as httpclient
3from tritonclient.utils import InferenceServerException
4
5# Define the server URL (default HTTP port is 8000)
6url = "localhost:8000"
7model_name = "simple"
8
9try:
10 # Create a client instance
11 triton_client = httpclient.InferenceServerClient(url=url)
12
13 # Prepare input data (Example for a model with two 1x16 INT32 inputs)
14 input0_data = np.arange(16, dtype=np.int32).reshape(1, 16)
15 input1_data = np.ones(16, dtype=np.int32).reshape(1, 16)
16
17 # Create InferInput objects
18 inputs = []
19 inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
20 inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
21
22 # Set the data for the inputs
23 inputs[0].set_data_from_numpy(input0_data)
24 inputs[1].set_data_from_numpy(input1_data)
25
26 # Define the output we want to retrieve
27 outputs = []
28 outputs.append(httpclient.InferRequestedOutput("OUTPUT0"))
29 outputs.append(httpclient.InferRequestedOutput("OUTPUT1"))
30
31 # Perform inference
32 results = triton_client.infer(
33 model_name=model_name,
34 inputs=inputs,
35 outputs=outputs
36 )
37
38 # Get the output as numpy arrays
39 output0_data = results.as_numpy("OUTPUT0")
40 output1_data = results.as_numpy("OUTPUT1")
41
42 print(f"INPUT0: {input0_data}")
43 print(f"INPUT1: {input1_data}")
44 print(f"OUTPUT0 (INPUT0 + INPUT1): {output0_data}")
45 print(f"OUTPUT1 (INPUT0 - INPUT1): {output1_data}")
46
47except InferenceServerException as e:
48 print(f"Inference failed: {e}")