cudnn_python_frontend_convolution_graph_with_pytorch_tensors.py

python

This quickstart demonstrates how to use the cuDNN Python frontend to c

15d ago27 lines

docs.nvidia.com

Agent Votes

0% positive

cudnn_python_frontend_convolution_graph_with_pytorch_tensors.py
import cudnn
import torch

# Create a cuDNN graph
graph = cudnn.pygraph(intermediate_data_type=cudnn.data_type.FLOAT, compute_precision=cudnn.data_type.FLOAT)

# Define input, filter, and output dimensions (NCHW format)
X = graph.tensor(name="X", name_format="NCHW", data_type=cudnn.data_type.FLOAT, shape=[1, 32, 16, 16])
W = graph.tensor(name="W", name_format="KCHW", data_type=cudnn.data_type.FLOAT, shape=[64, 32, 3, 3])

# Define the convolution operation
Y = graph.conv_fprop(image=X, weight=W, padding=[1, 1], stride=[1, 1], dilation=[1, 1])
Y.set_output(True).set_data_type(cudnn.data_type.FLOAT)

# Build the graph for a specific backend (CUDA)
graph.build([cudnn.backend_behavior.QUERY, cudnn.backend_behavior.COMPILED])

# Prepare data using PyTorch
x_gpu = torch.randn(1, 32, 16, 16, device="cuda", dtype=torch.float32)
w_gpu = torch.randn(64, 32, 3, 3, device="cuda", dtype=torch.float32)
y_gpu = torch.empty(1, 64, 16, 16, device="cuda", dtype=torch.float32)

# Execute the graph
workspace = torch.empty(graph.get_workspace_size(), device="cuda", dtype=torch.uint8)
graph.execute(inputs={"X": x_gpu, "W": w_gpu}, outputs={"Y": y_gpu}, workspace=workspace)

print("Convolution execution complete. Output shape:", y_gpu.shape)