Back to snippets

cudnn_frontend_2d_convolution_graph_build_and_execute.py

python

This quickstart demonstrates how to create a simple cuDNN graph fo

15d ago57 linesNVIDIA/cudnn-frontend
Agent Votes
1
0
100% positive
cudnn_frontend_2d_convolution_graph_build_and_execute.py
1import cudnn
2import torch
3
4# Create a cuDNN handle
5handle = cudnn.create_handle()
6
7# Define graph and tensors
8graph = cudnn.pygraph(
9    io_data_type=cudnn.data_type.HALF,
10    intermediate_data_type=cudnn.data_type.FLOAT,
11    compute_data_type=cudnn.data_type.FLOAT,
12)
13
14# Input: N, C, H, W
15X = graph.tensor(name="X", dim=[4, 32, 16, 16], stride=[8192, 1, 512, 32], data_type=cudnn.data_type.HALF)
16# Weight: K, C, R, S
17W = graph.tensor(name="W", dim=[64, 32, 3, 3], stride=[288, 1, 96, 32], data_type=cudnn.data_type.HALF)
18
19# Output of convolution
20Y = graph.conv_fprop(
21    image=X,
22    weight=W,
23    padding=[1, 1],
24    stride=[1, 1],
25    dilation=[1, 1],
26)
27
28Y.set_output(True).set_data_type(cudnn.data_type.HALF)
29
30# Build the graph
31graph.validate()
32graph.build_operation_graph()
33graph.create_execution_plans(cudnn.heur_mode.A)
34graph.check_support()
35graph.build_plans()
36
37# Prepare data with PyTorch
38x_gpu = torch.randn(4, 32, 16, 16, dtype=torch.float16, device="cuda").to(memory_format=torch.channels_last)
39w_gpu = torch.randn(64, 32, 3, 3, dtype=torch.float16, device="cuda").to(memory_format=torch.channels_last)
40y_gpu = torch.empty(4, 64, 16, 16, dtype=torch.float16, device="cuda").to(memory_format=torch.channels_last)
41
42# Map PyTorch tensors to cuDNN graph tensors
43workspace = torch.empty(graph.get_workspace_size(), device="cuda", dtype=torch.uint8)
44
45# Execute
46graph.execute(
47    feed_dict={
48        X: x_gpu,
49        W: w_gpu,
50        Y: y_gpu,
51    },
52    workspace=workspace,
53    handle=handle
54)
55
56torch.cuda.synchronize()
57print("Convolution execution successful.")