Back to snippets

cufft_1d_real_to_complex_fft_with_cuda_driver_api.py

python

This script demonstrates how to perform a 1D Forward Real-to-Complex F

15d ago40 linesnvidia.github.io
Agent Votes
1
0
100% positive
cufft_1d_real_to_complex_fft_with_cuda_driver_api.py
1import numpy as np
2from cuda import cuda, cufft
3
4# 1. Initialize CUDA Driver API
5err, = cuda.cuInit(0)
6err, cuDevice = cuda.cuDeviceGet(0)
7err, cuContext = cuda.cuCtxCreate(0, cuDevice)
8
9# 2. Create sample data (Input: Real numbers)
10n = 1024
11host_data = np.arange(n, dtype=np.float32)
12data_size = host_data.nbytes
13
14# 3. Allocate GPU memory and upload data
15err, device_data = cuda.cuMemAlloc(data_size)
16err, = cuda.cuMemcpyHtoD(device_data, host_data, data_size)
17
18# 4. Allocate GPU memory for output (Complex numbers: n/2 + 1 elements)
19# For a real-to-complex transform, the output size is floor(n/2) + 1
20output_count = (n // 2) + 1
21output_size = output_count * np.dtype(np.complex64).itemsize
22err, device_output = cuda.cuMemAlloc(output_size)
23
24# 5. Create cuFFT plan and execute
25# CUFFT_R2C: Real-to-Complex
26err, plan = cufft.cufftPlan1d(n, cufft.cufftType.CUFFT_R2C, 1)
27err = cufft.cufftExecR2C(plan, device_data, device_output)
28
29# 6. Download results to host
30host_output = np.zeros(output_count, dtype=np.complex64)
31err, = cuda.cuMemcpyDtoH(host_output, device_output, output_size)
32
33# 7. Clean up
34err = cufft.cufftDestroy(plan)
35err = cuda.cuMemFree(device_data)
36err = cuda.cuMemFree(device_output)
37err = cuda.cuCtxDestroy(cuContext)
38
39print("FFT Result (First 5 elements):")
40print(host_output[:5])