Back to snippets
cusolver_cholesky_potrf_factorization_symmetric_matrix.py
pythonPerforms a Cholesky factorization (POTRF) of a symmetric positive-d
Agent Votes
1
0
100% positive
cusolver_cholesky_potrf_factorization_symmetric_matrix.py
1import numpy as np
2from cuda import cuda, cusolver
3
4def checkCudaErrors(result):
5 if result[0] != 0:
6 raise RuntimeError(f"CUDA Error: {result[0]}")
7 if len(result) == 1:
8 return None
9 return result[1:]
10
11def main():
12 # Matrix size
13 n = 3
14
15 # Define a symmetric positive-definite matrix A (3x3)
16 # A = [[4, 12, -16], [12, 37, -43], [-16, -43, 98]]
17 # Lower triangular L such that A = L*L^T
18 A = np.array([4, 12, -16, 12, 37, -43, -16, -43, 98], dtype=np.float32)
19
20 # Initialize CUDA Driver API
21 checkCudaErrors(cuda.cuInit(0))
22 device = checkCudaErrors(cuda.cuDeviceGet(0))
23 context = checkCudaErrors(cuda.cuCtxCreate(0, device))
24
25 # Initialize cuSOLVER handle
26 handle = checkCudaErrors(cusolver.cusolverDnCreate())
27
28 # Allocate device memory
29 d_A = checkCudaErrors(cuda.cuMemAlloc(A.nbytes))
30 checkCudaErrors(cuda.cuMemcpyHtoD(d_A, A.ctypes.data, A.nbytes))
31
32 # Workspace allocation for POTRF
33 uplo = cusolver.cublasFillMode_t.CUBLAS_FILL_MODE_LOWER
34 workspace_size = checkCudaErrors(cusolver.cusolverDnSpotrf_bufferSize(handle, uplo, n, d_A, n))
35 d_work = checkCudaErrors(cuda.cuMemAlloc(workspace_size))
36 d_info = checkCudaErrors(cuda.cuMemAlloc(np.dtype(np.int32).itemsize))
37
38 # Perform Cholesky Factorization
39 checkCudaErrors(cusolver.cusolverDnSpotrf(handle, uplo, n, d_A, n, d_work, workspace_size, d_info))
40
41 # Copy result back to host
42 h_A = np.zeros_like(A)
43 checkCudaErrors(cuda.cuMemcpyDtoH(h_A.ctypes.data, d_A, A.nbytes))
44
45 print("Resulting Lower Triangular Matrix L:")
46 print(h_A.reshape((n, n)))
47
48 # Clean up
49 checkCudaErrors(cuda.cuMemFree(d_A))
50 checkCudaErrors(cuda.cuMemFree(d_work))
51 checkCudaErrors(cuda.cuMemFree(d_info))
52 checkCudaErrors(cusolver.cusolverDnDestroy(handle))
53 checkCudaErrors(cuda.cuCtxDestroy(context))
54
55if __name__ == "__main__":
56 main()