Created
November 22, 2022 12:21
-
-
Save delijati/200b1b8edfee4227da560b83e0d1c563 to your computer and use it in GitHub Desktop.
Revisions
-
delijati created this gist
Nov 22, 2022 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,112 @@ #include <iostream> #include <cuda_runtime.h> #include <cudnn.h> /** * Minimal example to apply sigmoid activation on a tensor * using cuDNN. **/ int main(int argc, char** argv) { int numGPUs; int driverVersion = 0, runtimeVersion = 0; cudaGetDeviceCount(&numGPUs); std::cout << "Found " << numGPUs << " GPUs." << std::endl; cudaSetDevice(0); // use GPU0 int device; struct cudaDeviceProp devProp; cudaGetDevice(&device); cudaGetDeviceProperties(&devProp, device); cudaDriverGetVersion(&driverVersion); cudaRuntimeGetVersion(&runtimeVersion); std::cout << "Device: " << devProp.name << std::endl; std::cout << "Driver Version: " << driverVersion<<"\n"; std::cout << "Runtime Version: " << runtimeVersion<<"\n"; std::cout << "Compute capability:" << devProp.major << "." << devProp.minor << std::endl; std::cout << "Total amount of global memory: "<<(unsigned long long)devProp.totalGlobalMem<<" bytes\n"; std::cout << "Total amount of constant memory: "<<devProp.totalConstMem<<"bytes\n"; std::cout << "Total amount of shared memory per block: "<<devProp.sharedMemPerBlock<<" bytes\n"; std::cout << "Total number of registers available per block: "<<devProp.regsPerBlock<<"\n"; std::cout << "Warp size: "<<devProp.warpSize<<"\n"; cudnnHandle_t handle_; cudnnCreate(&handle_); std::cout << "Created cuDNN handle" << std::endl; // create the tensor descriptor cudnnDataType_t dtype = CUDNN_DATA_FLOAT; cudnnTensorFormat_t format = CUDNN_TENSOR_NCHW; int n = 1, c = 1, h = 1, w = 10; int NUM_ELEMENTS = n*c*h*w; cudnnTensorDescriptor_t x_desc; cudnnCreateTensorDescriptor(&x_desc); cudnnSetTensor4dDescriptor(x_desc, format, dtype, n, c, h, w); // create the tensor float *x; cudaMallocManaged(&x, NUM_ELEMENTS * sizeof(float)); for(int i=0;i<NUM_ELEMENTS;i++) x[i] = i * 1.00f; std::cout << "Original array: "; for(int i=0;i<NUM_ELEMENTS;i++) std::cout << x[i] << " "; // create activation function descriptor float alpha[1] = {1}; float beta[1] = {0.0}; cudnnActivationDescriptor_t sigmoid_activation; cudnnActivationMode_t mode = CUDNN_ACTIVATION_SIGMOID; cudnnNanPropagation_t prop = CUDNN_NOT_PROPAGATE_NAN; cudnnCreateActivationDescriptor(&sigmoid_activation); cudnnSetActivationDescriptor(sigmoid_activation, mode, prop, 0.0f); cudnnActivationForward( handle_, sigmoid_activation, alpha, x_desc, x, beta, x_desc, x ); cudnnDestroy(handle_); std::cout << std::endl << "Destroyed cuDNN handle." << std::endl; std::cout << "New array: "; for(int i=0;i<NUM_ELEMENTS;i++) std::cout << x[i] << " "; std::cout << std::endl; cudaFree(x); return 0; } /* Info: $ lsb_release -a LSB Version: n/a Distributor ID: ManjaroLinux Description: Manjaro Linux Release: 22.0.0 Codename: Sikaris $ uname -a Linux papagayo 5.15.78-1-MANJARO #1 SMP PREEMPT Thu Nov 10 20:50:09 UTC 2022 x86_64 GNU/Linux $ nvidia-smi -L GPU 0: NVIDIA GeForce MX450 Build: $ g++ -I/opt/cuda/include -I/opt/cuda/targets/ppc64le-linux/include -o hw.o -c hw.cpp $ nvcc -ccbin g++ -m64 -gencode arch=compute_80,code=sm_80 -o hw hw.o -I/opt/cuda/include -I/opt/cuda/targets/ppc64le-linux/include -L/opt/cuda/lib64 -L/opt/cuda/targets/ppc64le-linux/lib -lcublasLt -lcudart -lcublas -lcudnn -lstdc++ -lm $ ./hw Found 1 GPUs. Device: NVIDIA GeForce MX450 Driver Version: 11080 Runtime Version: 11080 Compute capability:7.5 Total amount of global memory: 1969815552 bytes Total amount of constant memory: 65536bytes Total amount of shared memory per block: 49152 bytes Total number of registers available per block: 65536 Warp size: 32 Created cuDNN handle Original array: 0 1 2 3 4 5 6 7 8 9 Destroyed cuDNN handle. New array: 0.5 0.731059 0.880797 0.952574 0.982014 0.993307 0.997527 0.999089 0.999665 0.999877 */