delijati · November 22, 2022 12:21 · Nov 22, 2022
diff --git a/hw.cpp b/hw.cpp
@@ -0,0 +1,112 @@
+#include <iostream>
+#include <cuda_runtime.h>
+#include <cudnn.h>
+
+
+/**
+ * Minimal example to apply sigmoid activation on a tensor 
+ * using cuDNN.
+ **/
+int main(int argc, char** argv)
+{    
+    int numGPUs;
+    int driverVersion = 0, runtimeVersion = 0;
+    cudaGetDeviceCount(&numGPUs);
+    std::cout << "Found " << numGPUs << " GPUs." << std::endl;
+    cudaSetDevice(0); // use GPU0
+    int device; 
+    struct cudaDeviceProp devProp;
+    cudaGetDevice(&device);
+    cudaGetDeviceProperties(&devProp, device);
+    cudaDriverGetVersion(&driverVersion);
+    cudaRuntimeGetVersion(&runtimeVersion);
+
+    std::cout << "Device: " << devProp.name << std::endl;
+    std::cout << "Driver Version: " << driverVersion<<"\n";
+    std::cout << "Runtime Version: " << runtimeVersion<<"\n";
+    std::cout << "Compute capability:" << devProp.major << "." << devProp.minor << std::endl;
+    std::cout << "Total amount of global memory: "<<(unsigned long long)devProp.totalGlobalMem<<" bytes\n";
+    std::cout << "Total amount of constant memory: "<<devProp.totalConstMem<<"bytes\n"; 
+    std::cout << "Total amount of shared memory per block: "<<devProp.sharedMemPerBlock<<" bytes\n";
+    std::cout << "Total number of registers available per block: "<<devProp.regsPerBlock<<"\n";
+	std::cout << "Warp size: "<<devProp.warpSize<<"\n";
+
+    cudnnHandle_t handle_;
+    cudnnCreate(&handle_);
+    std::cout << "Created cuDNN handle" << std::endl;
+
+    // create the tensor descriptor
+    cudnnDataType_t dtype = CUDNN_DATA_FLOAT;
+    cudnnTensorFormat_t format = CUDNN_TENSOR_NCHW;
+    int n = 1, c = 1, h = 1, w = 10;
+    int NUM_ELEMENTS = n*c*h*w;
+    cudnnTensorDescriptor_t x_desc;
+    cudnnCreateTensorDescriptor(&x_desc);
+    cudnnSetTensor4dDescriptor(x_desc, format, dtype, n, c, h, w);
+
+    // create the tensor
+    float *x;
+    cudaMallocManaged(&x, NUM_ELEMENTS * sizeof(float));
+    for(int i=0;i<NUM_ELEMENTS;i++) x[i] = i * 1.00f;
+    std::cout << "Original array: "; 
+    for(int i=0;i<NUM_ELEMENTS;i++) std::cout << x[i] << " ";
+
+    // create activation function descriptor
+    float alpha[1] = {1};
+    float beta[1] = {0.0};
+    cudnnActivationDescriptor_t sigmoid_activation;
+    cudnnActivationMode_t mode = CUDNN_ACTIVATION_SIGMOID;
+    cudnnNanPropagation_t prop = CUDNN_NOT_PROPAGATE_NAN;
+    cudnnCreateActivationDescriptor(&sigmoid_activation);
+    cudnnSetActivationDescriptor(sigmoid_activation, mode, prop, 0.0f);
+
+    cudnnActivationForward(
+        handle_,
+        sigmoid_activation,
+        alpha,
+        x_desc,
+        x,
+        beta,
+        x_desc,
+        x
+    );
+
+    cudnnDestroy(handle_);
+    std::cout << std::endl << "Destroyed cuDNN handle." << std::endl;
+    std::cout << "New array: ";
+    for(int i=0;i<NUM_ELEMENTS;i++) std::cout << x[i] << " ";
+    std::cout << std::endl;
+    cudaFree(x);
+    return 0;
+}
+/*
+Info:
+$ lsb_release -a  
+LSB Version:	n/a
+Distributor ID:	ManjaroLinux
+Description:	Manjaro Linux
+Release:	22.0.0
+Codename:	Sikaris
+$ uname -a
+Linux papagayo 5.15.78-1-MANJARO #1 SMP PREEMPT Thu Nov 10 20:50:09 UTC 2022 x86_64 GNU/Linux
+$ nvidia-smi -L          
+GPU 0: NVIDIA GeForce MX450
+Build:
+$ g++ -I/opt/cuda/include -I/opt/cuda/targets/ppc64le-linux/include -o hw.o -c hw.cpp
+$ nvcc -ccbin g++ -m64 -gencode arch=compute_80,code=sm_80 -o hw hw.o -I/opt/cuda/include -I/opt/cuda/targets/ppc64le-linux/include -L/opt/cuda/lib64 -L/opt/cuda/targets/ppc64le-linux/lib -lcublasLt -lcudart -lcublas -lcudnn -lstdc++ -lm
+$ ./hw 
+Found 1 GPUs.
+Device: NVIDIA GeForce MX450
+Driver Version: 11080
+Runtime Version: 11080
+Compute capability:7.5
+Total amount of global memory: 1969815552 bytes
+Total amount of constant memory: 65536bytes
+Total amount of shared memory per block: 49152 bytes
+Total number of registers available per block: 65536
+Warp size: 32
+Created cuDNN handle
+Original array: 0 1 2 3 4 5 6 7 8 9 
+Destroyed cuDNN handle.
+New array: 0.5 0.731059 0.880797 0.952574 0.982014 0.993307 0.997527 0.999089 0.999665 0.999877 
+*/
No results found