-
-
Save eruffaldi/f2cf1eee58100f092ec3 to your computer and use it in GitHub Desktop.
A tiny example of CUDA + OpenGL interop with write-only surfaces and CUDA kernels. Uses GLFW+GLAD.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // | |
| // | |
| // | |
| #include <glad/glad.h> | |
| #include <GLFW/glfw3.h> | |
| #include <cuda_gl_interop.h> | |
| #include <stdlib.h> | |
| // | |
| // | |
| // | |
| #include "interop.h" | |
| // | |
| // | |
| // | |
| struct pxl_interop | |
| { | |
| // number of fbo's | |
| int count; | |
| int index; | |
| // w x h | |
| int width; | |
| int height; | |
| // GL buffers | |
| GLuint* fb; | |
| GLuint* rb; | |
| // CUDA resources | |
| cudaGraphicsResource_t* cgr; | |
| cudaArray_t* ca; | |
| // CUDA streams | |
| cudaStream_t* stream; | |
| }; | |
| // | |
| // | |
| // | |
| struct pxl_interop* | |
| pxl_interop_create(const int fbo_count) | |
| { | |
| cudaError_t cuda_err; | |
| struct pxl_interop* const interop = calloc(1,sizeof(*interop)); | |
| interop->count = fbo_count; | |
| interop->index = 0; | |
| // allocate arrays | |
| interop->fb = calloc(fbo_count,sizeof(*(interop->fb ))); | |
| interop->rb = calloc(fbo_count,sizeof(*(interop->rb ))); | |
| interop->cgr = calloc(fbo_count,sizeof(*(interop->cgr))); | |
| interop->ca = calloc(fbo_count,sizeof(*(interop->ca))); | |
| interop->stream = calloc(fbo_count,sizeof(*(interop->stream))); | |
| // render buffer object w/a color buffer | |
| glCreateRenderbuffers(fbo_count,interop->rb); | |
| // frame buffer object | |
| glCreateFramebuffers(fbo_count,interop->fb); | |
| // attach rbo to fbo | |
| for (int index=0; index<fbo_count; index++) | |
| { | |
| glNamedFramebufferRenderbuffer(interop->fb[index], | |
| GL_COLOR_ATTACHMENT0, | |
| GL_RENDERBUFFER, | |
| interop->rb[index]); | |
| cuda_err = cudaStreamCreate(&interop->stream[index]); | |
| } | |
| // return it | |
| return interop; | |
| } | |
| void | |
| pxl_interop_destroy(struct pxl_interop* const interop) | |
| { | |
| cudaError_t cuda_err; | |
| // unregister CUDA resources | |
| for (int index=0; index<interop->count; index++) | |
| { | |
| if (interop->cgr[index] != NULL) | |
| cuda_err = cudaGraphicsUnregisterResource(interop->cgr[index]); | |
| cuda_err = cudaStreamDestroy(interop->stream[index]); | |
| } | |
| // delete rbo's | |
| glDeleteRenderbuffers(interop->count,interop->rb); | |
| // delete fbo's | |
| glDeleteFramebuffers(interop->count,interop->fb); | |
| // free buffers and resources | |
| free(interop->fb); | |
| free(interop->rb); | |
| free(interop->cgr); | |
| free(interop->ca); | |
| free(interop->stream); | |
| // free interop | |
| free(interop); | |
| } | |
| // | |
| // | |
| // | |
| cudaError_t | |
| pxl_interop_size_set(struct pxl_interop* const interop, const int width, const int height) | |
| { | |
| cudaError_t cuda_err = cudaSuccess; | |
| // save new size | |
| interop->width = width; | |
| interop->height = height; | |
| // resize color buffer | |
| for (int index=0; index<interop->count; index++) | |
| { | |
| // unregister resource | |
| if (interop->cgr[index] != NULL) | |
| cuda_err = cudaGraphicsUnregisterResource(interop->cgr[index]); | |
| // resize rbo | |
| glNamedRenderbufferStorage(interop->rb[index],GL_RGBA8,width,height); | |
| // probe fbo status | |
| // glCheckNamedFramebufferStatus(interop->fb[index],0); | |
| // register rbo | |
| cuda_err = cudaGraphicsGLRegisterImage(&interop->cgr[index], | |
| interop->rb[index], | |
| GL_RENDERBUFFER, | |
| cudaGraphicsRegisterFlagsSurfaceLoadStore | | |
| cudaGraphicsRegisterFlagsWriteDiscard); | |
| } | |
| // map graphics resources | |
| cuda_err = cudaGraphicsMapResources(interop->count,interop->cgr,0); | |
| // get CUDA Array refernces | |
| for (int index=0; index<interop->count; index++) | |
| { | |
| cuda_err = cudaGraphicsSubResourceGetMappedArray(&interop->ca[index], | |
| interop->cgr[index], | |
| 0,0); | |
| } | |
| // unmap graphics resources | |
| cuda_err = cudaGraphicsUnmapResources(interop->count,interop->cgr,0); | |
| return cuda_err; | |
| } | |
| void | |
| pxl_interop_size_get(struct pxl_interop* const interop, int* const width, int* const height) | |
| { | |
| *width = interop->width; | |
| *height = interop->height; | |
| } | |
| // | |
| // | |
| // | |
| cudaError_t | |
| pxl_interop_map(struct pxl_interop* const interop) | |
| { | |
| cudaError_t cuda_err; | |
| // map graphics resources | |
| cuda_err = cudaGraphicsMapResources(1,&interop->cgr[interop->index], | |
| interop->stream[interop->index]); | |
| return cuda_err; | |
| } | |
| cudaError_t | |
| pxl_interop_unmap(struct pxl_interop* const interop) | |
| { | |
| cudaError_t cuda_err; | |
| cuda_err = cudaGraphicsUnmapResources(1,&interop->cgr[interop->index], | |
| interop->stream[interop->index]); | |
| return cuda_err; | |
| } | |
| cudaError_t | |
| pxl_interop_array_map(struct pxl_interop* const interop) | |
| { | |
| cudaError_t cuda_err; | |
| // get a CUDA Array | |
| cuda_err = cudaGraphicsSubResourceGetMappedArray(&interop->ca[interop->index], | |
| interop->cgr[interop->index], | |
| 0,0); | |
| return cuda_err; | |
| } | |
| // | |
| // | |
| // | |
| cudaArray_const_t | |
| pxl_interop_array_get(struct pxl_interop* const interop) | |
| { | |
| return interop->ca[interop->index]; | |
| } | |
| cudaStream_t | |
| pxl_interop_stream_get(struct pxl_interop* const interop) | |
| { | |
| return interop->stream[interop->index]; | |
| } | |
| int | |
| pxl_interop_index_get(struct pxl_interop* const interop) | |
| { | |
| return interop->index; | |
| } | |
| // | |
| // | |
| // | |
| void | |
| pxl_interop_swap(struct pxl_interop* const interop) | |
| { | |
| interop->index = (interop->index + 1) % interop->count; | |
| } | |
| // | |
| // | |
| // | |
| void | |
| pxl_interop_clear(struct pxl_interop* const interop) | |
| { | |
| /* | |
| const GLenum draw_buffer[] = { GL_COLOR_ATTACHMENT0 }; | |
| const GLuint clear_color[] = { 255, 0, 0, 255 }; | |
| glNamedFramebufferDrawBuffers(interop->fb0,1,draw_buffer); | |
| glClearNamedFramebufferuiv(interop->fb0,GL_COLOR,0,clear_color); | |
| */ | |
| static const GLenum attachments[] = { GL_COLOR_ATTACHMENT0 }; | |
| glInvalidateNamedFramebufferData(interop->fb[interop->index],1,attachments); | |
| } | |
| // | |
| // | |
| // | |
| void | |
| pxl_interop_blit(struct pxl_interop* const interop) | |
| { | |
| glBlitNamedFramebuffer(interop->fb[interop->index],0, | |
| 0,0, interop->width,interop->height, | |
| 0,interop->height,interop->width,0, | |
| GL_COLOR_BUFFER_BIT, | |
| GL_NEAREST); | |
| } | |
| // | |
| // | |
| // |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // | |
| // | |
| // | |
| #pragma once | |
| // | |
| // | |
| // | |
| #include <cuda_runtime.h> | |
| // | |
| // | |
| // | |
| struct pxl_interop* | |
| pxl_interop_create(const int fbo_count); | |
| void | |
| pxl_interop_destroy(struct pxl_interop* const interop); | |
| // | |
| // | |
| // | |
| cudaError_t | |
| pxl_interop_size_set(struct pxl_interop* const interop, const int width, const int height); | |
| void | |
| pxl_interop_size_get(struct pxl_interop* const interop, int* const width, int* const height); | |
| // | |
| // | |
| // | |
| cudaError_t | |
| pxl_interop_map(struct pxl_interop* const interop); | |
| cudaError_t | |
| pxl_interop_array_map(struct pxl_interop* const interop); | |
| cudaError_t | |
| pxl_interop_unmap(struct pxl_interop* const interop); | |
| // | |
| // | |
| // | |
| cudaArray_const_t | |
| pxl_interop_array_get(struct pxl_interop* const interop); | |
| cudaStream_t | |
| pxl_interop_stream_get(struct pxl_interop* const interop); | |
| int | |
| pxl_interop_index_get(struct pxl_interop* const interop); | |
| // | |
| // | |
| // | |
| void | |
| pxl_interop_swap(struct pxl_interop* const interop); | |
| void | |
| pxl_interop_clear(struct pxl_interop* const interop); | |
| void | |
| pxl_interop_blit(struct pxl_interop* const interop); | |
| // | |
| // | |
| // |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // -*- compile-command: "nvcc -m 32 -arch sm_30 -Xptxas=-v -cubin kernel.cu"; -*- | |
| // | |
| // | |
| // | |
| surface<void,cudaSurfaceType2D> surf; | |
| // | |
| // | |
| // | |
| union pxl_rgbx_24 | |
| { | |
| uint1 b32; | |
| struct { | |
| unsigned r : 8; | |
| unsigned g : 8; | |
| unsigned b : 8; | |
| unsigned na : 8; | |
| }; | |
| }; | |
| // | |
| // | |
| // | |
| extern "C" | |
| __global__ | |
| void | |
| pxl_kernel(const int width, const int height, const int index) | |
| { | |
| // pixel coordinates | |
| const int idx = (blockDim.x * blockIdx.x) + threadIdx.x; | |
| const int x = idx % width; | |
| const int y = idx / width; | |
| #if 1 | |
| // pixel color | |
| const int t = (unsigned int)clock() / 1100000; // 1.1 GHz | |
| const int xt = (idx + t) % width; | |
| const unsigned int ramp = (unsigned int)(((float)xt / (float)(width-1)) * 255.0f + 0.5f); | |
| const unsigned int bar = ((y + t) / 32) & 3; | |
| union pxl_rgbx_24 rgbx; | |
| rgbx.r = (bar == 0) || (bar == 1) ? ramp : 0; | |
| rgbx.g = (bar == 0) || (bar == 2) ? ramp : 0; | |
| rgbx.b = (bar == 0) || (bar == 3) ? ramp : 0; | |
| rgbx.na = 255; | |
| #else // DRAW A RED BORDER TO VALIDATE FLIPPED BLIT | |
| const bool border = (x == 0) || (x == width-1) || (y == 0) || (y == height-1); | |
| union pxl_rgbx_24 rgbx = { border ? 0xFF0000FF : index==0 ? 0xFF000000 : 0xFFFFFFFF }; | |
| #endif | |
| surf2Dwrite(rgbx.b32, // even simpler: (unsigned int)clock() | |
| surf, | |
| x*sizeof(rgbx), | |
| y, | |
| cudaBoundaryModeZero); // squelches out-of-bound writes | |
| } | |
| // | |
| // | |
| // | |
| #define PXL_KERNEL_THREADS_PER_BLOCK 256 // enough for 4Kx2 monitor | |
| extern "C" | |
| cudaError_t | |
| pxl_kernel_launcher(cudaArray_const_t array, | |
| const int width, | |
| const int height, | |
| const int index, | |
| cudaStream_t stream) | |
| { | |
| cudaError_t cuda_err = cudaBindSurfaceToArray(surf,array); | |
| if (cuda_err) | |
| return cuda_err; | |
| const int blocks = (width * height + PXL_KERNEL_THREADS_PER_BLOCK - 1) / PXL_KERNEL_THREADS_PER_BLOCK; | |
| if (blocks > 0) | |
| pxl_kernel<<<blocks,PXL_KERNEL_THREADS_PER_BLOCK,0,stream>>>(width,height,index); | |
| return cudaSuccess; | |
| } | |
| // | |
| // | |
| // |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // | |
| // | |
| // | |
| #include <glad/glad.h> | |
| #include <GLFW/glfw3.h> | |
| // | |
| // | |
| // | |
| #include <stdlib.h> | |
| #include <stdio.h> | |
| #include <stdbool.h> | |
| // | |
| // | |
| // | |
| #include <cuda_gl_interop.h> | |
| // | |
| // | |
| // | |
| #include "interop.h" | |
| // | |
| // FPS COUNTER FROM HERE: | |
| // | |
| // http://antongerdelan.net/opengl/glcontext2.html | |
| // | |
| static | |
| void | |
| pxl_glfw_fps(GLFWwindow* window) | |
| { | |
| // static fps counters | |
| static double stamp_prev = 0.0; | |
| static int frame_count = 0; | |
| // locals | |
| const double stamp_curr = glfwGetTime(); | |
| const double elapsed = stamp_curr - stamp_prev; | |
| if (elapsed > 0.5) | |
| { | |
| stamp_prev = stamp_curr; | |
| const double fps = (double)frame_count / elapsed; | |
| int width, height; | |
| char tmp[64]; | |
| glfwGetFramebufferSize(window,&width,&height); | |
| sprintf_s(tmp,64,"(%u x %u) - FPS: %.2f",width,height,fps); | |
| glfwSetWindowTitle(window,tmp); | |
| frame_count = 0; | |
| } | |
| frame_count++; | |
| } | |
| // | |
| // | |
| // | |
| static | |
| void | |
| pxl_glfw_error_callback(int error, const char* description) | |
| { | |
| fputs(description,stderr); | |
| } | |
| static | |
| void | |
| pxl_glfw_key_callback(GLFWwindow* window, int key, int scancode, int action, int mods) | |
| { | |
| if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS) | |
| glfwSetWindowShouldClose(window, GL_TRUE); | |
| } | |
| static | |
| void | |
| pxl_glfw_init(GLFWwindow** window, const int width, const int height) | |
| { | |
| // | |
| // INITIALIZE GLFW/GLAD | |
| // | |
| glfwSetErrorCallback(pxl_glfw_error_callback); | |
| if (!glfwInit()) | |
| exit(EXIT_FAILURE); | |
| glfwWindowHint(GLFW_DEPTH_BITS, 0); | |
| glfwWindowHint(GLFW_STENCIL_BITS, 0); | |
| glfwWindowHint(GLFW_SRGB_CAPABLE, GL_TRUE); | |
| glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4); | |
| glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 5); | |
| glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE); | |
| #ifdef PXL_FULLSCREEN | |
| GLFWmonitor* monitor = glfwGetPrimaryMonitor(); | |
| const GLFWvidmode* mode = glfwGetVideoMode(monitor); | |
| *window = glfwCreateWindow(mode->width,mode->height,"GLFW / CUDA Interop",monitor,NULL); | |
| #else | |
| *window = glfwCreateWindow(width,height,"GLFW / CUDA Interop",NULL,NULL); | |
| #endif | |
| if (*window == NULL) | |
| { | |
| glfwTerminate(); | |
| exit(EXIT_FAILURE); | |
| } | |
| glfwMakeContextCurrent(*window); | |
| // set up GLAD | |
| gladLoadGLLoader((GLADloadproc)glfwGetProcAddress); | |
| // ignore vsync for now | |
| glfwSwapInterval(0); | |
| // only copy r/g/b | |
| glColorMask(GL_TRUE,GL_TRUE,GL_TRUE,GL_FALSE); | |
| // enable SRGB | |
| // glEnable(GL_FRAMEBUFFER_SRGB); | |
| } | |
| // | |
| // | |
| // | |
| static | |
| void | |
| pxl_glfw_window_size_callback(GLFWwindow* window, int width, int height) | |
| { | |
| // get context | |
| struct pxl_interop* const interop = glfwGetWindowUserPointer(window); | |
| pxl_interop_size_set(interop,width,height); | |
| } | |
| // | |
| // | |
| // | |
| cudaError_t | |
| pxl_kernel_launcher(cudaArray_const_t array, | |
| const int width, | |
| const int height, | |
| const int index, | |
| cudaStream_t stream); | |
| // | |
| // | |
| // | |
| int | |
| main(int argc, char* argv[]) | |
| { | |
| // | |
| // INIT GLFW | |
| // | |
| GLFWwindow* window; | |
| pxl_glfw_init(&window,1024,1024); | |
| // | |
| // INIT CUDA | |
| // | |
| cudaError_t cuda_err; | |
| int gl_device_id,gl_device_count; | |
| cuda_err = cudaGLGetDevices(&gl_device_count,&gl_device_id,1,cudaGLDeviceListAll); | |
| int cuda_device_id = (argc > 1) ? atoi(argv[1]) : gl_device_id; | |
| cuda_err = cudaSetDevice(cuda_device_id); | |
| // | |
| // MULTI-GPU? | |
| // | |
| const bool multi_gpu = gl_device_id != cuda_device_id; | |
| // | |
| // INFO | |
| // | |
| struct cudaDeviceProp props; | |
| cuda_err = cudaGetDeviceProperties(&props,gl_device_id); | |
| printf("GL : %-24s (%2d)\n",props.name,props.multiProcessorCount); | |
| cuda_err = cudaGetDeviceProperties(&props,cuda_device_id); | |
| printf("CUDA : %-24s (%2d)\n",props.name,props.multiProcessorCount); | |
| // | |
| // CREATE INTEROP | |
| // | |
| struct pxl_interop* const interop = pxl_interop_create(2); | |
| // | |
| // RESIZE INTEROP | |
| // | |
| int width, height; | |
| // get initial width/height | |
| glfwGetFramebufferSize(window,&width,&height); | |
| // resize with initial window dimensions | |
| cuda_err = pxl_interop_size_set(interop,width,height); | |
| // | |
| // SET USER POINTER AND CALLBACKS | |
| // | |
| glfwSetWindowUserPointer (window,interop); | |
| glfwSetKeyCallback (window,pxl_glfw_key_callback); | |
| glfwSetFramebufferSizeCallback(window,pxl_glfw_window_size_callback); | |
| // | |
| // LOOP UNTIL DONE | |
| // | |
| int step = 0; | |
| while (!glfwWindowShouldClose(window)) | |
| { | |
| // | |
| // MONITOR FPS | |
| // | |
| pxl_glfw_fps(window); | |
| // | |
| // EXECUTE CUDA KERNEL ON RENDER BUFFER | |
| // | |
| int width,height; | |
| cudaArray_t cuda_array; | |
| pxl_interop_size_get(interop,&width,&height); | |
| if (multi_gpu) | |
| { | |
| cuda_err = pxl_interop_map(interop); | |
| // cuda_err = pxl_interop_array_map(interop); // NOT NEEDED? | |
| } | |
| cuda_err = pxl_kernel_launcher(pxl_interop_array_get(interop), | |
| width,height, | |
| pxl_interop_index_get(interop), | |
| pxl_interop_stream_get(interop)); | |
| if (multi_gpu) | |
| { | |
| cuda_err = pxl_interop_unmap(interop); | |
| // cuda_err = cudaStreamSynchronize(pxl_interop_stream_get(interop)); // NOT NEEDED? | |
| } | |
| // | |
| // BLIT & SWAP FBO | |
| // | |
| pxl_interop_blit(interop); | |
| pxl_interop_swap(interop); | |
| // | |
| // SWAP WINDOW | |
| // | |
| glfwSwapBuffers(window); | |
| // | |
| // PUMP/POLL/WAIT | |
| // | |
| glfwPollEvents(); // glfwWaitEvents(); | |
| } | |
| // | |
| // CLEANUP | |
| // | |
| pxl_interop_destroy(interop); | |
| glfwDestroyWindow(window); | |
| glfwTerminate(); | |
| cudaDeviceReset(); | |
| // missing some clean up here | |
| exit(EXIT_SUCCESS); | |
| } | |
| // | |
| // | |
| // |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Forked original for backporting to OpenGL 3.3, building with CMake, instructions for using GLAD.
Tested under OSX 10.10 with CUDA 7.5