Skip to content

Instantly share code, notes, and snippets.

@eruffaldi
Forked from allanmac/assert_cuda.c
Last active May 27, 2016 04:02
Show Gist options
  • Select an option

  • Save eruffaldi/f2cf1eee58100f092ec3 to your computer and use it in GitHub Desktop.

Select an option

Save eruffaldi/f2cf1eee58100f092ec3 to your computer and use it in GitHub Desktop.
A tiny example of CUDA + OpenGL interop with write-only surfaces and CUDA kernels. Uses GLFW+GLAD.
//
//
//
#include <stdlib.h>
//
//
//
#include "interop.h"
//
//
//
struct pxl_interop
{
// w x h
int width;
int height;
// GL buffers
GLuint fb0;
GLuint rb0;
// CUDA resources
cudaGraphicsResource_t cgr0;
};
//
//
//
struct pxl_interop*
pxl_interop_create(GLFWwindow* window)
{
struct pxl_interop* const interop = (struct pxl_interop*)malloc(sizeof(struct pxl_interop));
// init cuda graphics resource
interop->cgr0 = NULL;
// render buffer object w/a color buffer
glGenRenderbuffers(1,&interop->rb0);
// frame buffer object
glGenFramebuffers(1,&interop->fb0);
// return it
return interop;
}
void
pxl_interop_destroy(struct pxl_interop* const interop)
{
free(interop);
}
//
//
//
cudaError_t
pxl_interop_resize(struct pxl_interop* const interop, const int width, const int height)
{
// save new size
interop->width = width;
interop->height = height;
//
// RESIZE FBO'S COLOR BUFFER
//
// bind rbo
glBindRenderbuffer (GL_RENDERBUFFER,interop->rb0);
// resize color buffer
glRenderbufferStorage(GL_RENDERBUFFER,GL_RGBA8,width,height);
// bind fbo to read
glBindFramebuffer (GL_FRAMEBUFFER,interop->fb0);
// attach rb0 to the fb0
glFramebufferRenderbuffer(GL_FRAMEBUFFER,GL_COLOR_ATTACHMENT0,GL_RENDERBUFFER,interop->rb0);
// unbind rbo
glBindRenderbuffer(GL_RENDERBUFFER,0);
// bind to default fb
glBindFramebuffer(GL_FRAMEBUFFER,0);
//
// REGISTER RBO WITH CUDA
//
cudaError_t cuda_err;
// unregister
if (interop->cgr0 != NULL)
cuda_err = cudaGraphicsUnregisterResource(interop->cgr0);
// register image
cuda_err = cudaGraphicsGLRegisterImage(&interop->cgr0,
interop->rb0,
GL_RENDERBUFFER,
cudaGraphicsRegisterFlagsSurfaceLoadStore | cudaGraphicsRegisterFlagsWriteDiscard);
// diddle some flags
cuda_err = cudaGraphicsResourceSetMapFlags(interop->cgr0,cudaGraphicsMapFlagsWriteDiscard);
return cuda_err;
}
void
pxl_interop_get_size(struct pxl_interop* const interop, int* const width, int* const height)
{
*width = interop->width;
*height = interop->height;
}
//
//
//
cudaError_t
pxl_interop_map(struct pxl_interop* const interop, cudaArray_t* cuda_array, cudaStream_t stream)
{
cudaError_t cuda_err;
// map graphics resources
cuda_err = cudaGraphicsMapResources(1,&interop->cgr0,stream);
// get a CUDA Array
cuda_err = cudaGraphicsSubResourceGetMappedArray(cuda_array,interop->cgr0,0,0);
return cuda_err;
}
cudaError_t
pxl_interop_unmap(struct pxl_interop* const interop, cudaStream_t stream)
{
cudaError_t cuda_err;
cuda_err = cudaGraphicsUnmapResources(1,&interop->cgr0,stream);
return cuda_err;
}
//
//
//
void
pxl_interop_clear(struct pxl_interop* const interop)
{
/*
const GLenum draw_buffer[] = { GL_COLOR_ATTACHMENT0 };
const GLuint clear_color[] = { 255, 0, 0, 255 };
glNamedFramebufferDrawBuffers(interop->fb0,1,draw_buffer);
glClearNamedFramebufferuiv(interop->fb0,GL_COLOR,0,clear_color);
*/
const GLenum attachments[] = { GL_COLOR_ATTACHMENT0 };
glInvalidateNamedFramebufferData(interop->fb0,1,attachments);
}
//
//
//
void
pxl_interop_blit(struct pxl_interop* const interop)
{
/*
glBlitFramebuffer(0,0, interop->width,interop->height,
0,interop->height,interop->width,0,
GL_COLOR_BUFFER_BIT,
GL_NEAREST);
*/
glBlitNamedFramebuffer(interop->fb0,0,
0,0, interop->width,interop->height,
0,interop->height,interop->width,0,
GL_COLOR_BUFFER_BIT,
GL_NEAREST);
}
//
//
//
/*
* Copyright 2015 Allan MacKinnon. All rights reserved.
*
*/
#pragma once
//
//
//
#include <glad/glad.h>
#include <GLFW/glfw3.h>
//
//
//
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
//
//
//
struct pxl_interop*
pxl_interop_create();
void
pxl_interop_destroy(struct pxl_interop* const interop);
//
//
//
cudaError_t
pxl_interop_resize(struct pxl_interop* const interop, const int width, const int height);
void
pxl_interop_get_size(struct pxl_interop* const interop, int* const width, int* const height);
//
//
//
cudaError_t
pxl_interop_map(struct pxl_interop* const interop, cudaArray_t* cuda_array, cudaStream_t stream);
cudaError_t
pxl_interop_unmap(struct pxl_interop* const interop, cudaStream_t stream);
//
//
//
void
pxl_interop_clear(struct pxl_interop* const interop);
void
pxl_interop_blit(struct pxl_interop* const interop);
//
//
//
// -*- compile-command: "nvcc -m 32 -arch sm_30 -Xptxas=-v -cubin kernel.cu"; -*-
//
//
//
surface<void,cudaSurfaceType2D> surf;
//
//
//
union pxl_rgbx_24
{
uint1 b32;
struct {
unsigned r : 8;
unsigned g : 8;
unsigned b : 8;
unsigned na : 8;
};
};
//
//
//
__global__
void
pxl_kernel(const int width)
{
// pixel coordinates
const int idx = (blockDim.x * blockIdx.x) + threadIdx.x;
const int x = idx % width;
const int y = idx / width;
// pixel color
const unsigned int ramp = (unsigned int)(((float)x / (float)(width-1)) * 255.0f + 0.5f);
const unsigned int bar = (y / 32) & 3;
union pxl_rgbx_24 rgbx;
rgbx.r = (bar == 0) || (bar == 1) ? ramp : 0;
rgbx.g = (bar == 0) || (bar == 2) ? ramp : 0;
rgbx.b = (bar == 0) || (bar == 3) ? ramp : 0;
rgbx.na = 255;
// cudaBoundaryModeZero squelches out-of-bound writes
surf2Dwrite(rgbx.b32, // even simpler: (unsigned int)clock()
surf,
x*sizeof(rgbx),
y,
cudaBoundaryModeZero);
}
//
//
//
#define PXL_KERNEL_THREADS_PER_BLOCK 64
extern "C"
cudaError_t
pxl_kernel_launcher(cudaArray_const_t array,
const int width,
const int height,
cudaStream_t stream)
{
cudaError_t cuda_err = cudaBindSurfaceToArray(surf,array);
if (cuda_err)
return cuda_err;
const int blocks = (width * height + PXL_KERNEL_THREADS_PER_BLOCK - 1) / PXL_KERNEL_THREADS_PER_BLOCK;
pxl_kernel<<<blocks,PXL_KERNEL_THREADS_PER_BLOCK,0,stream>>>(width);
return cudaSuccess;
}
//
//
//
//
//
//
#include <glad/glad.h>
#include <GLFW/glfw3.h>
//
//
//
#include <stdlib.h>
#include <stdio.h>
//
//
//
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
//
//
//
#include "interop.h"
//
// FPS COUNTER FROM HERE:
//
// http://antongerdelan.net/opengl/glcontext2.html
//
static
void
pxl_glfw_fps(GLFWwindow* window)
{
// static fps counters
static double stamp_prev = 0.0;
static int frame_count = 0;
// locals
const double stamp_curr = glfwGetTime();
const double elapsed = stamp_curr - stamp_prev;
if (elapsed > 0.5)
{
stamp_prev = stamp_curr;
const double fps = (double)frame_count / elapsed;
int width, height;
char tmp[64];
glfwGetFramebufferSize(window,&width,&height);
sprintf_s(tmp,64,"(%u x %u) - FPS: %.2f",width,height,fps);
glfwSetWindowTitle(window,tmp);
frame_count = 0;
}
frame_count++;
}
//
//
//
static
void
pxl_glfw_error_callback(int error, const char* description)
{
fputs(description,stderr);
}
static
void
pxl_glfw_key_callback(GLFWwindow* window, int key, int scancode, int action, int mods)
{
if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS)
glfwSetWindowShouldClose(window, GL_TRUE);
}
static
void
pxl_glfw_init(GLFWwindow** window, const int width, const int height)
{
//
// INITIALIZE GLFW/GLAD
//
glfwSetErrorCallback(pxl_glfw_error_callback);
if (!glfwInit())
exit(EXIT_FAILURE);
const GLFWvidmode* mode = glfwGetVideoMode(glfwGetPrimaryMonitor());
glfwWindowHint(GLFW_DEPTH_BITS, 0);
glfwWindowHint(GLFW_STENCIL_BITS, 0);
glfwWindowHint(GLFW_SRGB_CAPABLE, GL_TRUE);
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 5);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
*window = glfwCreateWindow(width,height,"GLFW / CUDA Interop",NULL,NULL);
if (*window == NULL)
{
glfwTerminate();
exit(EXIT_FAILURE);
}
glfwMakeContextCurrent(*window);
// set up GLAD
gladLoadGLLoader((GLADloadproc)glfwGetProcAddress);
// ignore vsync for now
glfwSwapInterval(0);
// enable SRGB
glEnable(GL_FRAMEBUFFER_SRGB);
// only copy r/g/b
glColorMask(GL_TRUE,GL_TRUE,GL_TRUE,GL_FALSE);
}
//
//
//
static
void
pxl_glfw_window_size_callback(GLFWwindow* window, int width, int height)
{
// get context
struct pxl_interop* const interop = glfwGetWindowUserPointer(window);
pxl_interop_resize(interop,width,height);
}
//
//
//
cudaError_t
pxl_kernel_launcher(cudaArray_const_t array, const int width, const int height, cudaStream_t stream);
//
//
//
int
main(int argc, char* argv[])
{
//
// INIT GLFW
//
GLFWwindow* window;
pxl_glfw_init(&window,1024,1024);
//
// SET CALLBACKS
//
glfwSetKeyCallback (window,pxl_glfw_key_callback);
glfwSetFramebufferSizeCallback(window,pxl_glfw_window_size_callback);
//
// INIT CUDA
//
cudaError_t cuda_err;
int gl_device_id,gl_device_count;
cuda_err = cudaGLGetDevices(&gl_device_count,&gl_device_id,1,cudaGLDeviceListAll);
int cuda_device_id = (argc > 1) ? atoi(argv[1]) : gl_device_id;
cuda_err = cudaSetDevice(cuda_device_id);
//
// INFO
//
struct cudaDeviceProp props;
cuda_err = cudaGetDeviceProperties(&props,gl_device_id);
printf("GL : %-24s (%2d)\n",props.name,props.multiProcessorCount);
cuda_err = cudaGetDeviceProperties(&props,cuda_device_id);
printf("CUDA : %-24s (%2d)\n",props.name,props.multiProcessorCount);
//
// CREATE A CUDA STREAM
//
cudaStream_t stream;
cuda_err = cudaStreamCreate(&stream);
//
// CREATE AND SAVE INTEROP INSTANCE
//
struct pxl_interop* const interop = pxl_interop_create(window);
glfwSetWindowUserPointer(window,interop);
//
// GET ACTUAL WINDOW SIZE
//
int width, height;
// get initial width/height
glfwGetFramebufferSize(window,&width,&height);
// resize with initial window dimensions
cuda_err = pxl_interop_resize(interop,width,height);
//
// LOOP UNTIL DONE
//
while (!glfwWindowShouldClose(window))
{
//
// MONITOR FPS
//
pxl_glfw_fps(window);
//
// EXECUTE CUDA KERNEL ON RENDER BUFFER
//
int width,height;
cudaArray_t cuda_array;
pxl_interop_get_size(interop,&width,&height);
cuda_err = pxl_interop_map(interop,&cuda_array,stream);
cuda_err = pxl_kernel_launcher(cuda_array,width,height,stream);
cuda_err = pxl_interop_unmap(interop,stream);
cuda_err = cudaStreamSynchronize(stream);
//
// BLIT
//
pxl_interop_blit(interop);
//
// SWAP
//
glfwSwapBuffers(window);
//
// PUMP/POLL/WAIT
//
glfwPollEvents(); // glfwWaitEvents();
}
//
// CLEANUP
//
pxl_interop_destroy(interop);
glfwDestroyWindow(window);
glfwTerminate();
cudaDeviceReset();
// missing some clean up here
exit(EXIT_SUCCESS);
}
//
//
//
@eruffaldi
Copy link
Author

Forked original for backporting to OpenGL 3.3, building with CMake, instructions for using GLAD.

Tested under OSX 10.10 with CUDA 7.5

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment