#include #include #define CL_HPP_TARGET_OPENCL_VERSION 200 #define CL_HPP_ENABLE_EXCEPTIONS #include using namespace std; int main() { std::string kernelsSrc{ R"CLC( kernel void add( global float* restrict a, global float* restrict b, global float* restrict c, size_t N, float A, float B, float C ) { const size_t i = get_global_id(0); if (i < N) { // Do some stupid calculations for (int t = 0; t < 50; ++t) c[i] = A * sin(a[i]) + B * cos(b[i]) + sqrt(A * cos(a[i]) * B * sin(b[i])); c[i] /= C * tan(c[i]) + 1; } } )CLC" }; vector platforms; cl::Platform::get(&platforms); const size_t platformIndexToUse = 0; const size_t deviceIndexToUse = 0; cl::Platform platform; cl::Device device; for (size_t platIndex = 0; platIndex < platforms.size(); ++platIndex) { const cl::Platform& plat = platforms.at(platIndex); cout << "[" << platIndex << "]: " << plat.getInfo() << endl; vector devices; plat.getDevices(CL_DEVICE_TYPE_ALL, &devices); for (size_t devIndex = 0; devIndex < devices.size(); ++devIndex) { const cl::Device& dev = devices.at(devIndex); cout << "\t[" << devIndex << "]: " << dev.getInfo() << endl; } if (platformIndexToUse == platIndex) { platform = plat; device = devices.at(deviceIndexToUse); } } cout << endl; cout << "Running on " << device.getInfo() << endl; cl_command_queue_properties props = CL_QUEUE_PROFILING_ENABLE; cl::Context context{ device }; cl::CommandQueue queue{ context, props }; cl::Program program{ context, kernelsSrc }; try { program.build(); } catch (const cl::Error& err) { cout << err.what() << endl; for (auto p : program.getBuildInfo()) { cout << p.second << endl; } exit(1); } float cA = 1234; float cB = 4321; float cC = 5678; auto addKernel = cl::KernelFunctor(program, "add"); // size_t N = 100'000'000; size_t N = 10'000; cout << "Initializing ... " << flush; vector A(N, 123); vector B(N, 111); vector C(N); cout << "done" << endl; const size_t bufSize = A.size() * sizeof(float); cl::Buffer bufA{ context, CL_MEM_READ_ONLY, bufSize }; cl::Buffer bufB{ context, CL_MEM_READ_ONLY, bufSize }; cl::Buffer bufC{ context, CL_MEM_WRITE_ONLY, bufSize }; cl::copy(queue, begin(A), end(A), bufA); cl::copy(queue, begin(B), end(B), bufB); cout << "Copying ... "; queue.finish(); cout << "done" << endl; for (int i = 0; i < 5; ++i) { cl::Event e = addKernel(cl::EnqueueArgs{ queue, cl::NDRange{A.size()} }, bufA, bufB, bufC, A.size(), cA, cB, cC); e.wait(); cout << "Queued : " << e.getProfilingInfo() << endl; cout << "Submit : " << e.getProfilingInfo() << endl; cout << "Start : " << e.getProfilingInfo() << endl; cout << "End : " << e.getProfilingInfo() << endl; cout << endl; } cout << "Copying back ..." << flush; cl::copy(queue, bufC, begin(C), end(C)); cout << "done" << endl; cout << endl; return 0; }