|
| 1 | +/*===================== begin_copyright_notice ================================== |
| 2 | +
|
| 3 | + Copyright (c) 2020, Intel Corporation |
| 4 | +
|
| 5 | +
|
| 6 | + Permission is hereby granted, free of charge, to any person obtaining a |
| 7 | + copy of this software and associated documentation files (the "Software"), |
| 8 | + to deal in the Software without restriction, including without limitation |
| 9 | + the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 10 | + and/or sell copies of the Software, and to permit persons to whom the |
| 11 | + Software is furnished to do so, subject to the following conditions: |
| 12 | +
|
| 13 | + The above copyright notice and this permission notice shall be included |
| 14 | + in all copies or substantial portions of the Software. |
| 15 | +
|
| 16 | + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| 17 | + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 18 | + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 19 | + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
| 20 | + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
| 21 | + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| 22 | + OTHER DEALINGS IN THE SOFTWARE. |
| 23 | +======================= end_copyright_notice ==================================*/ |
| 24 | + |
| 25 | +#include <iostream> |
| 26 | +#include <cassert> |
| 27 | +#include <math.h> |
| 28 | +#include <vector> |
| 29 | + |
| 30 | +#include <CL/cl.h> |
| 31 | + |
| 32 | +#define SZ 160 |
| 33 | +#define KERNEL_SZ 16 |
| 34 | +#define CHECK(a) do { \ |
| 35 | + err = (a); \ |
| 36 | + if (err != CL_SUCCESS) { \ |
| 37 | + fprintf(stderr, "FAIL: err=%d @ line=%d (%s)\n", err, __LINE__, (#a)); \ |
| 38 | + exit(err); \ |
| 39 | + } \ |
| 40 | +}while (0) |
| 41 | +#define CHECK2(a) do { \ |
| 42 | + (a); \ |
| 43 | + if (err != CL_SUCCESS) { \ |
| 44 | + fprintf(stderr, "FAIL: err=%d @ line=%d (%s)\n", err, __LINE__, (#a)); \ |
| 45 | + exit(err); \ |
| 46 | + } \ |
| 47 | +}while (0) |
| 48 | +#ifndef KERNEL |
| 49 | +#error "Error: KERNEL must be defined with location of kernel binary" |
| 50 | +#endif |
| 51 | + |
| 52 | +int main( int argc, char* argv[]) |
| 53 | +{ |
| 54 | + // initialize data |
| 55 | + int *src1 = (int *)malloc(sizeof(int)*SZ); |
| 56 | + int *src2 = (int *)malloc(sizeof(int)*SZ); |
| 57 | + int *dst = (int *)malloc(sizeof(int)*SZ); |
| 58 | + |
| 59 | + for (unsigned i=0; i<SZ; i++) |
| 60 | + { |
| 61 | + src1[i] = i; |
| 62 | + src2[i] = i<<2; |
| 63 | + } |
| 64 | + |
| 65 | + // initialize GPU |
| 66 | + cl_platform_id platform; // OpenCL platform |
| 67 | + cl_device_id device; // device ID |
| 68 | + cl_context context; // context |
| 69 | + cl_command_queue queue; // command queue |
| 70 | + cl_program program; // program |
| 71 | + cl_kernel kernel; // kernel |
| 72 | + cl_int err; |
| 73 | + |
| 74 | + CHECK(clGetPlatformIDs(1, &platform, NULL)); |
| 75 | + CHECK(clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL)); |
| 76 | + CHECK2(context = clCreateContext(NULL, 1, &device, NULL, NULL, &err)); |
| 77 | + CHECK2(queue = clCreateCommandQueueWithProperties(context, device, 0, &err)); |
| 78 | + |
| 79 | + // diagnostic info |
| 80 | + char name_buffer[256]; |
| 81 | + CHECK(clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(name_buffer), name_buffer, NULL)); |
| 82 | + fprintf(stderr, "INFO: using platform: %s\n", name_buffer); |
| 83 | + CHECK(clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(name_buffer), name_buffer, NULL)); |
| 84 | + fprintf(stderr, "INFO: using device: %s\n", name_buffer); |
| 85 | + |
| 86 | + // read in and initialize kernel |
| 87 | + FILE *fp = fopen(KERNEL, "rb"); |
| 88 | + if (fp == NULL) { |
| 89 | + fprintf(stderr, "FAIL: unable to open %s\n", KERNEL); |
| 90 | + exit(-1); |
| 91 | + } |
| 92 | + fseek(fp, 0, SEEK_END); |
| 93 | + size_t sz = ftell(fp); |
| 94 | + rewind(fp); |
| 95 | + |
| 96 | + unsigned char *code = (unsigned char *)malloc(sz); |
| 97 | + fread(code, 1, sz, fp); |
| 98 | + fclose(fp); |
| 99 | + |
| 100 | + cl_int errNum = 0; |
| 101 | + const unsigned char *codes[1] = {code}; |
| 102 | + size_t sizes[1] = {sz}; |
| 103 | + CHECK2(program = clCreateProgramWithBinary(context, 1, &device, sizes, codes, &err, &errNum)); |
| 104 | + CHECK(clBuildProgram(program, 0, NULL, NULL, NULL, NULL)); |
| 105 | + CHECK2(kernel = clCreateKernel(program, "vector_add", &err)); |
| 106 | + |
| 107 | + // kernel parameter initialization |
| 108 | + cl_mem d_a, d_b, d_c; |
| 109 | + size_t bytes = SZ*sizeof(int); |
| 110 | + d_a = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, NULL); |
| 111 | + d_b = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, NULL); |
| 112 | + d_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, bytes, NULL, NULL); |
| 113 | + CHECK(clEnqueueWriteBuffer(queue, d_a, CL_TRUE, 0, bytes, src1, 0, NULL, NULL)); |
| 114 | + CHECK(clEnqueueWriteBuffer(queue, d_b, CL_TRUE, 0, bytes, src2, 0, NULL, NULL)); |
| 115 | + CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a)); |
| 116 | + CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_b)); |
| 117 | + CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_c)); |
| 118 | + |
| 119 | + // send to GPU |
| 120 | + size_t globalSize = SZ/KERNEL_SZ; |
| 121 | + size_t localSize = 1; |
| 122 | + CHECK(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, &localSize, 0, NULL, NULL)); |
| 123 | + clFinish(queue); |
| 124 | + |
| 125 | + // process output and cleanup |
| 126 | + clEnqueueReadBuffer(queue, d_c, CL_TRUE, 0, bytes, dst, 0, NULL, NULL ); |
| 127 | + clReleaseMemObject(d_a); |
| 128 | + clReleaseMemObject(d_b); |
| 129 | + clReleaseMemObject(d_c); |
| 130 | + clReleaseProgram(program); |
| 131 | + clReleaseKernel(kernel); |
| 132 | + clReleaseCommandQueue(queue); |
| 133 | + clReleaseContext(context); |
| 134 | + |
| 135 | + // verify results |
| 136 | + for (unsigned i=0; i<SZ; i++) |
| 137 | + if ((src1[i] + src2[i]) != dst[i]) { |
| 138 | + fprintf(stderr, "FAIL: comparison at index[%d]: %d + %d => %d(host), but %d(gpu)\n", i, src1[i], src2[i], (src1[i]+src2[i]), dst[i]); |
| 139 | + //exit(-1); |
| 140 | + } |
| 141 | + fprintf(stderr, "PASSED\n"); |
| 142 | + return 0; |
| 143 | +} |
0 commit comments