C++ Coder

HCP高性能计算架构,实现,编译器指令优化,算法优化, LLVM CLANG OpenCL CUDA OpenACC C++AMP OpenMP MPI

C++博客 首页 新随笔 联系 聚合 管理
  98 Posts :: 0 Stories :: 0 Comments :: 0 Trackbacks
http://pastebin.com/fije3CKf

  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <CL/opencl.h>
  5.  
  6. cl_int cl_error; // OpenCL error code
  7. cl_device_id device_id; // The chosen device
  8. cl_program program; // OpenCL program
  9.  
  10.  
  11. /** Formats the standard MACROS  __FILE__ and __LINE__ for message print.
  12.  */
  13. #define STRINGIFY(x) #x
  14. #define TOSTRING(x) STRINGIFY(x)
  15. #define AT __FILE__ ":" TOSTRING(__LINE__)
  16.  
  17.  
  18. #define DEBUG_BUFFER_SIZE 4096
  19. char * OpenCL_error_to_string(int error);
  20.  
  21. #define dump(msg,...) \
  22.     fprintf(stderr, AT msg,##__VA_ARGS__)
  23.  
  24.  
  25. #define OpenCL_test_execution(msg,error)         \
  26.   do {                  \
  27.     if(CL_SUCCESS != error) {           \
  28.       dump("The runtime error is %s\n",    \
  29.         (char *)OpenCL_error_to_string(error));      \
  30.       exit(EXIT_FAILURE);            \
  31.     }                 \
  32.   } while (0)
  33.  
  34.  
  35.  
  36.  
  37. static double t_start, t_end; // Timing
  38. double timer_get_time()
  39. {
  40.     struct timeval t;
  41.     if (gettimeofday (&t, NULL) != 0) {
  42.       perror("Error gettimeofday !\n");
  43.       exit(1);
  44.     }
  45.     return (t.tv_sec + t.tv_usec * 1.0e-6);
  46. }
  47.  
  48. void timer_start() {
  49.   t_start = timer_get_time();
  50. }
  51.  
  52. void timer_stop_display( char *msg ) {
  53.   t_end = timer_get_time();
  54.   printf ("%s : %0.1lf\n", msg, (t_end - t_start)*1000);
  55. }
  56.  
  57.  
  58.  
  59. void openclSimpleCopy(cl_context context, cl_command_queue queue, cl_kernel kernel, size_t n) {
  60.   // Host data
  61.   int a[n],b[n];
  62.   int _i;
  63.  
  64.   // Init
  65.   for(_i=0;_i<n;_i++) {
  66.     a[_i]=n-_i;
  67.     b[_i]=0;
  68.   }
  69.  
  70.   // Buffers on the device
  71.   cl_mem a_dev = clCreateBuffer(context,
  72.                                 CL_MEM_READ_WRITE,
  73.                                 n * sizeof(int),
  74.                                 NULL,
  75.                                 &cl_error);
  76.   OpenCL_test_execution("Create Buffer",cl_error);
  77.  
  78.   cl_mem b_dev = clCreateBuffer(context,
  79.                                 CL_MEM_READ_WRITE,
  80.                                 n * sizeof(int),
  81.                                 NULL,
  82.                                 &cl_error);
  83.   OpenCL_test_execution("Create Buffer",cl_error);
  84.  
  85.  
  86.   // 3 events is enough here
  87.   cl_event event1;
  88.   cl_event event2;
  89.   cl_event event3;
  90.  
  91.  
  92.  
  93.  
  94.   // Initialize buffer on the device
  95.   cl_error =  clEnqueueWriteBuffer(queue,
  96.                        a_dev,
  97.                        CL_TRUE,
  98.                        0,
  99.                        n * sizeof(int),
  100.                        a,
  101.                        0,
  102.                        NULL,
  103.                        &event1);
  104.   OpenCL_test_execution("Write to Buffer",cl_error);
  105.  
  106.   // Shouldn't be useful, I used a blocking write !
  107.   clFlush(queue);
  108.   clWaitForEvents(1,&event1);
  109.  
  110.  
  111.   // Arguments for the kernel
  112.   cl_error = clSetKernelArg(kernel,0,sizeof(a_dev), &a_dev);
  113.   OpenCL_test_execution("Set argument 0 ",cl_error);
  114.  
  115.   cl_error = clSetKernelArg(kernel,1,sizeof(b_dev), &b_dev);
  116.   OpenCL_test_execution("Set argument 1",cl_error);
  117.  
  118.   timer_start();
  119.   cl_error = clEnqueueNDRangeKernel(queue,
  120.                                     kernel,
  121.                                     1,
  122.                                     NULL,
  123.                                     &n,
  124.                                     NULL,
  125.                                     1,
  126.                                     &event1,
  127.                                     &event2);
  128.  
  129.   timer_stop_display("Time for Enqueue");
  130.   OpenCL_test_execution("Enqueue kernel",cl_error);
  131.  
  132.   OpenCL_test_execution("clWaitForEvents",clWaitForEvents(1,&event2));
  133.  
  134.   cl_error =  clEnqueueReadBuffer(queue,
  135.                        b_dev,
  136.                        CL_TRUE,
  137.                        0,
  138.                        n * sizeof(int),
  139.                        b,
  140.                        1,
  141.                        &event2,
  142.                        &event3);
  143.   OpenCL_test_execution("Read from buffer",cl_error);
  144.  
  145.   OpenCL_test_execution("clWaitForEvents",clWaitForEvents(1,&event3));
  146.  
  147.  
  148.   // Check result
  149.   for(_i=0;_i<n;_i++) {
  150.     if(a[_i]!=b[_i]) {
  151.       printf("Error %d : %d!=%d\n",_i,a[_i],b[_i]);
  152.       exit(-1);
  153.     }
  154.   }
  155.   OpenCL_test_execution("Release mem object",clReleaseMemObject (a_dev));
  156.   OpenCL_test_execution("Release mem object",clReleaseMemObject (b_dev));
  157.  
  158. }
  159.  
  160.  
  161.  
  162. int main(int argc, char **argv) {
  163.  
  164.   int platform_num = 0; // Platform number
  165.   int device_num = 0; // Device number
  166. #define DEVICE_TYPE CL_DEVICE_TYPE_ALL
  167.  
  168.   cl_int cl_error; // OpenCL error code
  169.   cl_kernel kernel = NULL;
  170.  
  171.  
  172.   // Chosing platform
  173.   cl_uint num_platforms;
  174.   clGetPlatformIDs(0, NULL, &num_platforms);
  175.   if(num_platforms <= 0) {
  176.     dump("No OpenCL platforms found :-(\n");
  177.     exit(-1);
  178.   }
  179.  
  180.   cl_platform_id platform_ids[num_platforms];
  181.   clGetPlatformIDs(num_platforms, platform_ids, NULL);
  182.  
  183.   if(platform_num < 0 || platform_num >= num_platforms) {
  184.     dump("Invalid platform: %d\n", platform_num);
  185.     exit(EXIT_FAILURE);
  186.   }
  187.  
  188.   // platform_id hold the chosen platform
  189.   cl_platform_id platform_id = platform_ids[platform_num];
  190.  
  191.   // Chosing the device
  192.  
  193.   cl_uint num_devices;
  194.   OpenCL_test_execution("Get number of devices", clGetDeviceIDs(platform_id, DEVICE_TYPE, 0, NULL, &num_devices));
  195.  
  196.   if(num_devices <= 0) {
  197.     dump("No devices found associated to this OpenCL platform :-(\n");
  198.     exit(-1);
  199.   }
  200.  
  201.   // Allocate spaces for devices
  202.   cl_device_id devices[num_devices];
  203.  
  204.   // Get devices list
  205.   OpenCL_test_execution("Get devices list", clGetDeviceIDs(platform_id, DEVICE_TYPE, num_devices, devices, NULL));
  206.  
  207.   /* Create a context for all devices */
  208.   cl_context context = clCreateContext(0,
  209.                                        num_devices,
  210.                                        devices,
  211.                                        NULL,
  212.                                        "from 'context'",
  213.                                        &cl_error);
  214.   OpenCL_test_execution("Context creation",cl_error);
  215.  
  216.   // Here is the device ID
  217.   device_id = devices[device_num];
  218.  
  219.   /* Create an in-order queue for this device */
  220.   cl_command_queue queue = clCreateCommandQueue(context, device_id, 0, &cl_error);
  221.   OpenCL_test_execution("Create command queue",cl_error);
  222.  
  223.   // END OF OPENCL INITIALIZATION
  224.   const char *kernel_str = " __kernel void copy(__global int *a, __global int *b) {"
  225.                            " int i = get_global_id(0);"
  226.                            " b[i]=a[i];"
  227.                            "}";
  228.  
  229.   program = clCreateProgramWithSource(context,
  230.                                       1,
  231.                                       &kernel_str,
  232.                                       NULL,
  233.                                       &cl_error);
  234.   OpenCL_test_execution("Create program with source",cl_error);
  235.  
  236.   cl_error = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
  237.   OpenCL_test_execution("Build Program",cl_error);
  238.  
  239.   kernel = clCreateKernel(program, "copy", &cl_error);
  240.   OpenCL_test_execution("Create kernel",cl_error);
  241.  
  242.  
  243.  
  244.  
  245.   // Size of problem, 10^5 will give me 1.5ms for the enqueue, 10^6 up to 30ms!
  246.   size_t n = 10000000;
  247.   int _i;
  248.   // Run the sequence many times
  249.   printf("Run with n = %zu\n",n);
  250.   for(_i=0;_i<10;_i++) {
  251.     openclSimpleCopy(context, queue, kernel,n);
  252.   }
  253.  
  254.   // Run the same sequence with a smaller problem size
  255.   n = n/100;
  256.   printf("Run with n = %zu\n",n);
  257.   for(_i=0;_i<10;_i++) {
  258.     openclSimpleCopy(context, queue, kernel,n);
  259.   }
  260.  
  261.  
  262. }
  263.  
  264.  
  265.  
  266. char * OpenCL_error_to_string(int error) {
  267.   switch (error)
  268.     {
  269.     case CL_SUCCESS:
  270.       return (char *)"Success";
  271.     case CL_DEVICE_NOT_FOUND:
  272.       return (char *)"Device Not Found";
  273.     case CL_DEVICE_NOT_AVAILABLE:
  274.       return (char *)"Device Not Available";
  275.     case CL_COMPILER_NOT_AVAILABLE:
  276.       return (char *)"Compiler Not Available";
  277.     case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  278.       return (char *)"Mem Object Allocation Failure";
  279.     case CL_OUT_OF_RESOURCES:
  280.       return (char *)"Out Of Ressources";
  281.     case CL_OUT_OF_HOST_MEMORY:
  282.       return (char *)"Out Of Host Memory";
  283.     case CL_PROFILING_INFO_NOT_AVAILABLE:
  284.       return (char *)"Profiling Info Not Available";
  285.     case CL_MEM_COPY_OVERLAP:
  286.       return (char *)"Mem Copy Overlap";
  287.     case CL_IMAGE_FORMAT_MISMATCH:
  288.       return (char *)"Image Format Mismatch";
  289.     case CL_IMAGE_FORMAT_NOT_SUPPORTED:
  290.       return (char *)"Image Format Not Supported";
  291.     case CL_BUILD_PROGRAM_FAILURE: {
  292.   #define CL_BUILD_PROGRAM_FAILURE_MSG "Build Program Failure : "
  293.   static char debug_buffer[DEBUG_BUFFER_SIZE]; // Static to be returned
  294.   strncat(debug_buffer,CL_BUILD_PROGRAM_FAILURE_MSG,DEBUG_BUFFER_SIZE);
  295.   clGetProgramBuildInfo(program,
  296.           device_id,
  297.           CL_PROGRAM_BUILD_LOG ,
  298.           DEBUG_BUFFER_SIZE,
  299.           debug_buffer+strlen(CL_BUILD_PROGRAM_FAILURE_MSG),
  300.           NULL);
  301.       return (char *)debug_buffer;
  302.     }
  303.     case CL_MAP_FAILURE:
  304.       return (char *)"Map Failure";
  305.     case CL_INVALID_VALUE:
  306.       return (char *)"Invalid Value";
  307.     case CL_INVALID_DEVICE_TYPE:
  308.       return (char *)"Invalid Device Type";
  309.     case CL_INVALID_PLATFORM:
  310.       return (char *)"Invalid Platform";
  311.     case CL_INVALID_DEVICE:
  312.       return (char *)"Invalid Device";
  313.     case CL_INVALID_CONTEXT:
  314.       return (char *)"Invalid Context";
  315.     case CL_INVALID_QUEUE_PROPERTIES:
  316.       return (char *)"Invalid Queue Properties";
  317.     case CL_INVALID_COMMAND_QUEUE:
  318.       return (char *)"Invalid Command Queue";
  319.     case CL_INVALID_HOST_PTR:
  320.       return (char *)"Invalid Host Ptr";
  321.     case CL_INVALID_MEM_OBJECT:
  322.       return (char *)"Invalid Mem Object";
  323.     case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
  324.       return (char *)"Invalid Image Format Descriptor";
  325.     case CL_INVALID_IMAGE_SIZE:
  326.       return (char *)"Invalid Image Size";
  327.     case CL_INVALID_SAMPLER:
  328.       return (char *)"Invalid Sampler";
  329.     case CL_INVALID_BINARY:
  330.       return (char *)"Invalid Binary";
  331.     case CL_INVALID_BUILD_OPTIONS:
  332.       return (char *)"Invalid Build Options";
  333.     case CL_INVALID_PROGRAM:
  334.       return (char *)"Invalid Program";
  335.     case CL_INVALID_PROGRAM_EXECUTABLE:
  336.       return (char *)"Invalid Program Executable";
  337.     case CL_INVALID_KERNEL_NAME:
  338.       return (char *)"Invalid Kernel Name";
  339.     case CL_INVALID_KERNEL_DEFINITION:
  340.       return (char *)"Invalid Kernel Definition";
  341.     case CL_INVALID_KERNEL:
  342.       return (char *)"Invalid Kernel";
  343.     case CL_INVALID_ARG_INDEX:
  344.       return (char *)"Invalid Arg Index";
  345.     case CL_INVALID_ARG_VALUE:
  346.       return (char *)"Invalid Arg Value";
  347.     case CL_INVALID_ARG_SIZE:
  348.       return (char *)"Invalid Arg Size";
  349.     case CL_INVALID_KERNEL_ARGS:
  350.       return (char *)"Invalid Kernel Args";
  351.     case CL_INVALID_WORK_DIMENSION:
  352.       return (char *)"Invalid Work Dimension";
  353.     case CL_INVALID_WORK_GROUP_SIZE:
  354.       return (char *)"Invalid Work Group Size";
  355.     case CL_INVALID_WORK_ITEM_SIZE:
  356.       return (char *)"Invalid Work Item Size";
  357.     case CL_INVALID_GLOBAL_OFFSET:
  358.       return (char *)"Invalid Global Offset";
  359.     case CL_INVALID_EVENT_WAIT_LIST:
  360.       return (char *)"Invalid Event Wait List";
  361.     case CL_INVALID_EVENT:
  362.       return (char *)"Invalid Event";
  363.     case CL_INVALID_OPERATION:
  364.       return (char *)"Invalid Operation";
  365.     case CL_INVALID_GL_OBJECT:
  366.       return (char *)"Invalid GL Object";
  367.     case CL_INVALID_BUFFER_SIZE:
  368.       return (char *)"Invalid Buffer Size";
  369.     case CL_INVALID_MIP_LEVEL:
  370.       return (char *)"Invalid Mip Level";
  371.     case CL_INVALID_GLOBAL_WORK_SIZE:
  372.       return (char *)"Invalid Global Work Size";
  373.     default:
  374.       break;
  375.     }
  376.   return "Unknown";
  377. }


posted on 2012-12-03 21:32 jackdong 阅读(765) 评论(0)  编辑 收藏 引用 所属分类: OpenCL

只有注册用户登录后才能发表评论。
网站导航: 博客园   IT新闻   BlogJava   知识库   博问   管理