basicsample.pas 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. {
  2. The sample is give at
  3. http://developer.apple.com/mac/library/samplecode/OpenCL_Hello_World_Example/index.html
  4. ===========================================================================
  5. DESCRIPTION:
  6. A simple "Hello World" compute example showing basic usage of OpenCL which
  7. calculates the mathematical square (X[i] = pow(X[i],2)) for a buffer of
  8. floating point values.
  9. For simplicity, this example is intended to be run from the command line.
  10. If run from within XCode, open the Run Log (Command-Shift-R) to see the
  11. output. Alternatively, run the applications from within a Terminal.app
  12. session to launch from the command line.
  13. ===========================================================================
  14. BUILD REQUIREMENTS:
  15. Mac OS X v10.6 or later
  16. or
  17. Windows with NVidia OpenCL SDK Installed
  18. ===========================================================================
  19. RUNTIME REQUIREMENTS:
  20. Mac OS X v10.6 or later
  21. To use the GPU as a compute device, use one of the following devices:
  22. - MacBook Pro w/NVidia GeForce 8600M
  23. - Mac Pro w/NVidia GeForce 8800GT
  24. If you don't have powerful GPU you can try to use CPU instead:
  25. by changing
  26. gpu := CL_DEVICE_TYPE_GPU;
  27. to
  28. gpu := CL_DEVICE_TYPE_CPU;
  29. Windows with NVidia OpenCL SDK Installed and libOpenCLXX.dll available
  30. CL_DEVICE_TYPE_CPU doesn't seem to work for windows
  31. ===========================================================================
  32. }
  33. program testcl;
  34. {$mode objfpc}{$H+}
  35. uses
  36. ctypes, cl;
  37. // Use a static data size for simplicity
  38. const
  39. DATA_SIZE = 1024;
  40. // Simple compute kernel which computes the square of an input array
  41. const
  42. KernelSource : PChar =
  43. '__kernel void square( '#10+
  44. ' __global float* input, '#10+
  45. ' __global float* output, '#10+
  46. ' const unsigned int count) '#10+
  47. '{ '#10+
  48. ' int i = get_global_id(0); '#10+
  49. ' if(i < count) '#10+
  50. ' output[i] = input[i] * input[i]; '#10+
  51. '} '#0;
  52. var
  53. err : Integer; // error code returned from api calls
  54. data : array [0..DATA_SIZE-1] of single; // original data set given to device
  55. results : array [0..DATA_SIZE-1] of single; // results returned from device
  56. correct : LongWord; // number of correct results returned
  57. global : csize_t; // global domain size for our calculation
  58. local : csize_t; // local domain size for our calculation
  59. device_id : cl_device_id; // compute device id
  60. context : cl_context; // compute context
  61. commands : cl_command_queue; // compute command queue
  62. prog : cl_program; // compute program
  63. kernel : cl_kernel; // compute kernel
  64. input : cl_mem; // device memory used for the input array
  65. output : cl_mem; // device memory used for the output array
  66. i : Integer;
  67. count : Integer;
  68. gpu : cl_device_type;
  69. tmpd : single;
  70. platformids : Pcl_platform_id;
  71. num_platforms : cl_uint;
  72. begin
  73. // Fill our data set with random float values
  74. count := DATA_SIZE;
  75. for i:=0 to count - 1 do
  76. data[i]:= random;
  77. err:=clGetPlatformIDs(0,nil,@num_platforms);
  78. Writeln('clGetPlatformIDs ', num_platforms);
  79. if (err <> CL_SUCCESS) then
  80. begin
  81. writeln('Error: Cannot get number of platforms!');
  82. Halt(1);
  83. end;
  84. getmem(platformids,num_platforms*sizeof(cl_platform_id));
  85. err := clGetPlatformIDs(num_platforms, platformids, nil);
  86. if (err <> CL_SUCCESS) then begin
  87. Writeln('Error: Failed to platforms!');
  88. Halt($FF);
  89. end;
  90. // Connect to a compute device
  91. // change CL_DEVICE_TYPE_CPU to CL_DEVICE_TYPE_GPU is you have powerful video (GeForce 8800/8600M or higher)
  92. gpu := CL_DEVICE_TYPE_GPU;
  93. device_id:=nil;
  94. err := clGetDeviceIDs(platformids[0], gpu, 1, @device_id, nil);
  95. writeln('clGetDeviceIDs ', err);
  96. if (err <> CL_SUCCESS) then begin
  97. Writeln('Error: Failed to create a device group!');
  98. Halt($FF);
  99. end;
  100. // Create a compute context
  101. context := clCreateContext(nil, 1, @device_id, nil, nil, err);
  102. writeln('clCreateContext ', err);
  103. if context=nil then begin
  104. Writeln('Error: Failed to create a compute context!');
  105. Halt($FF);
  106. end;
  107. // Create a command commands
  108. commands := clCreateCommandQueue(context, device_id, 0, err);
  109. writeln('clCreateCommandQueue ', err);
  110. if commands=nil then begin
  111. Writeln('Error: Failed to create a command commands!');
  112. Halt($FF);
  113. end;
  114. // Create the compute program from the source buffer
  115. prog:= clCreateProgramWithSource(context, 1, PPChar(@KernelSource), nil, err);
  116. writeln('clCreateProgramWithSource ', err);
  117. if prog=nil then begin
  118. writeln('Error: Failed to create compute program! ');
  119. Halt($FF);
  120. end;
  121. // Build the program executable
  122. err := clBuildProgram(prog, 0, nil, nil, nil, nil);
  123. writeln('clBuildProgram ', err);
  124. if (err <> CL_SUCCESS) then begin
  125. writeln('Error: Failed to build program executable!');
  126. Halt(1);
  127. end;
  128. // Create the compute kernel in the program we wish to run
  129. kernel := clCreateKernel(prog, 'square', err);
  130. writeln('clCreateKernel ', err);
  131. if (kernel=nil) or (err <> CL_SUCCESS) then begin
  132. writeln('Error: Failed to create compute kernel!');
  133. Halt(1);
  134. end;
  135. err := clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), @local, nil);
  136. writeln('clGetKernelWorkGroupInfo ', err);
  137. if (err<>CL_SUCCESS) then begin
  138. writeln('Error: Failed to retrieve kernel work group info!');
  139. Halt(1);
  140. end;
  141. // Create the input and output arrays in device memory for our calculation
  142. input := clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(single) * count, nil, err);
  143. writeln('clCreateBuffer ', err);
  144. output := clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(single) * count, nil, err);
  145. writeln('clCreateBuffer ', err);
  146. if (input=nil) or (output=nil) then begin
  147. writeln('Error: Failed to allocate device memory!');
  148. Halt(1);
  149. end;
  150. // Write our data set into the input array in device memory
  151. err := clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, sizeof(single) * count, @data, 0, nil, nil);
  152. writeln('clEnqueueWriteBuffer ', err);
  153. if (err <> CL_SUCCESS) then begin
  154. writeln('Error: Failed to write to source array!');
  155. Halt(1);
  156. end;
  157. // Set the arguments to our compute kernel
  158. err := 0;
  159. err := clSetKernelArg(kernel, 0, sizeof(cl_mem), @input);
  160. writeln('clSetKernelArg ', err);
  161. err := err or clSetKernelArg(kernel, 1, sizeof(cl_mem), @output);
  162. writeln('clSetKernelArg ', err);
  163. err := err or clSetKernelArg(kernel, 2, sizeof(longword), @count);
  164. writeln('clSetKernelArg ', err);
  165. if (err <> CL_SUCCESS) then begin
  166. writeln('Error: Failed to set kernel arguments! ');
  167. Halt(1);
  168. end;
  169. // Get the maximum work group size for executing the kernel on the device
  170. err := clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), @local, nil);
  171. writeln('clGetKernelWorkGroupInfo ', err);
  172. if (err<>CL_SUCCESS) then begin
  173. writeln('Error: Failed to retrieve kernel work group info!');
  174. Halt(1);
  175. end;
  176. // Execute the kernel over the entire range of our 1d input data set
  177. // using the maximum number of work group items for this device
  178. global := count;
  179. err := clEnqueueNDRangeKernel(commands, kernel, 1, nil, @global, @local, 0, nil, nil);
  180. writeln('clEnqueueNDRangeKernel ',err);
  181. if (err<>0) then begin
  182. writeln('Error: Failed to execute kernel!');
  183. Halt($FF);
  184. end;
  185. // Wait for the command commands to get serviced before reading back results
  186. err:=clFinish(commands);
  187. writeln('clFinish ',err);
  188. // Read back the results from the device to verify the output
  189. err := clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(single) * count, @results, 0, nil, nil);
  190. writeln('clEnqueueReadBuffer ',err);
  191. if (err <> CL_SUCCESS) then begin
  192. writeln('Error: Failed to read output array! ', err);
  193. Halt(1);
  194. end;
  195. // Validate our results
  196. correct := 0;
  197. for i:= 0 to count - 1 do begin
  198. // FPU warning:
  199. //
  200. // the following check (as in original C sample)
  201. // if results[i] = data[i] * data[i] then
  202. //
  203. // return the incorrect result (FP accuracy?),
  204. // must store the result to single type variable first,
  205. // and then compare:
  206. tmpd:=data[i] * data[i];
  207. if results[i] = tmpd then inc(correct);
  208. end;
  209. // Print a brief summary detailing the results
  210. writeln('Computed ', correct, '/', count,' correct values!');
  211. // Shutdown and cleanup
  212. clReleaseMemObject(input);
  213. clReleaseMemObject(output);
  214. clReleaseProgram(prog);
  215. clReleaseKernel(kernel);
  216. clReleaseCommandQueue(commands);
  217. clReleaseContext(context);
  218. end.