basicsample.pas 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. {
  2. The sample is give at
  3. http://developer.apple.com/mac/library/samplecode/OpenCL_Hello_World_Example/index.html
  4. ===========================================================================
  5. DESCRIPTION:
  6. A simple "Hello World" compute example showing basic usage of OpenCL which
  7. calculates the mathematical square (X[i] = pow(X[i],2)) for a buffer of
  8. floating point values.
  9. For simplicity, this example is intended to be run from the command line.
  10. If run from within XCode, open the Run Log (Command-Shift-R) to see the
  11. output. Alternatively, run the applications from within a Terminal.app
  12. session to launch from the command line.
  13. ===========================================================================
  14. BUILD REQUIREMENTS:
  15. Mac OS X v10.6 or later
  16. or
  17. Windows with NVidia OpenCL SDK Installed
  18. ===========================================================================
  19. RUNTIME REQUIREMENTS:
  20. Mac OS X v10.6 or later
  21. To use the GPU as a compute device, use one of the following devices:
  22. - MacBook Pro w/NVidia GeForce 8600M
  23. - Mac Pro w/NVidia GeForce 8800GT
  24. If you don't have powerful GPU you can try to use CPU instead:
  25. by changing
  26. gpu := CL_DEVICE_TYPE_GPU;
  27. to
  28. gpu := CL_DEVICE_TYPE_CPU;
  29. Windows with NVidia OpenCL SDK Installed and libOpenCLXX.dll available
  30. CL_DEVICE_TYPE_CPU doesn't seem to work for windows
  31. ===========================================================================
  32. }
  33. program testcl;
  34. {$mode objfpc}{$H+}
  35. uses
  36. ctypes, cl;
  37. // Use a static data size for simplicity
  38. const
  39. DATA_SIZE = 1024;
  40. // Simple compute kernel which computes the square of an input array
  41. const
  42. KernelSource : PChar =
  43. '__kernel void square( '#10+
  44. ' __global float* input, '#10+
  45. ' __global float* output, '#10+
  46. ' const unsigned int count) '#10+
  47. '{ '#10+
  48. ' int i = get_global_id(0); '#10+
  49. ' if(i < count) '#10+
  50. ' output[i] = input[i] * input[i]; '#10+
  51. '} '#0;
  52. var
  53. err : Integer; // error code returned from api calls
  54. data : array [0..DATA_SIZE-1] of single; // original data set given to device
  55. results : array [0..DATA_SIZE-1] of single; // results returned from device
  56. correct : LongWord; // number of correct results returned
  57. global : csize_t; // global domain size for our calculation
  58. local : csize_t; // local domain size for our calculation
  59. device_id : cl_device_id; // compute device id
  60. context : cl_context; // compute context
  61. commands : cl_command_queue; // compute command queue
  62. prog : cl_program; // compute program
  63. kernel : cl_kernel; // compute kernel
  64. input : cl_mem; // device memory used for the input array
  65. output : cl_mem; // device memory used for the output array
  66. i : Integer;
  67. count : Integer;
  68. gpu : cl_device_type;
  69. tmpd : single;
  70. begin
  71. // Fill our data set with random float values
  72. count := DATA_SIZE;
  73. for i:=0 to count - 1 do
  74. data[i]:= random;
  75. // Connect to a compute device
  76. // change CL_DEVICE_TYPE_CPU to CL_DEVICE_TYPE_GPU is you have powerful video (GeForce 8800/8600M or higher)
  77. gpu := CL_DEVICE_TYPE_GPU;
  78. device_id:=nil;
  79. err := clGetDeviceIDs(nil, gpu, 1, @device_id, nil);
  80. writeln('clGetDeviceIDs ', err);
  81. if (err <> CL_SUCCESS) then begin
  82. Writeln('Error: Failed to create a device group!');
  83. Halt($FF);
  84. end;
  85. // Create a compute context
  86. context := clCreateContext(nil, 1, @device_id, nil, nil, err);
  87. writeln('clCreateContext ', err);
  88. if context=nil then begin
  89. Writeln('Error: Failed to create a compute context!');
  90. Halt($FF);
  91. end;
  92. // Create a command commands
  93. commands := clCreateCommandQueue(context, device_id, 0, err);
  94. writeln('clCreateCommandQueue ', err);
  95. if commands=nil then begin
  96. Writeln('Error: Failed to create a command commands!');
  97. Halt($FF);
  98. end;
  99. // Create the compute program from the source buffer
  100. prog:= clCreateProgramWithSource(context, 1, PPChar(@KernelSource), nil, err);
  101. writeln('clCreateProgramWithSource ', err);
  102. if prog=nil then begin
  103. writeln('Error: Failed to create compute program! ');
  104. Halt($FF);
  105. end;
  106. // Build the program executable
  107. err := clBuildProgram(prog, 0, nil, nil, nil, nil);
  108. writeln('clBuildProgram ', err);
  109. if (err <> CL_SUCCESS) then begin
  110. writeln('Error: Failed to build program executable!');
  111. Halt(1);
  112. end;
  113. // Create the compute kernel in the program we wish to run
  114. kernel := clCreateKernel(prog, 'square', err);
  115. writeln('clCreateKernel ', err);
  116. if (kernel=nil) or (err <> CL_SUCCESS) then begin
  117. writeln('Error: Failed to create compute kernel!');
  118. Halt(1);
  119. end;
  120. err := clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), @local, nil);
  121. writeln('clGetKernelWorkGroupInfo ', err);
  122. if (err<>CL_SUCCESS) then begin
  123. writeln('Error: Failed to retrieve kernel work group info!');
  124. Halt(1);
  125. end;
  126. // Create the input and output arrays in device memory for our calculation
  127. input := clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(single) * count, nil, err);
  128. writeln('clCreateBuffer ', err);
  129. output := clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(single) * count, nil, err);
  130. writeln('clCreateBuffer ', err);
  131. if (input=nil) or (output=nil) then begin
  132. writeln('Error: Failed to allocate device memory!');
  133. Halt(1);
  134. end;
  135. // Write our data set into the input array in device memory
  136. err := clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, sizeof(single) * count, @data, 0, nil, nil);
  137. writeln('clEnqueueWriteBuffer ', err);
  138. if (err <> CL_SUCCESS) then begin
  139. writeln('Error: Failed to write to source array!');
  140. Halt(1);
  141. end;
  142. // Set the arguments to our compute kernel
  143. err := 0;
  144. err := clSetKernelArg(kernel, 0, sizeof(cl_mem), @input);
  145. writeln('clSetKernelArg ', err);
  146. err := err or clSetKernelArg(kernel, 1, sizeof(cl_mem), @output);
  147. writeln('clSetKernelArg ', err);
  148. err := err or clSetKernelArg(kernel, 2, sizeof(longword), @count);
  149. writeln('clSetKernelArg ', err);
  150. if (err <> CL_SUCCESS) then begin
  151. writeln('Error: Failed to set kernel arguments! ');
  152. Halt(1);
  153. end;
  154. // Get the maximum work group size for executing the kernel on the device
  155. err := clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), @local, nil);
  156. writeln('clGetKernelWorkGroupInfo ', err);
  157. if (err<>CL_SUCCESS) then begin
  158. writeln('Error: Failed to retrieve kernel work group info!');
  159. Halt(1);
  160. end;
  161. // Execute the kernel over the entire range of our 1d input data set
  162. // using the maximum number of work group items for this device
  163. global := count;
  164. err := clEnqueueNDRangeKernel(commands, kernel, 1, nil, @global, @local, 0, nil, nil);
  165. writeln('clEnqueueNDRangeKernel ',err);
  166. if (err<>0) then begin
  167. writeln('Error: Failed to execute kernel!');
  168. Halt($FF);
  169. end;
  170. // Wait for the command commands to get serviced before reading back results
  171. err:=clFinish(commands);
  172. writeln('clFinish ',err);
  173. // Read back the results from the device to verify the output
  174. err := clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(single) * count, @results, 0, nil, nil);
  175. writeln('clEnqueueReadBuffer ',err);
  176. if (err <> CL_SUCCESS) then begin
  177. writeln('Error: Failed to read output array! ', err);
  178. Halt(1);
  179. end;
  180. // Validate our results
  181. correct := 0;
  182. for i:= 0 to count - 1 do begin
  183. // FPU warning:
  184. //
  185. // the following check (as in original C sample)
  186. // if results[i] = data[i] * data[i] then
  187. //
  188. // return the incorrect result (FP accuracy?),
  189. // must store the result to single type variable first,
  190. // and then compare:
  191. tmpd:=data[i] * data[i];
  192. if results[i] = tmpd then inc(correct);
  193. end;
  194. // Print a brief summary detailing the results
  195. writeln('Computed ', correct, '/', count,' correct values!');
  196. // Shutdown and cleanup
  197. clReleaseMemObject(input);
  198. clReleaseMemObject(output);
  199. clReleaseProgram(prog);
  200. clReleaseKernel(kernel);
  201. clReleaseCommandQueue(commands);
  202. clReleaseContext(context);
  203. end.