b3LauncherCL.cpp 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. #include "b3LauncherCL.h"
  2. bool gDebugLauncherCL = false;
  3. b3LauncherCL::b3LauncherCL(cl_command_queue queue, cl_kernel kernel, const char* name)
  4. : m_commandQueue(queue),
  5. m_kernel(kernel),
  6. m_idx(0),
  7. m_enableSerialization(false),
  8. m_name(name)
  9. {
  10. if (gDebugLauncherCL)
  11. {
  12. static int counter = 0;
  13. printf("[%d] Prepare to launch OpenCL kernel %s\n", counter++, name);
  14. }
  15. m_serializationSizeInBytes = sizeof(int);
  16. }
  17. b3LauncherCL::~b3LauncherCL()
  18. {
  19. for (int i = 0; i < m_arrays.size(); i++)
  20. {
  21. delete (m_arrays[i]);
  22. }
  23. m_arrays.clear();
  24. if (gDebugLauncherCL)
  25. {
  26. static int counter = 0;
  27. printf("[%d] Finished launching OpenCL kernel %s\n", counter++, m_name);
  28. }
  29. }
  30. void b3LauncherCL::setBuffer(cl_mem clBuffer)
  31. {
  32. if (m_enableSerialization)
  33. {
  34. b3KernelArgData kernelArg;
  35. kernelArg.m_argIndex = m_idx;
  36. kernelArg.m_isBuffer = 1;
  37. kernelArg.m_clBuffer = clBuffer;
  38. cl_mem_info param_name = CL_MEM_SIZE;
  39. size_t param_value;
  40. size_t sizeInBytes = sizeof(size_t);
  41. size_t actualSizeInBytes;
  42. cl_int err;
  43. err = clGetMemObjectInfo(kernelArg.m_clBuffer,
  44. param_name,
  45. sizeInBytes,
  46. &param_value,
  47. &actualSizeInBytes);
  48. b3Assert(err == CL_SUCCESS);
  49. kernelArg.m_argSizeInBytes = param_value;
  50. m_kernelArguments.push_back(kernelArg);
  51. m_serializationSizeInBytes += sizeof(b3KernelArgData);
  52. m_serializationSizeInBytes += param_value;
  53. }
  54. cl_int status = clSetKernelArg(m_kernel, m_idx++, sizeof(cl_mem), &clBuffer);
  55. b3Assert(status == CL_SUCCESS);
  56. }
  57. void b3LauncherCL::setBuffers(b3BufferInfoCL* buffInfo, int n)
  58. {
  59. for (int i = 0; i < n; i++)
  60. {
  61. if (m_enableSerialization)
  62. {
  63. b3KernelArgData kernelArg;
  64. kernelArg.m_argIndex = m_idx;
  65. kernelArg.m_isBuffer = 1;
  66. kernelArg.m_clBuffer = buffInfo[i].m_clBuffer;
  67. cl_mem_info param_name = CL_MEM_SIZE;
  68. size_t param_value;
  69. size_t sizeInBytes = sizeof(size_t);
  70. size_t actualSizeInBytes;
  71. cl_int err;
  72. err = clGetMemObjectInfo(kernelArg.m_clBuffer,
  73. param_name,
  74. sizeInBytes,
  75. &param_value,
  76. &actualSizeInBytes);
  77. b3Assert(err == CL_SUCCESS);
  78. kernelArg.m_argSizeInBytes = param_value;
  79. m_kernelArguments.push_back(kernelArg);
  80. m_serializationSizeInBytes += sizeof(b3KernelArgData);
  81. m_serializationSizeInBytes += param_value;
  82. }
  83. cl_int status = clSetKernelArg(m_kernel, m_idx++, sizeof(cl_mem), &buffInfo[i].m_clBuffer);
  84. b3Assert(status == CL_SUCCESS);
  85. }
  86. }
  87. struct b3KernelArgDataUnaligned
  88. {
  89. int m_isBuffer;
  90. int m_argIndex;
  91. int m_argSizeInBytes;
  92. int m_unusedPadding;
  93. union {
  94. cl_mem m_clBuffer;
  95. unsigned char m_argData[B3_CL_MAX_ARG_SIZE];
  96. };
  97. };
  98. #include <string.h>
  99. int b3LauncherCL::deserializeArgs(unsigned char* buf, int bufSize, cl_context ctx)
  100. {
  101. int index = 0;
  102. int numArguments = *(int*)&buf[index];
  103. index += sizeof(int);
  104. for (int i = 0; i < numArguments; i++)
  105. {
  106. b3KernelArgDataUnaligned* arg = (b3KernelArgDataUnaligned*)&buf[index];
  107. index += sizeof(b3KernelArgData);
  108. if (arg->m_isBuffer)
  109. {
  110. b3OpenCLArray<unsigned char>* clData = new b3OpenCLArray<unsigned char>(ctx, m_commandQueue, arg->m_argSizeInBytes);
  111. clData->resize(arg->m_argSizeInBytes);
  112. clData->copyFromHostPointer(&buf[index], arg->m_argSizeInBytes);
  113. arg->m_clBuffer = clData->getBufferCL();
  114. m_arrays.push_back(clData);
  115. cl_int status = clSetKernelArg(m_kernel, m_idx++, sizeof(cl_mem), &arg->m_clBuffer);
  116. b3Assert(status == CL_SUCCESS);
  117. index += arg->m_argSizeInBytes;
  118. }
  119. else
  120. {
  121. cl_int status = clSetKernelArg(m_kernel, m_idx++, arg->m_argSizeInBytes, &arg->m_argData);
  122. b3Assert(status == CL_SUCCESS);
  123. }
  124. b3KernelArgData b;
  125. memcpy(&b, arg, sizeof(b3KernelArgDataUnaligned));
  126. m_kernelArguments.push_back(b);
  127. }
  128. m_serializationSizeInBytes = index;
  129. return index;
  130. }
  131. int b3LauncherCL::validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx)
  132. {
  133. int index = 0;
  134. int numArguments = *(int*)&goldBuffer[index];
  135. index += sizeof(int);
  136. if (numArguments != m_kernelArguments.size())
  137. {
  138. printf("failed validation: expected %d arguments, found %d\n", numArguments, m_kernelArguments.size());
  139. return -1;
  140. }
  141. for (int ii = 0; ii < numArguments; ii++)
  142. {
  143. b3KernelArgData* argGold = (b3KernelArgData*)&goldBuffer[index];
  144. if (m_kernelArguments[ii].m_argSizeInBytes != argGold->m_argSizeInBytes)
  145. {
  146. printf("failed validation: argument %d sizeInBytes expected: %d, found %d\n", ii, argGold->m_argSizeInBytes, m_kernelArguments[ii].m_argSizeInBytes);
  147. return -2;
  148. }
  149. {
  150. int expected = argGold->m_isBuffer;
  151. int found = m_kernelArguments[ii].m_isBuffer;
  152. if (expected != found)
  153. {
  154. printf("failed validation: argument %d isBuffer expected: %d, found %d\n", ii, expected, found);
  155. return -3;
  156. }
  157. }
  158. index += sizeof(b3KernelArgData);
  159. if (argGold->m_isBuffer)
  160. {
  161. unsigned char* memBuf = (unsigned char*)malloc(m_kernelArguments[ii].m_argSizeInBytes);
  162. unsigned char* goldBuf = &goldBuffer[index];
  163. for (int j = 0; j < m_kernelArguments[j].m_argSizeInBytes; j++)
  164. {
  165. memBuf[j] = 0xaa;
  166. }
  167. cl_int status = 0;
  168. status = clEnqueueReadBuffer(m_commandQueue, m_kernelArguments[ii].m_clBuffer, CL_TRUE, 0, m_kernelArguments[ii].m_argSizeInBytes,
  169. memBuf, 0, 0, 0);
  170. b3Assert(status == CL_SUCCESS);
  171. clFinish(m_commandQueue);
  172. for (int b = 0; b < m_kernelArguments[ii].m_argSizeInBytes; b++)
  173. {
  174. int expected = goldBuf[b];
  175. int found = memBuf[b];
  176. if (expected != found)
  177. {
  178. printf("failed validation: argument %d OpenCL data at byte position %d expected: %d, found %d\n",
  179. ii, b, expected, found);
  180. return -4;
  181. }
  182. }
  183. index += argGold->m_argSizeInBytes;
  184. }
  185. else
  186. {
  187. //compare content
  188. for (int b = 0; b < m_kernelArguments[ii].m_argSizeInBytes; b++)
  189. {
  190. int expected = argGold->m_argData[b];
  191. int found = m_kernelArguments[ii].m_argData[b];
  192. if (expected != found)
  193. {
  194. printf("failed validation: argument %d const data at byte position %d expected: %d, found %d\n",
  195. ii, b, expected, found);
  196. return -5;
  197. }
  198. }
  199. }
  200. }
  201. return index;
  202. }
  203. int b3LauncherCL::serializeArguments(unsigned char* destBuffer, int destBufferCapacity)
  204. {
  205. //initialize to known values
  206. for (int i = 0; i < destBufferCapacity; i++)
  207. destBuffer[i] = 0xec;
  208. assert(destBufferCapacity >= m_serializationSizeInBytes);
  209. //todo: use the b3Serializer for this to allow for 32/64bit, endianness etc
  210. int numArguments = m_kernelArguments.size();
  211. int curBufferSize = 0;
  212. int* dest = (int*)&destBuffer[curBufferSize];
  213. *dest = numArguments;
  214. curBufferSize += sizeof(int);
  215. for (int i = 0; i < this->m_kernelArguments.size(); i++)
  216. {
  217. b3KernelArgData* arg = (b3KernelArgData*)&destBuffer[curBufferSize];
  218. *arg = m_kernelArguments[i];
  219. curBufferSize += sizeof(b3KernelArgData);
  220. if (arg->m_isBuffer == 1)
  221. {
  222. //copy the OpenCL buffer content
  223. cl_int status = 0;
  224. status = clEnqueueReadBuffer(m_commandQueue, arg->m_clBuffer, 0, 0, arg->m_argSizeInBytes,
  225. &destBuffer[curBufferSize], 0, 0, 0);
  226. b3Assert(status == CL_SUCCESS);
  227. clFinish(m_commandQueue);
  228. curBufferSize += arg->m_argSizeInBytes;
  229. }
  230. }
  231. return curBufferSize;
  232. }
  233. void b3LauncherCL::serializeToFile(const char* fileName, int numWorkItems)
  234. {
  235. int num = numWorkItems;
  236. int buffSize = getSerializationBufferSize();
  237. unsigned char* buf = new unsigned char[buffSize + sizeof(int)];
  238. for (int i = 0; i < buffSize + 1; i++)
  239. {
  240. unsigned char* ptr = (unsigned char*)&buf[i];
  241. *ptr = 0xff;
  242. }
  243. // int actualWrite = serializeArguments(buf,buffSize);
  244. // unsigned char* cptr = (unsigned char*)&buf[buffSize];
  245. // printf("buf[buffSize] = %d\n",*cptr);
  246. assert(buf[buffSize] == 0xff); //check for buffer overrun
  247. int* ptr = (int*)&buf[buffSize];
  248. *ptr = num;
  249. FILE* f = fopen(fileName, "wb");
  250. fwrite(buf, buffSize + sizeof(int), 1, f);
  251. fclose(f);
  252. delete[] buf;
  253. }