b3GpuRaycast.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. #include "b3GpuRaycast.h"
  2. #include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h"
  3. #include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
  4. #include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h"
  5. #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
  6. #include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
  7. #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
  8. #include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h"
  9. #include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
  10. #include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h"
  11. #include "Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h"
  12. #include "Bullet3OpenCL/Raycast/kernels/rayCastKernels.h"
  13. #define B3_RAYCAST_PATH "src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl"
  14. struct b3GpuRaycastInternalData
  15. {
  16. cl_context m_context;
  17. cl_device_id m_device;
  18. cl_command_queue m_q;
  19. cl_kernel m_raytraceKernel;
  20. cl_kernel m_raytracePairsKernel;
  21. cl_kernel m_findRayRigidPairIndexRanges;
  22. b3GpuParallelLinearBvh* m_plbvh;
  23. b3RadixSort32CL* m_radixSorter;
  24. b3FillCL* m_fill;
  25. //1 element per ray
  26. b3OpenCLArray<b3RayInfo>* m_gpuRays;
  27. b3OpenCLArray<b3RayHit>* m_gpuHitResults;
  28. b3OpenCLArray<int>* m_firstRayRigidPairIndexPerRay;
  29. b3OpenCLArray<int>* m_numRayRigidPairsPerRay;
  30. //1 element per (ray index, rigid index) pair, where the ray intersects with the rigid's AABB
  31. b3OpenCLArray<int>* m_gpuNumRayRigidPairs;
  32. b3OpenCLArray<b3Int2>* m_gpuRayRigidPairs; //x == ray index, y == rigid index
  33. int m_test;
  34. };
  35. b3GpuRaycast::b3GpuRaycast(cl_context ctx, cl_device_id device, cl_command_queue q)
  36. {
  37. m_data = new b3GpuRaycastInternalData;
  38. m_data->m_context = ctx;
  39. m_data->m_device = device;
  40. m_data->m_q = q;
  41. m_data->m_raytraceKernel = 0;
  42. m_data->m_raytracePairsKernel = 0;
  43. m_data->m_findRayRigidPairIndexRanges = 0;
  44. m_data->m_plbvh = new b3GpuParallelLinearBvh(ctx, device, q);
  45. m_data->m_radixSorter = new b3RadixSort32CL(ctx, device, q);
  46. m_data->m_fill = new b3FillCL(ctx, device, q);
  47. m_data->m_gpuRays = new b3OpenCLArray<b3RayInfo>(ctx, q);
  48. m_data->m_gpuHitResults = new b3OpenCLArray<b3RayHit>(ctx, q);
  49. m_data->m_firstRayRigidPairIndexPerRay = new b3OpenCLArray<int>(ctx, q);
  50. m_data->m_numRayRigidPairsPerRay = new b3OpenCLArray<int>(ctx, q);
  51. m_data->m_gpuNumRayRigidPairs = new b3OpenCLArray<int>(ctx, q);
  52. m_data->m_gpuRayRigidPairs = new b3OpenCLArray<b3Int2>(ctx, q);
  53. {
  54. cl_int errNum = 0;
  55. cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, &errNum, "", B3_RAYCAST_PATH);
  56. b3Assert(errNum == CL_SUCCESS);
  57. m_data->m_raytraceKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, "rayCastKernel", &errNum, prog);
  58. b3Assert(errNum == CL_SUCCESS);
  59. m_data->m_raytracePairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, "rayCastPairsKernel", &errNum, prog);
  60. b3Assert(errNum == CL_SUCCESS);
  61. m_data->m_findRayRigidPairIndexRanges = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, "findRayRigidPairIndexRanges", &errNum, prog);
  62. b3Assert(errNum == CL_SUCCESS);
  63. clReleaseProgram(prog);
  64. }
  65. }
  66. b3GpuRaycast::~b3GpuRaycast()
  67. {
  68. clReleaseKernel(m_data->m_raytraceKernel);
  69. clReleaseKernel(m_data->m_raytracePairsKernel);
  70. clReleaseKernel(m_data->m_findRayRigidPairIndexRanges);
  71. delete m_data->m_plbvh;
  72. delete m_data->m_radixSorter;
  73. delete m_data->m_fill;
  74. delete m_data->m_gpuRays;
  75. delete m_data->m_gpuHitResults;
  76. delete m_data->m_firstRayRigidPairIndexPerRay;
  77. delete m_data->m_numRayRigidPairsPerRay;
  78. delete m_data->m_gpuNumRayRigidPairs;
  79. delete m_data->m_gpuRayRigidPairs;
  80. delete m_data;
  81. }
  82. bool sphere_intersect(const b3Vector3& spherePos, b3Scalar radius, const b3Vector3& rayFrom, const b3Vector3& rayTo, float& hitFraction)
  83. {
  84. b3Vector3 rs = rayFrom - spherePos;
  85. b3Vector3 rayDir = rayTo - rayFrom;
  86. float A = b3Dot(rayDir, rayDir);
  87. float B = b3Dot(rs, rayDir);
  88. float C = b3Dot(rs, rs) - (radius * radius);
  89. float D = B * B - A * C;
  90. if (D > 0.0)
  91. {
  92. float t = (-B - sqrt(D)) / A;
  93. if ((t >= 0.0f) && (t < hitFraction))
  94. {
  95. hitFraction = t;
  96. return true;
  97. }
  98. }
  99. return false;
  100. }
  101. bool rayConvex(const b3Vector3& rayFromLocal, const b3Vector3& rayToLocal, const b3ConvexPolyhedronData& poly,
  102. const b3AlignedObjectArray<b3GpuFace>& faces, float& hitFraction, b3Vector3& hitNormal)
  103. {
  104. float exitFraction = hitFraction;
  105. float enterFraction = -0.1f;
  106. b3Vector3 curHitNormal = b3MakeVector3(0, 0, 0);
  107. for (int i = 0; i < poly.m_numFaces; i++)
  108. {
  109. const b3GpuFace& face = faces[poly.m_faceOffset + i];
  110. float fromPlaneDist = b3Dot(rayFromLocal, face.m_plane) + face.m_plane.w;
  111. float toPlaneDist = b3Dot(rayToLocal, face.m_plane) + face.m_plane.w;
  112. if (fromPlaneDist < 0.f)
  113. {
  114. if (toPlaneDist >= 0.f)
  115. {
  116. float fraction = fromPlaneDist / (fromPlaneDist - toPlaneDist);
  117. if (exitFraction > fraction)
  118. {
  119. exitFraction = fraction;
  120. }
  121. }
  122. }
  123. else
  124. {
  125. if (toPlaneDist < 0.f)
  126. {
  127. float fraction = fromPlaneDist / (fromPlaneDist - toPlaneDist);
  128. if (enterFraction <= fraction)
  129. {
  130. enterFraction = fraction;
  131. curHitNormal = face.m_plane;
  132. curHitNormal.w = 0.f;
  133. }
  134. }
  135. else
  136. {
  137. return false;
  138. }
  139. }
  140. if (exitFraction <= enterFraction)
  141. return false;
  142. }
  143. if (enterFraction < 0.f)
  144. return false;
  145. hitFraction = enterFraction;
  146. hitNormal = curHitNormal;
  147. return true;
  148. }
  149. void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
  150. int numBodies, const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData)
  151. {
  152. // return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables);
  153. B3_PROFILE("castRaysHost");
  154. for (int r = 0; r < rays.size(); r++)
  155. {
  156. b3Vector3 rayFrom = rays[r].m_from;
  157. b3Vector3 rayTo = rays[r].m_to;
  158. float hitFraction = hitResults[r].m_hitFraction;
  159. int hitBodyIndex = -1;
  160. b3Vector3 hitNormal;
  161. for (int b = 0; b < numBodies; b++)
  162. {
  163. const b3Vector3& pos = bodies[b].m_pos;
  164. //const b3Quaternion& orn = bodies[b].m_quat;
  165. switch (collidables[bodies[b].m_collidableIdx].m_shapeType)
  166. {
  167. case SHAPE_SPHERE:
  168. {
  169. b3Scalar radius = collidables[bodies[b].m_collidableIdx].m_radius;
  170. if (sphere_intersect(pos, radius, rayFrom, rayTo, hitFraction))
  171. {
  172. hitBodyIndex = b;
  173. b3Vector3 hitPoint;
  174. hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to, hitFraction);
  175. hitNormal = (hitPoint - bodies[b].m_pos).normalize();
  176. }
  177. }
  178. case SHAPE_CONVEX_HULL:
  179. {
  180. b3Transform convexWorldTransform;
  181. convexWorldTransform.setIdentity();
  182. convexWorldTransform.setOrigin(bodies[b].m_pos);
  183. convexWorldTransform.setRotation(bodies[b].m_quat);
  184. b3Transform convexWorld2Local = convexWorldTransform.inverse();
  185. b3Vector3 rayFromLocal = convexWorld2Local(rayFrom);
  186. b3Vector3 rayToLocal = convexWorld2Local(rayTo);
  187. int shapeIndex = collidables[bodies[b].m_collidableIdx].m_shapeIndex;
  188. const b3ConvexPolyhedronData& poly = narrowphaseData->m_convexPolyhedra[shapeIndex];
  189. if (rayConvex(rayFromLocal, rayToLocal, poly, narrowphaseData->m_convexFaces, hitFraction, hitNormal))
  190. {
  191. hitBodyIndex = b;
  192. }
  193. break;
  194. }
  195. default:
  196. {
  197. static bool once = true;
  198. if (once)
  199. {
  200. once = false;
  201. b3Warning("Raytest: unsupported shape type\n");
  202. }
  203. }
  204. }
  205. }
  206. if (hitBodyIndex >= 0)
  207. {
  208. hitResults[r].m_hitFraction = hitFraction;
  209. hitResults[r].m_hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to, hitFraction);
  210. hitResults[r].m_hitNormal = hitNormal;
  211. hitResults[r].m_hitBody = hitBodyIndex;
  212. }
  213. }
  214. }
  215. ///todo: add some acceleration structure (AABBs, tree etc)
  216. void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
  217. int numBodies, const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables,
  218. const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase)
  219. {
  220. //castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData);
  221. B3_PROFILE("castRaysGPU");
  222. {
  223. B3_PROFILE("raycast copyFromHost");
  224. m_data->m_gpuRays->copyFromHost(rays);
  225. m_data->m_gpuHitResults->copyFromHost(hitResults);
  226. }
  227. int numRays = hitResults.size();
  228. {
  229. m_data->m_firstRayRigidPairIndexPerRay->resize(numRays);
  230. m_data->m_numRayRigidPairsPerRay->resize(numRays);
  231. m_data->m_gpuNumRayRigidPairs->resize(1);
  232. m_data->m_gpuRayRigidPairs->resize(numRays * 16);
  233. }
  234. //run kernel
  235. const bool USE_BRUTE_FORCE_RAYCAST = false;
  236. if (USE_BRUTE_FORCE_RAYCAST)
  237. {
  238. B3_PROFILE("raycast launch1D");
  239. b3LauncherCL launcher(m_data->m_q, m_data->m_raytraceKernel, "m_raytraceKernel");
  240. int numRays = rays.size();
  241. launcher.setConst(numRays);
  242. launcher.setBuffer(m_data->m_gpuRays->getBufferCL());
  243. launcher.setBuffer(m_data->m_gpuHitResults->getBufferCL());
  244. launcher.setConst(numBodies);
  245. launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL());
  246. launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL());
  247. launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL());
  248. launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL());
  249. launcher.launch1D(numRays);
  250. clFinish(m_data->m_q);
  251. }
  252. else
  253. {
  254. m_data->m_plbvh->build(broadphase->getAllAabbsGPU(), broadphase->getSmallAabbIndicesGPU(), broadphase->getLargeAabbIndicesGPU());
  255. m_data->m_plbvh->testRaysAgainstBvhAabbs(*m_data->m_gpuRays, *m_data->m_gpuNumRayRigidPairs, *m_data->m_gpuRayRigidPairs);
  256. int numRayRigidPairs = -1;
  257. m_data->m_gpuNumRayRigidPairs->copyToHostPointer(&numRayRigidPairs, 1);
  258. if (numRayRigidPairs > m_data->m_gpuRayRigidPairs->size())
  259. {
  260. numRayRigidPairs = m_data->m_gpuRayRigidPairs->size();
  261. m_data->m_gpuNumRayRigidPairs->copyFromHostPointer(&numRayRigidPairs, 1);
  262. }
  263. m_data->m_gpuRayRigidPairs->resize(numRayRigidPairs); //Radix sort needs b3OpenCLArray::size() to be correct
  264. //Sort ray-rigid pairs by ray index
  265. {
  266. B3_PROFILE("sort ray-rigid pairs");
  267. m_data->m_radixSorter->execute(*reinterpret_cast<b3OpenCLArray<b3SortData>*>(m_data->m_gpuRayRigidPairs));
  268. }
  269. //detect start,count of each ray pair
  270. {
  271. B3_PROFILE("detect ray-rigid pair index ranges");
  272. {
  273. B3_PROFILE("reset ray-rigid pair index ranges");
  274. m_data->m_fill->execute(*m_data->m_firstRayRigidPairIndexPerRay, numRayRigidPairs, numRays); //atomic_min used to find first index
  275. m_data->m_fill->execute(*m_data->m_numRayRigidPairsPerRay, 0, numRays);
  276. clFinish(m_data->m_q);
  277. }
  278. b3BufferInfoCL bufferInfo[] =
  279. {
  280. b3BufferInfoCL(m_data->m_gpuRayRigidPairs->getBufferCL()),
  281. b3BufferInfoCL(m_data->m_firstRayRigidPairIndexPerRay->getBufferCL()),
  282. b3BufferInfoCL(m_data->m_numRayRigidPairsPerRay->getBufferCL())};
  283. b3LauncherCL launcher(m_data->m_q, m_data->m_findRayRigidPairIndexRanges, "m_findRayRigidPairIndexRanges");
  284. launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL));
  285. launcher.setConst(numRayRigidPairs);
  286. launcher.launch1D(numRayRigidPairs);
  287. clFinish(m_data->m_q);
  288. }
  289. {
  290. B3_PROFILE("ray-rigid intersection");
  291. b3BufferInfoCL bufferInfo[] =
  292. {
  293. b3BufferInfoCL(m_data->m_gpuRays->getBufferCL()),
  294. b3BufferInfoCL(m_data->m_gpuHitResults->getBufferCL()),
  295. b3BufferInfoCL(m_data->m_firstRayRigidPairIndexPerRay->getBufferCL()),
  296. b3BufferInfoCL(m_data->m_numRayRigidPairsPerRay->getBufferCL()),
  297. b3BufferInfoCL(narrowphaseData->m_bodyBufferGPU->getBufferCL()),
  298. b3BufferInfoCL(narrowphaseData->m_collidablesGPU->getBufferCL()),
  299. b3BufferInfoCL(narrowphaseData->m_convexFacesGPU->getBufferCL()),
  300. b3BufferInfoCL(narrowphaseData->m_convexPolyhedraGPU->getBufferCL()),
  301. b3BufferInfoCL(m_data->m_gpuRayRigidPairs->getBufferCL())};
  302. b3LauncherCL launcher(m_data->m_q, m_data->m_raytracePairsKernel, "m_raytracePairsKernel");
  303. launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL));
  304. launcher.setConst(numRays);
  305. launcher.launch1D(numRays);
  306. clFinish(m_data->m_q);
  307. }
  308. }
  309. //copy results
  310. {
  311. B3_PROFILE("raycast copyToHost");
  312. m_data->m_gpuHitResults->copyToHost(hitResults);
  313. }
  314. }