b3GpuSapBroadphase.h 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. #ifndef B3_GPU_SAP_BROADPHASE_H
  2. #define B3_GPU_SAP_BROADPHASE_H
  3. #include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
  4. #include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" //b3Int2
  5. class b3Vector3;
  6. #include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
  7. #include "b3SapAabb.h"
  8. #include "Bullet3Common/shared/b3Int2.h"
  9. #include "b3GpuBroadphaseInterface.h"
  10. class b3GpuSapBroadphase : public b3GpuBroadphaseInterface
  11. {
  12. cl_context m_context;
  13. cl_device_id m_device;
  14. cl_command_queue m_queue;
  15. cl_kernel m_flipFloatKernel;
  16. cl_kernel m_scatterKernel ;
  17. cl_kernel m_copyAabbsKernel;
  18. cl_kernel m_sapKernel;
  19. cl_kernel m_sap2Kernel;
  20. cl_kernel m_prepareSumVarianceKernel;
  21. class b3RadixSort32CL* m_sorter;
  22. ///test for 3d SAP
  23. b3AlignedObjectArray<b3SortData> m_sortedAxisCPU[3][2];
  24. b3AlignedObjectArray<b3UnsignedInt2> m_objectMinMaxIndexCPU[3][2];
  25. b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis0;
  26. b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis1;
  27. b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis2;
  28. b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis0prev;
  29. b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis1prev;
  30. b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis2prev;
  31. b3OpenCLArray<b3SortData> m_sortedAxisGPU0;
  32. b3OpenCLArray<b3SortData> m_sortedAxisGPU1;
  33. b3OpenCLArray<b3SortData> m_sortedAxisGPU2;
  34. b3OpenCLArray<b3SortData> m_sortedAxisGPU0prev;
  35. b3OpenCLArray<b3SortData> m_sortedAxisGPU1prev;
  36. b3OpenCLArray<b3SortData> m_sortedAxisGPU2prev;
  37. b3OpenCLArray<b3Int4> m_addedHostPairsGPU;
  38. b3OpenCLArray<b3Int4> m_removedHostPairsGPU;
  39. b3OpenCLArray<int> m_addedCountGPU;
  40. b3OpenCLArray<int> m_removedCountGPU;
  41. int m_currentBuffer;
  42. public:
  43. b3OpenCLArray<int> m_pairCount;
  44. b3OpenCLArray<b3SapAabb> m_allAabbsGPU;
  45. b3AlignedObjectArray<b3SapAabb> m_allAabbsCPU;
  46. virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU()
  47. {
  48. return m_allAabbsGPU;
  49. }
  50. virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU()
  51. {
  52. return m_allAabbsCPU;
  53. }
  54. b3OpenCLArray<b3Vector3> m_sum;
  55. b3OpenCLArray<b3Vector3> m_sum2;
  56. b3OpenCLArray<b3Vector3> m_dst;
  57. b3OpenCLArray<int> m_smallAabbsMappingGPU;
  58. b3AlignedObjectArray<int> m_smallAabbsMappingCPU;
  59. b3OpenCLArray<int> m_largeAabbsMappingGPU;
  60. b3AlignedObjectArray<int> m_largeAabbsMappingCPU;
  61. b3OpenCLArray<b3Int4> m_overlappingPairs;
  62. //temporary gpu work memory
  63. b3OpenCLArray<b3SortData> m_gpuSmallSortData;
  64. b3OpenCLArray<b3SapAabb> m_gpuSmallSortedAabbs;
  65. class b3PrefixScanFloat4CL* m_prefixScanFloat4;
  66. enum b3GpuSapKernelType
  67. {
  68. B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU=1,
  69. B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU,
  70. B3_GPU_SAP_KERNEL_ORIGINAL,
  71. B3_GPU_SAP_KERNEL_BARRIER,
  72. B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY
  73. };
  74. b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q , b3GpuSapKernelType kernelType=B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY);
  75. virtual ~b3GpuSapBroadphase();
  76. static b3GpuBroadphaseInterface* CreateFuncBruteForceCpu(cl_context ctx,cl_device_id device, cl_command_queue q)
  77. {
  78. return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU);
  79. }
  80. static b3GpuBroadphaseInterface* CreateFuncBruteForceGpu(cl_context ctx,cl_device_id device, cl_command_queue q)
  81. {
  82. return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU);
  83. }
  84. static b3GpuBroadphaseInterface* CreateFuncOriginal(cl_context ctx,cl_device_id device, cl_command_queue q)
  85. {
  86. return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_ORIGINAL);
  87. }
  88. static b3GpuBroadphaseInterface* CreateFuncBarrier(cl_context ctx,cl_device_id device, cl_command_queue q)
  89. {
  90. return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_BARRIER);
  91. }
  92. static b3GpuBroadphaseInterface* CreateFuncLocalMemory(cl_context ctx,cl_device_id device, cl_command_queue q)
  93. {
  94. return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY);
  95. }
  96. virtual void calculateOverlappingPairs(int maxPairs);
  97. virtual void calculateOverlappingPairsHost(int maxPairs);
  98. void reset();
  99. void init3dSap();
  100. virtual void calculateOverlappingPairsHostIncremental3Sap();
  101. virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask);
  102. virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask);
  103. //call writeAabbsToGpu after done making all changes (createProxy etc)
  104. virtual void writeAabbsToGpu();
  105. virtual cl_mem getAabbBufferWS();
  106. virtual int getNumOverlap();
  107. virtual cl_mem getOverlappingPairBuffer();
  108. virtual b3OpenCLArray<b3Int4>& getOverlappingPairsGPU();
  109. virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU();
  110. virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU();
  111. };
  112. #endif //B3_GPU_SAP_BROADPHASE_H