12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298 |
- bool searchIncremental3dSapOnGpu = true;
- #include <limits.h>
- #include "b3GpuSapBroadphase.h"
- #include "Bullet3Common/b3Vector3.h"
- #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
- #include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.h"
- #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
- #include "kernels/sapKernels.h"
- #include "Bullet3Common/b3MinMax.h"
- #define B3_BROADPHASE_SAP_PATH "src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl"
- /*
-
-
-
-
-
-
- b3OpenCLArray<int> m_pairCount;
-
-
- b3OpenCLArray<b3SapAabb> m_allAabbsGPU;
- b3AlignedObjectArray<b3SapAabb> m_allAabbsCPU;
-
- virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU()
- {
- return m_allAabbsGPU;
- }
- virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU()
- {
- return m_allAabbsCPU;
- }
-
- b3OpenCLArray<b3Vector3> m_sum;
- b3OpenCLArray<b3Vector3> m_sum2;
- b3OpenCLArray<b3Vector3> m_dst;
-
- b3OpenCLArray<int> m_smallAabbsMappingGPU;
- b3AlignedObjectArray<int> m_smallAabbsMappingCPU;
-
- b3OpenCLArray<int> m_largeAabbsMappingGPU;
- b3AlignedObjectArray<int> m_largeAabbsMappingCPU;
-
-
- b3OpenCLArray<b3Int4> m_overlappingPairs;
-
- //temporary gpu work memory
- b3OpenCLArray<b3SortData> m_gpuSmallSortData;
- b3OpenCLArray<b3SapAabb> m_gpuSmallSortedAabbs;
-
- class b3PrefixScanFloat4CL* m_prefixScanFloat4;
- */
- b3GpuSapBroadphase::b3GpuSapBroadphase(cl_context ctx, cl_device_id device, cl_command_queue q, b3GpuSapKernelType kernelType)
- : m_context(ctx),
- m_device(device),
- m_queue(q),
- m_objectMinMaxIndexGPUaxis0(ctx, q),
- m_objectMinMaxIndexGPUaxis1(ctx, q),
- m_objectMinMaxIndexGPUaxis2(ctx, q),
- m_objectMinMaxIndexGPUaxis0prev(ctx, q),
- m_objectMinMaxIndexGPUaxis1prev(ctx, q),
- m_objectMinMaxIndexGPUaxis2prev(ctx, q),
- m_sortedAxisGPU0(ctx, q),
- m_sortedAxisGPU1(ctx, q),
- m_sortedAxisGPU2(ctx, q),
- m_sortedAxisGPU0prev(ctx, q),
- m_sortedAxisGPU1prev(ctx, q),
- m_sortedAxisGPU2prev(ctx, q),
- m_addedHostPairsGPU(ctx, q),
- m_removedHostPairsGPU(ctx, q),
- m_addedCountGPU(ctx, q),
- m_removedCountGPU(ctx, q),
- m_currentBuffer(-1),
- m_pairCount(ctx, q),
- m_allAabbsGPU(ctx, q),
- m_sum(ctx, q),
- m_sum2(ctx, q),
- m_dst(ctx, q),
- m_smallAabbsMappingGPU(ctx, q),
- m_largeAabbsMappingGPU(ctx, q),
- m_overlappingPairs(ctx, q),
- m_gpuSmallSortData(ctx, q),
- m_gpuSmallSortedAabbs(ctx, q)
- {
- const char* sapSrc = sapCL;
- cl_int errNum = 0;
- b3Assert(m_context);
- b3Assert(m_device);
- cl_program sapProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, sapSrc, &errNum, "", B3_BROADPHASE_SAP_PATH);
- b3Assert(errNum == CL_SUCCESS);
- b3Assert(errNum == CL_SUCCESS);
- #ifndef __APPLE__
- m_prefixScanFloat4 = new b3PrefixScanFloat4CL(m_context, m_device, m_queue);
- #else
- m_prefixScanFloat4 = 0;
- #endif
- m_sapKernel = 0;
- switch (kernelType)
- {
- case B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU:
- {
- m_sapKernel = 0;
- break;
- }
- case B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU:
- {
- m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelBruteForce", &errNum, sapProg);
- break;
- }
- case B3_GPU_SAP_KERNEL_ORIGINAL:
- {
- m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelOriginal", &errNum, sapProg);
- break;
- }
- case B3_GPU_SAP_KERNEL_BARRIER:
- {
- m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelBarrier", &errNum, sapProg);
- break;
- }
- case B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY:
- {
- m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelLocalSharedMemory", &errNum, sapProg);
- break;
- }
- default:
- {
- m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelLocalSharedMemory", &errNum, sapProg);
- b3Error("Unknown 3D GPU SAP provided, fallback to computePairsKernelLocalSharedMemory");
- }
- };
- m_sap2Kernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelTwoArrays", &errNum, sapProg);
- b3Assert(errNum == CL_SUCCESS);
- m_prepareSumVarianceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "prepareSumVarianceKernel", &errNum, sapProg);
- b3Assert(errNum == CL_SUCCESS);
- m_flipFloatKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "flipFloatKernel", &errNum, sapProg);
- m_copyAabbsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "copyAabbsKernel", &errNum, sapProg);
- m_scatterKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "scatterKernel", &errNum, sapProg);
- m_sorter = new b3RadixSort32CL(m_context, m_device, m_queue);
- }
- b3GpuSapBroadphase::~b3GpuSapBroadphase()
- {
- delete m_sorter;
- delete m_prefixScanFloat4;
- clReleaseKernel(m_scatterKernel);
- clReleaseKernel(m_flipFloatKernel);
- clReleaseKernel(m_copyAabbsKernel);
- clReleaseKernel(m_sapKernel);
- clReleaseKernel(m_sap2Kernel);
- clReleaseKernel(m_prepareSumVarianceKernel);
- }
- /// conservative test for overlap between two aabbs
- static bool TestAabbAgainstAabb2(const b3Vector3& aabbMin1, const b3Vector3& aabbMax1,
- const b3Vector3& aabbMin2, const b3Vector3& aabbMax2)
- {
- bool overlap = true;
- overlap = (aabbMin1.getX() > aabbMax2.getX() || aabbMax1.getX() < aabbMin2.getX()) ? false : overlap;
- overlap = (aabbMin1.getZ() > aabbMax2.getZ() || aabbMax1.getZ() < aabbMin2.getZ()) ? false : overlap;
- overlap = (aabbMin1.getY() > aabbMax2.getY() || aabbMax1.getY() < aabbMin2.getY()) ? false : overlap;
- return overlap;
- }
- //http://stereopsis.com/radix.html
- static unsigned int FloatFlip(float fl)
- {
- unsigned int f = *(unsigned int*)&fl;
- unsigned int mask = -(int)(f >> 31) | 0x80000000;
- return f ^ mask;
- };
- void b3GpuSapBroadphase::init3dSap()
- {
- if (m_currentBuffer < 0)
- {
- m_allAabbsGPU.copyToHost(m_allAabbsCPU);
- m_currentBuffer = 0;
- for (int axis = 0; axis < 3; axis++)
- {
- for (int buf = 0; buf < 2; buf++)
- {
- int totalNumAabbs = m_allAabbsCPU.size();
- int numEndPoints = 2 * totalNumAabbs;
- m_sortedAxisCPU[axis][buf].resize(numEndPoints);
- if (buf == m_currentBuffer)
- {
- for (int i = 0; i < totalNumAabbs; i++)
- {
- m_sortedAxisCPU[axis][buf][i * 2].m_key = FloatFlip(m_allAabbsCPU[i].m_min[axis]) - 1;
- m_sortedAxisCPU[axis][buf][i * 2].m_value = i * 2;
- m_sortedAxisCPU[axis][buf][i * 2 + 1].m_key = FloatFlip(m_allAabbsCPU[i].m_max[axis]) + 1;
- m_sortedAxisCPU[axis][buf][i * 2 + 1].m_value = i * 2 + 1;
- }
- }
- }
- }
- for (int axis = 0; axis < 3; axis++)
- {
- m_sorter->executeHost(m_sortedAxisCPU[axis][m_currentBuffer]);
- }
- for (int axis = 0; axis < 3; axis++)
- {
- //int totalNumAabbs = m_allAabbsCPU.size();
- int numEndPoints = m_sortedAxisCPU[axis][m_currentBuffer].size();
- m_objectMinMaxIndexCPU[axis][m_currentBuffer].resize(numEndPoints);
- for (int i = 0; i < numEndPoints; i++)
- {
- int destIndex = m_sortedAxisCPU[axis][m_currentBuffer][i].m_value;
- int newDest = destIndex / 2;
- if (destIndex & 1)
- {
- m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].y = i;
- }
- else
- {
- m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].x = i;
- }
- }
- }
- }
- }
- static bool b3PairCmp(const b3Int4& p, const b3Int4& q)
- {
- return ((p.x < q.x) || ((p.x == q.x) && (p.y < q.y)));
- }
- static bool operator==(const b3Int4& a, const b3Int4& b)
- {
- return a.x == b.x && a.y == b.y;
- };
- static bool operator<(const b3Int4& a, const b3Int4& b)
- {
- return a.x < b.x || (a.x == b.x && a.y < b.y);
- };
- static bool operator>(const b3Int4& a, const b3Int4& b)
- {
- return a.x > b.x || (a.x == b.x && a.y > b.y);
- };
- b3AlignedObjectArray<b3Int4> addedHostPairs;
- b3AlignedObjectArray<b3Int4> removedHostPairs;
- b3AlignedObjectArray<b3SapAabb> preAabbs;
- void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
- {
- //static int framepje = 0;
- //printf("framepje=%d\n",framepje++);
- B3_PROFILE("calculateOverlappingPairsHostIncremental3Sap");
- addedHostPairs.resize(0);
- removedHostPairs.resize(0);
- b3Assert(m_currentBuffer >= 0);
- {
- preAabbs.resize(m_allAabbsCPU.size());
- for (int i = 0; i < preAabbs.size(); i++)
- {
- preAabbs[i] = m_allAabbsCPU[i];
- }
- }
- if (m_currentBuffer < 0)
- return;
- {
- B3_PROFILE("m_allAabbsGPU.copyToHost");
- m_allAabbsGPU.copyToHost(m_allAabbsCPU);
- }
- b3AlignedObjectArray<b3Int4> allPairs;
- {
- B3_PROFILE("m_overlappingPairs.copyToHost");
- m_overlappingPairs.copyToHost(allPairs);
- }
- if (0)
- {
- {
- printf("ab[40].min=%f,%f,%f,ab[40].max=%f,%f,%f\n",
- m_allAabbsCPU[40].m_min[0], m_allAabbsCPU[40].m_min[1], m_allAabbsCPU[40].m_min[2],
- m_allAabbsCPU[40].m_max[0], m_allAabbsCPU[40].m_max[1], m_allAabbsCPU[40].m_max[2]);
- }
- {
- printf("ab[53].min=%f,%f,%f,ab[53].max=%f,%f,%f\n",
- m_allAabbsCPU[53].m_min[0], m_allAabbsCPU[53].m_min[1], m_allAabbsCPU[53].m_min[2],
- m_allAabbsCPU[53].m_max[0], m_allAabbsCPU[53].m_max[1], m_allAabbsCPU[53].m_max[2]);
- }
- {
- b3Int4 newPair;
- newPair.x = 40;
- newPair.y = 53;
- int index = allPairs.findBinarySearch(newPair);
- printf("hasPair(40,53)=%d out of %d\n", index, allPairs.size());
- {
- int overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[40].m_min, (const b3Vector3&)m_allAabbsCPU[40].m_max, (const b3Vector3&)m_allAabbsCPU[53].m_min, (const b3Vector3&)m_allAabbsCPU[53].m_max);
- printf("overlap=%d\n", overlap);
- }
- if (preAabbs.size())
- {
- int prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[40].m_min, (const b3Vector3&)preAabbs[40].m_max, (const b3Vector3&)preAabbs[53].m_min, (const b3Vector3&)preAabbs[53].m_max);
- printf("prevoverlap=%d\n", prevOverlap);
- }
- else
- {
- printf("unknown prevoverlap\n");
- }
- }
- }
- if (0)
- {
- for (int i = 0; i < m_allAabbsCPU.size(); i++)
- {
- //printf("aabb[%d] min=%f,%f,%f max=%f,%f,%f\n",i,m_allAabbsCPU[i].m_min[0],m_allAabbsCPU[i].m_min[1],m_allAabbsCPU[i].m_min[2], m_allAabbsCPU[i].m_max[0],m_allAabbsCPU[i].m_max[1],m_allAabbsCPU[i].m_max[2]);
- }
- for (int axis = 0; axis < 3; axis++)
- {
- for (int buf = 0; buf < 2; buf++)
- {
- b3Assert(m_sortedAxisCPU[axis][buf].size() == m_allAabbsCPU.size() * 2);
- }
- }
- }
- m_currentBuffer = 1 - m_currentBuffer;
- int totalNumAabbs = m_allAabbsCPU.size();
- {
- B3_PROFILE("assign m_sortedAxisCPU(FloatFlip)");
- for (int i = 0; i < totalNumAabbs; i++)
- {
- unsigned int keyMin[3];
- unsigned int keyMax[3];
- for (int axis = 0; axis < 3; axis++)
- {
- float vmin = m_allAabbsCPU[i].m_min[axis];
- float vmax = m_allAabbsCPU[i].m_max[axis];
- keyMin[axis] = FloatFlip(vmin);
- keyMax[axis] = FloatFlip(vmax);
- m_sortedAxisCPU[axis][m_currentBuffer][i * 2].m_key = keyMin[axis] - 1;
- m_sortedAxisCPU[axis][m_currentBuffer][i * 2].m_value = i * 2;
- m_sortedAxisCPU[axis][m_currentBuffer][i * 2 + 1].m_key = keyMax[axis] + 1;
- m_sortedAxisCPU[axis][m_currentBuffer][i * 2 + 1].m_value = i * 2 + 1;
- }
- //printf("aabb[%d] min=%u,%u,%u max %u,%u,%u\n", i,keyMin[0],keyMin[1],keyMin[2],keyMax[0],keyMax[1],keyMax[2]);
- }
- }
- {
- B3_PROFILE("sort m_sortedAxisCPU");
- for (int axis = 0; axis < 3; axis++)
- m_sorter->executeHost(m_sortedAxisCPU[axis][m_currentBuffer]);
- }
- #if 0
- if (0)
- {
- for (int axis=0;axis<3;axis++)
- {
- //printf("axis %d\n",axis);
- for (int i=0;i<m_sortedAxisCPU[axis][m_currentBuffer].size();i++)
- {
- //int key = m_sortedAxisCPU[axis][m_currentBuffer][i].m_key;
- //int value = m_sortedAxisCPU[axis][m_currentBuffer][i].m_value;
- //printf("[%d]=%d\n",i,value);
- }
- }
- }
- #endif
- {
- B3_PROFILE("assign m_objectMinMaxIndexCPU");
- for (int axis = 0; axis < 3; axis++)
- {
- int totalNumAabbs = m_allAabbsCPU.size();
- int numEndPoints = m_sortedAxisCPU[axis][m_currentBuffer].size();
- m_objectMinMaxIndexCPU[axis][m_currentBuffer].resize(totalNumAabbs);
- for (int i = 0; i < numEndPoints; i++)
- {
- int destIndex = m_sortedAxisCPU[axis][m_currentBuffer][i].m_value;
- int newDest = destIndex / 2;
- if (destIndex & 1)
- {
- m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].y = i;
- }
- else
- {
- m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].x = i;
- }
- }
- }
- }
- #if 0
- if (0)
- {
- printf("==========================\n");
- for (int axis=0;axis<3;axis++)
- {
- unsigned int curMinIndex40 = m_objectMinMaxIndexCPU[axis][m_currentBuffer][40].x;
- unsigned int curMaxIndex40 = m_objectMinMaxIndexCPU[axis][m_currentBuffer][40].y;
- unsigned int prevMaxIndex40 = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][40].y;
- unsigned int prevMinIndex40 = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][40].x;
- int dmin40 = curMinIndex40 - prevMinIndex40;
- int dmax40 = curMinIndex40 - prevMinIndex40;
- printf("axis %d curMinIndex40=%d prevMinIndex40=%d\n",axis,curMinIndex40, prevMinIndex40);
- printf("axis %d curMaxIndex40=%d prevMaxIndex40=%d\n",axis,curMaxIndex40, prevMaxIndex40);
- }
- printf(".........................\n");
- for (int axis=0;axis<3;axis++)
- {
- unsigned int curMinIndex53 = m_objectMinMaxIndexCPU[axis][m_currentBuffer][53].x;
- unsigned int curMaxIndex53 = m_objectMinMaxIndexCPU[axis][m_currentBuffer][53].y;
- unsigned int prevMaxIndex53 = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][53].y;
- unsigned int prevMinIndex53 = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][53].x;
- int dmin40 = curMinIndex53 - prevMinIndex53;
- int dmax40 = curMinIndex53 - prevMinIndex53;
- printf("axis %d curMinIndex53=%d prevMinIndex53=%d\n",axis,curMinIndex53, prevMinIndex53);
- printf("axis %d curMaxIndex53=%d prevMaxIndex53=%d\n",axis,curMaxIndex53, prevMaxIndex53);
- }
- }
- #endif
- int a = m_objectMinMaxIndexCPU[0][m_currentBuffer].size();
- int b = m_objectMinMaxIndexCPU[1][m_currentBuffer].size();
- int c = m_objectMinMaxIndexCPU[2][m_currentBuffer].size();
- b3Assert(a == b);
- b3Assert(b == c);
- /*
- if (searchIncremental3dSapOnGpu)
- {
- B3_PROFILE("computePairsIncremental3dSapKernelGPU");
- int numObjects = m_objectMinMaxIndexCPU[0][m_currentBuffer].size();
- int maxCapacity = 1024*1024;
- {
- B3_PROFILE("copy from host");
- m_objectMinMaxIndexGPUaxis0.copyFromHost(m_objectMinMaxIndexCPU[0][m_currentBuffer]);
- m_objectMinMaxIndexGPUaxis1.copyFromHost(m_objectMinMaxIndexCPU[1][m_currentBuffer]);
- m_objectMinMaxIndexGPUaxis2.copyFromHost(m_objectMinMaxIndexCPU[2][m_currentBuffer]);
- m_objectMinMaxIndexGPUaxis0prev.copyFromHost(m_objectMinMaxIndexCPU[0][1-m_currentBuffer]);
- m_objectMinMaxIndexGPUaxis1prev.copyFromHost(m_objectMinMaxIndexCPU[1][1-m_currentBuffer]);
- m_objectMinMaxIndexGPUaxis2prev.copyFromHost(m_objectMinMaxIndexCPU[2][1-m_currentBuffer]);
- m_sortedAxisGPU0.copyFromHost(m_sortedAxisCPU[0][m_currentBuffer]);
- m_sortedAxisGPU1.copyFromHost(m_sortedAxisCPU[1][m_currentBuffer]);
- m_sortedAxisGPU2.copyFromHost(m_sortedAxisCPU[2][m_currentBuffer]);
- m_sortedAxisGPU0prev.copyFromHost(m_sortedAxisCPU[0][1-m_currentBuffer]);
- m_sortedAxisGPU1prev.copyFromHost(m_sortedAxisCPU[1][1-m_currentBuffer]);
- m_sortedAxisGPU2prev.copyFromHost(m_sortedAxisCPU[2][1-m_currentBuffer]);
-
- m_addedHostPairsGPU.resize(maxCapacity);
- m_removedHostPairsGPU.resize(maxCapacity);
- m_addedCountGPU.resize(0);
- m_addedCountGPU.push_back(0);
- m_removedCountGPU.resize(0);
- m_removedCountGPU.push_back(0);
- }
- {
- B3_PROFILE("launch1D");
- b3LauncherCL launcher(m_queue, m_computePairsIncremental3dSapKernel,"m_computePairsIncremental3dSapKernel");
- launcher.setBuffer(m_objectMinMaxIndexGPUaxis0.getBufferCL());
- launcher.setBuffer(m_objectMinMaxIndexGPUaxis1.getBufferCL());
- launcher.setBuffer(m_objectMinMaxIndexGPUaxis2.getBufferCL());
- launcher.setBuffer(m_objectMinMaxIndexGPUaxis0prev.getBufferCL());
- launcher.setBuffer(m_objectMinMaxIndexGPUaxis1prev.getBufferCL());
- launcher.setBuffer(m_objectMinMaxIndexGPUaxis2prev.getBufferCL());
- launcher.setBuffer(m_sortedAxisGPU0.getBufferCL());
- launcher.setBuffer(m_sortedAxisGPU1.getBufferCL());
- launcher.setBuffer(m_sortedAxisGPU2.getBufferCL());
- launcher.setBuffer(m_sortedAxisGPU0prev.getBufferCL());
- launcher.setBuffer(m_sortedAxisGPU1prev.getBufferCL());
- launcher.setBuffer(m_sortedAxisGPU2prev.getBufferCL());
-
- launcher.setBuffer(m_addedHostPairsGPU.getBufferCL());
- launcher.setBuffer(m_removedHostPairsGPU.getBufferCL());
- launcher.setBuffer(m_addedCountGPU.getBufferCL());
- launcher.setBuffer(m_removedCountGPU.getBufferCL());
- launcher.setConst(maxCapacity);
- launcher.setConst( numObjects);
- launcher.launch1D( numObjects);
- clFinish(m_queue);
- }
- {
- B3_PROFILE("copy to host");
- int addedCountGPU = m_addedCountGPU.at(0);
- m_addedHostPairsGPU.resize(addedCountGPU);
- m_addedHostPairsGPU.copyToHost(addedHostPairs);
- //printf("addedCountGPU=%d\n",addedCountGPU);
- int removedCountGPU = m_removedCountGPU.at(0);
- m_removedHostPairsGPU.resize(removedCountGPU);
- m_removedHostPairsGPU.copyToHost(removedHostPairs);
- //printf("removedCountGPU=%d\n",removedCountGPU);
- }
- }
- else
- */
- {
- int numObjects = m_objectMinMaxIndexCPU[0][m_currentBuffer].size();
- B3_PROFILE("actual search");
- for (int i = 0; i < numObjects; i++)
- {
- //int numObjects = m_objectMinMaxIndexCPU[axis][m_currentBuffer].size();
- //int checkObjects[]={40,53};
- //int numCheckObjects = sizeof(checkObjects)/sizeof(int);
- //for (int a=0;a<numCheckObjects ;a++)
- for (int axis = 0; axis < 3; axis++)
- {
- //int i = checkObjects[a];
- unsigned int curMinIndex = m_objectMinMaxIndexCPU[axis][m_currentBuffer][i].x;
- unsigned int curMaxIndex = m_objectMinMaxIndexCPU[axis][m_currentBuffer][i].y;
- unsigned int prevMinIndex = m_objectMinMaxIndexCPU[axis][1 - m_currentBuffer][i].x;
- int dmin = curMinIndex - prevMinIndex;
- unsigned int prevMaxIndex = m_objectMinMaxIndexCPU[axis][1 - m_currentBuffer][i].y;
- int dmax = curMaxIndex - prevMaxIndex;
- if (dmin != 0)
- {
- //printf("for object %d, dmin=%d\n",i,dmin);
- }
- if (dmax != 0)
- {
- //printf("for object %d, dmax=%d\n",i,dmax);
- }
- for (int otherbuffer = 0; otherbuffer < 2; otherbuffer++)
- {
- if (dmin != 0)
- {
- int stepMin = dmin < 0 ? -1 : 1;
- for (int j = prevMinIndex; j != curMinIndex; j += stepMin)
- {
- int otherIndex2 = m_sortedAxisCPU[axis][otherbuffer][j].y;
- int otherIndex = otherIndex2 / 2;
- if (otherIndex != i)
- {
- bool otherIsMax = ((otherIndex2 & 1) != 0);
- if (otherIsMax)
- {
- //bool overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[i].m_min, (const b3Vector3&)m_allAabbsCPU[i].m_max,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_min,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_max);
- //bool prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[i].m_min, (const b3Vector3&)preAabbs[i].m_max,(const b3Vector3&)preAabbs[otherIndex].m_min,(const b3Vector3&)preAabbs[otherIndex].m_max);
- bool overlap = true;
- for (int ax = 0; ax < 3; ax++)
- {
- if ((m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].y) ||
- (m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].x))
- overlap = false;
- }
- // b3Assert(overlap2==overlap);
- bool prevOverlap = true;
- for (int ax = 0; ax < 3; ax++)
- {
- if ((m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][otherIndex].y) ||
- (m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][otherIndex].x))
- prevOverlap = false;
- }
- //b3Assert(overlap==overlap2);
- if (dmin < 0)
- {
- if (overlap && !prevOverlap)
- {
- //add a pair
- b3Int4 newPair;
- if (i <= otherIndex)
- {
- newPair.x = i;
- newPair.y = otherIndex;
- }
- else
- {
- newPair.x = otherIndex;
- newPair.y = i;
- }
- addedHostPairs.push_back(newPair);
- }
- }
- else
- {
- if (!overlap && prevOverlap)
- {
- //remove a pair
- b3Int4 removedPair;
- if (i <= otherIndex)
- {
- removedPair.x = i;
- removedPair.y = otherIndex;
- }
- else
- {
- removedPair.x = otherIndex;
- removedPair.y = i;
- }
- removedHostPairs.push_back(removedPair);
- }
- } //otherisMax
- } //if (dmin<0)
- } //if (otherIndex!=i)
- } //for (int j=
- }
- if (dmax != 0)
- {
- int stepMax = dmax < 0 ? -1 : 1;
- for (int j = prevMaxIndex; j != curMaxIndex; j += stepMax)
- {
- int otherIndex2 = m_sortedAxisCPU[axis][otherbuffer][j].y;
- int otherIndex = otherIndex2 / 2;
- if (otherIndex != i)
- {
- //bool otherIsMin = ((otherIndex2&1)==0);
- //if (otherIsMin)
- {
- //bool overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[i].m_min, (const b3Vector3&)m_allAabbsCPU[i].m_max,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_min,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_max);
- //bool prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[i].m_min, (const b3Vector3&)preAabbs[i].m_max,(const b3Vector3&)preAabbs[otherIndex].m_min,(const b3Vector3&)preAabbs[otherIndex].m_max);
- bool overlap = true;
- for (int ax = 0; ax < 3; ax++)
- {
- if ((m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].y) ||
- (m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].x))
- overlap = false;
- }
- //b3Assert(overlap2==overlap);
- bool prevOverlap = true;
- for (int ax = 0; ax < 3; ax++)
- {
- if ((m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][otherIndex].y) ||
- (m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][otherIndex].x))
- prevOverlap = false;
- }
- if (dmax > 0)
- {
- if (overlap && !prevOverlap)
- {
- //add a pair
- b3Int4 newPair;
- if (i <= otherIndex)
- {
- newPair.x = i;
- newPair.y = otherIndex;
- }
- else
- {
- newPair.x = otherIndex;
- newPair.y = i;
- }
- addedHostPairs.push_back(newPair);
- }
- }
- else
- {
- if (!overlap && prevOverlap)
- {
- //if (otherIndex2&1==0) -> min?
- //remove a pair
- b3Int4 removedPair;
- if (i <= otherIndex)
- {
- removedPair.x = i;
- removedPair.y = otherIndex;
- }
- else
- {
- removedPair.x = otherIndex;
- removedPair.y = i;
- }
- removedHostPairs.push_back(removedPair);
- }
- }
- } //if (dmin<0)
- } //if (otherIndex!=i)
- } //for (int j=
- }
- } //for (int otherbuffer
- } //for (int axis=0;
- } //for (int i=0;i<numObjects
- }
- //remove duplicates and add/remove then to existing m_overlappingPairs
- {
- {
- B3_PROFILE("sort allPairs");
- allPairs.quickSort(b3PairCmp);
- }
- {
- B3_PROFILE("sort addedHostPairs");
- addedHostPairs.quickSort(b3PairCmp);
- }
- {
- B3_PROFILE("sort removedHostPairs");
- removedHostPairs.quickSort(b3PairCmp);
- }
- }
- b3Int4 prevPair;
- prevPair.x = -1;
- prevPair.y = -1;
- int uniqueRemovedPairs = 0;
- b3AlignedObjectArray<int> removedPositions;
- {
- B3_PROFILE("actual removing");
- for (int i = 0; i < removedHostPairs.size(); i++)
- {
- b3Int4 removedPair = removedHostPairs[i];
- if ((removedPair.x != prevPair.x) || (removedPair.y != prevPair.y))
- {
- int index1 = allPairs.findBinarySearch(removedPair);
- //#ifdef _DEBUG
- int index2 = allPairs.findLinearSearch(removedPair);
- b3Assert(index1 == index2);
- //b3Assert(index1!=allPairs.size());
- if (index1 < allPairs.size())
- //#endif//_DEBUG
- {
- uniqueRemovedPairs++;
- removedPositions.push_back(index1);
- {
- //printf("framepje(%d) remove pair(%d):%d,%d\n",framepje,i,removedPair.x,removedPair.y);
- }
- }
- }
- prevPair = removedPair;
- }
- if (uniqueRemovedPairs)
- {
- for (int i = 0; i < removedPositions.size(); i++)
- {
- allPairs[removedPositions[i]].x = INT_MAX;
- allPairs[removedPositions[i]].y = INT_MAX;
- }
- allPairs.quickSort(b3PairCmp);
- allPairs.resize(allPairs.size() - uniqueRemovedPairs);
- }
- }
- //if (uniqueRemovedPairs)
- // printf("uniqueRemovedPairs=%d\n",uniqueRemovedPairs);
- //printf("removedHostPairs.size = %d\n",removedHostPairs.size());
- prevPair.x = -1;
- prevPair.y = -1;
- int uniqueAddedPairs = 0;
- b3AlignedObjectArray<b3Int4> actualAddedPairs;
- {
- B3_PROFILE("actual adding");
- for (int i = 0; i < addedHostPairs.size(); i++)
- {
- b3Int4 newPair = addedHostPairs[i];
- if ((newPair.x != prevPair.x) || (newPair.y != prevPair.y))
- {
- //#ifdef _DEBUG
- int index1 = allPairs.findBinarySearch(newPair);
- int index2 = allPairs.findLinearSearch(newPair);
- b3Assert(index1 == index2);
- b3Assert(index1 == allPairs.size());
- if (index1 != allPairs.size())
- {
- printf("??\n");
- }
- if (index1 == allPairs.size())
- //#endif //_DEBUG
- {
- uniqueAddedPairs++;
- actualAddedPairs.push_back(newPair);
- }
- }
- prevPair = newPair;
- }
- for (int i = 0; i < actualAddedPairs.size(); i++)
- {
- //printf("framepje (%d), new pair(%d):%d,%d\n",framepje,i,actualAddedPairs[i].x,actualAddedPairs[i].y);
- allPairs.push_back(actualAddedPairs[i]);
- }
- }
- //if (uniqueAddedPairs)
- // printf("uniqueAddedPairs=%d\n", uniqueAddedPairs);
- {
- B3_PROFILE("m_overlappingPairs.copyFromHost");
- m_overlappingPairs.copyFromHost(allPairs);
- }
- }
- void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs)
- {
- //test
- // if (m_currentBuffer>=0)
- // return calculateOverlappingPairsHostIncremental3Sap();
- b3Assert(m_allAabbsCPU.size() == m_allAabbsGPU.size());
- m_allAabbsGPU.copyToHost(m_allAabbsCPU);
- int axis = 0;
- {
- B3_PROFILE("CPU compute best variance axis");
- b3Vector3 s = b3MakeVector3(0, 0, 0), s2 = b3MakeVector3(0, 0, 0);
- int numRigidBodies = m_smallAabbsMappingCPU.size();
- for (int i = 0; i < numRigidBodies; i++)
- {
- b3SapAabb aabb = this->m_allAabbsCPU[m_smallAabbsMappingCPU[i]];
- b3Vector3 maxAabb = b3MakeVector3(aabb.m_max[0], aabb.m_max[1], aabb.m_max[2]);
- b3Vector3 minAabb = b3MakeVector3(aabb.m_min[0], aabb.m_min[1], aabb.m_min[2]);
- b3Vector3 centerAabb = (maxAabb + minAabb) * 0.5f;
- s += centerAabb;
- s2 += centerAabb * centerAabb;
- }
- b3Vector3 v = s2 - (s * s) / (float)numRigidBodies;
- if (v[1] > v[0])
- axis = 1;
- if (v[2] > v[axis])
- axis = 2;
- }
- b3AlignedObjectArray<b3Int4> hostPairs;
- {
- int numSmallAabbs = m_smallAabbsMappingCPU.size();
- for (int i = 0; i < numSmallAabbs; i++)
- {
- b3SapAabb smallAabbi = m_allAabbsCPU[m_smallAabbsMappingCPU[i]];
- //float reference = smallAabbi.m_max[axis];
- for (int j = i + 1; j < numSmallAabbs; j++)
- {
- b3SapAabb smallAabbj = m_allAabbsCPU[m_smallAabbsMappingCPU[j]];
- if (TestAabbAgainstAabb2((b3Vector3&)smallAabbi.m_min, (b3Vector3&)smallAabbi.m_max,
- (b3Vector3&)smallAabbj.m_min, (b3Vector3&)smallAabbj.m_max))
- {
- b3Int4 pair;
- int a = smallAabbi.m_minIndices[3];
- int b = smallAabbj.m_minIndices[3];
- if (a <= b)
- {
- pair.x = a; //store the original index in the unsorted aabb array
- pair.y = b;
- }
- else
- {
- pair.x = b; //store the original index in the unsorted aabb array
- pair.y = a;
- }
- hostPairs.push_back(pair);
- }
- }
- }
- }
- {
- int numSmallAabbs = m_smallAabbsMappingCPU.size();
- for (int i = 0; i < numSmallAabbs; i++)
- {
- b3SapAabb smallAabbi = m_allAabbsCPU[m_smallAabbsMappingCPU[i]];
- //float reference = smallAabbi.m_max[axis];
- int numLargeAabbs = m_largeAabbsMappingCPU.size();
- for (int j = 0; j < numLargeAabbs; j++)
- {
- b3SapAabb largeAabbj = m_allAabbsCPU[m_largeAabbsMappingCPU[j]];
- if (TestAabbAgainstAabb2((b3Vector3&)smallAabbi.m_min, (b3Vector3&)smallAabbi.m_max,
- (b3Vector3&)largeAabbj.m_min, (b3Vector3&)largeAabbj.m_max))
- {
- b3Int4 pair;
- int a = largeAabbj.m_minIndices[3];
- int b = smallAabbi.m_minIndices[3];
- if (a <= b)
- {
- pair.x = a;
- pair.y = b; //store the original index in the unsorted aabb array
- }
- else
- {
- pair.x = b;
- pair.y = a; //store the original index in the unsorted aabb array
- }
- hostPairs.push_back(pair);
- }
- }
- }
- }
- if (hostPairs.size() > maxPairs)
- {
- hostPairs.resize(maxPairs);
- }
- if (hostPairs.size())
- {
- m_overlappingPairs.copyFromHost(hostPairs);
- }
- else
- {
- m_overlappingPairs.resize(0);
- }
- //init3dSap();
- }
- void b3GpuSapBroadphase::reset()
- {
- m_allAabbsGPU.resize(0);
- m_allAabbsCPU.resize(0);
- m_smallAabbsMappingGPU.resize(0);
- m_smallAabbsMappingCPU.resize(0);
- m_pairCount.resize(0);
- m_largeAabbsMappingGPU.resize(0);
- m_largeAabbsMappingCPU.resize(0);
- }
- void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
- {
- if (m_sapKernel == 0)
- {
- calculateOverlappingPairsHost(maxPairs);
- return;
- }
- //if (m_currentBuffer>=0)
- // return calculateOverlappingPairsHostIncremental3Sap();
- //calculateOverlappingPairsHost(maxPairs);
- B3_PROFILE("GPU 1-axis SAP calculateOverlappingPairs");
- int axis = 0;
- {
- //bool syncOnHost = false;
- int numSmallAabbs = m_smallAabbsMappingCPU.size();
- if (m_prefixScanFloat4 && numSmallAabbs)
- {
- B3_PROFILE("GPU compute best variance axis");
- if (m_dst.size() != (numSmallAabbs + 1))
- {
- m_dst.resize(numSmallAabbs + 128);
- m_sum.resize(numSmallAabbs + 128);
- m_sum2.resize(numSmallAabbs + 128);
- m_sum.at(numSmallAabbs) = b3MakeVector3(0, 0, 0); //slow?
- m_sum2.at(numSmallAabbs) = b3MakeVector3(0, 0, 0); //slow?
- }
- b3LauncherCL launcher(m_queue, m_prepareSumVarianceKernel, "m_prepareSumVarianceKernel");
- launcher.setBuffer(m_allAabbsGPU.getBufferCL());
- launcher.setBuffer(m_smallAabbsMappingGPU.getBufferCL());
- launcher.setBuffer(m_sum.getBufferCL());
- launcher.setBuffer(m_sum2.getBufferCL());
- launcher.setConst(numSmallAabbs);
- int num = numSmallAabbs;
- launcher.launch1D(num);
- b3Vector3 s;
- b3Vector3 s2;
- m_prefixScanFloat4->execute(m_sum, m_dst, numSmallAabbs + 1, &s);
- m_prefixScanFloat4->execute(m_sum2, m_dst, numSmallAabbs + 1, &s2);
- b3Vector3 v = s2 - (s * s) / (float)numSmallAabbs;
- if (v[1] > v[0])
- axis = 1;
- if (v[2] > v[axis])
- axis = 2;
- }
- m_gpuSmallSortData.resize(numSmallAabbs);
- #if 1
- if (m_smallAabbsMappingGPU.size())
- {
- B3_PROFILE("flipFloatKernel");
- b3BufferInfoCL bInfo[] = {
- b3BufferInfoCL(m_allAabbsGPU.getBufferCL(), true),
- b3BufferInfoCL(m_smallAabbsMappingGPU.getBufferCL(), true),
- b3BufferInfoCL(m_gpuSmallSortData.getBufferCL())};
- b3LauncherCL launcher(m_queue, m_flipFloatKernel, "m_flipFloatKernel");
- launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
- launcher.setConst(numSmallAabbs);
- launcher.setConst(axis);
- int num = numSmallAabbs;
- launcher.launch1D(num);
- clFinish(m_queue);
- }
- if (m_gpuSmallSortData.size())
- {
- B3_PROFILE("gpu radix sort");
- m_sorter->execute(m_gpuSmallSortData);
- clFinish(m_queue);
- }
- m_gpuSmallSortedAabbs.resize(numSmallAabbs);
- if (numSmallAabbs)
- {
- B3_PROFILE("scatterKernel");
- b3BufferInfoCL bInfo[] = {
- b3BufferInfoCL(m_allAabbsGPU.getBufferCL(), true),
- b3BufferInfoCL(m_smallAabbsMappingGPU.getBufferCL(), true),
- b3BufferInfoCL(m_gpuSmallSortData.getBufferCL(), true),
- b3BufferInfoCL(m_gpuSmallSortedAabbs.getBufferCL())};
- b3LauncherCL launcher(m_queue, m_scatterKernel, "m_scatterKernel ");
- launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
- launcher.setConst(numSmallAabbs);
- int num = numSmallAabbs;
- launcher.launch1D(num);
- clFinish(m_queue);
- }
- m_overlappingPairs.resize(maxPairs);
- m_pairCount.resize(0);
- m_pairCount.push_back(0);
- int numPairs = 0;
- {
- int numLargeAabbs = m_largeAabbsMappingGPU.size();
- if (numLargeAabbs && numSmallAabbs)
- {
- //@todo
- B3_PROFILE("sap2Kernel");
- b3BufferInfoCL bInfo[] = {
- b3BufferInfoCL(m_allAabbsGPU.getBufferCL()),
- b3BufferInfoCL(m_largeAabbsMappingGPU.getBufferCL()),
- b3BufferInfoCL(m_smallAabbsMappingGPU.getBufferCL()),
- b3BufferInfoCL(m_overlappingPairs.getBufferCL()),
- b3BufferInfoCL(m_pairCount.getBufferCL())};
- b3LauncherCL launcher(m_queue, m_sap2Kernel, "m_sap2Kernel");
- launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
- launcher.setConst(numLargeAabbs);
- launcher.setConst(numSmallAabbs);
- launcher.setConst(axis);
- launcher.setConst(maxPairs);
- //@todo: use actual maximum work item sizes of the device instead of hardcoded values
- launcher.launch2D(numLargeAabbs, numSmallAabbs, 4, 64);
- numPairs = m_pairCount.at(0);
- if (numPairs > maxPairs)
- {
- b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs);
- numPairs = maxPairs;
- }
- }
- }
- if (m_gpuSmallSortedAabbs.size())
- {
- B3_PROFILE("sapKernel");
- b3BufferInfoCL bInfo[] = {b3BufferInfoCL(m_gpuSmallSortedAabbs.getBufferCL()), b3BufferInfoCL(m_overlappingPairs.getBufferCL()), b3BufferInfoCL(m_pairCount.getBufferCL())};
- b3LauncherCL launcher(m_queue, m_sapKernel, "m_sapKernel");
- launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
- launcher.setConst(numSmallAabbs);
- launcher.setConst(axis);
- launcher.setConst(maxPairs);
- int num = numSmallAabbs;
- #if 0
- int buffSize = launcher.getSerializationBufferSize();
- unsigned char* buf = new unsigned char[buffSize+sizeof(int)];
- for (int i=0;i<buffSize+1;i++)
- {
- unsigned char* ptr = (unsigned char*)&buf[i];
- *ptr = 0xff;
- }
- int actualWrite = launcher.serializeArguments(buf,buffSize);
-
- unsigned char* cptr = (unsigned char*)&buf[buffSize];
- // printf("buf[buffSize] = %d\n",*cptr);
-
- assert(buf[buffSize]==0xff);//check for buffer overrun
- int* ptr = (int*)&buf[buffSize];
-
- *ptr = num;
-
- FILE* f = fopen("m_sapKernelArgs.bin","wb");
- fwrite(buf,buffSize+sizeof(int),1,f);
- fclose(f);
- #endif //
- launcher.launch1D(num);
- clFinish(m_queue);
- numPairs = m_pairCount.at(0);
- if (numPairs > maxPairs)
- {
- b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs);
- numPairs = maxPairs;
- m_pairCount.resize(0);
- m_pairCount.push_back(maxPairs);
- }
- }
- #else
- int numPairs = 0;
- b3LauncherCL launcher(m_queue, m_sapKernel);
- const char* fileName = "m_sapKernelArgs.bin";
- FILE* f = fopen(fileName, "rb");
- if (f)
- {
- int sizeInBytes = 0;
- if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET))
- {
- printf("error, cannot get file size\n");
- exit(0);
- }
- unsigned char* buf = (unsigned char*)malloc(sizeInBytes);
- fread(buf, sizeInBytes, 1, f);
- int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes, m_context);
- int num = *(int*)&buf[serializedBytes];
- launcher.launch1D(num);
- b3OpenCLArray<int> pairCount(m_context, m_queue);
- int numElements = launcher.m_arrays[2]->size() / sizeof(int);
- pairCount.setFromOpenCLBuffer(launcher.m_arrays[2]->getBufferCL(), numElements);
- numPairs = pairCount.at(0);
- //printf("overlapping pairs = %d\n",numPairs);
- b3AlignedObjectArray<b3Int4> hostOoverlappingPairs;
- b3OpenCLArray<b3Int4> tmpGpuPairs(m_context, m_queue);
- tmpGpuPairs.setFromOpenCLBuffer(launcher.m_arrays[1]->getBufferCL(), numPairs);
- tmpGpuPairs.copyToHost(hostOoverlappingPairs);
- m_overlappingPairs.copyFromHost(hostOoverlappingPairs);
- //printf("hello %d\n", m_overlappingPairs.size());
- free(buf);
- fclose(f);
- }
- else
- {
- printf("error: cannot find file %s\n", fileName);
- }
- clFinish(m_queue);
- #endif
- m_overlappingPairs.resize(numPairs);
- } //B3_PROFILE("GPU_RADIX SORT");
- //init3dSap();
- }
- void b3GpuSapBroadphase::writeAabbsToGpu()
- {
- m_smallAabbsMappingGPU.copyFromHost(m_smallAabbsMappingCPU);
- m_largeAabbsMappingGPU.copyFromHost(m_largeAabbsMappingCPU);
- m_allAabbsGPU.copyFromHost(m_allAabbsCPU); //might not be necessary, the 'setupGpuAabbsFull' already takes care of this
- }
- void b3GpuSapBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask)
- {
- int index = userPtr;
- b3SapAabb aabb;
- for (int i = 0; i < 4; i++)
- {
- aabb.m_min[i] = aabbMin[i];
- aabb.m_max[i] = aabbMax[i];
- }
- aabb.m_minIndices[3] = index;
- aabb.m_signedMaxIndices[3] = m_allAabbsCPU.size();
- m_largeAabbsMappingCPU.push_back(m_allAabbsCPU.size());
- m_allAabbsCPU.push_back(aabb);
- }
- void b3GpuSapBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask)
- {
- int index = userPtr;
- b3SapAabb aabb;
- for (int i = 0; i < 4; i++)
- {
- aabb.m_min[i] = aabbMin[i];
- aabb.m_max[i] = aabbMax[i];
- }
- aabb.m_minIndices[3] = index;
- aabb.m_signedMaxIndices[3] = m_allAabbsCPU.size();
- m_smallAabbsMappingCPU.push_back(m_allAabbsCPU.size());
- m_allAabbsCPU.push_back(aabb);
- }
- cl_mem b3GpuSapBroadphase::getAabbBufferWS()
- {
- return m_allAabbsGPU.getBufferCL();
- }
- int b3GpuSapBroadphase::getNumOverlap()
- {
- return m_overlappingPairs.size();
- }
- cl_mem b3GpuSapBroadphase::getOverlappingPairBuffer()
- {
- return m_overlappingPairs.getBufferCL();
- }
- b3OpenCLArray<b3Int4>& b3GpuSapBroadphase::getOverlappingPairsGPU()
- {
- return m_overlappingPairs;
- }
- b3OpenCLArray<int>& b3GpuSapBroadphase::getSmallAabbIndicesGPU()
- {
- return m_smallAabbsMappingGPU;
- }
- b3OpenCLArray<int>& b3GpuSapBroadphase::getLargeAabbIndicesGPU()
- {
- return m_largeAabbsMappingGPU;
- }
|