OcclusionBuffer.cpp 33 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030
  1. // Copyright (c) 2008-2022 the Urho3D project
  2. // License: MIT
  3. #include "../Precompiled.h"
  4. #include "../Core/WorkQueue.h"
  5. #include "../Core/Profiler.h"
  6. #include "../Graphics/Camera.h"
  7. #include "../Graphics/OcclusionBuffer.h"
  8. #include "../IO/Log.h"
  9. #include "../DebugNew.h"
  10. namespace Urho3D
  11. {
  12. enum ClipMask : unsigned
  13. {
  14. CLIPMASK_X_POS = 0x1,
  15. CLIPMASK_X_NEG = 0x2,
  16. CLIPMASK_Y_POS = 0x4,
  17. CLIPMASK_Y_NEG = 0x8,
  18. CLIPMASK_Z_POS = 0x10,
  19. CLIPMASK_Z_NEG = 0x20,
  20. };
  21. URHO3D_FLAGSET(ClipMask, ClipMaskFlags);
  22. void DrawOcclusionBatchWork(const WorkItem* item, i32 threadIndex)
  23. {
  24. auto* buffer = reinterpret_cast<OcclusionBuffer*>(item->aux_);
  25. OcclusionBatch& batch = *reinterpret_cast<OcclusionBatch*>(item->start_);
  26. buffer->DrawBatch(batch, threadIndex);
  27. }
  28. OcclusionBuffer::OcclusionBuffer(Context* context) :
  29. Object(context)
  30. {
  31. }
  32. OcclusionBuffer::~OcclusionBuffer() = default;
  33. bool OcclusionBuffer::SetSize(int width, int height, bool threaded)
  34. {
  35. // Force the height to an even amount of pixels for better mip generation
  36. if (height & 1u)
  37. ++height;
  38. if (width == width_ && height == height_)
  39. return true;
  40. if (width <= 0 || height <= 0)
  41. return false;
  42. if (!IsPowerOfTwo((unsigned)width))
  43. {
  44. URHO3D_LOGERRORF("Requested occlusion buffer width %d is not a power of two", width);
  45. return false;
  46. }
  47. width_ = width;
  48. height_ = height;
  49. // Build work buffers for threading
  50. unsigned numThreadBuffers = threaded ? GetSubsystem<WorkQueue>()->GetNumThreads() + 1 : 1;
  51. buffers_.Resize(numThreadBuffers);
  52. for (unsigned i = 0; i < numThreadBuffers; ++i)
  53. {
  54. // Reserve extra memory in case 3D clipping is not exact
  55. OcclusionBufferData& buffer = buffers_[i];
  56. buffer.dataWithSafety_ = new int[width * (height + 2) + 2];
  57. buffer.data_ = buffer.dataWithSafety_.Get() + width + 1;
  58. buffer.used_ = false;
  59. }
  60. mipBuffers_.Clear();
  61. // Build buffers for mip levels
  62. for (;;)
  63. {
  64. width = (width + 1) / 2;
  65. height = (height + 1) / 2;
  66. mipBuffers_.Push(SharedArrayPtr<DepthValue>(new DepthValue[width * height]));
  67. if (width <= OCCLUSION_MIN_SIZE && height <= OCCLUSION_MIN_SIZE)
  68. break;
  69. }
  70. URHO3D_LOGDEBUG("Set occlusion buffer size " + String(width_) + "x" + String(height_) + " with " +
  71. String(mipBuffers_.Size()) + " mip levels and " + String(numThreadBuffers) + " thread buffers");
  72. CalculateViewport();
  73. return true;
  74. }
  75. void OcclusionBuffer::SetView(Camera* camera)
  76. {
  77. if (!camera)
  78. return;
  79. view_ = camera->GetView();
  80. projection_ = camera->GetProjection();
  81. viewProj_ = projection_ * view_;
  82. nearClip_ = camera->GetNearClip();
  83. farClip_ = camera->GetFarClip();
  84. reverseCulling_ = camera->GetReverseCulling();
  85. CalculateViewport();
  86. }
  87. void OcclusionBuffer::SetMaxTriangles(unsigned triangles)
  88. {
  89. maxTriangles_ = triangles;
  90. }
  91. void OcclusionBuffer::SetCullMode(CullMode mode)
  92. {
  93. if (reverseCulling_)
  94. {
  95. if (mode == CULL_CW)
  96. mode = CULL_CCW;
  97. else if (mode == CULL_CCW)
  98. mode = CULL_CW;
  99. }
  100. cullMode_ = mode;
  101. }
  102. void OcclusionBuffer::Reset()
  103. {
  104. numTriangles_ = 0;
  105. batches_.Clear();
  106. }
  107. void OcclusionBuffer::Clear()
  108. {
  109. Reset();
  110. // Only clear the main thread buffer. Rest are cleared on-demand when drawing the first batch
  111. ClearBuffer(0);
  112. for (OcclusionBufferData& buffer : buffers_)
  113. buffer.used_ = false;
  114. depthHierarchyDirty_ = true;
  115. }
  116. bool OcclusionBuffer::AddTriangles(const Matrix3x4& model, const void* vertexData, unsigned vertexSize, unsigned vertexStart,
  117. unsigned vertexCount)
  118. {
  119. batches_.Resize(batches_.Size() + 1);
  120. OcclusionBatch& batch = batches_.Back();
  121. batch.model_ = model;
  122. batch.vertexData_ = vertexData;
  123. batch.vertexSize_ = vertexSize;
  124. batch.indexData_ = nullptr;
  125. batch.indexSize_ = 0;
  126. batch.drawStart_ = vertexStart;
  127. batch.drawCount_ = vertexCount;
  128. numTriangles_ += vertexCount / 3;
  129. return numTriangles_ <= maxTriangles_;
  130. }
  131. bool OcclusionBuffer::AddTriangles(const Matrix3x4& model, const void* vertexData, unsigned vertexSize, const void* indexData,
  132. unsigned indexSize, unsigned indexStart, unsigned indexCount)
  133. {
  134. batches_.Resize(batches_.Size() + 1);
  135. OcclusionBatch& batch = batches_.Back();
  136. batch.model_ = model;
  137. batch.vertexData_ = vertexData;
  138. batch.vertexSize_ = vertexSize;
  139. batch.indexData_ = indexData;
  140. batch.indexSize_ = indexSize;
  141. batch.drawStart_ = indexStart;
  142. batch.drawCount_ = indexCount;
  143. numTriangles_ += indexCount / 3;
  144. return numTriangles_ <= maxTriangles_;
  145. }
  146. void OcclusionBuffer::DrawTriangles()
  147. {
  148. if (buffers_.Size() == 1)
  149. {
  150. // Not threaded
  151. for (Vector<OcclusionBatch>::Iterator i = batches_.Begin(); i != batches_.End(); ++i)
  152. DrawBatch(*i, 0);
  153. depthHierarchyDirty_ = true;
  154. }
  155. else if (buffers_.Size() > 1)
  156. {
  157. // Threaded
  158. auto* queue = GetSubsystem<WorkQueue>();
  159. for (Vector<OcclusionBatch>::Iterator i = batches_.Begin(); i != batches_.End(); ++i)
  160. {
  161. SharedPtr<WorkItem> item = queue->GetFreeItem();
  162. item->priority_ = WI_MAX_PRIORITY;
  163. item->workFunction_ = DrawOcclusionBatchWork;
  164. item->aux_ = this;
  165. item->start_ = &(*i);
  166. queue->AddWorkItem(item);
  167. }
  168. queue->Complete(WI_MAX_PRIORITY);
  169. MergeBuffers();
  170. depthHierarchyDirty_ = true;
  171. }
  172. batches_.Clear();
  173. }
  174. void OcclusionBuffer::BuildDepthHierarchy()
  175. {
  176. if (buffers_.Empty() || !depthHierarchyDirty_)
  177. return;
  178. URHO3D_PROFILE(BuildDepthHierarchy);
  179. // Build the first mip level from the pixel-level data
  180. int width = (width_ + 1) / 2;
  181. int height = (height_ + 1) / 2;
  182. if (mipBuffers_.Size())
  183. {
  184. for (int y = 0; y < height; ++y)
  185. {
  186. int* src = buffers_[0].data_ + (y * 2) * width_;
  187. DepthValue* dest = mipBuffers_[0].Get() + y * width;
  188. DepthValue* end = dest + width;
  189. if (y * 2 + 1 < height_)
  190. {
  191. int* src2 = src + width_;
  192. while (dest < end)
  193. {
  194. int minUpper = Min(src[0], src[1]);
  195. int minLower = Min(src2[0], src2[1]);
  196. dest->min_ = Min(minUpper, minLower);
  197. int maxUpper = Max(src[0], src[1]);
  198. int maxLower = Max(src2[0], src2[1]);
  199. dest->max_ = Max(maxUpper, maxLower);
  200. src += 2;
  201. src2 += 2;
  202. ++dest;
  203. }
  204. }
  205. else
  206. {
  207. while (dest < end)
  208. {
  209. dest->min_ = Min(src[0], src[1]);
  210. dest->max_ = Max(src[0], src[1]);
  211. src += 2;
  212. ++dest;
  213. }
  214. }
  215. }
  216. }
  217. // Build the rest of the mip levels
  218. for (unsigned i = 1; i < mipBuffers_.Size(); ++i)
  219. {
  220. int prevWidth = width;
  221. int prevHeight = height;
  222. width = (width + 1) / 2;
  223. height = (height + 1) / 2;
  224. for (int y = 0; y < height; ++y)
  225. {
  226. DepthValue* src = mipBuffers_[i - 1].Get() + (y * 2) * prevWidth;
  227. DepthValue* dest = mipBuffers_[i].Get() + y * width;
  228. DepthValue* end = dest + width;
  229. if (y * 2 + 1 < prevHeight)
  230. {
  231. DepthValue* src2 = src + prevWidth;
  232. while (dest < end)
  233. {
  234. int minUpper = Min(src[0].min_, src[1].min_);
  235. int minLower = Min(src2[0].min_, src2[1].min_);
  236. dest->min_ = Min(minUpper, minLower);
  237. int maxUpper = Max(src[0].max_, src[1].max_);
  238. int maxLower = Max(src2[0].max_, src2[1].max_);
  239. dest->max_ = Max(maxUpper, maxLower);
  240. src += 2;
  241. src2 += 2;
  242. ++dest;
  243. }
  244. }
  245. else
  246. {
  247. while (dest < end)
  248. {
  249. dest->min_ = Min(src[0].min_, src[1].min_);
  250. dest->max_ = Max(src[0].max_, src[1].max_);
  251. src += 2;
  252. ++dest;
  253. }
  254. }
  255. }
  256. }
  257. depthHierarchyDirty_ = false;
  258. }
  259. void OcclusionBuffer::ResetUseTimer()
  260. {
  261. useTimer_.Reset();
  262. }
  263. bool OcclusionBuffer::IsVisible(const BoundingBox& worldSpaceBox) const
  264. {
  265. if (buffers_.Empty())
  266. return true;
  267. // Transform corners to projection space
  268. Vector4 vertices[8];
  269. vertices[0] = ModelTransform(viewProj_, worldSpaceBox.min_);
  270. vertices[1] = ModelTransform(viewProj_, Vector3(worldSpaceBox.max_.x_, worldSpaceBox.min_.y_, worldSpaceBox.min_.z_));
  271. vertices[2] = ModelTransform(viewProj_, Vector3(worldSpaceBox.min_.x_, worldSpaceBox.max_.y_, worldSpaceBox.min_.z_));
  272. vertices[3] = ModelTransform(viewProj_, Vector3(worldSpaceBox.max_.x_, worldSpaceBox.max_.y_, worldSpaceBox.min_.z_));
  273. vertices[4] = ModelTransform(viewProj_, Vector3(worldSpaceBox.min_.x_, worldSpaceBox.min_.y_, worldSpaceBox.max_.z_));
  274. vertices[5] = ModelTransform(viewProj_, Vector3(worldSpaceBox.max_.x_, worldSpaceBox.min_.y_, worldSpaceBox.max_.z_));
  275. vertices[6] = ModelTransform(viewProj_, Vector3(worldSpaceBox.min_.x_, worldSpaceBox.max_.y_, worldSpaceBox.max_.z_));
  276. vertices[7] = ModelTransform(viewProj_, worldSpaceBox.max_);
  277. // Apply a far clip relative bias
  278. for (auto& vertice : vertices)
  279. vertice.z_ -= OCCLUSION_RELATIVE_BIAS;
  280. // Transform to screen space. If any of the corners cross the near plane, assume visible
  281. float minX, maxX, minY, maxY, minZ;
  282. if (vertices[0].z_ <= 0.0f)
  283. return true;
  284. Vector3 projected = ViewportTransform(vertices[0]);
  285. minX = maxX = projected.x_;
  286. minY = maxY = projected.y_;
  287. minZ = projected.z_;
  288. // Project the rest
  289. for (unsigned i = 1; i < 8; ++i)
  290. {
  291. if (vertices[i].z_ <= 0.0f)
  292. return true;
  293. projected = ViewportTransform(vertices[i]);
  294. if (projected.x_ < minX) minX = projected.x_;
  295. if (projected.x_ > maxX) maxX = projected.x_;
  296. if (projected.y_ < minY) minY = projected.y_;
  297. if (projected.y_ > maxY) maxY = projected.y_;
  298. if (projected.z_ < minZ) minZ = projected.z_;
  299. }
  300. // Expand the bounding box 1 pixel in each direction to be conservative and correct rasterization offset
  301. IntRect rect((int)(minX - 1.5f), (int)(minY - 1.5f), RoundToInt(maxX), RoundToInt(maxY));
  302. // If the rect is outside, let frustum culling handle
  303. if (rect.right_ < 0 || rect.bottom_ < 0)
  304. return true;
  305. if (rect.left_ >= width_ || rect.top_ >= height_)
  306. return true;
  307. // Clipping of rect
  308. if (rect.left_ < 0)
  309. rect.left_ = 0;
  310. if (rect.top_ < 0)
  311. rect.top_ = 0;
  312. if (rect.right_ >= width_)
  313. rect.right_ = width_ - 1;
  314. if (rect.bottom_ >= height_)
  315. rect.bottom_ = height_ - 1;
  316. // Convert depth to integer and apply final bias
  317. int z = RoundToInt(minZ) - OCCLUSION_FIXED_BIAS;
  318. if (!depthHierarchyDirty_)
  319. {
  320. // Start from lowest mip level and check if a conclusive result can be found
  321. for (int i = mipBuffers_.Size() - 1; i >= 0; --i)
  322. {
  323. int shift = i + 1;
  324. int width = width_ >> shift;
  325. int left = rect.left_ >> shift;
  326. int right = rect.right_ >> shift;
  327. DepthValue* buffer = mipBuffers_[i].Get();
  328. DepthValue* row = buffer + (rect.top_ >> shift) * width;
  329. DepthValue* endRow = buffer + (rect.bottom_ >> shift) * width;
  330. bool allOccluded = true;
  331. while (row <= endRow)
  332. {
  333. DepthValue* src = row + left;
  334. DepthValue* end = row + right;
  335. while (src <= end)
  336. {
  337. if (z <= src->min_)
  338. return true;
  339. if (z <= src->max_)
  340. allOccluded = false;
  341. ++src;
  342. }
  343. row += width;
  344. }
  345. if (allOccluded)
  346. return false;
  347. }
  348. }
  349. // If no conclusive result, finally check the pixel-level data
  350. int* row = buffers_[0].data_ + rect.top_ * width_;
  351. int* endRow = buffers_[0].data_ + rect.bottom_ * width_;
  352. while (row <= endRow)
  353. {
  354. int* src = row + rect.left_;
  355. int* end = row + rect.right_;
  356. while (src <= end)
  357. {
  358. if (z <= *src)
  359. return true;
  360. ++src;
  361. }
  362. row += width_;
  363. }
  364. return false;
  365. }
  366. unsigned OcclusionBuffer::GetUseTimer()
  367. {
  368. return useTimer_.GetMSec(false);
  369. }
  370. void OcclusionBuffer::DrawBatch(const OcclusionBatch& batch, i32 threadIndex)
  371. {
  372. assert(threadIndex >= 0);
  373. // If buffer not yet used, clear it
  374. if (threadIndex > 0 && !buffers_[threadIndex].used_)
  375. {
  376. ClearBuffer(threadIndex);
  377. buffers_[threadIndex].used_ = true;
  378. }
  379. Matrix4 modelViewProj = viewProj_ * batch.model_;
  380. // Theoretical max. amount of vertices if each of the 6 clipping planes doubles the triangle count
  381. Vector4 vertices[64 * 3];
  382. if (!batch.indexData_)
  383. {
  384. const unsigned char* srcData = ((const unsigned char*)batch.vertexData_) + batch.drawStart_ * batch.vertexSize_;
  385. unsigned index = 0;
  386. while (index + 2 < batch.drawCount_)
  387. {
  388. const Vector3& v0 = *((const Vector3*)(&srcData[index * batch.vertexSize_]));
  389. const Vector3& v1 = *((const Vector3*)(&srcData[(index + 1) * batch.vertexSize_]));
  390. const Vector3& v2 = *((const Vector3*)(&srcData[(index + 2) * batch.vertexSize_]));
  391. vertices[0] = ModelTransform(modelViewProj, v0);
  392. vertices[1] = ModelTransform(modelViewProj, v1);
  393. vertices[2] = ModelTransform(modelViewProj, v2);
  394. DrawTriangle(vertices, threadIndex);
  395. index += 3;
  396. }
  397. }
  398. else
  399. {
  400. const auto* srcData = (const unsigned char*)batch.vertexData_;
  401. // 16-bit indices
  402. if (batch.indexSize_ == sizeof(unsigned short))
  403. {
  404. const unsigned short* indices = ((const unsigned short*)batch.indexData_) + batch.drawStart_;
  405. const unsigned short* indicesEnd = indices + batch.drawCount_;
  406. while (indices < indicesEnd)
  407. {
  408. const Vector3& v0 = *((const Vector3*)(&srcData[indices[0] * batch.vertexSize_]));
  409. const Vector3& v1 = *((const Vector3*)(&srcData[indices[1] * batch.vertexSize_]));
  410. const Vector3& v2 = *((const Vector3*)(&srcData[indices[2] * batch.vertexSize_]));
  411. vertices[0] = ModelTransform(modelViewProj, v0);
  412. vertices[1] = ModelTransform(modelViewProj, v1);
  413. vertices[2] = ModelTransform(modelViewProj, v2);
  414. DrawTriangle(vertices, threadIndex);
  415. indices += 3;
  416. }
  417. }
  418. else
  419. {
  420. const unsigned* indices = ((const unsigned*)batch.indexData_) + batch.drawStart_;
  421. const unsigned* indicesEnd = indices + batch.drawCount_;
  422. while (indices < indicesEnd)
  423. {
  424. const Vector3& v0 = *((const Vector3*)(&srcData[indices[0] * batch.vertexSize_]));
  425. const Vector3& v1 = *((const Vector3*)(&srcData[indices[1] * batch.vertexSize_]));
  426. const Vector3& v2 = *((const Vector3*)(&srcData[indices[2] * batch.vertexSize_]));
  427. vertices[0] = ModelTransform(modelViewProj, v0);
  428. vertices[1] = ModelTransform(modelViewProj, v1);
  429. vertices[2] = ModelTransform(modelViewProj, v2);
  430. DrawTriangle(vertices, threadIndex);
  431. indices += 3;
  432. }
  433. }
  434. }
  435. }
  436. inline Vector4 OcclusionBuffer::ModelTransform(const Matrix4& transform, const Vector3& vertex) const
  437. {
  438. return Vector4(
  439. transform.m00_ * vertex.x_ + transform.m01_ * vertex.y_ + transform.m02_ * vertex.z_ + transform.m03_,
  440. transform.m10_ * vertex.x_ + transform.m11_ * vertex.y_ + transform.m12_ * vertex.z_ + transform.m13_,
  441. transform.m20_ * vertex.x_ + transform.m21_ * vertex.y_ + transform.m22_ * vertex.z_ + transform.m23_,
  442. transform.m30_ * vertex.x_ + transform.m31_ * vertex.y_ + transform.m32_ * vertex.z_ + transform.m33_
  443. );
  444. }
  445. inline Vector3 OcclusionBuffer::ViewportTransform(const Vector4& vertex) const
  446. {
  447. float invW = 1.0f / vertex.w_;
  448. return Vector3(
  449. invW * vertex.x_ * scaleX_ + offsetX_,
  450. invW * vertex.y_ * scaleY_ + offsetY_,
  451. invW * vertex.z_ * OCCLUSION_Z_SCALE
  452. );
  453. }
  454. inline Vector4 OcclusionBuffer::ClipEdge(const Vector4& v0, const Vector4& v1, float d0, float d1) const
  455. {
  456. float t = d0 / (d0 - d1);
  457. return v0 + t * (v1 - v0);
  458. }
  459. inline float OcclusionBuffer::SignedArea(const Vector3& v0, const Vector3& v1, const Vector3& v2) const
  460. {
  461. float aX = v0.x_ - v1.x_;
  462. float aY = v0.y_ - v1.y_;
  463. float bX = v2.x_ - v1.x_;
  464. float bY = v2.y_ - v1.y_;
  465. return aX * bY - aY * bX;
  466. }
  467. void OcclusionBuffer::CalculateViewport()
  468. {
  469. // Add half pixel offset due to 3D frustum culling
  470. scaleX_ = 0.5f * width_;
  471. scaleY_ = -0.5f * height_;
  472. offsetX_ = 0.5f * width_ + 0.5f;
  473. offsetY_ = 0.5f * height_ + 0.5f;
  474. projOffsetScaleX_ = projection_.m00_ * scaleX_;
  475. projOffsetScaleY_ = projection_.m11_ * scaleY_;
  476. }
  477. void OcclusionBuffer::DrawTriangle(Vector4* vertices, i32 threadIndex)
  478. {
  479. assert(threadIndex >= 0);
  480. ClipMaskFlags clipMask{};
  481. ClipMaskFlags andClipMask{};
  482. bool drawOk = false;
  483. Vector3 projected[3];
  484. // Build the clip plane mask for the triangle
  485. for (unsigned i = 0; i < 3; ++i)
  486. {
  487. ClipMaskFlags vertexClipMask{};
  488. if (vertices[i].x_ > vertices[i].w_)
  489. vertexClipMask |= CLIPMASK_X_POS;
  490. if (vertices[i].x_ < -vertices[i].w_)
  491. vertexClipMask |= CLIPMASK_X_NEG;
  492. if (vertices[i].y_ > vertices[i].w_)
  493. vertexClipMask |= CLIPMASK_Y_POS;
  494. if (vertices[i].y_ < -vertices[i].w_)
  495. vertexClipMask |= CLIPMASK_Y_NEG;
  496. if (vertices[i].z_ > vertices[i].w_)
  497. vertexClipMask |= CLIPMASK_Z_POS;
  498. if (vertices[i].z_ < 0.0f)
  499. vertexClipMask |= CLIPMASK_Z_NEG;
  500. clipMask |= vertexClipMask;
  501. if (!i)
  502. andClipMask = vertexClipMask;
  503. else
  504. andClipMask &= vertexClipMask;
  505. }
  506. // If triangle is fully behind any clip plane, can reject quickly
  507. if (andClipMask)
  508. return;
  509. // Check if triangle is fully inside
  510. if (!clipMask)
  511. {
  512. projected[0] = ViewportTransform(vertices[0]);
  513. projected[1] = ViewportTransform(vertices[1]);
  514. projected[2] = ViewportTransform(vertices[2]);
  515. bool clockwise = SignedArea(projected[0], projected[1], projected[2]) < 0.0f;
  516. if (cullMode_ == CULL_NONE || (cullMode_ == CULL_CCW && clockwise) || (cullMode_ == CULL_CW && !clockwise))
  517. {
  518. DrawTriangle2D(projected, clockwise, threadIndex);
  519. drawOk = true;
  520. }
  521. }
  522. else
  523. {
  524. bool triangles[64];
  525. // Initial triangle
  526. triangles[0] = true;
  527. unsigned numTriangles = 1;
  528. if (clipMask & CLIPMASK_X_POS)
  529. ClipVertices(Vector4(-1.0f, 0.0f, 0.0f, 1.0f), vertices, triangles, numTriangles);
  530. if (clipMask & CLIPMASK_X_NEG)
  531. ClipVertices(Vector4(1.0f, 0.0f, 0.0f, 1.0f), vertices, triangles, numTriangles);
  532. if (clipMask & CLIPMASK_Y_POS)
  533. ClipVertices(Vector4(0.0f, -1.0f, 0.0f, 1.0f), vertices, triangles, numTriangles);
  534. if (clipMask & CLIPMASK_Y_NEG)
  535. ClipVertices(Vector4(0.0f, 1.0f, 0.0f, 1.0f), vertices, triangles, numTriangles);
  536. if (clipMask & CLIPMASK_Z_POS)
  537. ClipVertices(Vector4(0.0f, 0.0f, -1.0f, 1.0f), vertices, triangles, numTriangles);
  538. if (clipMask & CLIPMASK_Z_NEG)
  539. ClipVertices(Vector4(0.0f, 0.0f, 1.0f, 0.0f), vertices, triangles, numTriangles);
  540. // Draw each accepted triangle
  541. for (unsigned i = 0; i < numTriangles; ++i)
  542. {
  543. if (triangles[i])
  544. {
  545. unsigned index = i * 3;
  546. projected[0] = ViewportTransform(vertices[index]);
  547. projected[1] = ViewportTransform(vertices[index + 1]);
  548. projected[2] = ViewportTransform(vertices[index + 2]);
  549. bool clockwise = SignedArea(projected[0], projected[1], projected[2]) < 0.0f;
  550. if (cullMode_ == CULL_NONE || (cullMode_ == CULL_CCW && clockwise) || (cullMode_ == CULL_CW && !clockwise))
  551. {
  552. DrawTriangle2D(projected, clockwise, threadIndex);
  553. drawOk = true;
  554. }
  555. }
  556. }
  557. }
  558. if (drawOk)
  559. ++numTriangles_;
  560. }
  561. void OcclusionBuffer::ClipVertices(const Vector4& plane, Vector4* vertices, bool* triangles, unsigned& numTriangles)
  562. {
  563. unsigned num = numTriangles;
  564. for (unsigned i = 0; i < num; ++i)
  565. {
  566. if (triangles[i])
  567. {
  568. unsigned index = i * 3;
  569. float d0 = plane.DotProduct(vertices[index]);
  570. float d1 = plane.DotProduct(vertices[index + 1]);
  571. float d2 = plane.DotProduct(vertices[index + 2]);
  572. // If all vertices behind the plane, reject triangle
  573. if (d0 < 0.0f && d1 < 0.0f && d2 < 0.0f)
  574. {
  575. triangles[i] = false;
  576. continue;
  577. }
  578. // If 2 vertices behind the plane, create a new triangle in-place
  579. else if (d0 < 0.0f && d1 < 0.0f)
  580. {
  581. vertices[index] = ClipEdge(vertices[index], vertices[index + 2], d0, d2);
  582. vertices[index + 1] = ClipEdge(vertices[index + 1], vertices[index + 2], d1, d2);
  583. }
  584. else if (d0 < 0.0f && d2 < 0.0f)
  585. {
  586. vertices[index] = ClipEdge(vertices[index], vertices[index + 1], d0, d1);
  587. vertices[index + 2] = ClipEdge(vertices[index + 2], vertices[index + 1], d2, d1);
  588. }
  589. else if (d1 < 0.0f && d2 < 0.0f)
  590. {
  591. vertices[index + 1] = ClipEdge(vertices[index + 1], vertices[index], d1, d0);
  592. vertices[index + 2] = ClipEdge(vertices[index + 2], vertices[index], d2, d0);
  593. }
  594. // 1 vertex behind the plane: create one new triangle, and modify one in-place
  595. else if (d0 < 0.0f)
  596. {
  597. unsigned newIdx = numTriangles * 3;
  598. triangles[numTriangles] = true;
  599. ++numTriangles;
  600. vertices[newIdx] = ClipEdge(vertices[index], vertices[index + 2], d0, d2);
  601. vertices[newIdx + 1] = vertices[index] = ClipEdge(vertices[index], vertices[index + 1], d0, d1);
  602. vertices[newIdx + 2] = vertices[index + 2];
  603. }
  604. else if (d1 < 0.0f)
  605. {
  606. unsigned newIdx = numTriangles * 3;
  607. triangles[numTriangles] = true;
  608. ++numTriangles;
  609. vertices[newIdx + 1] = ClipEdge(vertices[index + 1], vertices[index], d1, d0);
  610. vertices[newIdx + 2] = vertices[index + 1] = ClipEdge(vertices[index + 1], vertices[index + 2], d1, d2);
  611. vertices[newIdx] = vertices[index];
  612. }
  613. else if (d2 < 0.0f)
  614. {
  615. unsigned newIdx = numTriangles * 3;
  616. triangles[numTriangles] = true;
  617. ++numTriangles;
  618. vertices[newIdx + 2] = ClipEdge(vertices[index + 2], vertices[index + 1], d2, d1);
  619. vertices[newIdx] = vertices[index + 2] = ClipEdge(vertices[index + 2], vertices[index], d2, d0);
  620. vertices[newIdx + 1] = vertices[index + 1];
  621. }
  622. }
  623. }
  624. }
  625. // Code based on Chris Hecker's Perspective Texture Mapping series in the Game Developer magazine
  626. // Also available online at http://chrishecker.com/Miscellaneous_Technical_Articles
  627. /// %Gradients of a software rasterized triangle.
  628. struct Gradients
  629. {
  630. /// Construct from vertices.
  631. explicit Gradients(const Vector3* vertices)
  632. {
  633. float invdX = 1.0f / (((vertices[1].x_ - vertices[2].x_) *
  634. (vertices[0].y_ - vertices[2].y_)) -
  635. ((vertices[0].x_ - vertices[2].x_) *
  636. (vertices[1].y_ - vertices[2].y_)));
  637. float invdY = -invdX;
  638. dInvZdX_ = invdX * (((vertices[1].z_ - vertices[2].z_) * (vertices[0].y_ - vertices[2].y_)) -
  639. ((vertices[0].z_ - vertices[2].z_) * (vertices[1].y_ - vertices[2].y_)));
  640. dInvZdY_ = invdY * (((vertices[1].z_ - vertices[2].z_) * (vertices[0].x_ - vertices[2].x_)) -
  641. ((vertices[0].z_ - vertices[2].z_) * (vertices[1].x_ - vertices[2].x_)));
  642. dInvZdXInt_ = (int)dInvZdX_;
  643. }
  644. /// Integer horizontal gradient.
  645. int dInvZdXInt_;
  646. /// Horizontal gradient.
  647. float dInvZdX_;
  648. /// Vertical gradient.
  649. float dInvZdY_;
  650. };
  651. /// %Edge of a software rasterized triangle.
  652. struct Edge
  653. {
  654. /// Construct from gradients and top & bottom vertices.
  655. Edge(const Gradients& gradients, const Vector3& top, const Vector3& bottom, int topY)
  656. {
  657. float height = (bottom.y_ - top.y_);
  658. float slope = (height != 0.0f) ? (bottom.x_ - top.x_) / height : 0.0f;
  659. float yPreStep = (float)(topY + 1) - top.y_;
  660. float xPreStep = slope * yPreStep;
  661. x_ = RoundToInt((xPreStep + top.x_) * OCCLUSION_X_SCALE);
  662. xStep_ = RoundToInt(slope * OCCLUSION_X_SCALE);
  663. invZ_ = RoundToInt(top.z_ + xPreStep * gradients.dInvZdX_ + yPreStep * gradients.dInvZdY_);
  664. invZStep_ = RoundToInt(slope * gradients.dInvZdX_ + gradients.dInvZdY_);
  665. }
  666. /// X coordinate.
  667. int x_;
  668. /// X coordinate step.
  669. int xStep_;
  670. /// Inverse Z.
  671. int invZ_;
  672. /// Inverse Z step.
  673. int invZStep_;
  674. };
  675. void OcclusionBuffer::DrawTriangle2D(const Vector3* vertices, bool clockwise, i32 threadIndex)
  676. {
  677. assert(threadIndex >= 0);
  678. int top, middle, bottom;
  679. bool middleIsRight;
  680. // Sort vertices in Y-direction
  681. if (vertices[0].y_ < vertices[1].y_)
  682. {
  683. if (vertices[2].y_ < vertices[0].y_)
  684. {
  685. top = 2;
  686. middle = 0;
  687. bottom = 1;
  688. middleIsRight = true;
  689. }
  690. else
  691. {
  692. top = 0;
  693. if (vertices[1].y_ < vertices[2].y_)
  694. {
  695. middle = 1;
  696. bottom = 2;
  697. middleIsRight = true;
  698. }
  699. else
  700. {
  701. middle = 2;
  702. bottom = 1;
  703. middleIsRight = false;
  704. }
  705. }
  706. }
  707. else
  708. {
  709. if (vertices[2].y_ < vertices[1].y_)
  710. {
  711. top = 2;
  712. middle = 1;
  713. bottom = 0;
  714. middleIsRight = false;
  715. }
  716. else
  717. {
  718. top = 1;
  719. if (vertices[0].y_ < vertices[2].y_)
  720. {
  721. middle = 0;
  722. bottom = 2;
  723. middleIsRight = false;
  724. }
  725. else
  726. {
  727. middle = 2;
  728. bottom = 0;
  729. middleIsRight = true;
  730. }
  731. }
  732. }
  733. auto topY = (int)vertices[top].y_;
  734. auto middleY = (int)vertices[middle].y_;
  735. auto bottomY = (int)vertices[bottom].y_;
  736. // Check for degenerate triangle
  737. if (topY == bottomY)
  738. return;
  739. // Reverse middleIsRight test if triangle is counterclockwise
  740. if (!clockwise)
  741. middleIsRight = !middleIsRight;
  742. const bool topDegenerate = topY == middleY;
  743. const bool bottomDegenerate = middleY == bottomY;
  744. Gradients gradients(vertices);
  745. Edge topToBottom(gradients, vertices[top], vertices[bottom], topY);
  746. int* bufferData = buffers_[threadIndex].data_;
  747. if (middleIsRight)
  748. {
  749. // Top half
  750. if (!topDegenerate)
  751. {
  752. Edge topToMiddle(gradients, vertices[top], vertices[middle], topY);
  753. int* row = bufferData + topY * width_;
  754. int* endRow = bufferData + middleY * width_;
  755. while (row < endRow)
  756. {
  757. int invZ = topToBottom.invZ_;
  758. int* dest = row + (topToBottom.x_ >> 16u);
  759. int* end = row + (topToMiddle.x_ >> 16u);
  760. while (dest < end)
  761. {
  762. if (invZ < *dest)
  763. *dest = invZ;
  764. invZ += gradients.dInvZdXInt_;
  765. ++dest;
  766. }
  767. topToBottom.x_ += topToBottom.xStep_;
  768. topToBottom.invZ_ += topToBottom.invZStep_;
  769. topToMiddle.x_ += topToMiddle.xStep_;
  770. row += width_;
  771. }
  772. }
  773. // Bottom half
  774. if (!bottomDegenerate)
  775. {
  776. Edge middleToBottom(gradients, vertices[middle], vertices[bottom], middleY);
  777. int* row = bufferData + middleY * width_;
  778. int* endRow = bufferData + bottomY * width_;
  779. while (row < endRow)
  780. {
  781. int invZ = topToBottom.invZ_;
  782. int* dest = row + (topToBottom.x_ >> 16u);
  783. int* end = row + (middleToBottom.x_ >> 16u);
  784. while (dest < end)
  785. {
  786. if (invZ < *dest)
  787. *dest = invZ;
  788. invZ += gradients.dInvZdXInt_;
  789. ++dest;
  790. }
  791. topToBottom.x_ += topToBottom.xStep_;
  792. topToBottom.invZ_ += topToBottom.invZStep_;
  793. middleToBottom.x_ += middleToBottom.xStep_;
  794. row += width_;
  795. }
  796. }
  797. }
  798. else
  799. {
  800. // Top half
  801. if (!topDegenerate)
  802. {
  803. Edge topToMiddle(gradients, vertices[top], vertices[middle], topY);
  804. int* row = bufferData + topY * width_;
  805. int* endRow = bufferData + middleY * width_;
  806. while (row < endRow)
  807. {
  808. int invZ = topToMiddle.invZ_;
  809. int* dest = row + (topToMiddle.x_ >> 16u);
  810. int* end = row + (topToBottom.x_ >> 16u);
  811. while (dest < end)
  812. {
  813. if (invZ < *dest)
  814. *dest = invZ;
  815. invZ += gradients.dInvZdXInt_;
  816. ++dest;
  817. }
  818. topToMiddle.x_ += topToMiddle.xStep_;
  819. topToMiddle.invZ_ += topToMiddle.invZStep_;
  820. topToBottom.x_ += topToBottom.xStep_;
  821. row += width_;
  822. }
  823. }
  824. // Bottom half
  825. if (!bottomDegenerate)
  826. {
  827. Edge middleToBottom(gradients, vertices[middle], vertices[bottom], middleY);
  828. int* row = bufferData + middleY * width_;
  829. int* endRow = bufferData + bottomY * width_;
  830. while (row < endRow)
  831. {
  832. int invZ = middleToBottom.invZ_;
  833. int* dest = row + (middleToBottom.x_ >> 16u);
  834. int* end = row + (topToBottom.x_ >> 16u);
  835. while (dest < end)
  836. {
  837. if (invZ < *dest)
  838. *dest = invZ;
  839. invZ += gradients.dInvZdXInt_;
  840. ++dest;
  841. }
  842. middleToBottom.x_ += middleToBottom.xStep_;
  843. middleToBottom.invZ_ += middleToBottom.invZStep_;
  844. topToBottom.x_ += topToBottom.xStep_;
  845. row += width_;
  846. }
  847. }
  848. }
  849. }
  850. void OcclusionBuffer::MergeBuffers()
  851. {
  852. URHO3D_PROFILE(MergeBuffers);
  853. for (unsigned i = 1; i < buffers_.Size(); ++i)
  854. {
  855. if (!buffers_[i].used_)
  856. continue;
  857. int* src = buffers_[i].data_;
  858. int* dest = buffers_[0].data_;
  859. int count = width_ * height_;
  860. while (count--)
  861. {
  862. // If thread buffer's depth value is closer, overwrite the original
  863. if (*src < *dest)
  864. *dest = *src;
  865. ++src;
  866. ++dest;
  867. }
  868. }
  869. }
  870. void OcclusionBuffer::ClearBuffer(i32 threadIndex)
  871. {
  872. assert(threadIndex >= 0);
  873. if (threadIndex >= buffers_.Size())
  874. return;
  875. int* dest = buffers_[threadIndex].data_;
  876. int count = width_ * height_;
  877. auto fillValue = (int)OCCLUSION_Z_SCALE;
  878. while (count--)
  879. *dest++ = fillValue;
  880. }
  881. }