OcclusionBuffer.cpp 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038
  1. // Copyright (c) 2008-2023 the Urho3D project
  2. // License: MIT
  3. #include "../Precompiled.h"
  4. #include "../Core/WorkQueue.h"
  5. #include "../Core/Profiler.h"
  6. #include "../Graphics/Camera.h"
  7. #include "../Graphics/OcclusionBuffer.h"
  8. #include "../IO/Log.h"
  9. #include "../DebugNew.h"
  10. namespace Urho3D
  11. {
  12. enum ClipMask : unsigned
  13. {
  14. CLIPMASK_X_POS = 0x1,
  15. CLIPMASK_X_NEG = 0x2,
  16. CLIPMASK_Y_POS = 0x4,
  17. CLIPMASK_Y_NEG = 0x8,
  18. CLIPMASK_Z_POS = 0x10,
  19. CLIPMASK_Z_NEG = 0x20,
  20. };
  21. URHO3D_FLAGSET(ClipMask, ClipMaskFlags);
  22. static constexpr int OCCLUSION_MIN_SIZE = 8;
  23. static constexpr int OCCLUSION_DEFAULT_MAX_TRIANGLES = 5000;
  24. static constexpr float OCCLUSION_RELATIVE_BIAS = 0.00001f;
  25. static constexpr int OCCLUSION_FIXED_BIAS = 16;
  26. static constexpr float OCCLUSION_X_SCALE = 65536.0f;
  27. static constexpr float OCCLUSION_Z_SCALE = 16777216.0f;
  28. void DrawOcclusionBatchWork(const WorkItem* item, i32 threadIndex)
  29. {
  30. auto* buffer = reinterpret_cast<OcclusionBuffer*>(item->aux_);
  31. OcclusionBatch& batch = *reinterpret_cast<OcclusionBatch*>(item->start_);
  32. buffer->DrawBatch(batch, threadIndex);
  33. }
  34. OcclusionBuffer::OcclusionBuffer(Context* context)
  35. : Object(context)
  36. , maxTriangles_(OCCLUSION_DEFAULT_MAX_TRIANGLES)
  37. {
  38. }
  39. OcclusionBuffer::~OcclusionBuffer() = default;
  40. bool OcclusionBuffer::SetSize(int width, int height, bool threaded)
  41. {
  42. // Force the height to an even amount of pixels for better mip generation
  43. if (height & 1u)
  44. ++height;
  45. if (width == width_ && height == height_)
  46. return true;
  47. if (width <= 0 || height <= 0)
  48. return false;
  49. if (!IsPowerOfTwo((unsigned)width))
  50. {
  51. URHO3D_LOGERRORF("Requested occlusion buffer width %d is not a power of two", width);
  52. return false;
  53. }
  54. width_ = width;
  55. height_ = height;
  56. // Build work buffers for threading
  57. unsigned numThreadBuffers = threaded ? GetSubsystem<WorkQueue>()->GetNumThreads() + 1 : 1;
  58. buffers_.Resize(numThreadBuffers);
  59. for (unsigned i = 0; i < numThreadBuffers; ++i)
  60. {
  61. // Reserve extra memory in case 3D clipping is not exact
  62. OcclusionBufferData& buffer = buffers_[i];
  63. buffer.dataWithSafety_ = new int[width * (height + 2) + 2];
  64. buffer.data_ = buffer.dataWithSafety_.Get() + width + 1;
  65. buffer.used_ = false;
  66. }
  67. mipBuffers_.Clear();
  68. // Build buffers for mip levels
  69. for (;;)
  70. {
  71. width = (width + 1) / 2;
  72. height = (height + 1) / 2;
  73. mipBuffers_.Push(SharedArrayPtr<DepthValue>(new DepthValue[width * height]));
  74. if (width <= OCCLUSION_MIN_SIZE && height <= OCCLUSION_MIN_SIZE)
  75. break;
  76. }
  77. URHO3D_LOGDEBUG("Set occlusion buffer size " + String(width_) + "x" + String(height_) + " with " +
  78. String(mipBuffers_.Size()) + " mip levels and " + String(numThreadBuffers) + " thread buffers");
  79. CalculateViewport();
  80. return true;
  81. }
  82. void OcclusionBuffer::SetView(Camera* camera)
  83. {
  84. if (!camera)
  85. return;
  86. view_ = camera->GetView();
  87. projection_ = camera->GetProjection();
  88. viewProj_ = projection_ * view_;
  89. nearClip_ = camera->GetNearClip();
  90. farClip_ = camera->GetFarClip();
  91. reverseCulling_ = camera->GetReverseCulling();
  92. CalculateViewport();
  93. }
  94. void OcclusionBuffer::SetMaxTriangles(unsigned triangles)
  95. {
  96. maxTriangles_ = triangles;
  97. }
  98. void OcclusionBuffer::SetCullMode(CullMode mode)
  99. {
  100. if (reverseCulling_)
  101. {
  102. if (mode == CULL_CW)
  103. mode = CULL_CCW;
  104. else if (mode == CULL_CCW)
  105. mode = CULL_CW;
  106. }
  107. cullMode_ = mode;
  108. }
  109. void OcclusionBuffer::Reset()
  110. {
  111. numTriangles_ = 0;
  112. batches_.Clear();
  113. }
  114. void OcclusionBuffer::Clear()
  115. {
  116. Reset();
  117. // Only clear the main thread buffer. Rest are cleared on-demand when drawing the first batch
  118. ClearBuffer(0);
  119. for (OcclusionBufferData& buffer : buffers_)
  120. buffer.used_ = false;
  121. depthHierarchyDirty_ = true;
  122. }
  123. bool OcclusionBuffer::AddTriangles(const Matrix3x4& model, const void* vertexData, unsigned vertexSize, unsigned vertexStart,
  124. unsigned vertexCount)
  125. {
  126. batches_.Resize(batches_.Size() + 1);
  127. OcclusionBatch& batch = batches_.Back();
  128. batch.model_ = model;
  129. batch.vertexData_ = vertexData;
  130. batch.vertexSize_ = vertexSize;
  131. batch.indexData_ = nullptr;
  132. batch.indexSize_ = 0;
  133. batch.drawStart_ = vertexStart;
  134. batch.drawCount_ = vertexCount;
  135. numTriangles_ += vertexCount / 3;
  136. return numTriangles_ <= maxTriangles_;
  137. }
  138. bool OcclusionBuffer::AddTriangles(const Matrix3x4& model, const void* vertexData, unsigned vertexSize, const void* indexData,
  139. unsigned indexSize, unsigned indexStart, unsigned indexCount)
  140. {
  141. batches_.Resize(batches_.Size() + 1);
  142. OcclusionBatch& batch = batches_.Back();
  143. batch.model_ = model;
  144. batch.vertexData_ = vertexData;
  145. batch.vertexSize_ = vertexSize;
  146. batch.indexData_ = indexData;
  147. batch.indexSize_ = indexSize;
  148. batch.drawStart_ = indexStart;
  149. batch.drawCount_ = indexCount;
  150. numTriangles_ += indexCount / 3;
  151. return numTriangles_ <= maxTriangles_;
  152. }
  153. void OcclusionBuffer::DrawTriangles()
  154. {
  155. if (buffers_.Size() == 1)
  156. {
  157. // Not threaded
  158. for (Vector<OcclusionBatch>::Iterator i = batches_.Begin(); i != batches_.End(); ++i)
  159. DrawBatch(*i, 0);
  160. depthHierarchyDirty_ = true;
  161. }
  162. else if (buffers_.Size() > 1)
  163. {
  164. // Threaded
  165. auto* queue = GetSubsystem<WorkQueue>();
  166. for (Vector<OcclusionBatch>::Iterator i = batches_.Begin(); i != batches_.End(); ++i)
  167. {
  168. SharedPtr<WorkItem> item = queue->GetFreeItem();
  169. item->priority_ = WI_MAX_PRIORITY;
  170. item->workFunction_ = DrawOcclusionBatchWork;
  171. item->aux_ = this;
  172. item->start_ = &(*i);
  173. queue->AddWorkItem(item);
  174. }
  175. queue->Complete(WI_MAX_PRIORITY);
  176. MergeBuffers();
  177. depthHierarchyDirty_ = true;
  178. }
  179. batches_.Clear();
  180. }
  181. void OcclusionBuffer::BuildDepthHierarchy()
  182. {
  183. if (buffers_.Empty() || !depthHierarchyDirty_)
  184. return;
  185. URHO3D_PROFILE(BuildDepthHierarchy);
  186. // Build the first mip level from the pixel-level data
  187. int width = (width_ + 1) / 2;
  188. int height = (height_ + 1) / 2;
  189. if (mipBuffers_.Size())
  190. {
  191. for (int y = 0; y < height; ++y)
  192. {
  193. int* src = buffers_[0].data_ + (y * 2) * width_;
  194. DepthValue* dest = mipBuffers_[0].Get() + y * width;
  195. DepthValue* end = dest + width;
  196. if (y * 2 + 1 < height_)
  197. {
  198. int* src2 = src + width_;
  199. while (dest < end)
  200. {
  201. int minUpper = Min(src[0], src[1]);
  202. int minLower = Min(src2[0], src2[1]);
  203. dest->min_ = Min(minUpper, minLower);
  204. int maxUpper = Max(src[0], src[1]);
  205. int maxLower = Max(src2[0], src2[1]);
  206. dest->max_ = Max(maxUpper, maxLower);
  207. src += 2;
  208. src2 += 2;
  209. ++dest;
  210. }
  211. }
  212. else
  213. {
  214. while (dest < end)
  215. {
  216. dest->min_ = Min(src[0], src[1]);
  217. dest->max_ = Max(src[0], src[1]);
  218. src += 2;
  219. ++dest;
  220. }
  221. }
  222. }
  223. }
  224. // Build the rest of the mip levels
  225. for (i32 i = 1; i < mipBuffers_.Size(); ++i)
  226. {
  227. int prevWidth = width;
  228. int prevHeight = height;
  229. width = (width + 1) / 2;
  230. height = (height + 1) / 2;
  231. for (int y = 0; y < height; ++y)
  232. {
  233. DepthValue* src = mipBuffers_[i - 1].Get() + (y * 2) * prevWidth;
  234. DepthValue* dest = mipBuffers_[i].Get() + y * width;
  235. DepthValue* end = dest + width;
  236. if (y * 2 + 1 < prevHeight)
  237. {
  238. DepthValue* src2 = src + prevWidth;
  239. while (dest < end)
  240. {
  241. int minUpper = Min(src[0].min_, src[1].min_);
  242. int minLower = Min(src2[0].min_, src2[1].min_);
  243. dest->min_ = Min(minUpper, minLower);
  244. int maxUpper = Max(src[0].max_, src[1].max_);
  245. int maxLower = Max(src2[0].max_, src2[1].max_);
  246. dest->max_ = Max(maxUpper, maxLower);
  247. src += 2;
  248. src2 += 2;
  249. ++dest;
  250. }
  251. }
  252. else
  253. {
  254. while (dest < end)
  255. {
  256. dest->min_ = Min(src[0].min_, src[1].min_);
  257. dest->max_ = Max(src[0].max_, src[1].max_);
  258. src += 2;
  259. ++dest;
  260. }
  261. }
  262. }
  263. }
  264. depthHierarchyDirty_ = false;
  265. }
  266. void OcclusionBuffer::ResetUseTimer()
  267. {
  268. useTimer_.Reset();
  269. }
  270. bool OcclusionBuffer::IsVisible(const BoundingBox& worldSpaceBox) const
  271. {
  272. if (buffers_.Empty())
  273. return true;
  274. // Transform corners to projection space
  275. Vector4 vertices[8];
  276. vertices[0] = ModelTransform(viewProj_, worldSpaceBox.min_);
  277. vertices[1] = ModelTransform(viewProj_, Vector3(worldSpaceBox.max_.x_, worldSpaceBox.min_.y_, worldSpaceBox.min_.z_));
  278. vertices[2] = ModelTransform(viewProj_, Vector3(worldSpaceBox.min_.x_, worldSpaceBox.max_.y_, worldSpaceBox.min_.z_));
  279. vertices[3] = ModelTransform(viewProj_, Vector3(worldSpaceBox.max_.x_, worldSpaceBox.max_.y_, worldSpaceBox.min_.z_));
  280. vertices[4] = ModelTransform(viewProj_, Vector3(worldSpaceBox.min_.x_, worldSpaceBox.min_.y_, worldSpaceBox.max_.z_));
  281. vertices[5] = ModelTransform(viewProj_, Vector3(worldSpaceBox.max_.x_, worldSpaceBox.min_.y_, worldSpaceBox.max_.z_));
  282. vertices[6] = ModelTransform(viewProj_, Vector3(worldSpaceBox.min_.x_, worldSpaceBox.max_.y_, worldSpaceBox.max_.z_));
  283. vertices[7] = ModelTransform(viewProj_, worldSpaceBox.max_);
  284. // Apply a far clip relative bias
  285. for (auto& vertice : vertices)
  286. vertice.z_ -= OCCLUSION_RELATIVE_BIAS;
  287. // Transform to screen space. If any of the corners cross the near plane, assume visible
  288. float minX, maxX, minY, maxY, minZ;
  289. if (vertices[0].z_ <= 0.0f)
  290. return true;
  291. Vector3 projected = ViewportTransform(vertices[0]);
  292. minX = maxX = projected.x_;
  293. minY = maxY = projected.y_;
  294. minZ = projected.z_;
  295. // Project the rest
  296. for (unsigned i = 1; i < 8; ++i)
  297. {
  298. if (vertices[i].z_ <= 0.0f)
  299. return true;
  300. projected = ViewportTransform(vertices[i]);
  301. if (projected.x_ < minX) minX = projected.x_;
  302. if (projected.x_ > maxX) maxX = projected.x_;
  303. if (projected.y_ < minY) minY = projected.y_;
  304. if (projected.y_ > maxY) maxY = projected.y_;
  305. if (projected.z_ < minZ) minZ = projected.z_;
  306. }
  307. // Expand the bounding box 1 pixel in each direction to be conservative and correct rasterization offset
  308. IntRect rect((int)(minX - 1.5f), (int)(minY - 1.5f), RoundToInt(maxX), RoundToInt(maxY));
  309. // If the rect is outside, let frustum culling handle
  310. if (rect.right_ < 0 || rect.bottom_ < 0)
  311. return true;
  312. if (rect.left_ >= width_ || rect.top_ >= height_)
  313. return true;
  314. // Clipping of rect
  315. if (rect.left_ < 0)
  316. rect.left_ = 0;
  317. if (rect.top_ < 0)
  318. rect.top_ = 0;
  319. if (rect.right_ >= width_)
  320. rect.right_ = width_ - 1;
  321. if (rect.bottom_ >= height_)
  322. rect.bottom_ = height_ - 1;
  323. // Convert depth to integer and apply final bias
  324. int z = RoundToInt(minZ) - OCCLUSION_FIXED_BIAS;
  325. if (!depthHierarchyDirty_)
  326. {
  327. // Start from lowest mip level and check if a conclusive result can be found
  328. for (int i = mipBuffers_.Size() - 1; i >= 0; --i)
  329. {
  330. int shift = i + 1;
  331. int width = width_ >> shift;
  332. int left = rect.left_ >> shift;
  333. int right = rect.right_ >> shift;
  334. DepthValue* buffer = mipBuffers_[i].Get();
  335. DepthValue* row = buffer + (rect.top_ >> shift) * width;
  336. DepthValue* endRow = buffer + (rect.bottom_ >> shift) * width;
  337. bool allOccluded = true;
  338. while (row <= endRow)
  339. {
  340. DepthValue* src = row + left;
  341. DepthValue* end = row + right;
  342. while (src <= end)
  343. {
  344. if (z <= src->min_)
  345. return true;
  346. if (z <= src->max_)
  347. allOccluded = false;
  348. ++src;
  349. }
  350. row += width;
  351. }
  352. if (allOccluded)
  353. return false;
  354. }
  355. }
  356. // If no conclusive result, finally check the pixel-level data
  357. int* row = buffers_[0].data_ + rect.top_ * width_;
  358. int* endRow = buffers_[0].data_ + rect.bottom_ * width_;
  359. while (row <= endRow)
  360. {
  361. int* src = row + rect.left_;
  362. int* end = row + rect.right_;
  363. while (src <= end)
  364. {
  365. if (z <= *src)
  366. return true;
  367. ++src;
  368. }
  369. row += width_;
  370. }
  371. return false;
  372. }
  373. unsigned OcclusionBuffer::GetUseTimer()
  374. {
  375. return useTimer_.GetMSec(false);
  376. }
  377. void OcclusionBuffer::DrawBatch(const OcclusionBatch& batch, i32 threadIndex)
  378. {
  379. assert(threadIndex >= 0);
  380. // If buffer not yet used, clear it
  381. if (threadIndex > 0 && !buffers_[threadIndex].used_)
  382. {
  383. ClearBuffer(threadIndex);
  384. buffers_[threadIndex].used_ = true;
  385. }
  386. Matrix4 modelViewProj = viewProj_ * batch.model_;
  387. // Theoretical max. amount of vertices if each of the 6 clipping planes doubles the triangle count
  388. Vector4 vertices[64 * 3];
  389. if (!batch.indexData_)
  390. {
  391. const unsigned char* srcData = ((const unsigned char*)batch.vertexData_) + batch.drawStart_ * batch.vertexSize_;
  392. unsigned index = 0;
  393. while (index + 2 < batch.drawCount_)
  394. {
  395. const Vector3& v0 = *((const Vector3*)(&srcData[index * batch.vertexSize_]));
  396. const Vector3& v1 = *((const Vector3*)(&srcData[(index + 1) * batch.vertexSize_]));
  397. const Vector3& v2 = *((const Vector3*)(&srcData[(index + 2) * batch.vertexSize_]));
  398. vertices[0] = ModelTransform(modelViewProj, v0);
  399. vertices[1] = ModelTransform(modelViewProj, v1);
  400. vertices[2] = ModelTransform(modelViewProj, v2);
  401. DrawTriangle(vertices, threadIndex);
  402. index += 3;
  403. }
  404. }
  405. else
  406. {
  407. const auto* srcData = (const unsigned char*)batch.vertexData_;
  408. // 16-bit indices
  409. if (batch.indexSize_ == sizeof(unsigned short))
  410. {
  411. const unsigned short* indices = ((const unsigned short*)batch.indexData_) + batch.drawStart_;
  412. const unsigned short* indicesEnd = indices + batch.drawCount_;
  413. while (indices < indicesEnd)
  414. {
  415. const Vector3& v0 = *((const Vector3*)(&srcData[indices[0] * batch.vertexSize_]));
  416. const Vector3& v1 = *((const Vector3*)(&srcData[indices[1] * batch.vertexSize_]));
  417. const Vector3& v2 = *((const Vector3*)(&srcData[indices[2] * batch.vertexSize_]));
  418. vertices[0] = ModelTransform(modelViewProj, v0);
  419. vertices[1] = ModelTransform(modelViewProj, v1);
  420. vertices[2] = ModelTransform(modelViewProj, v2);
  421. DrawTriangle(vertices, threadIndex);
  422. indices += 3;
  423. }
  424. }
  425. else
  426. {
  427. const unsigned* indices = ((const unsigned*)batch.indexData_) + batch.drawStart_;
  428. const unsigned* indicesEnd = indices + batch.drawCount_;
  429. while (indices < indicesEnd)
  430. {
  431. const Vector3& v0 = *((const Vector3*)(&srcData[indices[0] * batch.vertexSize_]));
  432. const Vector3& v1 = *((const Vector3*)(&srcData[indices[1] * batch.vertexSize_]));
  433. const Vector3& v2 = *((const Vector3*)(&srcData[indices[2] * batch.vertexSize_]));
  434. vertices[0] = ModelTransform(modelViewProj, v0);
  435. vertices[1] = ModelTransform(modelViewProj, v1);
  436. vertices[2] = ModelTransform(modelViewProj, v2);
  437. DrawTriangle(vertices, threadIndex);
  438. indices += 3;
  439. }
  440. }
  441. }
  442. }
  443. inline Vector4 OcclusionBuffer::ModelTransform(const Matrix4& transform, const Vector3& vertex) const
  444. {
  445. return Vector4(
  446. transform.m00_ * vertex.x_ + transform.m01_ * vertex.y_ + transform.m02_ * vertex.z_ + transform.m03_,
  447. transform.m10_ * vertex.x_ + transform.m11_ * vertex.y_ + transform.m12_ * vertex.z_ + transform.m13_,
  448. transform.m20_ * vertex.x_ + transform.m21_ * vertex.y_ + transform.m22_ * vertex.z_ + transform.m23_,
  449. transform.m30_ * vertex.x_ + transform.m31_ * vertex.y_ + transform.m32_ * vertex.z_ + transform.m33_
  450. );
  451. }
  452. inline Vector3 OcclusionBuffer::ViewportTransform(const Vector4& vertex) const
  453. {
  454. float invW = 1.0f / vertex.w_;
  455. return Vector3(
  456. invW * vertex.x_ * scaleX_ + offsetX_,
  457. invW * vertex.y_ * scaleY_ + offsetY_,
  458. invW * vertex.z_ * OCCLUSION_Z_SCALE
  459. );
  460. }
  461. inline Vector4 OcclusionBuffer::ClipEdge(const Vector4& v0, const Vector4& v1, float d0, float d1) const
  462. {
  463. float t = d0 / (d0 - d1);
  464. return v0 + t * (v1 - v0);
  465. }
  466. inline float OcclusionBuffer::SignedArea(const Vector3& v0, const Vector3& v1, const Vector3& v2) const
  467. {
  468. float aX = v0.x_ - v1.x_;
  469. float aY = v0.y_ - v1.y_;
  470. float bX = v2.x_ - v1.x_;
  471. float bY = v2.y_ - v1.y_;
  472. return aX * bY - aY * bX;
  473. }
  474. void OcclusionBuffer::CalculateViewport()
  475. {
  476. // Add half pixel offset due to 3D frustum culling
  477. scaleX_ = 0.5f * width_;
  478. scaleY_ = -0.5f * height_;
  479. offsetX_ = 0.5f * width_ + 0.5f;
  480. offsetY_ = 0.5f * height_ + 0.5f;
  481. projOffsetScaleX_ = projection_.m00_ * scaleX_;
  482. projOffsetScaleY_ = projection_.m11_ * scaleY_;
  483. }
  484. void OcclusionBuffer::DrawTriangle(Vector4* vertices, i32 threadIndex)
  485. {
  486. assert(threadIndex >= 0);
  487. ClipMaskFlags clipMask{};
  488. ClipMaskFlags andClipMask{};
  489. bool drawOk = false;
  490. Vector3 projected[3];
  491. // Build the clip plane mask for the triangle
  492. for (unsigned i = 0; i < 3; ++i)
  493. {
  494. ClipMaskFlags vertexClipMask{};
  495. if (vertices[i].x_ > vertices[i].w_)
  496. vertexClipMask |= CLIPMASK_X_POS;
  497. if (vertices[i].x_ < -vertices[i].w_)
  498. vertexClipMask |= CLIPMASK_X_NEG;
  499. if (vertices[i].y_ > vertices[i].w_)
  500. vertexClipMask |= CLIPMASK_Y_POS;
  501. if (vertices[i].y_ < -vertices[i].w_)
  502. vertexClipMask |= CLIPMASK_Y_NEG;
  503. if (vertices[i].z_ > vertices[i].w_)
  504. vertexClipMask |= CLIPMASK_Z_POS;
  505. if (vertices[i].z_ < 0.0f)
  506. vertexClipMask |= CLIPMASK_Z_NEG;
  507. clipMask |= vertexClipMask;
  508. if (!i)
  509. andClipMask = vertexClipMask;
  510. else
  511. andClipMask &= vertexClipMask;
  512. }
  513. // If triangle is fully behind any clip plane, can reject quickly
  514. if (andClipMask)
  515. return;
  516. // Check if triangle is fully inside
  517. if (!clipMask)
  518. {
  519. projected[0] = ViewportTransform(vertices[0]);
  520. projected[1] = ViewportTransform(vertices[1]);
  521. projected[2] = ViewportTransform(vertices[2]);
  522. bool clockwise = SignedArea(projected[0], projected[1], projected[2]) < 0.0f;
  523. if (cullMode_ == CULL_NONE || (cullMode_ == CULL_CCW && clockwise) || (cullMode_ == CULL_CW && !clockwise))
  524. {
  525. DrawTriangle2D(projected, clockwise, threadIndex);
  526. drawOk = true;
  527. }
  528. }
  529. else
  530. {
  531. bool triangles[64];
  532. // Initial triangle
  533. triangles[0] = true;
  534. unsigned numTriangles = 1;
  535. if (clipMask & CLIPMASK_X_POS)
  536. ClipVertices(Vector4(-1.0f, 0.0f, 0.0f, 1.0f), vertices, triangles, numTriangles);
  537. if (clipMask & CLIPMASK_X_NEG)
  538. ClipVertices(Vector4(1.0f, 0.0f, 0.0f, 1.0f), vertices, triangles, numTriangles);
  539. if (clipMask & CLIPMASK_Y_POS)
  540. ClipVertices(Vector4(0.0f, -1.0f, 0.0f, 1.0f), vertices, triangles, numTriangles);
  541. if (clipMask & CLIPMASK_Y_NEG)
  542. ClipVertices(Vector4(0.0f, 1.0f, 0.0f, 1.0f), vertices, triangles, numTriangles);
  543. if (clipMask & CLIPMASK_Z_POS)
  544. ClipVertices(Vector4(0.0f, 0.0f, -1.0f, 1.0f), vertices, triangles, numTriangles);
  545. if (clipMask & CLIPMASK_Z_NEG)
  546. ClipVertices(Vector4(0.0f, 0.0f, 1.0f, 0.0f), vertices, triangles, numTriangles);
  547. // Draw each accepted triangle
  548. for (unsigned i = 0; i < numTriangles; ++i)
  549. {
  550. if (triangles[i])
  551. {
  552. unsigned index = i * 3;
  553. projected[0] = ViewportTransform(vertices[index]);
  554. projected[1] = ViewportTransform(vertices[index + 1]);
  555. projected[2] = ViewportTransform(vertices[index + 2]);
  556. bool clockwise = SignedArea(projected[0], projected[1], projected[2]) < 0.0f;
  557. if (cullMode_ == CULL_NONE || (cullMode_ == CULL_CCW && clockwise) || (cullMode_ == CULL_CW && !clockwise))
  558. {
  559. DrawTriangle2D(projected, clockwise, threadIndex);
  560. drawOk = true;
  561. }
  562. }
  563. }
  564. }
  565. if (drawOk)
  566. ++numTriangles_;
  567. }
  568. void OcclusionBuffer::ClipVertices(const Vector4& plane, Vector4* vertices, bool* triangles, unsigned& numTriangles)
  569. {
  570. unsigned num = numTriangles;
  571. for (unsigned i = 0; i < num; ++i)
  572. {
  573. if (triangles[i])
  574. {
  575. unsigned index = i * 3;
  576. float d0 = plane.DotProduct(vertices[index]);
  577. float d1 = plane.DotProduct(vertices[index + 1]);
  578. float d2 = plane.DotProduct(vertices[index + 2]);
  579. // If all vertices behind the plane, reject triangle
  580. if (d0 < 0.0f && d1 < 0.0f && d2 < 0.0f)
  581. {
  582. triangles[i] = false;
  583. continue;
  584. }
  585. // If 2 vertices behind the plane, create a new triangle in-place
  586. else if (d0 < 0.0f && d1 < 0.0f)
  587. {
  588. vertices[index] = ClipEdge(vertices[index], vertices[index + 2], d0, d2);
  589. vertices[index + 1] = ClipEdge(vertices[index + 1], vertices[index + 2], d1, d2);
  590. }
  591. else if (d0 < 0.0f && d2 < 0.0f)
  592. {
  593. vertices[index] = ClipEdge(vertices[index], vertices[index + 1], d0, d1);
  594. vertices[index + 2] = ClipEdge(vertices[index + 2], vertices[index + 1], d2, d1);
  595. }
  596. else if (d1 < 0.0f && d2 < 0.0f)
  597. {
  598. vertices[index + 1] = ClipEdge(vertices[index + 1], vertices[index], d1, d0);
  599. vertices[index + 2] = ClipEdge(vertices[index + 2], vertices[index], d2, d0);
  600. }
  601. // 1 vertex behind the plane: create one new triangle, and modify one in-place
  602. else if (d0 < 0.0f)
  603. {
  604. unsigned newIdx = numTriangles * 3;
  605. triangles[numTriangles] = true;
  606. ++numTriangles;
  607. vertices[newIdx] = ClipEdge(vertices[index], vertices[index + 2], d0, d2);
  608. vertices[newIdx + 1] = vertices[index] = ClipEdge(vertices[index], vertices[index + 1], d0, d1);
  609. vertices[newIdx + 2] = vertices[index + 2];
  610. }
  611. else if (d1 < 0.0f)
  612. {
  613. unsigned newIdx = numTriangles * 3;
  614. triangles[numTriangles] = true;
  615. ++numTriangles;
  616. vertices[newIdx + 1] = ClipEdge(vertices[index + 1], vertices[index], d1, d0);
  617. vertices[newIdx + 2] = vertices[index + 1] = ClipEdge(vertices[index + 1], vertices[index + 2], d1, d2);
  618. vertices[newIdx] = vertices[index];
  619. }
  620. else if (d2 < 0.0f)
  621. {
  622. unsigned newIdx = numTriangles * 3;
  623. triangles[numTriangles] = true;
  624. ++numTriangles;
  625. vertices[newIdx + 2] = ClipEdge(vertices[index + 2], vertices[index + 1], d2, d1);
  626. vertices[newIdx] = vertices[index + 2] = ClipEdge(vertices[index + 2], vertices[index], d2, d0);
  627. vertices[newIdx + 1] = vertices[index + 1];
  628. }
  629. }
  630. }
  631. }
  632. // Code based on Chris Hecker's Perspective Texture Mapping series in the Game Developer magazine
  633. // Also available online at http://chrishecker.com/Miscellaneous_Technical_Articles
  634. /// %Gradients of a software rasterized triangle.
  635. struct Gradients
  636. {
  637. /// Construct from vertices.
  638. explicit Gradients(const Vector3* vertices)
  639. {
  640. float invdX = 1.0f / (((vertices[1].x_ - vertices[2].x_) *
  641. (vertices[0].y_ - vertices[2].y_)) -
  642. ((vertices[0].x_ - vertices[2].x_) *
  643. (vertices[1].y_ - vertices[2].y_)));
  644. float invdY = -invdX;
  645. dInvZdX_ = invdX * (((vertices[1].z_ - vertices[2].z_) * (vertices[0].y_ - vertices[2].y_)) -
  646. ((vertices[0].z_ - vertices[2].z_) * (vertices[1].y_ - vertices[2].y_)));
  647. dInvZdY_ = invdY * (((vertices[1].z_ - vertices[2].z_) * (vertices[0].x_ - vertices[2].x_)) -
  648. ((vertices[0].z_ - vertices[2].z_) * (vertices[1].x_ - vertices[2].x_)));
  649. dInvZdXInt_ = (int)dInvZdX_;
  650. }
  651. /// Integer horizontal gradient.
  652. int dInvZdXInt_;
  653. /// Horizontal gradient.
  654. float dInvZdX_;
  655. /// Vertical gradient.
  656. float dInvZdY_;
  657. };
  658. /// %Edge of a software rasterized triangle.
  659. struct Edge
  660. {
  661. /// Construct from gradients and top & bottom vertices.
  662. Edge(const Gradients& gradients, const Vector3& top, const Vector3& bottom, int topY)
  663. {
  664. float height = (bottom.y_ - top.y_);
  665. float slope = (height != 0.0f) ? (bottom.x_ - top.x_) / height : 0.0f;
  666. float yPreStep = (float)(topY + 1) - top.y_;
  667. float xPreStep = slope * yPreStep;
  668. x_ = RoundToInt((xPreStep + top.x_) * OCCLUSION_X_SCALE);
  669. xStep_ = RoundToInt(slope * OCCLUSION_X_SCALE);
  670. invZ_ = RoundToInt(top.z_ + xPreStep * gradients.dInvZdX_ + yPreStep * gradients.dInvZdY_);
  671. invZStep_ = RoundToInt(slope * gradients.dInvZdX_ + gradients.dInvZdY_);
  672. }
  673. /// X coordinate.
  674. int x_;
  675. /// X coordinate step.
  676. int xStep_;
  677. /// Inverse Z.
  678. int invZ_;
  679. /// Inverse Z step.
  680. int invZStep_;
  681. };
  682. void OcclusionBuffer::DrawTriangle2D(const Vector3* vertices, bool clockwise, i32 threadIndex)
  683. {
  684. assert(threadIndex >= 0);
  685. int top, middle, bottom;
  686. bool middleIsRight;
  687. // Sort vertices in Y-direction
  688. if (vertices[0].y_ < vertices[1].y_)
  689. {
  690. if (vertices[2].y_ < vertices[0].y_)
  691. {
  692. top = 2;
  693. middle = 0;
  694. bottom = 1;
  695. middleIsRight = true;
  696. }
  697. else
  698. {
  699. top = 0;
  700. if (vertices[1].y_ < vertices[2].y_)
  701. {
  702. middle = 1;
  703. bottom = 2;
  704. middleIsRight = true;
  705. }
  706. else
  707. {
  708. middle = 2;
  709. bottom = 1;
  710. middleIsRight = false;
  711. }
  712. }
  713. }
  714. else
  715. {
  716. if (vertices[2].y_ < vertices[1].y_)
  717. {
  718. top = 2;
  719. middle = 1;
  720. bottom = 0;
  721. middleIsRight = false;
  722. }
  723. else
  724. {
  725. top = 1;
  726. if (vertices[0].y_ < vertices[2].y_)
  727. {
  728. middle = 0;
  729. bottom = 2;
  730. middleIsRight = false;
  731. }
  732. else
  733. {
  734. middle = 2;
  735. bottom = 0;
  736. middleIsRight = true;
  737. }
  738. }
  739. }
  740. auto topY = (int)vertices[top].y_;
  741. auto middleY = (int)vertices[middle].y_;
  742. auto bottomY = (int)vertices[bottom].y_;
  743. // Check for degenerate triangle
  744. if (topY == bottomY)
  745. return;
  746. // Reverse middleIsRight test if triangle is counterclockwise
  747. if (!clockwise)
  748. middleIsRight = !middleIsRight;
  749. const bool topDegenerate = topY == middleY;
  750. const bool bottomDegenerate = middleY == bottomY;
  751. Gradients gradients(vertices);
  752. Edge topToBottom(gradients, vertices[top], vertices[bottom], topY);
  753. int* bufferData = buffers_[threadIndex].data_;
  754. if (middleIsRight)
  755. {
  756. // Top half
  757. if (!topDegenerate)
  758. {
  759. Edge topToMiddle(gradients, vertices[top], vertices[middle], topY);
  760. int* row = bufferData + topY * width_;
  761. int* endRow = bufferData + middleY * width_;
  762. while (row < endRow)
  763. {
  764. int invZ = topToBottom.invZ_;
  765. int* dest = row + (topToBottom.x_ >> 16u);
  766. int* end = row + (topToMiddle.x_ >> 16u);
  767. while (dest < end)
  768. {
  769. if (invZ < *dest)
  770. *dest = invZ;
  771. invZ += gradients.dInvZdXInt_;
  772. ++dest;
  773. }
  774. topToBottom.x_ += topToBottom.xStep_;
  775. topToBottom.invZ_ += topToBottom.invZStep_;
  776. topToMiddle.x_ += topToMiddle.xStep_;
  777. row += width_;
  778. }
  779. }
  780. // Bottom half
  781. if (!bottomDegenerate)
  782. {
  783. Edge middleToBottom(gradients, vertices[middle], vertices[bottom], middleY);
  784. int* row = bufferData + middleY * width_;
  785. int* endRow = bufferData + bottomY * width_;
  786. while (row < endRow)
  787. {
  788. int invZ = topToBottom.invZ_;
  789. int* dest = row + (topToBottom.x_ >> 16u);
  790. int* end = row + (middleToBottom.x_ >> 16u);
  791. while (dest < end)
  792. {
  793. if (invZ < *dest)
  794. *dest = invZ;
  795. invZ += gradients.dInvZdXInt_;
  796. ++dest;
  797. }
  798. topToBottom.x_ += topToBottom.xStep_;
  799. topToBottom.invZ_ += topToBottom.invZStep_;
  800. middleToBottom.x_ += middleToBottom.xStep_;
  801. row += width_;
  802. }
  803. }
  804. }
  805. else
  806. {
  807. // Top half
  808. if (!topDegenerate)
  809. {
  810. Edge topToMiddle(gradients, vertices[top], vertices[middle], topY);
  811. int* row = bufferData + topY * width_;
  812. int* endRow = bufferData + middleY * width_;
  813. while (row < endRow)
  814. {
  815. int invZ = topToMiddle.invZ_;
  816. int* dest = row + (topToMiddle.x_ >> 16u);
  817. int* end = row + (topToBottom.x_ >> 16u);
  818. while (dest < end)
  819. {
  820. if (invZ < *dest)
  821. *dest = invZ;
  822. invZ += gradients.dInvZdXInt_;
  823. ++dest;
  824. }
  825. topToMiddle.x_ += topToMiddle.xStep_;
  826. topToMiddle.invZ_ += topToMiddle.invZStep_;
  827. topToBottom.x_ += topToBottom.xStep_;
  828. row += width_;
  829. }
  830. }
  831. // Bottom half
  832. if (!bottomDegenerate)
  833. {
  834. Edge middleToBottom(gradients, vertices[middle], vertices[bottom], middleY);
  835. int* row = bufferData + middleY * width_;
  836. int* endRow = bufferData + bottomY * width_;
  837. while (row < endRow)
  838. {
  839. int invZ = middleToBottom.invZ_;
  840. int* dest = row + (middleToBottom.x_ >> 16u);
  841. int* end = row + (topToBottom.x_ >> 16u);
  842. while (dest < end)
  843. {
  844. if (invZ < *dest)
  845. *dest = invZ;
  846. invZ += gradients.dInvZdXInt_;
  847. ++dest;
  848. }
  849. middleToBottom.x_ += middleToBottom.xStep_;
  850. middleToBottom.invZ_ += middleToBottom.invZStep_;
  851. topToBottom.x_ += topToBottom.xStep_;
  852. row += width_;
  853. }
  854. }
  855. }
  856. }
  857. void OcclusionBuffer::MergeBuffers()
  858. {
  859. URHO3D_PROFILE(MergeBuffers);
  860. for (i32 i = 1; i < buffers_.Size(); ++i)
  861. {
  862. if (!buffers_[i].used_)
  863. continue;
  864. int* src = buffers_[i].data_;
  865. int* dest = buffers_[0].data_;
  866. int count = width_ * height_;
  867. while (count--)
  868. {
  869. // If thread buffer's depth value is closer, overwrite the original
  870. if (*src < *dest)
  871. *dest = *src;
  872. ++src;
  873. ++dest;
  874. }
  875. }
  876. }
  877. void OcclusionBuffer::ClearBuffer(i32 threadIndex)
  878. {
  879. assert(threadIndex >= 0);
  880. if (threadIndex >= buffers_.Size())
  881. return;
  882. int* dest = buffers_[threadIndex].data_;
  883. int count = width_ * height_;
  884. auto fillValue = (int)OCCLUSION_Z_SCALE;
  885. while (count--)
  886. *dest++ = fillValue;
  887. }
  888. }