OcclusionBuffer.cpp 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041
  1. //
  2. // Copyright (c) 2008-2020 the Urho3D project.
  3. //
  4. // Permission is hereby granted, free of charge, to any person obtaining a copy
  5. // of this software and associated documentation files (the "Software"), to deal
  6. // in the Software without restriction, including without limitation the rights
  7. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. // copies of the Software, and to permit persons to whom the Software is
  9. // furnished to do so, subject to the following conditions:
  10. //
  11. // The above copyright notice and this permission notice shall be included in
  12. // all copies or substantial portions of the Software.
  13. //
  14. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20. // THE SOFTWARE.
  21. //
  22. #include "../Precompiled.h"
  23. #include "../Core/WorkQueue.h"
  24. #include "../Core/Profiler.h"
  25. #include "../Graphics/Camera.h"
  26. #include "../Graphics/OcclusionBuffer.h"
  27. #include "../IO/Log.h"
  28. #include "../DebugNew.h"
  29. namespace Urho3D
  30. {
  31. enum ClipMask : unsigned
  32. {
  33. CLIPMASK_X_POS = 0x1,
  34. CLIPMASK_X_NEG = 0x2,
  35. CLIPMASK_Y_POS = 0x4,
  36. CLIPMASK_Y_NEG = 0x8,
  37. CLIPMASK_Z_POS = 0x10,
  38. CLIPMASK_Z_NEG = 0x20,
  39. };
  40. URHO3D_FLAGSET(ClipMask, ClipMaskFlags);
  41. void DrawOcclusionBatchWork(const WorkItem* item, unsigned threadIndex)
  42. {
  43. auto* buffer = reinterpret_cast<OcclusionBuffer*>(item->aux_);
  44. OcclusionBatch& batch = *reinterpret_cast<OcclusionBatch*>(item->start_);
  45. buffer->DrawBatch(batch, threadIndex);
  46. }
  47. OcclusionBuffer::OcclusionBuffer(Context* context) :
  48. Object(context)
  49. {
  50. }
  51. OcclusionBuffer::~OcclusionBuffer() = default;
  52. bool OcclusionBuffer::SetSize(int width, int height, bool threaded)
  53. {
  54. // Force the height to an even amount of pixels for better mip generation
  55. if (height & 1u)
  56. ++height;
  57. if (width == width_ && height == height_)
  58. return true;
  59. if (width <= 0 || height <= 0)
  60. return false;
  61. if (!IsPowerOfTwo((unsigned)width))
  62. {
  63. URHO3D_LOGERRORF("Requested occlusion buffer width %d is not a power of two", width);
  64. return false;
  65. }
  66. width_ = width;
  67. height_ = height;
  68. // Build work buffers for threading
  69. unsigned numThreadBuffers = threaded ? GetSubsystem<WorkQueue>()->GetNumThreads() + 1 : 1;
  70. buffers_.Resize(numThreadBuffers);
  71. for (unsigned i = 0; i < numThreadBuffers; ++i)
  72. {
  73. // Reserve extra memory in case 3D clipping is not exact
  74. OcclusionBufferData& buffer = buffers_[i];
  75. buffer.dataWithSafety_ = new int[width * (height + 2) + 2];
  76. buffer.data_ = buffer.dataWithSafety_.Get() + width + 1;
  77. buffer.used_ = false;
  78. }
  79. mipBuffers_.Clear();
  80. // Build buffers for mip levels
  81. for (;;)
  82. {
  83. width = (width + 1) / 2;
  84. height = (height + 1) / 2;
  85. mipBuffers_.Push(SharedArrayPtr<DepthValue>(new DepthValue[width * height]));
  86. if (width <= OCCLUSION_MIN_SIZE && height <= OCCLUSION_MIN_SIZE)
  87. break;
  88. }
  89. URHO3D_LOGDEBUG("Set occlusion buffer size " + String(width_) + "x" + String(height_) + " with " +
  90. String(mipBuffers_.Size()) + " mip levels and " + String(numThreadBuffers) + " thread buffers");
  91. CalculateViewport();
  92. return true;
  93. }
  94. void OcclusionBuffer::SetView(Camera* camera)
  95. {
  96. if (!camera)
  97. return;
  98. view_ = camera->GetView();
  99. projection_ = camera->GetProjection();
  100. viewProj_ = projection_ * view_;
  101. nearClip_ = camera->GetNearClip();
  102. farClip_ = camera->GetFarClip();
  103. reverseCulling_ = camera->GetReverseCulling();
  104. CalculateViewport();
  105. }
  106. void OcclusionBuffer::SetMaxTriangles(unsigned triangles)
  107. {
  108. maxTriangles_ = triangles;
  109. }
  110. void OcclusionBuffer::SetCullMode(CullMode mode)
  111. {
  112. if (reverseCulling_)
  113. {
  114. if (mode == CULL_CW)
  115. mode = CULL_CCW;
  116. else if (mode == CULL_CCW)
  117. mode = CULL_CW;
  118. }
  119. cullMode_ = mode;
  120. }
  121. void OcclusionBuffer::Reset()
  122. {
  123. numTriangles_ = 0;
  124. batches_.Clear();
  125. }
  126. void OcclusionBuffer::Clear()
  127. {
  128. Reset();
  129. // Only clear the main thread buffer. Rest are cleared on-demand when drawing the first batch
  130. ClearBuffer(0);
  131. for (unsigned i = 1; i < buffers_.Size(); ++i)
  132. buffers_[i].used_ = false;
  133. depthHierarchyDirty_ = true;
  134. }
  135. bool OcclusionBuffer::AddTriangles(const Matrix3x4& model, const void* vertexData, unsigned vertexSize, unsigned vertexStart,
  136. unsigned vertexCount)
  137. {
  138. batches_.Resize(batches_.Size() + 1);
  139. OcclusionBatch& batch = batches_.Back();
  140. batch.model_ = model;
  141. batch.vertexData_ = vertexData;
  142. batch.vertexSize_ = vertexSize;
  143. batch.indexData_ = nullptr;
  144. batch.indexSize_ = 0;
  145. batch.drawStart_ = vertexStart;
  146. batch.drawCount_ = vertexCount;
  147. numTriangles_ += vertexCount / 3;
  148. return numTriangles_ <= maxTriangles_;
  149. }
  150. bool OcclusionBuffer::AddTriangles(const Matrix3x4& model, const void* vertexData, unsigned vertexSize, const void* indexData,
  151. unsigned indexSize, unsigned indexStart, unsigned indexCount)
  152. {
  153. batches_.Resize(batches_.Size() + 1);
  154. OcclusionBatch& batch = batches_.Back();
  155. batch.model_ = model;
  156. batch.vertexData_ = vertexData;
  157. batch.vertexSize_ = vertexSize;
  158. batch.indexData_ = indexData;
  159. batch.indexSize_ = indexSize;
  160. batch.drawStart_ = indexStart;
  161. batch.drawCount_ = indexCount;
  162. numTriangles_ += indexCount / 3;
  163. return numTriangles_ <= maxTriangles_;
  164. }
  165. void OcclusionBuffer::DrawTriangles()
  166. {
  167. if (buffers_.Size() == 1)
  168. {
  169. // Not threaded
  170. for (Vector<OcclusionBatch>::Iterator i = batches_.Begin(); i != batches_.End(); ++i)
  171. DrawBatch(*i, 0);
  172. depthHierarchyDirty_ = true;
  173. }
  174. else if (buffers_.Size() > 1)
  175. {
  176. // Threaded
  177. auto* queue = GetSubsystem<WorkQueue>();
  178. for (Vector<OcclusionBatch>::Iterator i = batches_.Begin(); i != batches_.End(); ++i)
  179. {
  180. SharedPtr<WorkItem> item = queue->GetFreeItem();
  181. item->priority_ = M_MAX_UNSIGNED;
  182. item->workFunction_ = DrawOcclusionBatchWork;
  183. item->aux_ = this;
  184. item->start_ = &(*i);
  185. queue->AddWorkItem(item);
  186. }
  187. queue->Complete(M_MAX_UNSIGNED);
  188. MergeBuffers();
  189. depthHierarchyDirty_ = true;
  190. }
  191. batches_.Clear();
  192. }
  193. void OcclusionBuffer::BuildDepthHierarchy()
  194. {
  195. if (buffers_.Empty() || !depthHierarchyDirty_)
  196. return;
  197. URHO3D_PROFILE(BuildDepthHierarchy);
  198. // Build the first mip level from the pixel-level data
  199. int width = (width_ + 1) / 2;
  200. int height = (height_ + 1) / 2;
  201. if (mipBuffers_.Size())
  202. {
  203. for (int y = 0; y < height; ++y)
  204. {
  205. int* src = buffers_[0].data_ + (y * 2) * width_;
  206. DepthValue* dest = mipBuffers_[0].Get() + y * width;
  207. DepthValue* end = dest + width;
  208. if (y * 2 + 1 < height_)
  209. {
  210. int* src2 = src + width_;
  211. while (dest < end)
  212. {
  213. int minUpper = Min(src[0], src[1]);
  214. int minLower = Min(src2[0], src2[1]);
  215. dest->min_ = Min(minUpper, minLower);
  216. int maxUpper = Max(src[0], src[1]);
  217. int maxLower = Max(src2[0], src2[1]);
  218. dest->max_ = Max(maxUpper, maxLower);
  219. src += 2;
  220. src2 += 2;
  221. ++dest;
  222. }
  223. }
  224. else
  225. {
  226. while (dest < end)
  227. {
  228. dest->min_ = Min(src[0], src[1]);
  229. dest->max_ = Max(src[0], src[1]);
  230. src += 2;
  231. ++dest;
  232. }
  233. }
  234. }
  235. }
  236. // Build the rest of the mip levels
  237. for (unsigned i = 1; i < mipBuffers_.Size(); ++i)
  238. {
  239. int prevWidth = width;
  240. int prevHeight = height;
  241. width = (width + 1) / 2;
  242. height = (height + 1) / 2;
  243. for (int y = 0; y < height; ++y)
  244. {
  245. DepthValue* src = mipBuffers_[i - 1].Get() + (y * 2) * prevWidth;
  246. DepthValue* dest = mipBuffers_[i].Get() + y * width;
  247. DepthValue* end = dest + width;
  248. if (y * 2 + 1 < prevHeight)
  249. {
  250. DepthValue* src2 = src + prevWidth;
  251. while (dest < end)
  252. {
  253. int minUpper = Min(src[0].min_, src[1].min_);
  254. int minLower = Min(src2[0].min_, src2[1].min_);
  255. dest->min_ = Min(minUpper, minLower);
  256. int maxUpper = Max(src[0].max_, src[1].max_);
  257. int maxLower = Max(src2[0].max_, src2[1].max_);
  258. dest->max_ = Max(maxUpper, maxLower);
  259. src += 2;
  260. src2 += 2;
  261. ++dest;
  262. }
  263. }
  264. else
  265. {
  266. while (dest < end)
  267. {
  268. dest->min_ = Min(src[0].min_, src[1].min_);
  269. dest->max_ = Max(src[0].max_, src[1].max_);
  270. src += 2;
  271. ++dest;
  272. }
  273. }
  274. }
  275. }
  276. depthHierarchyDirty_ = false;
  277. }
  278. void OcclusionBuffer::ResetUseTimer()
  279. {
  280. useTimer_.Reset();
  281. }
  282. bool OcclusionBuffer::IsVisible(const BoundingBox& worldSpaceBox) const
  283. {
  284. if (buffers_.Empty())
  285. return true;
  286. // Transform corners to projection space
  287. Vector4 vertices[8];
  288. vertices[0] = ModelTransform(viewProj_, worldSpaceBox.min_);
  289. vertices[1] = ModelTransform(viewProj_, Vector3(worldSpaceBox.max_.x_, worldSpaceBox.min_.y_, worldSpaceBox.min_.z_));
  290. vertices[2] = ModelTransform(viewProj_, Vector3(worldSpaceBox.min_.x_, worldSpaceBox.max_.y_, worldSpaceBox.min_.z_));
  291. vertices[3] = ModelTransform(viewProj_, Vector3(worldSpaceBox.max_.x_, worldSpaceBox.max_.y_, worldSpaceBox.min_.z_));
  292. vertices[4] = ModelTransform(viewProj_, Vector3(worldSpaceBox.min_.x_, worldSpaceBox.min_.y_, worldSpaceBox.max_.z_));
  293. vertices[5] = ModelTransform(viewProj_, Vector3(worldSpaceBox.max_.x_, worldSpaceBox.min_.y_, worldSpaceBox.max_.z_));
  294. vertices[6] = ModelTransform(viewProj_, Vector3(worldSpaceBox.min_.x_, worldSpaceBox.max_.y_, worldSpaceBox.max_.z_));
  295. vertices[7] = ModelTransform(viewProj_, worldSpaceBox.max_);
  296. // Apply a far clip relative bias
  297. for (auto& vertice : vertices)
  298. vertice.z_ -= OCCLUSION_RELATIVE_BIAS;
  299. // Transform to screen space. If any of the corners cross the near plane, assume visible
  300. float minX, maxX, minY, maxY, minZ;
  301. if (vertices[0].z_ <= 0.0f)
  302. return true;
  303. Vector3 projected = ViewportTransform(vertices[0]);
  304. minX = maxX = projected.x_;
  305. minY = maxY = projected.y_;
  306. minZ = projected.z_;
  307. // Project the rest
  308. for (unsigned i = 1; i < 8; ++i)
  309. {
  310. if (vertices[i].z_ <= 0.0f)
  311. return true;
  312. projected = ViewportTransform(vertices[i]);
  313. if (projected.x_ < minX) minX = projected.x_;
  314. if (projected.x_ > maxX) maxX = projected.x_;
  315. if (projected.y_ < minY) minY = projected.y_;
  316. if (projected.y_ > maxY) maxY = projected.y_;
  317. if (projected.z_ < minZ) minZ = projected.z_;
  318. }
  319. // Expand the bounding box 1 pixel in each direction to be conservative and correct rasterization offset
  320. IntRect rect((int)(minX - 1.5f), (int)(minY - 1.5f), RoundToInt(maxX), RoundToInt(maxY));
  321. // If the rect is outside, let frustum culling handle
  322. if (rect.right_ < 0 || rect.bottom_ < 0)
  323. return true;
  324. if (rect.left_ >= width_ || rect.top_ >= height_)
  325. return true;
  326. // Clipping of rect
  327. if (rect.left_ < 0)
  328. rect.left_ = 0;
  329. if (rect.top_ < 0)
  330. rect.top_ = 0;
  331. if (rect.right_ >= width_)
  332. rect.right_ = width_ - 1;
  333. if (rect.bottom_ >= height_)
  334. rect.bottom_ = height_ - 1;
  335. // Convert depth to integer and apply final bias
  336. int z = RoundToInt(minZ) - OCCLUSION_FIXED_BIAS;
  337. if (!depthHierarchyDirty_)
  338. {
  339. // Start from lowest mip level and check if a conclusive result can be found
  340. for (int i = mipBuffers_.Size() - 1; i >= 0; --i)
  341. {
  342. int shift = i + 1;
  343. int width = width_ >> shift;
  344. int left = rect.left_ >> shift;
  345. int right = rect.right_ >> shift;
  346. DepthValue* buffer = mipBuffers_[i].Get();
  347. DepthValue* row = buffer + (rect.top_ >> shift) * width;
  348. DepthValue* endRow = buffer + (rect.bottom_ >> shift) * width;
  349. bool allOccluded = true;
  350. while (row <= endRow)
  351. {
  352. DepthValue* src = row + left;
  353. DepthValue* end = row + right;
  354. while (src <= end)
  355. {
  356. if (z <= src->min_)
  357. return true;
  358. if (z <= src->max_)
  359. allOccluded = false;
  360. ++src;
  361. }
  362. row += width;
  363. }
  364. if (allOccluded)
  365. return false;
  366. }
  367. }
  368. // If no conclusive result, finally check the pixel-level data
  369. int* row = buffers_[0].data_ + rect.top_ * width_;
  370. int* endRow = buffers_[0].data_ + rect.bottom_ * width_;
  371. while (row <= endRow)
  372. {
  373. int* src = row + rect.left_;
  374. int* end = row + rect.right_;
  375. while (src <= end)
  376. {
  377. if (z <= *src)
  378. return true;
  379. ++src;
  380. }
  381. row += width_;
  382. }
  383. return false;
  384. }
  385. unsigned OcclusionBuffer::GetUseTimer()
  386. {
  387. return useTimer_.GetMSec(false);
  388. }
  389. void OcclusionBuffer::DrawBatch(const OcclusionBatch& batch, unsigned threadIndex)
  390. {
  391. // If buffer not yet used, clear it
  392. if (threadIndex > 0 && !buffers_[threadIndex].used_)
  393. {
  394. ClearBuffer(threadIndex);
  395. buffers_[threadIndex].used_ = true;
  396. }
  397. Matrix4 modelViewProj = viewProj_ * batch.model_;
  398. // Theoretical max. amount of vertices if each of the 6 clipping planes doubles the triangle count
  399. Vector4 vertices[64 * 3];
  400. if (!batch.indexData_)
  401. {
  402. const unsigned char* srcData = ((const unsigned char*)batch.vertexData_) + batch.drawStart_ * batch.vertexSize_;
  403. unsigned index = 0;
  404. while (index + 2 < batch.drawCount_)
  405. {
  406. const Vector3& v0 = *((const Vector3*)(&srcData[index * batch.vertexSize_]));
  407. const Vector3& v1 = *((const Vector3*)(&srcData[(index + 1) * batch.vertexSize_]));
  408. const Vector3& v2 = *((const Vector3*)(&srcData[(index + 2) * batch.vertexSize_]));
  409. vertices[0] = ModelTransform(modelViewProj, v0);
  410. vertices[1] = ModelTransform(modelViewProj, v1);
  411. vertices[2] = ModelTransform(modelViewProj, v2);
  412. DrawTriangle(vertices, threadIndex);
  413. index += 3;
  414. }
  415. }
  416. else
  417. {
  418. const auto* srcData = (const unsigned char*)batch.vertexData_;
  419. // 16-bit indices
  420. if (batch.indexSize_ == sizeof(unsigned short))
  421. {
  422. const unsigned short* indices = ((const unsigned short*)batch.indexData_) + batch.drawStart_;
  423. const unsigned short* indicesEnd = indices + batch.drawCount_;
  424. while (indices < indicesEnd)
  425. {
  426. const Vector3& v0 = *((const Vector3*)(&srcData[indices[0] * batch.vertexSize_]));
  427. const Vector3& v1 = *((const Vector3*)(&srcData[indices[1] * batch.vertexSize_]));
  428. const Vector3& v2 = *((const Vector3*)(&srcData[indices[2] * batch.vertexSize_]));
  429. vertices[0] = ModelTransform(modelViewProj, v0);
  430. vertices[1] = ModelTransform(modelViewProj, v1);
  431. vertices[2] = ModelTransform(modelViewProj, v2);
  432. DrawTriangle(vertices, threadIndex);
  433. indices += 3;
  434. }
  435. }
  436. else
  437. {
  438. const unsigned* indices = ((const unsigned*)batch.indexData_) + batch.drawStart_;
  439. const unsigned* indicesEnd = indices + batch.drawCount_;
  440. while (indices < indicesEnd)
  441. {
  442. const Vector3& v0 = *((const Vector3*)(&srcData[indices[0] * batch.vertexSize_]));
  443. const Vector3& v1 = *((const Vector3*)(&srcData[indices[1] * batch.vertexSize_]));
  444. const Vector3& v2 = *((const Vector3*)(&srcData[indices[2] * batch.vertexSize_]));
  445. vertices[0] = ModelTransform(modelViewProj, v0);
  446. vertices[1] = ModelTransform(modelViewProj, v1);
  447. vertices[2] = ModelTransform(modelViewProj, v2);
  448. DrawTriangle(vertices, threadIndex);
  449. indices += 3;
  450. }
  451. }
  452. }
  453. }
  454. inline Vector4 OcclusionBuffer::ModelTransform(const Matrix4& transform, const Vector3& vertex) const
  455. {
  456. return Vector4(
  457. transform.m00_ * vertex.x_ + transform.m01_ * vertex.y_ + transform.m02_ * vertex.z_ + transform.m03_,
  458. transform.m10_ * vertex.x_ + transform.m11_ * vertex.y_ + transform.m12_ * vertex.z_ + transform.m13_,
  459. transform.m20_ * vertex.x_ + transform.m21_ * vertex.y_ + transform.m22_ * vertex.z_ + transform.m23_,
  460. transform.m30_ * vertex.x_ + transform.m31_ * vertex.y_ + transform.m32_ * vertex.z_ + transform.m33_
  461. );
  462. }
  463. inline Vector3 OcclusionBuffer::ViewportTransform(const Vector4& vertex) const
  464. {
  465. float invW = 1.0f / vertex.w_;
  466. return Vector3(
  467. invW * vertex.x_ * scaleX_ + offsetX_,
  468. invW * vertex.y_ * scaleY_ + offsetY_,
  469. invW * vertex.z_ * OCCLUSION_Z_SCALE
  470. );
  471. }
  472. inline Vector4 OcclusionBuffer::ClipEdge(const Vector4& v0, const Vector4& v1, float d0, float d1) const
  473. {
  474. float t = d0 / (d0 - d1);
  475. return v0 + t * (v1 - v0);
  476. }
  477. inline float OcclusionBuffer::SignedArea(const Vector3& v0, const Vector3& v1, const Vector3& v2) const
  478. {
  479. float aX = v0.x_ - v1.x_;
  480. float aY = v0.y_ - v1.y_;
  481. float bX = v2.x_ - v1.x_;
  482. float bY = v2.y_ - v1.y_;
  483. return aX * bY - aY * bX;
  484. }
  485. void OcclusionBuffer::CalculateViewport()
  486. {
  487. // Add half pixel offset due to 3D frustum culling
  488. scaleX_ = 0.5f * width_;
  489. scaleY_ = -0.5f * height_;
  490. offsetX_ = 0.5f * width_ + 0.5f;
  491. offsetY_ = 0.5f * height_ + 0.5f;
  492. projOffsetScaleX_ = projection_.m00_ * scaleX_;
  493. projOffsetScaleY_ = projection_.m11_ * scaleY_;
  494. }
  495. void OcclusionBuffer::DrawTriangle(Vector4* vertices, unsigned threadIndex)
  496. {
  497. ClipMaskFlags clipMask{};
  498. ClipMaskFlags andClipMask{};
  499. bool drawOk = false;
  500. Vector3 projected[3];
  501. // Build the clip plane mask for the triangle
  502. for (unsigned i = 0; i < 3; ++i)
  503. {
  504. ClipMaskFlags vertexClipMask{};
  505. if (vertices[i].x_ > vertices[i].w_)
  506. vertexClipMask |= CLIPMASK_X_POS;
  507. if (vertices[i].x_ < -vertices[i].w_)
  508. vertexClipMask |= CLIPMASK_X_NEG;
  509. if (vertices[i].y_ > vertices[i].w_)
  510. vertexClipMask |= CLIPMASK_Y_POS;
  511. if (vertices[i].y_ < -vertices[i].w_)
  512. vertexClipMask |= CLIPMASK_Y_NEG;
  513. if (vertices[i].z_ > vertices[i].w_)
  514. vertexClipMask |= CLIPMASK_Z_POS;
  515. if (vertices[i].z_ < 0.0f)
  516. vertexClipMask |= CLIPMASK_Z_NEG;
  517. clipMask |= vertexClipMask;
  518. if (!i)
  519. andClipMask = vertexClipMask;
  520. else
  521. andClipMask &= vertexClipMask;
  522. }
  523. // If triangle is fully behind any clip plane, can reject quickly
  524. if (andClipMask)
  525. return;
  526. // Check if triangle is fully inside
  527. if (!clipMask)
  528. {
  529. projected[0] = ViewportTransform(vertices[0]);
  530. projected[1] = ViewportTransform(vertices[1]);
  531. projected[2] = ViewportTransform(vertices[2]);
  532. bool clockwise = SignedArea(projected[0], projected[1], projected[2]) < 0.0f;
  533. if (cullMode_ == CULL_NONE || (cullMode_ == CULL_CCW && clockwise) || (cullMode_ == CULL_CW && !clockwise))
  534. {
  535. DrawTriangle2D(projected, clockwise, threadIndex);
  536. drawOk = true;
  537. }
  538. }
  539. else
  540. {
  541. bool triangles[64];
  542. // Initial triangle
  543. triangles[0] = true;
  544. unsigned numTriangles = 1;
  545. if (clipMask & CLIPMASK_X_POS)
  546. ClipVertices(Vector4(-1.0f, 0.0f, 0.0f, 1.0f), vertices, triangles, numTriangles);
  547. if (clipMask & CLIPMASK_X_NEG)
  548. ClipVertices(Vector4(1.0f, 0.0f, 0.0f, 1.0f), vertices, triangles, numTriangles);
  549. if (clipMask & CLIPMASK_Y_POS)
  550. ClipVertices(Vector4(0.0f, -1.0f, 0.0f, 1.0f), vertices, triangles, numTriangles);
  551. if (clipMask & CLIPMASK_Y_NEG)
  552. ClipVertices(Vector4(0.0f, 1.0f, 0.0f, 1.0f), vertices, triangles, numTriangles);
  553. if (clipMask & CLIPMASK_Z_POS)
  554. ClipVertices(Vector4(0.0f, 0.0f, -1.0f, 1.0f), vertices, triangles, numTriangles);
  555. if (clipMask & CLIPMASK_Z_NEG)
  556. ClipVertices(Vector4(0.0f, 0.0f, 1.0f, 0.0f), vertices, triangles, numTriangles);
  557. // Draw each accepted triangle
  558. for (unsigned i = 0; i < numTriangles; ++i)
  559. {
  560. if (triangles[i])
  561. {
  562. unsigned index = i * 3;
  563. projected[0] = ViewportTransform(vertices[index]);
  564. projected[1] = ViewportTransform(vertices[index + 1]);
  565. projected[2] = ViewportTransform(vertices[index + 2]);
  566. bool clockwise = SignedArea(projected[0], projected[1], projected[2]) < 0.0f;
  567. if (cullMode_ == CULL_NONE || (cullMode_ == CULL_CCW && clockwise) || (cullMode_ == CULL_CW && !clockwise))
  568. {
  569. DrawTriangle2D(projected, clockwise, threadIndex);
  570. drawOk = true;
  571. }
  572. }
  573. }
  574. }
  575. if (drawOk)
  576. ++numTriangles_;
  577. }
  578. void OcclusionBuffer::ClipVertices(const Vector4& plane, Vector4* vertices, bool* triangles, unsigned& numTriangles)
  579. {
  580. unsigned num = numTriangles;
  581. for (unsigned i = 0; i < num; ++i)
  582. {
  583. if (triangles[i])
  584. {
  585. unsigned index = i * 3;
  586. float d0 = plane.DotProduct(vertices[index]);
  587. float d1 = plane.DotProduct(vertices[index + 1]);
  588. float d2 = plane.DotProduct(vertices[index + 2]);
  589. // If all vertices behind the plane, reject triangle
  590. if (d0 < 0.0f && d1 < 0.0f && d2 < 0.0f)
  591. {
  592. triangles[i] = false;
  593. continue;
  594. }
  595. // If 2 vertices behind the plane, create a new triangle in-place
  596. else if (d0 < 0.0f && d1 < 0.0f)
  597. {
  598. vertices[index] = ClipEdge(vertices[index], vertices[index + 2], d0, d2);
  599. vertices[index + 1] = ClipEdge(vertices[index + 1], vertices[index + 2], d1, d2);
  600. }
  601. else if (d0 < 0.0f && d2 < 0.0f)
  602. {
  603. vertices[index] = ClipEdge(vertices[index], vertices[index + 1], d0, d1);
  604. vertices[index + 2] = ClipEdge(vertices[index + 2], vertices[index + 1], d2, d1);
  605. }
  606. else if (d1 < 0.0f && d2 < 0.0f)
  607. {
  608. vertices[index + 1] = ClipEdge(vertices[index + 1], vertices[index], d1, d0);
  609. vertices[index + 2] = ClipEdge(vertices[index + 2], vertices[index], d2, d0);
  610. }
  611. // 1 vertex behind the plane: create one new triangle, and modify one in-place
  612. else if (d0 < 0.0f)
  613. {
  614. unsigned newIdx = numTriangles * 3;
  615. triangles[numTriangles] = true;
  616. ++numTriangles;
  617. vertices[newIdx] = ClipEdge(vertices[index], vertices[index + 2], d0, d2);
  618. vertices[newIdx + 1] = vertices[index] = ClipEdge(vertices[index], vertices[index + 1], d0, d1);
  619. vertices[newIdx + 2] = vertices[index + 2];
  620. }
  621. else if (d1 < 0.0f)
  622. {
  623. unsigned newIdx = numTriangles * 3;
  624. triangles[numTriangles] = true;
  625. ++numTriangles;
  626. vertices[newIdx + 1] = ClipEdge(vertices[index + 1], vertices[index], d1, d0);
  627. vertices[newIdx + 2] = vertices[index + 1] = ClipEdge(vertices[index + 1], vertices[index + 2], d1, d2);
  628. vertices[newIdx] = vertices[index];
  629. }
  630. else if (d2 < 0.0f)
  631. {
  632. unsigned newIdx = numTriangles * 3;
  633. triangles[numTriangles] = true;
  634. ++numTriangles;
  635. vertices[newIdx + 2] = ClipEdge(vertices[index + 2], vertices[index + 1], d2, d1);
  636. vertices[newIdx] = vertices[index + 2] = ClipEdge(vertices[index + 2], vertices[index], d2, d0);
  637. vertices[newIdx + 1] = vertices[index + 1];
  638. }
  639. }
  640. }
  641. }
  642. // Code based on Chris Hecker's Perspective Texture Mapping series in the Game Developer magazine
  643. // Also available online at http://chrishecker.com/Miscellaneous_Technical_Articles
  644. /// %Gradients of a software rasterized triangle.
  645. struct Gradients
  646. {
  647. /// Construct from vertices.
  648. explicit Gradients(const Vector3* vertices)
  649. {
  650. float invdX = 1.0f / (((vertices[1].x_ - vertices[2].x_) *
  651. (vertices[0].y_ - vertices[2].y_)) -
  652. ((vertices[0].x_ - vertices[2].x_) *
  653. (vertices[1].y_ - vertices[2].y_)));
  654. float invdY = -invdX;
  655. dInvZdX_ = invdX * (((vertices[1].z_ - vertices[2].z_) * (vertices[0].y_ - vertices[2].y_)) -
  656. ((vertices[0].z_ - vertices[2].z_) * (vertices[1].y_ - vertices[2].y_)));
  657. dInvZdY_ = invdY * (((vertices[1].z_ - vertices[2].z_) * (vertices[0].x_ - vertices[2].x_)) -
  658. ((vertices[0].z_ - vertices[2].z_) * (vertices[1].x_ - vertices[2].x_)));
  659. dInvZdXInt_ = (int)dInvZdX_;
  660. }
  661. /// Integer horizontal gradient.
  662. int dInvZdXInt_;
  663. /// Horizontal gradient.
  664. float dInvZdX_;
  665. /// Vertical gradient.
  666. float dInvZdY_;
  667. };
  668. /// %Edge of a software rasterized triangle.
  669. struct Edge
  670. {
  671. /// Construct from gradients and top & bottom vertices.
  672. Edge(const Gradients& gradients, const Vector3& top, const Vector3& bottom, int topY)
  673. {
  674. float height = (bottom.y_ - top.y_);
  675. float slope = (height != 0.0f) ? (bottom.x_ - top.x_) / height : 0.0f;
  676. float yPreStep = (float)(topY + 1) - top.y_;
  677. float xPreStep = slope * yPreStep;
  678. x_ = RoundToInt((xPreStep + top.x_) * OCCLUSION_X_SCALE);
  679. xStep_ = RoundToInt(slope * OCCLUSION_X_SCALE);
  680. invZ_ = RoundToInt(top.z_ + xPreStep * gradients.dInvZdX_ + yPreStep * gradients.dInvZdY_);
  681. invZStep_ = RoundToInt(slope * gradients.dInvZdX_ + gradients.dInvZdY_);
  682. }
  683. /// X coordinate.
  684. int x_;
  685. /// X coordinate step.
  686. int xStep_;
  687. /// Inverse Z.
  688. int invZ_;
  689. /// Inverse Z step.
  690. int invZStep_;
  691. };
  692. void OcclusionBuffer::DrawTriangle2D(const Vector3* vertices, bool clockwise, unsigned threadIndex)
  693. {
  694. int top, middle, bottom;
  695. bool middleIsRight;
  696. // Sort vertices in Y-direction
  697. if (vertices[0].y_ < vertices[1].y_)
  698. {
  699. if (vertices[2].y_ < vertices[0].y_)
  700. {
  701. top = 2;
  702. middle = 0;
  703. bottom = 1;
  704. middleIsRight = true;
  705. }
  706. else
  707. {
  708. top = 0;
  709. if (vertices[1].y_ < vertices[2].y_)
  710. {
  711. middle = 1;
  712. bottom = 2;
  713. middleIsRight = true;
  714. }
  715. else
  716. {
  717. middle = 2;
  718. bottom = 1;
  719. middleIsRight = false;
  720. }
  721. }
  722. }
  723. else
  724. {
  725. if (vertices[2].y_ < vertices[1].y_)
  726. {
  727. top = 2;
  728. middle = 1;
  729. bottom = 0;
  730. middleIsRight = false;
  731. }
  732. else
  733. {
  734. top = 1;
  735. if (vertices[0].y_ < vertices[2].y_)
  736. {
  737. middle = 0;
  738. bottom = 2;
  739. middleIsRight = false;
  740. }
  741. else
  742. {
  743. middle = 2;
  744. bottom = 0;
  745. middleIsRight = true;
  746. }
  747. }
  748. }
  749. auto topY = (int)vertices[top].y_;
  750. auto middleY = (int)vertices[middle].y_;
  751. auto bottomY = (int)vertices[bottom].y_;
  752. // Check for degenerate triangle
  753. if (topY == bottomY)
  754. return;
  755. // Reverse middleIsRight test if triangle is counterclockwise
  756. if (!clockwise)
  757. middleIsRight = !middleIsRight;
  758. const bool topDegenerate = topY == middleY;
  759. const bool bottomDegenerate = middleY == bottomY;
  760. Gradients gradients(vertices);
  761. Edge topToBottom(gradients, vertices[top], vertices[bottom], topY);
  762. int* bufferData = buffers_[threadIndex].data_;
  763. if (middleIsRight)
  764. {
  765. // Top half
  766. if (!topDegenerate)
  767. {
  768. Edge topToMiddle(gradients, vertices[top], vertices[middle], topY);
  769. int* row = bufferData + topY * width_;
  770. int* endRow = bufferData + middleY * width_;
  771. while (row < endRow)
  772. {
  773. int invZ = topToBottom.invZ_;
  774. int* dest = row + (topToBottom.x_ >> 16u);
  775. int* end = row + (topToMiddle.x_ >> 16u);
  776. while (dest < end)
  777. {
  778. if (invZ < *dest)
  779. *dest = invZ;
  780. invZ += gradients.dInvZdXInt_;
  781. ++dest;
  782. }
  783. topToBottom.x_ += topToBottom.xStep_;
  784. topToBottom.invZ_ += topToBottom.invZStep_;
  785. topToMiddle.x_ += topToMiddle.xStep_;
  786. row += width_;
  787. }
  788. }
  789. // Bottom half
  790. if (!bottomDegenerate)
  791. {
  792. Edge middleToBottom(gradients, vertices[middle], vertices[bottom], middleY);
  793. int* row = bufferData + middleY * width_;
  794. int* endRow = bufferData + bottomY * width_;
  795. while (row < endRow)
  796. {
  797. int invZ = topToBottom.invZ_;
  798. int* dest = row + (topToBottom.x_ >> 16u);
  799. int* end = row + (middleToBottom.x_ >> 16u);
  800. while (dest < end)
  801. {
  802. if (invZ < *dest)
  803. *dest = invZ;
  804. invZ += gradients.dInvZdXInt_;
  805. ++dest;
  806. }
  807. topToBottom.x_ += topToBottom.xStep_;
  808. topToBottom.invZ_ += topToBottom.invZStep_;
  809. middleToBottom.x_ += middleToBottom.xStep_;
  810. row += width_;
  811. }
  812. }
  813. }
  814. else
  815. {
  816. // Top half
  817. if (!topDegenerate)
  818. {
  819. Edge topToMiddle(gradients, vertices[top], vertices[middle], topY);
  820. int* row = bufferData + topY * width_;
  821. int* endRow = bufferData + middleY * width_;
  822. while (row < endRow)
  823. {
  824. int invZ = topToMiddle.invZ_;
  825. int* dest = row + (topToMiddle.x_ >> 16u);
  826. int* end = row + (topToBottom.x_ >> 16u);
  827. while (dest < end)
  828. {
  829. if (invZ < *dest)
  830. *dest = invZ;
  831. invZ += gradients.dInvZdXInt_;
  832. ++dest;
  833. }
  834. topToMiddle.x_ += topToMiddle.xStep_;
  835. topToMiddle.invZ_ += topToMiddle.invZStep_;
  836. topToBottom.x_ += topToBottom.xStep_;
  837. row += width_;
  838. }
  839. }
  840. // Bottom half
  841. if (!bottomDegenerate)
  842. {
  843. Edge middleToBottom(gradients, vertices[middle], vertices[bottom], middleY);
  844. int* row = bufferData + middleY * width_;
  845. int* endRow = bufferData + bottomY * width_;
  846. while (row < endRow)
  847. {
  848. int invZ = middleToBottom.invZ_;
  849. int* dest = row + (middleToBottom.x_ >> 16u);
  850. int* end = row + (topToBottom.x_ >> 16u);
  851. while (dest < end)
  852. {
  853. if (invZ < *dest)
  854. *dest = invZ;
  855. invZ += gradients.dInvZdXInt_;
  856. ++dest;
  857. }
  858. middleToBottom.x_ += middleToBottom.xStep_;
  859. middleToBottom.invZ_ += middleToBottom.invZStep_;
  860. topToBottom.x_ += topToBottom.xStep_;
  861. row += width_;
  862. }
  863. }
  864. }
  865. }
  866. void OcclusionBuffer::MergeBuffers()
  867. {
  868. URHO3D_PROFILE(MergeBuffers);
  869. for (unsigned i = 1; i < buffers_.Size(); ++i)
  870. {
  871. if (!buffers_[i].used_)
  872. continue;
  873. int* src = buffers_[i].data_;
  874. int* dest = buffers_[0].data_;
  875. int count = width_ * height_;
  876. while (count--)
  877. {
  878. // If thread buffer's depth value is closer, overwrite the original
  879. if (*src < *dest)
  880. *dest = *src;
  881. ++src;
  882. ++dest;
  883. }
  884. }
  885. }
  886. void OcclusionBuffer::ClearBuffer(unsigned threadIndex)
  887. {
  888. if (threadIndex >= buffers_.Size())
  889. return;
  890. int* dest = buffers_[threadIndex].data_;
  891. int count = width_ * height_;
  892. auto fillValue = (int)OCCLUSION_Z_SCALE;
  893. while (count--)
  894. *dest++ = fillValue;
  895. }
  896. }