OcclusionBuffer.cpp 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034
  1. //
  2. // Copyright (c) 2008-2017 the Urho3D project.
  3. //
  4. // Permission is hereby granted, free of charge, to any person obtaining a copy
  5. // of this software and associated documentation files (the "Software"), to deal
  6. // in the Software without restriction, including without limitation the rights
  7. // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. // copies of the Software, and to permit persons to whom the Software is
  9. // furnished to do so, subject to the following conditions:
  10. //
  11. // The above copyright notice and this permission notice shall be included in
  12. // all copies or substantial portions of the Software.
  13. //
  14. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  20. // THE SOFTWARE.
  21. //
  22. #include "../Precompiled.h"
  23. #include "../Core/WorkQueue.h"
  24. #include "../Core/Profiler.h"
  25. #include "../Graphics/Camera.h"
  26. #include "../Graphics/OcclusionBuffer.h"
  27. #include "../IO/Log.h"
  28. #include "../DebugNew.h"
  29. namespace Atomic
  30. {
  31. static const unsigned CLIPMASK_X_POS = 0x1;
  32. static const unsigned CLIPMASK_X_NEG = 0x2;
  33. static const unsigned CLIPMASK_Y_POS = 0x4;
  34. static const unsigned CLIPMASK_Y_NEG = 0x8;
  35. static const unsigned CLIPMASK_Z_POS = 0x10;
  36. static const unsigned CLIPMASK_Z_NEG = 0x20;
  37. void DrawOcclusionBatchWork(const WorkItem* item, unsigned threadIndex)
  38. {
  39. OcclusionBuffer* buffer = reinterpret_cast<OcclusionBuffer*>(item->aux_);
  40. OcclusionBatch& batch = *reinterpret_cast<OcclusionBatch*>(item->start_);
  41. buffer->DrawBatch(batch, threadIndex);
  42. }
  43. OcclusionBuffer::OcclusionBuffer(Context* context) :
  44. Object(context),
  45. width_(0),
  46. height_(0),
  47. numTriangles_(0),
  48. maxTriangles_(OCCLUSION_DEFAULT_MAX_TRIANGLES),
  49. cullMode_(CULL_CCW),
  50. depthHierarchyDirty_(true),
  51. reverseCulling_(false),
  52. nearClip_(0.0f),
  53. farClip_(0.0f)
  54. {
  55. }
  56. OcclusionBuffer::~OcclusionBuffer()
  57. {
  58. }
  59. bool OcclusionBuffer::SetSize(int width, int height, bool threaded)
  60. {
  61. // Force the height to an even amount of pixels for better mip generation
  62. if (height & 1)
  63. ++height;
  64. if (width == width_ && height == height_)
  65. return true;
  66. if (width <= 0 || height <= 0)
  67. return false;
  68. if (!IsPowerOfTwo((unsigned)width))
  69. {
  70. ATOMIC_LOGERRORF("Requested occlusion buffer width %d is not a power of two", width);
  71. return false;
  72. }
  73. width_ = width;
  74. height_ = height;
  75. // Build work buffers for threading
  76. unsigned numThreadBuffers = threaded ? GetSubsystem<WorkQueue>()->GetNumThreads() + 1 : 1;
  77. buffers_.Resize(numThreadBuffers);
  78. for (unsigned i = 0; i < numThreadBuffers; ++i)
  79. {
  80. // Reserve extra memory in case 3D clipping is not exact
  81. OcclusionBufferData& buffer = buffers_[i];
  82. buffer.dataWithSafety_ = new int[width * (height + 2) + 2];
  83. buffer.data_ = buffer.dataWithSafety_.Get() + width + 1;
  84. buffer.used_ = false;
  85. }
  86. mipBuffers_.Clear();
  87. // Build buffers for mip levels
  88. for (;;)
  89. {
  90. width = (width + 1) / 2;
  91. height = (height + 1) / 2;
  92. mipBuffers_.Push(SharedArrayPtr<DepthValue>(new DepthValue[width * height]));
  93. if (width <= OCCLUSION_MIN_SIZE && height <= OCCLUSION_MIN_SIZE)
  94. break;
  95. }
  96. ATOMIC_LOGDEBUG("Set occlusion buffer size " + String(width_) + "x" + String(height_) + " with " +
  97. String(mipBuffers_.Size()) + " mip levels and " + String(numThreadBuffers) + " thread buffers");
  98. CalculateViewport();
  99. return true;
  100. }
  101. void OcclusionBuffer::SetView(Camera* camera)
  102. {
  103. if (!camera)
  104. return;
  105. view_ = camera->GetView();
  106. projection_ = camera->GetProjection();
  107. viewProj_ = projection_ * view_;
  108. nearClip_ = camera->GetNearClip();
  109. farClip_ = camera->GetFarClip();
  110. reverseCulling_ = camera->GetReverseCulling();
  111. CalculateViewport();
  112. }
  113. void OcclusionBuffer::SetMaxTriangles(unsigned triangles)
  114. {
  115. maxTriangles_ = triangles;
  116. }
  117. void OcclusionBuffer::SetCullMode(CullMode mode)
  118. {
  119. if (reverseCulling_)
  120. {
  121. if (mode == CULL_CW)
  122. mode = CULL_CCW;
  123. else if (mode == CULL_CCW)
  124. mode = CULL_CW;
  125. }
  126. cullMode_ = mode;
  127. }
  128. void OcclusionBuffer::Reset()
  129. {
  130. numTriangles_ = 0;
  131. batches_.Clear();
  132. }
  133. void OcclusionBuffer::Clear()
  134. {
  135. Reset();
  136. // Only clear the main thread buffer. Rest are cleared on-demand when drawing the first batch
  137. ClearBuffer(0);
  138. for (unsigned i = 1; i < buffers_.Size(); ++i)
  139. buffers_[i].used_ = false;
  140. depthHierarchyDirty_ = true;
  141. }
  142. bool OcclusionBuffer::AddTriangles(const Matrix3x4& model, const void* vertexData, unsigned vertexSize, unsigned vertexStart,
  143. unsigned vertexCount)
  144. {
  145. batches_.Resize(batches_.Size() + 1);
  146. OcclusionBatch& batch = batches_.Back();
  147. batch.model_ = model;
  148. batch.vertexData_ = vertexData;
  149. batch.vertexSize_ = vertexSize;
  150. batch.indexData_ = 0;
  151. batch.indexSize_ = 0;
  152. batch.drawStart_ = vertexStart;
  153. batch.drawCount_ = vertexCount;
  154. numTriangles_ += vertexCount / 3;
  155. return numTriangles_ <= maxTriangles_;
  156. }
  157. bool OcclusionBuffer::AddTriangles(const Matrix3x4& model, const void* vertexData, unsigned vertexSize, const void* indexData,
  158. unsigned indexSize, unsigned indexStart, unsigned indexCount)
  159. {
  160. batches_.Resize(batches_.Size() + 1);
  161. OcclusionBatch& batch = batches_.Back();
  162. batch.model_ = model;
  163. batch.vertexData_ = vertexData;
  164. batch.vertexSize_ = vertexSize;
  165. batch.indexData_ = indexData;
  166. batch.indexSize_ = indexSize;
  167. batch.drawStart_ = indexStart;
  168. batch.drawCount_ = indexCount;
  169. numTriangles_ += indexCount / 3;
  170. return numTriangles_ <= maxTriangles_;
  171. }
  172. void OcclusionBuffer::DrawTriangles()
  173. {
  174. if (buffers_.Size() == 1)
  175. {
  176. // Not threaded
  177. for (Vector<OcclusionBatch>::Iterator i = batches_.Begin(); i != batches_.End(); ++i)
  178. DrawBatch(*i, 0);
  179. depthHierarchyDirty_ = true;
  180. }
  181. else if (buffers_.Size() > 1)
  182. {
  183. // Threaded
  184. WorkQueue* queue = GetSubsystem<WorkQueue>();
  185. for (Vector<OcclusionBatch>::Iterator i = batches_.Begin(); i != batches_.End(); ++i)
  186. {
  187. SharedPtr<WorkItem> item = queue->GetFreeItem();
  188. item->priority_ = M_MAX_UNSIGNED;
  189. item->workFunction_ = DrawOcclusionBatchWork;
  190. item->aux_ = this;
  191. item->start_ = &(*i);
  192. queue->AddWorkItem(item);
  193. }
  194. queue->Complete(M_MAX_UNSIGNED);
  195. MergeBuffers();
  196. depthHierarchyDirty_ = true;
  197. }
  198. batches_.Clear();
  199. }
  200. void OcclusionBuffer::BuildDepthHierarchy()
  201. {
  202. if (buffers_.Empty() || !depthHierarchyDirty_)
  203. return;
  204. ATOMIC_PROFILE(BuildDepthHierarchy);
  205. // Build the first mip level from the pixel-level data
  206. int width = (width_ + 1) / 2;
  207. int height = (height_ + 1) / 2;
  208. if (mipBuffers_.Size())
  209. {
  210. for (int y = 0; y < height; ++y)
  211. {
  212. int* src = buffers_[0].data_ + (y * 2) * width_;
  213. DepthValue* dest = mipBuffers_[0].Get() + y * width;
  214. DepthValue* end = dest + width;
  215. if (y * 2 + 1 < height_)
  216. {
  217. int* src2 = src + width_;
  218. while (dest < end)
  219. {
  220. int minUpper = Min(src[0], src[1]);
  221. int minLower = Min(src2[0], src2[1]);
  222. dest->min_ = Min(minUpper, minLower);
  223. int maxUpper = Max(src[0], src[1]);
  224. int maxLower = Max(src2[0], src2[1]);
  225. dest->max_ = Max(maxUpper, maxLower);
  226. src += 2;
  227. src2 += 2;
  228. ++dest;
  229. }
  230. }
  231. else
  232. {
  233. while (dest < end)
  234. {
  235. dest->min_ = Min(src[0], src[1]);
  236. dest->max_ = Max(src[0], src[1]);
  237. src += 2;
  238. ++dest;
  239. }
  240. }
  241. }
  242. }
  243. // Build the rest of the mip levels
  244. for (unsigned i = 1; i < mipBuffers_.Size(); ++i)
  245. {
  246. int prevWidth = width;
  247. int prevHeight = height;
  248. width = (width + 1) / 2;
  249. height = (height + 1) / 2;
  250. for (int y = 0; y < height; ++y)
  251. {
  252. DepthValue* src = mipBuffers_[i - 1].Get() + (y * 2) * prevWidth;
  253. DepthValue* dest = mipBuffers_[i].Get() + y * width;
  254. DepthValue* end = dest + width;
  255. if (y * 2 + 1 < prevHeight)
  256. {
  257. DepthValue* src2 = src + prevWidth;
  258. while (dest < end)
  259. {
  260. int minUpper = Min(src[0].min_, src[1].min_);
  261. int minLower = Min(src2[0].min_, src2[1].min_);
  262. dest->min_ = Min(minUpper, minLower);
  263. int maxUpper = Max(src[0].max_, src[1].max_);
  264. int maxLower = Max(src2[0].max_, src2[1].max_);
  265. dest->max_ = Max(maxUpper, maxLower);
  266. src += 2;
  267. src2 += 2;
  268. ++dest;
  269. }
  270. }
  271. else
  272. {
  273. while (dest < end)
  274. {
  275. dest->min_ = Min(src[0].min_, src[1].min_);
  276. dest->max_ = Max(src[0].max_, src[1].max_);
  277. src += 2;
  278. ++dest;
  279. }
  280. }
  281. }
  282. }
  283. depthHierarchyDirty_ = false;
  284. }
  285. void OcclusionBuffer::ResetUseTimer()
  286. {
  287. useTimer_.Reset();
  288. }
  289. bool OcclusionBuffer::IsVisible(const BoundingBox& worldSpaceBox) const
  290. {
  291. if (buffers_.Empty())
  292. return true;
  293. // Transform corners to projection space
  294. Vector4 vertices[8];
  295. vertices[0] = ModelTransform(viewProj_, worldSpaceBox.min_);
  296. vertices[1] = ModelTransform(viewProj_, Vector3(worldSpaceBox.max_.x_, worldSpaceBox.min_.y_, worldSpaceBox.min_.z_));
  297. vertices[2] = ModelTransform(viewProj_, Vector3(worldSpaceBox.min_.x_, worldSpaceBox.max_.y_, worldSpaceBox.min_.z_));
  298. vertices[3] = ModelTransform(viewProj_, Vector3(worldSpaceBox.max_.x_, worldSpaceBox.max_.y_, worldSpaceBox.min_.z_));
  299. vertices[4] = ModelTransform(viewProj_, Vector3(worldSpaceBox.min_.x_, worldSpaceBox.min_.y_, worldSpaceBox.max_.z_));
  300. vertices[5] = ModelTransform(viewProj_, Vector3(worldSpaceBox.max_.x_, worldSpaceBox.min_.y_, worldSpaceBox.max_.z_));
  301. vertices[6] = ModelTransform(viewProj_, Vector3(worldSpaceBox.min_.x_, worldSpaceBox.max_.y_, worldSpaceBox.max_.z_));
  302. vertices[7] = ModelTransform(viewProj_, worldSpaceBox.max_);
  303. // Apply a far clip relative bias
  304. for (unsigned i = 0; i < 8; ++i)
  305. vertices[i].z_ -= OCCLUSION_RELATIVE_BIAS;
  306. // Transform to screen space. If any of the corners cross the near plane, assume visible
  307. float minX, maxX, minY, maxY, minZ;
  308. if (vertices[0].z_ <= 0.0f)
  309. return true;
  310. Vector3 projected = ViewportTransform(vertices[0]);
  311. minX = maxX = projected.x_;
  312. minY = maxY = projected.y_;
  313. minZ = projected.z_;
  314. // Project the rest
  315. for (unsigned i = 1; i < 8; ++i)
  316. {
  317. if (vertices[i].z_ <= 0.0f)
  318. return true;
  319. projected = ViewportTransform(vertices[i]);
  320. if (projected.x_ < minX) minX = projected.x_;
  321. if (projected.x_ > maxX) maxX = projected.x_;
  322. if (projected.y_ < minY) minY = projected.y_;
  323. if (projected.y_ > maxY) maxY = projected.y_;
  324. if (projected.z_ < minZ) minZ = projected.z_;
  325. }
  326. // Expand the bounding box 1 pixel in each direction to be conservative and correct rasterization offset
  327. IntRect rect(
  328. (int)(minX - 1.5f), (int)(minY - 1.5f),
  329. (int)(maxX + 0.5f), (int)(maxY + 0.5f)
  330. );
  331. // If the rect is outside, let frustum culling handle
  332. if (rect.right_ < 0 || rect.bottom_ < 0)
  333. return true;
  334. if (rect.left_ >= width_ || rect.top_ >= height_)
  335. return true;
  336. // Clipping of rect
  337. if (rect.left_ < 0)
  338. rect.left_ = 0;
  339. if (rect.top_ < 0)
  340. rect.top_ = 0;
  341. if (rect.right_ >= width_)
  342. rect.right_ = width_ - 1;
  343. if (rect.bottom_ >= height_)
  344. rect.bottom_ = height_ - 1;
  345. // Convert depth to integer and apply final bias
  346. int z = (int)(minZ + 0.5f) - OCCLUSION_FIXED_BIAS;
  347. if (!depthHierarchyDirty_)
  348. {
  349. // Start from lowest mip level and check if a conclusive result can be found
  350. for (int i = mipBuffers_.Size() - 1; i >= 0; --i)
  351. {
  352. int shift = i + 1;
  353. int width = width_ >> shift;
  354. int left = rect.left_ >> shift;
  355. int right = rect.right_ >> shift;
  356. DepthValue* buffer = mipBuffers_[i].Get();
  357. DepthValue* row = buffer + (rect.top_ >> shift) * width;
  358. DepthValue* endRow = buffer + (rect.bottom_ >> shift) * width;
  359. bool allOccluded = true;
  360. while (row <= endRow)
  361. {
  362. DepthValue* src = row + left;
  363. DepthValue* end = row + right;
  364. while (src <= end)
  365. {
  366. if (z <= src->min_)
  367. return true;
  368. if (z <= src->max_)
  369. allOccluded = false;
  370. ++src;
  371. }
  372. row += width;
  373. }
  374. if (allOccluded)
  375. return false;
  376. }
  377. }
  378. // If no conclusive result, finally check the pixel-level data
  379. int* row = buffers_[0].data_ + rect.top_ * width_;
  380. int* endRow = buffers_[0].data_ + rect.bottom_ * width_;
  381. while (row <= endRow)
  382. {
  383. int* src = row + rect.left_;
  384. int* end = row + rect.right_;
  385. while (src <= end)
  386. {
  387. if (z <= *src)
  388. return true;
  389. ++src;
  390. }
  391. row += width_;
  392. }
  393. return false;
  394. }
  395. unsigned OcclusionBuffer::GetUseTimer()
  396. {
  397. return useTimer_.GetMSec(false);
  398. }
  399. void OcclusionBuffer::DrawBatch(const OcclusionBatch& batch, unsigned threadIndex)
  400. {
  401. // If buffer not yet used, clear it
  402. if (threadIndex > 0 && !buffers_[threadIndex].used_)
  403. {
  404. ClearBuffer(threadIndex);
  405. buffers_[threadIndex].used_ = true;
  406. }
  407. Matrix4 modelViewProj = viewProj_ * batch.model_;
  408. // Theoretical max. amount of vertices if each of the 6 clipping planes doubles the triangle count
  409. Vector4 vertices[64 * 3];
  410. if (!batch.indexData_)
  411. {
  412. const unsigned char* srcData = ((const unsigned char*)batch.vertexData_) + batch.drawStart_ * batch.vertexSize_;
  413. unsigned index = 0;
  414. while (index + 2 < batch.drawCount_)
  415. {
  416. const Vector3& v0 = *((const Vector3*)(&srcData[index * batch.vertexSize_]));
  417. const Vector3& v1 = *((const Vector3*)(&srcData[(index + 1) * batch.vertexSize_]));
  418. const Vector3& v2 = *((const Vector3*)(&srcData[(index + 2) * batch.vertexSize_]));
  419. vertices[0] = ModelTransform(modelViewProj, v0);
  420. vertices[1] = ModelTransform(modelViewProj, v1);
  421. vertices[2] = ModelTransform(modelViewProj, v2);
  422. DrawTriangle(vertices, threadIndex);
  423. index += 3;
  424. }
  425. }
  426. else
  427. {
  428. const unsigned char* srcData = (const unsigned char*)batch.vertexData_;
  429. // 16-bit indices
  430. if (batch.indexSize_ == sizeof(unsigned short))
  431. {
  432. const unsigned short* indices = ((const unsigned short*)batch.indexData_) + batch.drawStart_;
  433. const unsigned short* indicesEnd = indices + batch.drawCount_;
  434. while (indices < indicesEnd)
  435. {
  436. const Vector3& v0 = *((const Vector3*)(&srcData[indices[0] * batch.vertexSize_]));
  437. const Vector3& v1 = *((const Vector3*)(&srcData[indices[1] * batch.vertexSize_]));
  438. const Vector3& v2 = *((const Vector3*)(&srcData[indices[2] * batch.vertexSize_]));
  439. vertices[0] = ModelTransform(modelViewProj, v0);
  440. vertices[1] = ModelTransform(modelViewProj, v1);
  441. vertices[2] = ModelTransform(modelViewProj, v2);
  442. DrawTriangle(vertices, threadIndex);
  443. indices += 3;
  444. }
  445. }
  446. else
  447. {
  448. const unsigned* indices = ((const unsigned*)batch.indexData_) + batch.drawStart_;
  449. const unsigned* indicesEnd = indices + batch.drawCount_;
  450. while (indices < indicesEnd)
  451. {
  452. const Vector3& v0 = *((const Vector3*)(&srcData[indices[0] * batch.vertexSize_]));
  453. const Vector3& v1 = *((const Vector3*)(&srcData[indices[1] * batch.vertexSize_]));
  454. const Vector3& v2 = *((const Vector3*)(&srcData[indices[2] * batch.vertexSize_]));
  455. vertices[0] = ModelTransform(modelViewProj, v0);
  456. vertices[1] = ModelTransform(modelViewProj, v1);
  457. vertices[2] = ModelTransform(modelViewProj, v2);
  458. DrawTriangle(vertices, threadIndex);
  459. indices += 3;
  460. }
  461. }
  462. }
  463. }
  464. inline Vector4 OcclusionBuffer::ModelTransform(const Matrix4& transform, const Vector3& vertex) const
  465. {
  466. return Vector4(
  467. transform.m00_ * vertex.x_ + transform.m01_ * vertex.y_ + transform.m02_ * vertex.z_ + transform.m03_,
  468. transform.m10_ * vertex.x_ + transform.m11_ * vertex.y_ + transform.m12_ * vertex.z_ + transform.m13_,
  469. transform.m20_ * vertex.x_ + transform.m21_ * vertex.y_ + transform.m22_ * vertex.z_ + transform.m23_,
  470. transform.m30_ * vertex.x_ + transform.m31_ * vertex.y_ + transform.m32_ * vertex.z_ + transform.m33_
  471. );
  472. }
  473. inline Vector3 OcclusionBuffer::ViewportTransform(const Vector4& vertex) const
  474. {
  475. float invW = 1.0f / vertex.w_;
  476. return Vector3(
  477. invW * vertex.x_ * scaleX_ + offsetX_,
  478. invW * vertex.y_ * scaleY_ + offsetY_,
  479. invW * vertex.z_ * OCCLUSION_Z_SCALE
  480. );
  481. }
  482. inline Vector4 OcclusionBuffer::ClipEdge(const Vector4& v0, const Vector4& v1, float d0, float d1) const
  483. {
  484. float t = d0 / (d0 - d1);
  485. return v0 + t * (v1 - v0);
  486. }
  487. inline float OcclusionBuffer::SignedArea(const Vector3& v0, const Vector3& v1, const Vector3& v2) const
  488. {
  489. float aX = v0.x_ - v1.x_;
  490. float aY = v0.y_ - v1.y_;
  491. float bX = v2.x_ - v1.x_;
  492. float bY = v2.y_ - v1.y_;
  493. return aX * bY - aY * bX;
  494. }
  495. void OcclusionBuffer::CalculateViewport()
  496. {
  497. // Add half pixel offset due to 3D frustum culling
  498. scaleX_ = 0.5f * width_;
  499. scaleY_ = -0.5f * height_;
  500. offsetX_ = 0.5f * width_ + 0.5f;
  501. offsetY_ = 0.5f * height_ + 0.5f;
  502. projOffsetScaleX_ = projection_.m00_ * scaleX_;
  503. projOffsetScaleY_ = projection_.m11_ * scaleY_;
  504. }
  505. void OcclusionBuffer::DrawTriangle(Vector4* vertices, unsigned threadIndex)
  506. {
  507. unsigned clipMask = 0;
  508. unsigned andClipMask = 0;
  509. bool drawOk = false;
  510. Vector3 projected[3];
  511. // Build the clip plane mask for the triangle
  512. for (unsigned i = 0; i < 3; ++i)
  513. {
  514. unsigned vertexClipMask = 0;
  515. if (vertices[i].x_ > vertices[i].w_)
  516. vertexClipMask |= CLIPMASK_X_POS;
  517. if (vertices[i].x_ < -vertices[i].w_)
  518. vertexClipMask |= CLIPMASK_X_NEG;
  519. if (vertices[i].y_ > vertices[i].w_)
  520. vertexClipMask |= CLIPMASK_Y_POS;
  521. if (vertices[i].y_ < -vertices[i].w_)
  522. vertexClipMask |= CLIPMASK_Y_NEG;
  523. if (vertices[i].z_ > vertices[i].w_)
  524. vertexClipMask |= CLIPMASK_Z_POS;
  525. if (vertices[i].z_ < 0.0f)
  526. vertexClipMask |= CLIPMASK_Z_NEG;
  527. clipMask |= vertexClipMask;
  528. if (!i)
  529. andClipMask = vertexClipMask;
  530. else
  531. andClipMask &= vertexClipMask;
  532. }
  533. // If triangle is fully behind any clip plane, can reject quickly
  534. if (andClipMask)
  535. return;
  536. // Check if triangle is fully inside
  537. if (!clipMask)
  538. {
  539. projected[0] = ViewportTransform(vertices[0]);
  540. projected[1] = ViewportTransform(vertices[1]);
  541. projected[2] = ViewportTransform(vertices[2]);
  542. bool clockwise = SignedArea(projected[0], projected[1], projected[2]) < 0.0f;
  543. if (cullMode_ == CULL_NONE || (cullMode_ == CULL_CCW && clockwise) || (cullMode_ == CULL_CW && !clockwise))
  544. {
  545. DrawTriangle2D(projected, clockwise, threadIndex);
  546. drawOk = true;
  547. }
  548. }
  549. else
  550. {
  551. bool triangles[64];
  552. // Initial triangle
  553. triangles[0] = true;
  554. unsigned numTriangles = 1;
  555. if (clipMask & CLIPMASK_X_POS)
  556. ClipVertices(Vector4(-1.0f, 0.0f, 0.0f, 1.0f), vertices, triangles, numTriangles);
  557. if (clipMask & CLIPMASK_X_NEG)
  558. ClipVertices(Vector4(1.0f, 0.0f, 0.0f, 1.0f), vertices, triangles, numTriangles);
  559. if (clipMask & CLIPMASK_Y_POS)
  560. ClipVertices(Vector4(0.0f, -1.0f, 0.0f, 1.0f), vertices, triangles, numTriangles);
  561. if (clipMask & CLIPMASK_Y_NEG)
  562. ClipVertices(Vector4(0.0f, 1.0f, 0.0f, 1.0f), vertices, triangles, numTriangles);
  563. if (clipMask & CLIPMASK_Z_POS)
  564. ClipVertices(Vector4(0.0f, 0.0f, -1.0f, 1.0f), vertices, triangles, numTriangles);
  565. if (clipMask & CLIPMASK_Z_NEG)
  566. ClipVertices(Vector4(0.0f, 0.0f, 1.0f, 0.0f), vertices, triangles, numTriangles);
  567. // Draw each accepted triangle
  568. for (unsigned i = 0; i < numTriangles; ++i)
  569. {
  570. if (triangles[i])
  571. {
  572. unsigned index = i * 3;
  573. projected[0] = ViewportTransform(vertices[index]);
  574. projected[1] = ViewportTransform(vertices[index + 1]);
  575. projected[2] = ViewportTransform(vertices[index + 2]);
  576. bool clockwise = SignedArea(projected[0], projected[1], projected[2]) < 0.0f;
  577. if (cullMode_ == CULL_NONE || (cullMode_ == CULL_CCW && clockwise) || (cullMode_ == CULL_CW && !clockwise))
  578. {
  579. DrawTriangle2D(projected, clockwise, threadIndex);
  580. drawOk = true;
  581. }
  582. }
  583. }
  584. }
  585. if (drawOk)
  586. ++numTriangles_;
  587. }
  588. void OcclusionBuffer::ClipVertices(const Vector4& plane, Vector4* vertices, bool* triangles, unsigned& numTriangles)
  589. {
  590. unsigned num = numTriangles;
  591. for (unsigned i = 0; i < num; ++i)
  592. {
  593. if (triangles[i])
  594. {
  595. unsigned index = i * 3;
  596. float d0 = plane.DotProduct(vertices[index]);
  597. float d1 = plane.DotProduct(vertices[index + 1]);
  598. float d2 = plane.DotProduct(vertices[index + 2]);
  599. // If all vertices behind the plane, reject triangle
  600. if (d0 < 0.0f && d1 < 0.0f && d2 < 0.0f)
  601. {
  602. triangles[i] = false;
  603. continue;
  604. }
  605. // If 2 vertices behind the plane, create a new triangle in-place
  606. else if (d0 < 0.0f && d1 < 0.0f)
  607. {
  608. vertices[index] = ClipEdge(vertices[index], vertices[index + 2], d0, d2);
  609. vertices[index + 1] = ClipEdge(vertices[index + 1], vertices[index + 2], d1, d2);
  610. }
  611. else if (d0 < 0.0f && d2 < 0.0f)
  612. {
  613. vertices[index] = ClipEdge(vertices[index], vertices[index + 1], d0, d1);
  614. vertices[index + 2] = ClipEdge(vertices[index + 2], vertices[index + 1], d2, d1);
  615. }
  616. else if (d1 < 0.0f && d2 < 0.0f)
  617. {
  618. vertices[index + 1] = ClipEdge(vertices[index + 1], vertices[index], d1, d0);
  619. vertices[index + 2] = ClipEdge(vertices[index + 2], vertices[index], d2, d0);
  620. }
  621. // 1 vertex behind the plane: create one new triangle, and modify one in-place
  622. else if (d0 < 0.0f)
  623. {
  624. unsigned newIdx = numTriangles * 3;
  625. triangles[numTriangles] = true;
  626. ++numTriangles;
  627. vertices[newIdx] = ClipEdge(vertices[index], vertices[index + 2], d0, d2);
  628. vertices[newIdx + 1] = vertices[index] = ClipEdge(vertices[index], vertices[index + 1], d0, d1);
  629. vertices[newIdx + 2] = vertices[index + 2];
  630. }
  631. else if (d1 < 0.0f)
  632. {
  633. unsigned newIdx = numTriangles * 3;
  634. triangles[numTriangles] = true;
  635. ++numTriangles;
  636. vertices[newIdx + 1] = ClipEdge(vertices[index + 1], vertices[index], d1, d0);
  637. vertices[newIdx + 2] = vertices[index + 1] = ClipEdge(vertices[index + 1], vertices[index + 2], d1, d2);
  638. vertices[newIdx] = vertices[index];
  639. }
  640. else if (d2 < 0.0f)
  641. {
  642. unsigned newIdx = numTriangles * 3;
  643. triangles[numTriangles] = true;
  644. ++numTriangles;
  645. vertices[newIdx + 2] = ClipEdge(vertices[index + 2], vertices[index + 1], d2, d1);
  646. vertices[newIdx] = vertices[index + 2] = ClipEdge(vertices[index + 2], vertices[index], d2, d0);
  647. vertices[newIdx + 1] = vertices[index + 1];
  648. }
  649. }
  650. }
  651. }
  652. // Code based on Chris Hecker's Perspective Texture Mapping series in the Game Developer magazine
  653. // Also available online at http://chrishecker.com/Miscellaneous_Technical_Articles
  654. /// %Gradients of a software rasterized triangle.
  655. struct Gradients
  656. {
  657. /// Construct from vertices.
  658. Gradients(const Vector3* vertices)
  659. {
  660. float invdX = 1.0f / (((vertices[1].x_ - vertices[2].x_) *
  661. (vertices[0].y_ - vertices[2].y_)) -
  662. ((vertices[0].x_ - vertices[2].x_) *
  663. (vertices[1].y_ - vertices[2].y_)));
  664. float invdY = -invdX;
  665. dInvZdX_ = invdX * (((vertices[1].z_ - vertices[2].z_) * (vertices[0].y_ - vertices[2].y_)) -
  666. ((vertices[0].z_ - vertices[2].z_) * (vertices[1].y_ - vertices[2].y_)));
  667. dInvZdY_ = invdY * (((vertices[1].z_ - vertices[2].z_) * (vertices[0].x_ - vertices[2].x_)) -
  668. ((vertices[0].z_ - vertices[2].z_) * (vertices[1].x_ - vertices[2].x_)));
  669. dInvZdXInt_ = (int)dInvZdX_;
  670. }
  671. /// Integer horizontal gradient.
  672. int dInvZdXInt_;
  673. /// Horizontal gradient.
  674. float dInvZdX_;
  675. /// Vertical gradient.
  676. float dInvZdY_;
  677. };
  678. /// %Edge of a software rasterized triangle.
  679. struct Edge
  680. {
  681. /// Construct from gradients and top & bottom vertices.
  682. Edge(const Gradients& gradients, const Vector3& top, const Vector3& bottom, int topY)
  683. {
  684. float height = (bottom.y_ - top.y_);
  685. float slope = (height != 0.0f) ? (bottom.x_ - top.x_) / height : 0.0f;
  686. float yPreStep = (float)(topY + 1) - top.y_;
  687. float xPreStep = slope * yPreStep;
  688. x_ = (int)((xPreStep + top.x_) * OCCLUSION_X_SCALE + 0.5f);
  689. xStep_ = (int)(slope * OCCLUSION_X_SCALE + 0.5f);
  690. invZ_ = (int)(top.z_ + xPreStep * gradients.dInvZdX_ + yPreStep * gradients.dInvZdY_ + 0.5f);
  691. invZStep_ = (int)(slope * gradients.dInvZdX_ + gradients.dInvZdY_ + 0.5f);
  692. }
  693. /// X coordinate.
  694. int x_;
  695. /// X coordinate step.
  696. int xStep_;
  697. /// Inverse Z.
  698. int invZ_;
  699. /// Inverse Z step.
  700. int invZStep_;
  701. };
  702. void OcclusionBuffer::DrawTriangle2D(const Vector3* vertices, bool clockwise, unsigned threadIndex)
  703. {
  704. int top, middle, bottom;
  705. bool middleIsRight;
  706. // Sort vertices in Y-direction
  707. if (vertices[0].y_ < vertices[1].y_)
  708. {
  709. if (vertices[2].y_ < vertices[0].y_)
  710. {
  711. top = 2;
  712. middle = 0;
  713. bottom = 1;
  714. middleIsRight = true;
  715. }
  716. else
  717. {
  718. top = 0;
  719. if (vertices[1].y_ < vertices[2].y_)
  720. {
  721. middle = 1;
  722. bottom = 2;
  723. middleIsRight = true;
  724. }
  725. else
  726. {
  727. middle = 2;
  728. bottom = 1;
  729. middleIsRight = false;
  730. }
  731. }
  732. }
  733. else
  734. {
  735. if (vertices[2].y_ < vertices[1].y_)
  736. {
  737. top = 2;
  738. middle = 1;
  739. bottom = 0;
  740. middleIsRight = false;
  741. }
  742. else
  743. {
  744. top = 1;
  745. if (vertices[0].y_ < vertices[2].y_)
  746. {
  747. middle = 0;
  748. bottom = 2;
  749. middleIsRight = false;
  750. }
  751. else
  752. {
  753. middle = 2;
  754. bottom = 0;
  755. middleIsRight = true;
  756. }
  757. }
  758. }
  759. int topY = (int)vertices[top].y_;
  760. int middleY = (int)vertices[middle].y_;
  761. int bottomY = (int)vertices[bottom].y_;
  762. // Check for degenerate triangle
  763. if (topY == bottomY)
  764. return;
  765. // Reverse middleIsRight test if triangle is counterclockwise
  766. if (!clockwise)
  767. middleIsRight = !middleIsRight;
  768. Gradients gradients(vertices);
  769. Edge topToMiddle(gradients, vertices[top], vertices[middle], topY);
  770. Edge topToBottom(gradients, vertices[top], vertices[bottom], topY);
  771. Edge middleToBottom(gradients, vertices[middle], vertices[bottom], middleY);
  772. int* bufferData = buffers_[threadIndex].data_;
  773. if (middleIsRight)
  774. {
  775. // Top half
  776. int* row = bufferData + topY * width_;
  777. int* endRow = bufferData + middleY * width_;
  778. while (row < endRow)
  779. {
  780. int invZ = topToBottom.invZ_;
  781. int* dest = row + (topToBottom.x_ >> 16);
  782. int* end = row + (topToMiddle.x_ >> 16);
  783. while (dest < end)
  784. {
  785. if (invZ < *dest)
  786. *dest = invZ;
  787. invZ += gradients.dInvZdXInt_;
  788. ++dest;
  789. }
  790. topToBottom.x_ += topToBottom.xStep_;
  791. topToBottom.invZ_ += topToBottom.invZStep_;
  792. topToMiddle.x_ += topToMiddle.xStep_;
  793. row += width_;
  794. }
  795. // Bottom half
  796. row = bufferData + middleY * width_;
  797. endRow = bufferData + bottomY * width_;
  798. while (row < endRow)
  799. {
  800. int invZ = topToBottom.invZ_;
  801. int* dest = row + (topToBottom.x_ >> 16);
  802. int* end = row + (middleToBottom.x_ >> 16);
  803. while (dest < end)
  804. {
  805. if (invZ < *dest)
  806. *dest = invZ;
  807. invZ += gradients.dInvZdXInt_;
  808. ++dest;
  809. }
  810. topToBottom.x_ += topToBottom.xStep_;
  811. topToBottom.invZ_ += topToBottom.invZStep_;
  812. middleToBottom.x_ += middleToBottom.xStep_;
  813. row += width_;
  814. }
  815. }
  816. else
  817. {
  818. // Top half
  819. int* row = bufferData + topY * width_;
  820. int* endRow = bufferData + middleY * width_;
  821. while (row < endRow)
  822. {
  823. int invZ = topToMiddle.invZ_;
  824. int* dest = row + (topToMiddle.x_ >> 16);
  825. int* end = row + (topToBottom.x_ >> 16);
  826. while (dest < end)
  827. {
  828. if (invZ < *dest)
  829. *dest = invZ;
  830. invZ += gradients.dInvZdXInt_;
  831. ++dest;
  832. }
  833. topToMiddle.x_ += topToMiddle.xStep_;
  834. topToMiddle.invZ_ += topToMiddle.invZStep_;
  835. topToBottom.x_ += topToBottom.xStep_;
  836. row += width_;
  837. }
  838. // Bottom half
  839. row = bufferData + middleY * width_;
  840. endRow = bufferData + bottomY * width_;
  841. while (row < endRow)
  842. {
  843. int invZ = middleToBottom.invZ_;
  844. int* dest = row + (middleToBottom.x_ >> 16);
  845. int* end = row + (topToBottom.x_ >> 16);
  846. while (dest < end)
  847. {
  848. if (invZ < *dest)
  849. *dest = invZ;
  850. invZ += gradients.dInvZdXInt_;
  851. ++dest;
  852. }
  853. middleToBottom.x_ += middleToBottom.xStep_;
  854. middleToBottom.invZ_ += middleToBottom.invZStep_;
  855. topToBottom.x_ += topToBottom.xStep_;
  856. row += width_;
  857. }
  858. }
  859. }
  860. void OcclusionBuffer::MergeBuffers()
  861. {
  862. ATOMIC_PROFILE(MergeBuffers);
  863. for (unsigned i = 1; i < buffers_.Size(); ++i)
  864. {
  865. if (!buffers_[i].used_)
  866. continue;
  867. int* src = buffers_[i].data_;
  868. int* dest = buffers_[0].data_;
  869. int count = width_ * height_;
  870. while (count--)
  871. {
  872. // If thread buffer's depth value is closer, overwrite the original
  873. if (*src < *dest)
  874. *dest = *src;
  875. ++src;
  876. ++dest;
  877. }
  878. }
  879. }
  880. void OcclusionBuffer::ClearBuffer(unsigned threadIndex)
  881. {
  882. if (threadIndex >= buffers_.Size())
  883. return;
  884. int* dest = buffers_[threadIndex].data_;
  885. int count = width_ * height_;
  886. int fillValue = (int)OCCLUSION_Z_SCALE;
  887. while (count--)
  888. *dest++ = fillValue;
  889. }
  890. }