cluster_builder_rd.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. /*************************************************************************/
  2. /* cluster_builder_rd.h */
  3. /*************************************************************************/
  4. /* This file is part of: */
  5. /* GODOT ENGINE */
  6. /* https://godotengine.org */
  7. /*************************************************************************/
  8. /* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
  9. /* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
  10. /* */
  11. /* Permission is hereby granted, free of charge, to any person obtaining */
  12. /* a copy of this software and associated documentation files (the */
  13. /* "Software"), to deal in the Software without restriction, including */
  14. /* without limitation the rights to use, copy, modify, merge, publish, */
  15. /* distribute, sublicense, and/or sell copies of the Software, and to */
  16. /* permit persons to whom the Software is furnished to do so, subject to */
  17. /* the following conditions: */
  18. /* */
  19. /* The above copyright notice and this permission notice shall be */
  20. /* included in all copies or substantial portions of the Software. */
  21. /* */
  22. /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
  23. /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
  24. /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
  25. /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
  26. /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
  27. /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
  28. /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
  29. /*************************************************************************/
  30. #ifndef CLUSTER_BUILDER_RD_H
  31. #define CLUSTER_BUILDER_RD_H
  32. #include "servers/rendering/renderer_rd/renderer_storage_rd.h"
  33. #include "servers/rendering/renderer_rd/shaders/cluster_debug.glsl.gen.h"
  34. #include "servers/rendering/renderer_rd/shaders/cluster_render.glsl.gen.h"
  35. #include "servers/rendering/renderer_rd/shaders/cluster_store.glsl.gen.h"
  36. class ClusterBuilderSharedDataRD {
  37. friend class ClusterBuilderRD;
  38. RID sphere_vertex_buffer;
  39. RID sphere_vertex_array;
  40. RID sphere_index_buffer;
  41. RID sphere_index_array;
  42. float sphere_overfit = 0.0; //because an icosphere is not a perfect sphere, we need to enlarge it to cover the sphere area
  43. RID cone_vertex_buffer;
  44. RID cone_vertex_array;
  45. RID cone_index_buffer;
  46. RID cone_index_array;
  47. float cone_overfit = 0.0; //because an cone mesh is not a perfect sphere, we need to enlarge it to cover the actual cone area
  48. RID box_vertex_buffer;
  49. RID box_vertex_array;
  50. RID box_index_buffer;
  51. RID box_index_array;
  52. enum Divisor {
  53. DIVISOR_1,
  54. DIVISOR_2,
  55. DIVISOR_4,
  56. };
  57. struct ClusterRender {
  58. struct PushConstant {
  59. uint32_t base_index;
  60. uint32_t pad0;
  61. uint32_t pad1;
  62. uint32_t pad2;
  63. };
  64. ClusterRenderShaderRD cluster_render_shader;
  65. RID shader_version;
  66. RID shader;
  67. enum PipelineVersion {
  68. PIPELINE_NORMAL,
  69. PIPELINE_MSAA,
  70. PIPELINE_MAX
  71. };
  72. RID shader_pipelines[PIPELINE_MAX];
  73. } cluster_render;
  74. struct ClusterStore {
  75. struct PushConstant {
  76. uint32_t cluster_render_data_size; // how much data for a single cluster takes
  77. uint32_t max_render_element_count_div_32; //divided by 32
  78. uint32_t cluster_screen_size[2];
  79. uint32_t render_element_count_div_32; //divided by 32
  80. uint32_t max_cluster_element_count_div_32; //divided by 32
  81. uint32_t pad1;
  82. uint32_t pad2;
  83. };
  84. ClusterStoreShaderRD cluster_store_shader;
  85. RID shader_version;
  86. RID shader;
  87. RID shader_pipeline;
  88. } cluster_store;
  89. struct ClusterDebug {
  90. struct PushConstant {
  91. uint32_t screen_size[2];
  92. uint32_t cluster_screen_size[2];
  93. uint32_t cluster_shift;
  94. uint32_t cluster_type;
  95. float z_near;
  96. float z_far;
  97. uint32_t orthogonal;
  98. uint32_t max_cluster_element_count_div_32;
  99. uint32_t pad1;
  100. uint32_t pad2;
  101. };
  102. ClusterDebugShaderRD cluster_debug_shader;
  103. RID shader_version;
  104. RID shader;
  105. RID shader_pipeline;
  106. } cluster_debug;
  107. public:
  108. ClusterBuilderSharedDataRD();
  109. ~ClusterBuilderSharedDataRD();
  110. };
  111. class ClusterBuilderRD {
  112. public:
  113. enum LightType {
  114. LIGHT_TYPE_OMNI,
  115. LIGHT_TYPE_SPOT
  116. };
  117. enum BoxType {
  118. BOX_TYPE_REFLECTION_PROBE,
  119. BOX_TYPE_DECAL,
  120. };
  121. enum ElementType {
  122. ELEMENT_TYPE_OMNI_LIGHT,
  123. ELEMENT_TYPE_SPOT_LIGHT,
  124. ELEMENT_TYPE_DECAL,
  125. ELEMENT_TYPE_REFLECTION_PROBE,
  126. ELEMENT_TYPE_MAX,
  127. };
  128. private:
  129. ClusterBuilderSharedDataRD *shared = nullptr;
  130. struct RenderElementData {
  131. uint32_t type; //0-4
  132. uint32_t touches_near;
  133. uint32_t touches_far;
  134. uint32_t original_index;
  135. float transform_inv[12]; //transposed transform for less space
  136. float scale[3];
  137. uint32_t pad;
  138. };
  139. uint32_t cluster_count_by_type[ELEMENT_TYPE_MAX] = {};
  140. uint32_t max_elements_by_type = 0;
  141. RenderElementData *render_elements = nullptr;
  142. uint32_t render_element_count = 0;
  143. uint32_t render_element_max = 0;
  144. Transform view_xform;
  145. CameraMatrix adjusted_projection;
  146. CameraMatrix projection;
  147. float z_far = 0;
  148. float z_near = 0;
  149. bool orthogonal = false;
  150. enum Divisor {
  151. DIVISOR_1,
  152. DIVISOR_2,
  153. DIVISOR_4,
  154. };
  155. uint32_t cluster_size = 32;
  156. bool use_msaa = true;
  157. Divisor divisor = DIVISOR_4;
  158. Size2i screen_size;
  159. Size2i cluster_screen_size;
  160. RID framebuffer;
  161. RID cluster_render_buffer; //used for creating
  162. RID cluster_buffer; //used for rendering
  163. RID element_buffer; //used for storing, to hint element touches far plane or near plane
  164. uint32_t cluster_render_buffer_size = 0;
  165. uint32_t cluster_buffer_size = 0;
  166. RID cluster_render_uniform_set;
  167. RID cluster_store_uniform_set;
  168. //persistent data
  169. void _clear();
  170. struct StateUniform {
  171. float projection[16];
  172. float inv_z_far;
  173. uint32_t screen_to_clusters_shift; // shift to obtain coordinates in block indices
  174. uint32_t cluster_screen_width; //
  175. uint32_t cluster_data_size; // how much data for a single cluster takes
  176. uint32_t cluster_depth_offset;
  177. uint32_t pad0;
  178. uint32_t pad1;
  179. uint32_t pad2;
  180. };
  181. RID state_uniform;
  182. RID debug_uniform_set;
  183. public:
  184. void setup(Size2i p_screen_size, uint32_t p_max_elements, RID p_depth_buffer, RID p_depth_buffer_sampler, RID p_color_buffer);
  185. void begin(const Transform &p_view_transform, const CameraMatrix &p_cam_projection, bool p_flip_y);
  186. _FORCE_INLINE_ void add_light(LightType p_type, const Transform &p_transform, float p_radius, float p_spot_aperture) {
  187. if (p_type == LIGHT_TYPE_OMNI && cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT] == max_elements_by_type) {
  188. return; //max number elements reached
  189. }
  190. if (p_type == LIGHT_TYPE_SPOT && cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT] == max_elements_by_type) {
  191. return; //max number elements reached
  192. }
  193. RenderElementData &e = render_elements[render_element_count];
  194. Transform xform = view_xform * p_transform;
  195. float radius = xform.basis.get_uniform_scale();
  196. if (radius > 0.98 || radius < 1.02) {
  197. xform.basis.orthonormalize();
  198. }
  199. radius *= p_radius;
  200. if (p_type == LIGHT_TYPE_OMNI) {
  201. radius *= shared->sphere_overfit; // overfit icosphere
  202. //omni
  203. float depth = -xform.origin.z;
  204. if (orthogonal) {
  205. e.touches_near = (depth - radius) < z_near;
  206. } else {
  207. //contains camera inside light
  208. float radius2 = radius * shared->sphere_overfit; // overfit again for outer size (camera may be outside actual sphere but behind an icosphere vertex)
  209. e.touches_near = xform.origin.length_squared() < radius2 * radius2;
  210. }
  211. e.touches_far = (depth + radius) > z_far;
  212. e.scale[0] = radius;
  213. e.scale[1] = radius;
  214. e.scale[2] = radius;
  215. e.type = ELEMENT_TYPE_OMNI_LIGHT;
  216. e.original_index = cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT];
  217. RendererStorageRD::store_transform_transposed_3x4(xform, e.transform_inv);
  218. cluster_count_by_type[ELEMENT_TYPE_OMNI_LIGHT]++;
  219. } else {
  220. //spot
  221. radius *= shared->cone_overfit; // overfit icosphere
  222. real_t len = Math::tan(Math::deg2rad(p_spot_aperture)) * radius;
  223. //approximate, probably better to use a cone support function
  224. float max_d = -1e20;
  225. float min_d = 1e20;
  226. #define CONE_MINMAX(m_x, m_y) \
  227. { \
  228. float d = -xform.xform(Vector3(len * m_x, len * m_y, -radius)).z; \
  229. min_d = MIN(d, min_d); \
  230. max_d = MAX(d, max_d); \
  231. }
  232. CONE_MINMAX(1, 1);
  233. CONE_MINMAX(-1, 1);
  234. CONE_MINMAX(-1, -1);
  235. CONE_MINMAX(1, -1);
  236. if (orthogonal) {
  237. e.touches_near = min_d < z_near;
  238. } else {
  239. //contains camera inside light
  240. Plane base_plane(xform.origin, -xform.basis.get_axis(Vector3::AXIS_Z));
  241. float dist = base_plane.distance_to(Vector3());
  242. if (dist >= 0 && dist < radius) {
  243. //inside, check angle
  244. float angle = Math::rad2deg(Math::acos((-xform.origin.normalized()).dot(-xform.basis.get_axis(Vector3::AXIS_Z))));
  245. e.touches_near = angle < p_spot_aperture * 1.05; //overfit aperture a little due to cone overfit
  246. } else {
  247. e.touches_near = false;
  248. }
  249. }
  250. e.touches_far = max_d > z_far;
  251. e.scale[0] = len * shared->cone_overfit;
  252. e.scale[1] = len * shared->cone_overfit;
  253. e.scale[2] = radius;
  254. e.type = ELEMENT_TYPE_SPOT_LIGHT;
  255. e.original_index = cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT]; //use omni since they share index
  256. RendererStorageRD::store_transform_transposed_3x4(xform, e.transform_inv);
  257. cluster_count_by_type[ELEMENT_TYPE_SPOT_LIGHT]++;
  258. }
  259. render_element_count++;
  260. }
  261. _FORCE_INLINE_ void add_box(BoxType p_box_type, const Transform &p_transform, const Vector3 &p_half_extents) {
  262. if (p_box_type == BOX_TYPE_DECAL && cluster_count_by_type[ELEMENT_TYPE_DECAL] == max_elements_by_type) {
  263. return; //max number elements reached
  264. }
  265. if (p_box_type == BOX_TYPE_REFLECTION_PROBE && cluster_count_by_type[ELEMENT_TYPE_REFLECTION_PROBE] == max_elements_by_type) {
  266. return; //max number elements reached
  267. }
  268. RenderElementData &e = render_elements[render_element_count];
  269. Transform xform = view_xform * p_transform;
  270. //extract scale and scale the matrix by it, makes things simpler
  271. Vector3 scale = p_half_extents;
  272. for (uint32_t i = 0; i < 3; i++) {
  273. float s = xform.basis.elements[i].length();
  274. scale[i] *= s;
  275. xform.basis.elements[i] /= s;
  276. };
  277. float box_depth = Math::abs(xform.basis.xform_inv(Vector3(0, 0, -1)).dot(scale));
  278. float depth = -xform.origin.z;
  279. if (orthogonal) {
  280. e.touches_near = depth - box_depth < z_near;
  281. } else {
  282. //contains camera inside box
  283. Vector3 inside = xform.xform_inv(Vector3(0, 0, 0)).abs();
  284. e.touches_near = inside.x < scale.x && inside.y < scale.y && inside.z < scale.z;
  285. }
  286. e.touches_far = depth + box_depth > z_far;
  287. e.scale[0] = scale.x;
  288. e.scale[1] = scale.y;
  289. e.scale[2] = scale.z;
  290. e.type = (p_box_type == BOX_TYPE_DECAL) ? ELEMENT_TYPE_DECAL : ELEMENT_TYPE_REFLECTION_PROBE;
  291. e.original_index = cluster_count_by_type[e.type];
  292. RendererStorageRD::store_transform_transposed_3x4(xform, e.transform_inv);
  293. cluster_count_by_type[e.type]++;
  294. render_element_count++;
  295. }
  296. void bake_cluster();
  297. void debug(ElementType p_element);
  298. RID get_cluster_buffer() const;
  299. uint32_t get_cluster_size() const;
  300. uint32_t get_max_cluster_elements() const;
  301. void set_shared(ClusterBuilderSharedDataRD *p_shared);
  302. ClusterBuilderRD();
  303. ~ClusterBuilderRD();
  304. };
  305. #endif // CLUSTER_BUILDER_H