Browse Source

Fix lightmapper rng

In our previous attempts to fix the lightmapper we may have
inadvertently introduced the same issue we were trying to fix. It
appears that rand() will on some platforms introduce a mutex making it
slower and on others may have a per-thread state that would need to be
initialized with srand() on each thread. This slows down the lightbaking
further.

This sets up a separate rng state for each OpenMP thread by calling
rand() only in the single-threaded part of the code. We then keep a
vector of states.

I believe this solves our problems.
Hein-Pieter van Braam 7 years ago
parent
commit
ccbb5923ac
2 changed files with 36 additions and 17 deletions
  1. 35 16
      scene/3d/voxel_light_baker.cpp
  2. 1 1
      scene/3d/voxel_light_baker.h

+ 35 - 16
scene/3d/voxel_light_baker.cpp

@@ -32,6 +32,10 @@
 #include "os/os.h"
 
 #include <stdlib.h>
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
 #define FINDMINMAX(x0, x1, x2, min, max) \
 	min = max = x0;                      \
 	if (x1 < min) min = x1;              \
@@ -1675,19 +1679,17 @@ Vector3 VoxelLightBaker::_compute_pixel_light_at_pos(const Vector3 &p_pos, const
 	return accum;
 }
 
-uint32_t xorshiftstate[] = { 123 }; // anything non-zero will do here
-
-_ALWAYS_INLINE_ uint32_t xorshift32(uint32_t *seed) {
+_ALWAYS_INLINE_ uint32_t xorshift32(uint32_t *state) {
 	/* Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" */
-	uint32_t x = *seed;
+	uint32_t x = *state;
 	x ^= x << 13;
 	x ^= x >> 17;
 	x ^= x << 5;
-	*seed = x;
+	*state = x;
 	return x;
 }
 
-Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal) {
+Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal, uint32_t *rng_state) {
 
 	int samples_per_quality[3] = { 48, 128, 512 };
 
@@ -1709,16 +1711,11 @@ Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const V
 	const Light *light = bake_light.ptr();
 	const Cell *cells = bake_cells.ptr();
 
-	uint32_t seed = 0;
-	while (seed == 0) {
-		seed = rand(); //system rand is thread safe, do not replace by Math:: random.
-	}
-
 	for (int i = 0; i < samples; i++) {
 
-		float random_angle1 = (((xorshift32(&seed) % 65535) / 65535.0) * 2.0 - 1.0) * spread;
+		float random_angle1 = (((xorshift32(rng_state) % 65535) / 65535.0) * 2.0 - 1.0) * spread;
 		Vector3 axis(0, sin(random_angle1), cos(random_angle1));
-		float random_angle2 = ((xorshift32(&seed) % 65535) / 65535.0) * Math_PI * 2.0;
+		float random_angle2 = ((xorshift32(rng_state) % 65535) / 65535.0) * Math_PI * 2.0;
 		Basis rot(Vector3(0, 0, 1), random_angle2);
 		axis = rot.xform(axis);
 
@@ -1852,21 +1849,43 @@ Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh
 			_plot_triangle(uv, vertex, normal, lightmap.ptrw(), width, height);
 		}
 	}
-	//step 3 perform voxel cone trace on lightmap pixels
 
+	//step 3 perform voxel cone trace on lightmap pixels
 	{
 		LightMap *lightmap_ptr = lightmap.ptrw();
 		uint64_t begin_time = OS::get_singleton()->get_ticks_usec();
 		volatile int lines = 0;
 
+		// make sure our OS-level rng is seeded
+		srand(OS::get_singleton()->get_ticks_usec());
+
+		// setup an RNG state for each OpenMP thread
+		uint32_t threadcount = 1;
+		uint32_t threadnum = 0;
+#ifdef _OPENMP
+		threadcount = omp_get_max_threads();
+#endif
+		Vector<uint32_t> rng_states;
+		rng_states.resize(threadcount);
+		for (uint32_t i = 0; i < threadcount; i++) {
+			do {
+				rng_states[i] = rand();
+			} while (rng_states[i] == 0);
+		}
+		uint32_t *rng_states_p = rng_states.ptrw();
+
 		for (int i = 0; i < height; i++) {
 
 		//print_line("bake line " + itos(i) + " / " + itos(height));
 #ifdef _OPENMP
-#pragma omp parallel for schedule(dynamic, 1)
+#pragma omp parallel for schedule(dynamic, 1) private(threadnum)
 #endif
 			for (int j = 0; j < width; j++) {
 
+#ifdef _OPENMP
+				threadnum = omp_get_thread_num();
+#endif
+
 				//if (i == 125 && j == 280) {
 
 				LightMap *pixel = &lightmap_ptr[i * width + j];
@@ -1879,7 +1898,7 @@ Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh
 						pixel->light = _compute_pixel_light_at_pos(pixel->pos, pixel->normal) * energy;
 					} break;
 					case BAKE_MODE_RAY_TRACE: {
-						pixel->light = _compute_ray_trace_at_pos(pixel->pos, pixel->normal) * energy;
+						pixel->light = _compute_ray_trace_at_pos(pixel->pos, pixel->normal, &rng_states_p[threadnum]) * energy;
 					} break;
 						//	pixel->light = Vector3(1, 1, 1);
 						//}

+ 1 - 1
scene/3d/voxel_light_baker.h

@@ -148,7 +148,7 @@ private:
 	_FORCE_INLINE_ void _sample_baked_octree_filtered_and_anisotropic(const Vector3 &p_posf, const Vector3 &p_direction, float p_level, Vector3 &r_color, float &r_alpha);
 	_FORCE_INLINE_ Vector3 _voxel_cone_trace(const Vector3 &p_pos, const Vector3 &p_normal, float p_aperture);
 	_FORCE_INLINE_ Vector3 _compute_pixel_light_at_pos(const Vector3 &p_pos, const Vector3 &p_normal);
-	_FORCE_INLINE_ Vector3 _compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal);
+	_FORCE_INLINE_ Vector3 _compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal, uint32_t *rng_state);
 
 public:
 	void begin_bake(int p_subdiv, const AABB &p_bounds);