Browse Source

-Removed OpenMP support, replaced by a custom class.
-Disabled Opus, implementation is wrong.

Juan Linietsky 7 years ago
parent
commit
021f3c924b

+ 0 - 1
SConstruct

@@ -168,7 +168,6 @@ opts.Add(BoolVariable('vsproj', "Generate Visual Studio Project.", False))
 opts.Add(EnumVariable('warnings', "Set the level of warnings emitted during compilation", 'no', ('extra', 'all', 'moderate', 'no')))
 opts.Add(BoolVariable('progress', "Show a progress indicator during build", True))
 opts.Add(BoolVariable('dev', "If yes, alias for verbose=yes warnings=all", False))
-opts.Add(BoolVariable('openmp', "If yes, enable OpenMP", True))
 opts.Add(EnumVariable('macports_clang', "Build using clang from MacPorts", 'no', ('no', '5.0', 'devel')))
 
 # Thirdparty libraries

+ 2 - 0
core/os/threaded_array_processor.cpp

@@ -0,0 +1,2 @@
+#include "threaded_array_processor.h"
+

+ 80 - 0
core/os/threaded_array_processor.h

@@ -0,0 +1,80 @@
+#ifndef THREADED_ARRAY_PROCESSOR_H
+#define THREADED_ARRAY_PROCESSOR_H
+
+#include "os/mutex.h"
+#include "os/os.h"
+#include "os/thread.h"
+#include "safe_refcount.h"
+#include "thread_safe.h"
+
+template <class C, class U>
+struct ThreadArrayProcessData {
+	uint32_t elements;
+	uint32_t index;
+	C *instance;
+	U userdata;
+	void (C::*method)(uint32_t, U);
+
+	void process(uint32_t p_index) {
+		(instance->*method)(p_index, userdata);
+	}
+};
+
+#ifndef NO_THREADS
+
+template <class T>
+void process_array_thread(void *ud) {
+
+	T &data = *(T *)ud;
+	while (true) {
+		uint32_t index = atomic_increment(&data.index);
+		if (index >= data.elements)
+			break;
+		data.process(index);
+	}
+}
+
+template <class C, class M, class U>
+void thread_process_array(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) {
+
+	ThreadArrayProcessData<C, U> data;
+	data.method = p_method;
+	data.instance = p_instance;
+	data.userdata = p_userdata;
+	data.index = 0;
+	data.elements = p_elements;
+	data.process(data.index); //process first, let threads increment for next
+
+	Vector<Thread *> threads;
+
+	threads.resize(OS::get_singleton()->get_processor_count());
+
+	for (int i = 0; i < threads.size(); i++) {
+		threads[i] = Thread::create(process_array_thread<ThreadArrayProcessData<C, U> >, &data);
+	}
+
+	for (int i = 0; i < threads.size(); i++) {
+		Thread::wait_to_finish(threads[i]);
+		memdelete(threads[i]);
+	}
+}
+
+#else
+
+template <class C, class M, class U>
+void thread_process_array(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) {
+
+	ThreadArrayProcessData<C, U> data;
+	data.method = p_method;
+	data.instance = p_instance;
+	data.userdata = p_userdata;
+	data.index = 0;
+	data.elements = p_elements;
+	for (uint32_t i = 0; i < p_elements; i++) {
+		data.process(i);
+	}
+}
+
+#endif
+
+#endif // THREADED_ARRAY_PROCESSOR_H

+ 6 - 1
modules/opus/config.py

@@ -1,5 +1,10 @@
 def can_build(platform):
-    return True
+    # Sorry guys, do not enable this unless you can figure out a way
+    # to get Opus to not do any memory allocation or system calls
+    # in the audio thread.
+    # Currently the implementation even reads files from the audio thread,
+    # and this is not how audio programming works.
+    return False
 
 def configure(env):
     pass

+ 0 - 3
platform/osx/detect.py

@@ -82,9 +82,6 @@ def configure(env):
             env['RANLIB'] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-ranlib"
             env['AS'] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-as"
             env.Append(CCFLAGS=['-D__MACPORTS__']) #hack to fix libvpx MM256_BROADCASTSI128_SI256 define
-            if env['tools'] and env['openmp']:
-                env.Append(CPPFLAGS=['-fopenmp'])
-                env.Append(LINKFLAGS=['-fopenmp'])
 
     else: # osxcross build
         root = os.environ.get("OSXCROSS_ROOT", 0)

+ 0 - 5
platform/windows/detect.py

@@ -191,8 +191,6 @@ def configure(env):
         if (env["use_lto"]):
             env.Append(CCFLAGS=['/GL'])
             env.Append(LINKFLAGS=['/LTCG'])
-        if env['tools'] and env['openmp']:
-            env.Append(CPPFLAGS=['/openmp'])
 
         env.Append(CCFLAGS=["/I" + p for p in os.getenv("INCLUDE").split(";")])
         env.Append(LIBPATH=[p for p in os.getenv("LIB").split(";")])
@@ -270,9 +268,6 @@ def configure(env):
             env.Append(CCFLAGS=['-flto'])
             env.Append(LINKFLAGS=['-flto=' + str(env.GetOption("num_jobs"))])
 
-        if env['tools'] and env['openmp']:
-            env.Append(CPPFLAGS=['-fopenmp'])
-            env.Append(LINKFLAGS=['-fopenmp'])
 
         ## Compile flags
 

+ 0 - 4
platform/x11/detect.py

@@ -265,9 +265,5 @@ def configure(env):
         env.Append(LINKFLAGS=['-m64', '-L/usr/lib/i686-linux-gnu'])
 
 
-    if env['tools'] and env['openmp']:
-        env.Append(CPPFLAGS=['-fopenmp'])
-        env.Append(LINKFLAGS=['-fopenmp'])
-
     if env['use_static_cpp']:
         env.Append(LINKFLAGS=['-static-libstdc++'])

+ 2 - 2
scene/3d/baked_lightmap.cpp

@@ -772,8 +772,8 @@ void BakedLightmap::_bind_methods() {
 BakedLightmap::BakedLightmap() {
 
 	extents = Vector3(10, 10, 10);
-	bake_cell_size = 0.1;
-	capture_cell_size = 0.25;
+	bake_cell_size = 0.25;
+	capture_cell_size = 0.5;
 
 	bake_quality = BAKE_QUALITY_MEDIUM;
 	bake_mode = BAKE_MODE_CONE_TRACE;

+ 25 - 51
scene/3d/voxel_light_baker.cpp

@@ -30,11 +30,9 @@
 
 #include "voxel_light_baker.h"
 #include "os/os.h"
+#include "os/threaded_array_processor.h"
 
 #include <stdlib.h>
-#ifdef _OPENMP
-#include <omp.h>
-#endif
 
 #define FINDMINMAX(x0, x1, x2, min, max) \
 	min = max = x0;                      \
@@ -1689,7 +1687,7 @@ _ALWAYS_INLINE_ uint32_t xorshift32(uint32_t *state) {
 	return x;
 }
 
-Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal, uint32_t *rng_state) {
+Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal) {
 
 	int samples_per_quality[3] = { 48, 128, 512 };
 
@@ -1711,8 +1709,7 @@ Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const V
 	const Light *light = bake_light.ptr();
 	const Cell *cells = bake_cells.ptr();
 
-	// Prevent false sharing when running on OpenMP
-	uint32_t local_rng_state = *rng_state;
+	uint32_t local_rng_state = rand(); //needs to be fixed again
 
 	for (int i = 0; i < samples; i++) {
 
@@ -1796,10 +1793,30 @@ Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const V
 	}
 
 	// Make sure we don't reset this thread's RNG state
-	*rng_state = local_rng_state;
+
 	return accum / samples;
 }
 
+void VoxelLightBaker::_lightmap_bake_point(uint32_t p_x, LightMap *p_line) {
+
+
+	LightMap *pixel = &p_line[p_x];
+	if (pixel->pos == Vector3())
+		return;
+	//print_line("pos: " + pixel->pos + " normal " + pixel->normal);
+	switch (bake_mode) {
+		case BAKE_MODE_CONE_TRACE: {
+			pixel->light = _compute_pixel_light_at_pos(pixel->pos, pixel->normal) * energy;
+		} break;
+		case BAKE_MODE_RAY_TRACE: {
+			pixel->light = _compute_ray_trace_at_pos(pixel->pos, pixel->normal) * energy;
+		} break;
+			//	pixel->light = Vector3(1, 1, 1);
+			//}
+	}
+
+}
+
 Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh, LightMapData &r_lightmap, bool (*p_bake_time_func)(void *, float, float), void *p_bake_time_ud) {
 
 	//transfer light information to a lightmap
@@ -1862,53 +1879,10 @@ Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh
 		volatile int lines = 0;
 
 		// make sure our OS-level rng is seeded
-		srand(OS::get_singleton()->get_ticks_usec());
-
-		// setup an RNG state for each OpenMP thread
-		uint32_t threadcount = 1;
-		uint32_t threadnum = 0;
-#ifdef _OPENMP
-		threadcount = omp_get_max_threads();
-#endif
-		Vector<uint32_t> rng_states;
-		rng_states.resize(threadcount);
-		for (uint32_t i = 0; i < threadcount; i++) {
-			do {
-				rng_states[i] = rand();
-			} while (rng_states[i] == 0);
-		}
-		uint32_t *rng_states_p = rng_states.ptrw();
 
 		for (int i = 0; i < height; i++) {
 
-		//print_line("bake line " + itos(i) + " / " + itos(height));
-#ifdef _OPENMP
-#pragma omp parallel for schedule(dynamic, 1) private(threadnum)
-#endif
-			for (int j = 0; j < width; j++) {
-
-#ifdef _OPENMP
-				threadnum = omp_get_thread_num();
-#endif
-
-				//if (i == 125 && j == 280) {
-
-				LightMap *pixel = &lightmap_ptr[i * width + j];
-				if (pixel->pos == Vector3())
-					continue; //unused, skipe
-
-				//print_line("pos: " + pixel->pos + " normal " + pixel->normal);
-				switch (bake_mode) {
-					case BAKE_MODE_CONE_TRACE: {
-						pixel->light = _compute_pixel_light_at_pos(pixel->pos, pixel->normal) * energy;
-					} break;
-					case BAKE_MODE_RAY_TRACE: {
-						pixel->light = _compute_ray_trace_at_pos(pixel->pos, pixel->normal, &rng_states_p[threadnum]) * energy;
-					} break;
-						//	pixel->light = Vector3(1, 1, 1);
-						//}
-				}
-			}
+			thread_process_array(width,this,&VoxelLightBaker::_lightmap_bake_point,&lightmap_ptr[i*width]);
 
 			lines = MAX(lines, i); //for multithread
 			if (p_bake_time_func) {

+ 4 - 1
scene/3d/voxel_light_baker.h

@@ -148,9 +148,12 @@ private:
 	_FORCE_INLINE_ void _sample_baked_octree_filtered_and_anisotropic(const Vector3 &p_posf, const Vector3 &p_direction, float p_level, Vector3 &r_color, float &r_alpha);
 	_FORCE_INLINE_ Vector3 _voxel_cone_trace(const Vector3 &p_pos, const Vector3 &p_normal, float p_aperture);
 	_FORCE_INLINE_ Vector3 _compute_pixel_light_at_pos(const Vector3 &p_pos, const Vector3 &p_normal);
-	_FORCE_INLINE_ Vector3 _compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal, uint32_t *rng_state);
+	_FORCE_INLINE_ Vector3 _compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal);
+
+	void _lightmap_bake_point(uint32_t p_x,	LightMap *p_line);
 
 public:
+
 	void begin_bake(int p_subdiv, const AABB &p_bounds);
 	void plot_mesh(const Transform &p_xform, Ref<Mesh> &p_mesh, const Vector<Ref<Material> > &p_materials, const Ref<Material> &p_override_material);
 	void begin_bake_light(BakeQuality p_quality = BAKE_QUALITY_MEDIUM, BakeMode p_bake_mode = BAKE_MODE_CONE_TRACE, float p_propagation = 0.85, float p_energy = 1);