Explorar o código

-Removed OpenMP support, replaced by a custom class.
-Disabled Opus, implementation is wrong.

Juan Linietsky %!s(int64=7) %!d(string=hai) anos
pai
achega
021f3c924b

+ 0 - 1
SConstruct

@@ -168,7 +168,6 @@ opts.Add(BoolVariable('vsproj', "Generate Visual Studio Project.", False))
 opts.Add(EnumVariable('warnings', "Set the level of warnings emitted during compilation", 'no', ('extra', 'all', 'moderate', 'no')))
 opts.Add(EnumVariable('warnings', "Set the level of warnings emitted during compilation", 'no', ('extra', 'all', 'moderate', 'no')))
 opts.Add(BoolVariable('progress', "Show a progress indicator during build", True))
 opts.Add(BoolVariable('progress', "Show a progress indicator during build", True))
 opts.Add(BoolVariable('dev', "If yes, alias for verbose=yes warnings=all", False))
 opts.Add(BoolVariable('dev', "If yes, alias for verbose=yes warnings=all", False))
-opts.Add(BoolVariable('openmp', "If yes, enable OpenMP", True))
 opts.Add(EnumVariable('macports_clang', "Build using clang from MacPorts", 'no', ('no', '5.0', 'devel')))
 opts.Add(EnumVariable('macports_clang', "Build using clang from MacPorts", 'no', ('no', '5.0', 'devel')))
 
 
 # Thirdparty libraries
 # Thirdparty libraries

+ 2 - 0
core/os/threaded_array_processor.cpp

@@ -0,0 +1,2 @@
+#include "threaded_array_processor.h"
+

+ 80 - 0
core/os/threaded_array_processor.h

@@ -0,0 +1,80 @@
+#ifndef THREADED_ARRAY_PROCESSOR_H
+#define THREADED_ARRAY_PROCESSOR_H
+
+#include "os/mutex.h"
+#include "os/os.h"
+#include "os/thread.h"
+#include "safe_refcount.h"
+#include "thread_safe.h"
+
+template <class C, class U>
+struct ThreadArrayProcessData {
+	uint32_t elements;
+	uint32_t index;
+	C *instance;
+	U userdata;
+	void (C::*method)(uint32_t, U);
+
+	void process(uint32_t p_index) {
+		(instance->*method)(p_index, userdata);
+	}
+};
+
+#ifndef NO_THREADS
+
+template <class T>
+void process_array_thread(void *ud) {
+
+	T &data = *(T *)ud;
+	while (true) {
+		uint32_t index = atomic_increment(&data.index);
+		if (index >= data.elements)
+			break;
+		data.process(index);
+	}
+}
+
+template <class C, class M, class U>
+void thread_process_array(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) {
+
+	ThreadArrayProcessData<C, U> data;
+	data.method = p_method;
+	data.instance = p_instance;
+	data.userdata = p_userdata;
+	data.index = 0;
+	data.elements = p_elements;
+	data.process(data.index); //process first, let threads increment for next
+
+	Vector<Thread *> threads;
+
+	threads.resize(OS::get_singleton()->get_processor_count());
+
+	for (int i = 0; i < threads.size(); i++) {
+		threads[i] = Thread::create(process_array_thread<ThreadArrayProcessData<C, U> >, &data);
+	}
+
+	for (int i = 0; i < threads.size(); i++) {
+		Thread::wait_to_finish(threads[i]);
+		memdelete(threads[i]);
+	}
+}
+
+#else
+
+template <class C, class M, class U>
+void thread_process_array(uint32_t p_elements, C *p_instance, M p_method, U p_userdata) {
+
+	ThreadArrayProcessData<C, U> data;
+	data.method = p_method;
+	data.instance = p_instance;
+	data.userdata = p_userdata;
+	data.index = 0;
+	data.elements = p_elements;
+	for (uint32_t i = 0; i < p_elements; i++) {
+		data.process(i);
+	}
+}
+
+#endif
+
+#endif // THREADED_ARRAY_PROCESSOR_H

+ 6 - 1
modules/opus/config.py

@@ -1,5 +1,10 @@
 def can_build(platform):
 def can_build(platform):
-    return True
+    # Sorry guys, do not enable this unless you can figure out a way
+    # to get Opus to not do any memory allocation or system calls
+    # in the audio thread.
+    # Currently the implementation even reads files from the audio thread,
+    # and this is not how audio programming works.
+    return False
 
 
 def configure(env):
 def configure(env):
     pass
     pass

+ 0 - 3
platform/osx/detect.py

@@ -82,9 +82,6 @@ def configure(env):
             env['RANLIB'] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-ranlib"
             env['RANLIB'] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-ranlib"
             env['AS'] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-as"
             env['AS'] = mpprefix + "/libexec/llvm-" + mpclangver + "/bin/llvm-as"
             env.Append(CCFLAGS=['-D__MACPORTS__']) #hack to fix libvpx MM256_BROADCASTSI128_SI256 define
             env.Append(CCFLAGS=['-D__MACPORTS__']) #hack to fix libvpx MM256_BROADCASTSI128_SI256 define
-            if env['tools'] and env['openmp']:
-                env.Append(CPPFLAGS=['-fopenmp'])
-                env.Append(LINKFLAGS=['-fopenmp'])
 
 
     else: # osxcross build
     else: # osxcross build
         root = os.environ.get("OSXCROSS_ROOT", 0)
         root = os.environ.get("OSXCROSS_ROOT", 0)

+ 0 - 5
platform/windows/detect.py

@@ -191,8 +191,6 @@ def configure(env):
         if (env["use_lto"]):
         if (env["use_lto"]):
             env.Append(CCFLAGS=['/GL'])
             env.Append(CCFLAGS=['/GL'])
             env.Append(LINKFLAGS=['/LTCG'])
             env.Append(LINKFLAGS=['/LTCG'])
-        if env['tools'] and env['openmp']:
-            env.Append(CPPFLAGS=['/openmp'])
 
 
         env.Append(CCFLAGS=["/I" + p for p in os.getenv("INCLUDE").split(";")])
         env.Append(CCFLAGS=["/I" + p for p in os.getenv("INCLUDE").split(";")])
         env.Append(LIBPATH=[p for p in os.getenv("LIB").split(";")])
         env.Append(LIBPATH=[p for p in os.getenv("LIB").split(";")])
@@ -270,9 +268,6 @@ def configure(env):
             env.Append(CCFLAGS=['-flto'])
             env.Append(CCFLAGS=['-flto'])
             env.Append(LINKFLAGS=['-flto=' + str(env.GetOption("num_jobs"))])
             env.Append(LINKFLAGS=['-flto=' + str(env.GetOption("num_jobs"))])
 
 
-        if env['tools'] and env['openmp']:
-            env.Append(CPPFLAGS=['-fopenmp'])
-            env.Append(LINKFLAGS=['-fopenmp'])
 
 
         ## Compile flags
         ## Compile flags
 
 

+ 0 - 4
platform/x11/detect.py

@@ -265,9 +265,5 @@ def configure(env):
         env.Append(LINKFLAGS=['-m64', '-L/usr/lib/i686-linux-gnu'])
         env.Append(LINKFLAGS=['-m64', '-L/usr/lib/i686-linux-gnu'])
 
 
 
 
-    if env['tools'] and env['openmp']:
-        env.Append(CPPFLAGS=['-fopenmp'])
-        env.Append(LINKFLAGS=['-fopenmp'])
-
     if env['use_static_cpp']:
     if env['use_static_cpp']:
         env.Append(LINKFLAGS=['-static-libstdc++'])
         env.Append(LINKFLAGS=['-static-libstdc++'])

+ 2 - 2
scene/3d/baked_lightmap.cpp

@@ -772,8 +772,8 @@ void BakedLightmap::_bind_methods() {
 BakedLightmap::BakedLightmap() {
 BakedLightmap::BakedLightmap() {
 
 
 	extents = Vector3(10, 10, 10);
 	extents = Vector3(10, 10, 10);
-	bake_cell_size = 0.1;
-	capture_cell_size = 0.25;
+	bake_cell_size = 0.25;
+	capture_cell_size = 0.5;
 
 
 	bake_quality = BAKE_QUALITY_MEDIUM;
 	bake_quality = BAKE_QUALITY_MEDIUM;
 	bake_mode = BAKE_MODE_CONE_TRACE;
 	bake_mode = BAKE_MODE_CONE_TRACE;

+ 25 - 51
scene/3d/voxel_light_baker.cpp

@@ -30,11 +30,9 @@
 
 
 #include "voxel_light_baker.h"
 #include "voxel_light_baker.h"
 #include "os/os.h"
 #include "os/os.h"
+#include "os/threaded_array_processor.h"
 
 
 #include <stdlib.h>
 #include <stdlib.h>
-#ifdef _OPENMP
-#include <omp.h>
-#endif
 
 
 #define FINDMINMAX(x0, x1, x2, min, max) \
 #define FINDMINMAX(x0, x1, x2, min, max) \
 	min = max = x0;                      \
 	min = max = x0;                      \
@@ -1689,7 +1687,7 @@ _ALWAYS_INLINE_ uint32_t xorshift32(uint32_t *state) {
 	return x;
 	return x;
 }
 }
 
 
-Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal, uint32_t *rng_state) {
+Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal) {
 
 
 	int samples_per_quality[3] = { 48, 128, 512 };
 	int samples_per_quality[3] = { 48, 128, 512 };
 
 
@@ -1711,8 +1709,7 @@ Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const V
 	const Light *light = bake_light.ptr();
 	const Light *light = bake_light.ptr();
 	const Cell *cells = bake_cells.ptr();
 	const Cell *cells = bake_cells.ptr();
 
 
-	// Prevent false sharing when running on OpenMP
-	uint32_t local_rng_state = *rng_state;
+	uint32_t local_rng_state = rand(); //needs to be fixed again
 
 
 	for (int i = 0; i < samples; i++) {
 	for (int i = 0; i < samples; i++) {
 
 
@@ -1796,10 +1793,30 @@ Vector3 VoxelLightBaker::_compute_ray_trace_at_pos(const Vector3 &p_pos, const V
 	}
 	}
 
 
 	// Make sure we don't reset this thread's RNG state
 	// Make sure we don't reset this thread's RNG state
-	*rng_state = local_rng_state;
+
 	return accum / samples;
 	return accum / samples;
 }
 }
 
 
+void VoxelLightBaker::_lightmap_bake_point(uint32_t p_x, LightMap *p_line) {
+
+
+	LightMap *pixel = &p_line[p_x];
+	if (pixel->pos == Vector3())
+		return;
+	//print_line("pos: " + pixel->pos + " normal " + pixel->normal);
+	switch (bake_mode) {
+		case BAKE_MODE_CONE_TRACE: {
+			pixel->light = _compute_pixel_light_at_pos(pixel->pos, pixel->normal) * energy;
+		} break;
+		case BAKE_MODE_RAY_TRACE: {
+			pixel->light = _compute_ray_trace_at_pos(pixel->pos, pixel->normal) * energy;
+		} break;
+			//	pixel->light = Vector3(1, 1, 1);
+			//}
+	}
+
+}
+
 Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh, LightMapData &r_lightmap, bool (*p_bake_time_func)(void *, float, float), void *p_bake_time_ud) {
 Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh, LightMapData &r_lightmap, bool (*p_bake_time_func)(void *, float, float), void *p_bake_time_ud) {
 
 
 	//transfer light information to a lightmap
 	//transfer light information to a lightmap
@@ -1862,53 +1879,10 @@ Error VoxelLightBaker::make_lightmap(const Transform &p_xform, Ref<Mesh> &p_mesh
 		volatile int lines = 0;
 		volatile int lines = 0;
 
 
 		// make sure our OS-level rng is seeded
 		// make sure our OS-level rng is seeded
-		srand(OS::get_singleton()->get_ticks_usec());
-
-		// setup an RNG state for each OpenMP thread
-		uint32_t threadcount = 1;
-		uint32_t threadnum = 0;
-#ifdef _OPENMP
-		threadcount = omp_get_max_threads();
-#endif
-		Vector<uint32_t> rng_states;
-		rng_states.resize(threadcount);
-		for (uint32_t i = 0; i < threadcount; i++) {
-			do {
-				rng_states[i] = rand();
-			} while (rng_states[i] == 0);
-		}
-		uint32_t *rng_states_p = rng_states.ptrw();
 
 
 		for (int i = 0; i < height; i++) {
 		for (int i = 0; i < height; i++) {
 
 
-		//print_line("bake line " + itos(i) + " / " + itos(height));
-#ifdef _OPENMP
-#pragma omp parallel for schedule(dynamic, 1) private(threadnum)
-#endif
-			for (int j = 0; j < width; j++) {
-
-#ifdef _OPENMP
-				threadnum = omp_get_thread_num();
-#endif
-
-				//if (i == 125 && j == 280) {
-
-				LightMap *pixel = &lightmap_ptr[i * width + j];
-				if (pixel->pos == Vector3())
-					continue; //unused, skipe
-
-				//print_line("pos: " + pixel->pos + " normal " + pixel->normal);
-				switch (bake_mode) {
-					case BAKE_MODE_CONE_TRACE: {
-						pixel->light = _compute_pixel_light_at_pos(pixel->pos, pixel->normal) * energy;
-					} break;
-					case BAKE_MODE_RAY_TRACE: {
-						pixel->light = _compute_ray_trace_at_pos(pixel->pos, pixel->normal, &rng_states_p[threadnum]) * energy;
-					} break;
-						//	pixel->light = Vector3(1, 1, 1);
-						//}
-				}
-			}
+			thread_process_array(width,this,&VoxelLightBaker::_lightmap_bake_point,&lightmap_ptr[i*width]);
 
 
 			lines = MAX(lines, i); //for multithread
 			lines = MAX(lines, i); //for multithread
 			if (p_bake_time_func) {
 			if (p_bake_time_func) {

+ 4 - 1
scene/3d/voxel_light_baker.h

@@ -148,9 +148,12 @@ private:
 	_FORCE_INLINE_ void _sample_baked_octree_filtered_and_anisotropic(const Vector3 &p_posf, const Vector3 &p_direction, float p_level, Vector3 &r_color, float &r_alpha);
 	_FORCE_INLINE_ void _sample_baked_octree_filtered_and_anisotropic(const Vector3 &p_posf, const Vector3 &p_direction, float p_level, Vector3 &r_color, float &r_alpha);
 	_FORCE_INLINE_ Vector3 _voxel_cone_trace(const Vector3 &p_pos, const Vector3 &p_normal, float p_aperture);
 	_FORCE_INLINE_ Vector3 _voxel_cone_trace(const Vector3 &p_pos, const Vector3 &p_normal, float p_aperture);
 	_FORCE_INLINE_ Vector3 _compute_pixel_light_at_pos(const Vector3 &p_pos, const Vector3 &p_normal);
 	_FORCE_INLINE_ Vector3 _compute_pixel_light_at_pos(const Vector3 &p_pos, const Vector3 &p_normal);
-	_FORCE_INLINE_ Vector3 _compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal, uint32_t *rng_state);
+	_FORCE_INLINE_ Vector3 _compute_ray_trace_at_pos(const Vector3 &p_pos, const Vector3 &p_normal);
+
+	void _lightmap_bake_point(uint32_t p_x,	LightMap *p_line);
 
 
 public:
 public:
+
 	void begin_bake(int p_subdiv, const AABB &p_bounds);
 	void begin_bake(int p_subdiv, const AABB &p_bounds);
 	void plot_mesh(const Transform &p_xform, Ref<Mesh> &p_mesh, const Vector<Ref<Material> > &p_materials, const Ref<Material> &p_override_material);
 	void plot_mesh(const Transform &p_xform, Ref<Mesh> &p_mesh, const Vector<Ref<Material> > &p_materials, const Ref<Material> &p_override_material);
 	void begin_bake_light(BakeQuality p_quality = BAKE_QUALITY_MEDIUM, BakeMode p_bake_mode = BAKE_MODE_CONE_TRACE, float p_propagation = 0.85, float p_energy = 1);
 	void begin_bake_light(BakeQuality p_quality = BAKE_QUALITY_MEDIUM, BakeMode p_bake_mode = BAKE_MODE_CONE_TRACE, float p_propagation = 0.85, float p_energy = 1);