Bläddra i källkod

Merge pull request #123 from HaxeFoundation/threads

Threads support
Nicolas Cannasse 7 år sedan
förälder
incheckning
950de37f84
19 ändrade filer med 769 tillägg och 244 borttagningar
  1. 4 2
      libs/ui/ui_win.c
  2. 49 0
      other/statics/Statics.hx
  3. 51 0
      other/tests/Threads.hx
  4. 209 56
      src/alloc.c
  5. 20 12
      src/debugger.c
  6. 62 8
      src/hl.h
  7. 2 0
      src/hlc.h
  8. 24 27
      src/hlc_main.c
  9. 1 2
      src/hlmodule.h
  10. 51 35
      src/jit.c
  11. 18 28
      src/main.c
  12. 2 7
      src/module.c
  13. 20 32
      src/std/error.c
  14. 2 0
      src/std/file.c
  15. 29 0
      src/std/fun.c
  16. 1 1
      src/std/maps.h
  17. 11 1
      src/std/obj.c
  18. 5 1
      src/std/sys.c
  19. 208 32
      src/std/thread.c

+ 4 - 2
libs/ui/ui_win.c

@@ -254,7 +254,9 @@ HL_PRIM vsentinel *HL_NAME(ui_start_sentinel)( double timeout, vclosure *c ) {
 	s->pause = false;
 	s->original = GetCurrentThreadId();
 	s->callback = c->fun;
+#	ifdef HL_THREADS
 	s->thread = hl_thread_start(sentinel_loop,s,false);
+#	endif
 	return s;
 }
 
@@ -289,7 +291,7 @@ HL_PRIM vbyte *HL_NAME(ui_choose_file)( bool forSave, vdynamic *options ) {
 		int i, pos = 0;
 		for(i=0;i<filters->size;i++) {
 			wchar_t *str = hl_aptr(filters,wchar_t*)[i];
-			int len = wcslen(str);
+			int len = (int)wcslen(str);
 			if( pos + len > 1024 ) return false;
 			memcpy(filterStr + pos, str, (len + 1) << 1);
 			pos += len + 1;
@@ -314,7 +316,7 @@ HL_PRIM vbyte *HL_NAME(ui_choose_file)( bool forSave, vdynamic *options ) {
 		if( !GetOpenFileName(&op) )
 			return NULL;
 	}
-	return hl_copy_bytes((vbyte*)outputFile, (wcslen(outputFile)+1)*2);
+	return hl_copy_bytes((vbyte*)outputFile, (int)(wcslen(outputFile)+1)*2);
 }
 
 

+ 49 - 0
other/statics/Statics.hx

@@ -0,0 +1,49 @@
+class Statics {
+
+	static var BASE_PATH = "../../";
+
+	static function loopRec( dir : String ) {
+		for( f in sys.FileSystem.readDirectory(BASE_PATH+dir) ) {
+			var path = dir + "/" + f;
+			if( sys.FileSystem.isDirectory(BASE_PATH+path) ) {
+				loopRec(path);
+				continue;
+			}
+			var ext = f.split(".").pop().toLowerCase();
+			if( ext != "c" && ext != "cpp" ) continue;
+			
+			var lineNum = 0;
+			for( l in sys.io.File.getContent(BASE_PATH+path).split("\n") ) {
+				lineNum++;
+				var l = StringTools.rtrim(l);
+				if( l == "" ) continue;
+				switch( l.charCodeAt(0) ) {
+				case '\t'.code, '*'.code, '#'.code, '}'.code:
+					continue;
+				default:
+					if( !StringTools.endsWith(l,";") )
+						continue;
+					// function decl
+					if( StringTools.endsWith(l,");") )
+						continue;
+					if( l.indexOf(" const ") > 0 )
+						continue;
+					for( w in ["HL_PRIM ","HL_API ","DEFINE_PRIM","typedef","struct","//","/*"," *"," "] )
+						if( StringTools.startsWith(l,w) ) {
+							l = null;
+							break;
+						}
+					if( l == null )
+						continue;
+					Sys.println('$path:$lineNum: $l');
+				}
+			}
+		}
+	}
+
+	static function main() {
+		for( dir in ["src","libs"] )
+			loopRec(dir);
+	}
+
+}

+ 51 - 0
other/tests/Threads.hx

@@ -0,0 +1,51 @@
+class Threads {
+
+	static var WAIT = [];
+	
+	@:hlNative("std","thread_create") static function thread_create( f : Void -> Void ) : hl.Abstract<"hl_thread"> {
+		return null;
+	}
+
+	static function run(i,k) {
+		for( i in 0...Math.ceil(10000000/k) )
+			new hl.Bytes(1);
+		if( i>=0 ) WAIT[i] = false; 
+	}
+	
+    public static function main() {
+
+		hl.Gc.enable(false); // disable major gc
+
+		var flags = hl.Gc.flags;
+		flags.set(NoThreads);
+		hl.Gc.flags = flags;
+		
+		var t0 = Sys.time();
+		run(-1,1);
+		trace("Threads disable "+(Sys.time() - t0));
+		
+		flags.unset(NoThreads);
+		hl.Gc.flags = flags;
+	
+		var t0 = Sys.time();
+		run(-1,1);
+		trace("Single thread "+(Sys.time() - t0));
+		
+		for( COUNT in [2,4,10] ) {
+			for( i in 0...COUNT ) 
+				WAIT[i] = true;
+			t0 = Sys.time();
+			for( i in 0...COUNT ) 			
+				thread_create(run.bind(i,COUNT));
+			var i = 0;
+			while( i < COUNT ) {
+				if( WAIT[i] ) {
+					i = 0;
+					Sys.sleep(0);
+				} else i++;
+			}
+			trace(COUNT+" threads "+(Sys.time() - t0));
+		}
+    }
+	
+}

+ 209 - 56
src/alloc.c

@@ -137,6 +137,7 @@ static const int GC_SIZES[GC_PARTITIONS] = {4,8,12,16,20,	8,64,1<<14,1<<22};
 #define GC_PROFILE		1
 #define GC_DUMP_MEM		2
 #define GC_TRACK		4
+#define GC_NO_THREADS	8
 
 static int gc_flags = 0;
 static gc_pheader *gc_pages[GC_ALL_PAGES] = {NULL};
@@ -146,6 +147,15 @@ static gc_pheader *gc_level1_null[1<<GC_LEVEL1_BITS] = {NULL};
 static gc_pheader **hl_gc_page_map[1<<GC_LEVEL0_BITS] = {NULL};
 static void (*gc_track_callback)(hl_type *,int,int,void*) = NULL;
 
+static struct {
+	int count;
+	bool stopping_world;
+	hl_thread_info **threads;
+	hl_mutex *global_lock;
+} gc_threads;
+
+HL_THREAD_STATIC_VAR hl_thread_info *current_thread;
+
 static struct {
 	int64 total_requested;
 	int64 total_allocated;
@@ -180,11 +190,40 @@ static void ***gc_roots = NULL;
 static int gc_roots_count = 0;
 static int gc_roots_max = 0;
 
+HL_API hl_thread_info *hl_get_thread() {
+	return current_thread;
+}
+
+static void gc_save_context(hl_thread_info *t ) {
+	setjmp(t->gc_regs);
+	t->stack_cur = &t;
+}
+
+#ifndef HL_THREADS
+#	define gc_global_lock(_)
+#else
+static void gc_global_lock( bool lock ) {
+	hl_thread_info *t = current_thread;
+	bool mt = (gc_flags & GC_NO_THREADS) == 0;
+	if( lock ) {
+		if( t->gc_blocking )
+			hl_fatal("Can't lock GC in hl_blocking section");
+		if( mt ) gc_save_context(t);
+		t->gc_blocking++;
+		if( mt ) hl_mutex_acquire(gc_threads.global_lock);
+	} else {
+		t->gc_blocking--;
+		if( mt ) hl_mutex_release(gc_threads.global_lock);
+	}
+}
+#endif
+
 HL_PRIM void hl_gc_set_track( void *f ) {
 	gc_track_callback = f;
 }
 
 HL_PRIM void hl_add_root( void *r ) {
+	gc_global_lock(true);
 	if( gc_roots_count == gc_roots_max ) {
 		int nroots = gc_roots_max ? (gc_roots_max << 1) : 16;
 		void ***roots = (void***)malloc(sizeof(void*)*nroots);
@@ -194,20 +233,19 @@ HL_PRIM void hl_add_root( void *r ) {
 		gc_roots_max = nroots;
 	}
 	gc_roots[gc_roots_count++] = (void**)r;
-}
-
-HL_PRIM void hl_pop_root() {
-	gc_roots_count--;
+	gc_global_lock(false);
 }
 
 HL_PRIM void hl_remove_root( void *v ) {
 	int i;
-	for(i=0;i<gc_roots_count;i++)
+	gc_global_lock(true);
+	for(i=gc_roots_count-1;i>=0;i--)
 		if( gc_roots[i] == (void**)v ) {
 			gc_roots_count--;
 			memmove(gc_roots + i, gc_roots + (i+1), (gc_roots_count - i) * sizeof(void*));
 			break;
 		}
+	gc_global_lock(false);
 }
 
 HL_PRIM gc_pheader *hl_gc_get_page( void *v ) {
@@ -219,6 +257,72 @@ HL_PRIM gc_pheader *hl_gc_get_page( void *v ) {
 	return page;
 }
 
+// -------------------------  THREADS ----------------------------------------------------------
+
+HL_API int hl_thread_id();
+
+HL_API void hl_register_thread( void *stack_top ) {
+	if( hl_get_thread() )
+		hl_fatal("Thread already registered");
+
+	hl_thread_info *t = (hl_thread_info*)malloc(sizeof(hl_thread_info));
+	memset(t, 0, sizeof(hl_thread_info));
+	t->thread_id = hl_thread_id();
+	t->stack_top = stack_top;
+	current_thread = t;
+	hl_add_root(&t->exc_value);
+	hl_add_root(&t->exc_handler);
+
+	gc_global_lock(true);
+	hl_thread_info **all = (hl_thread_info**)malloc(sizeof(void*) * (gc_threads.count + 1));
+	memcpy(all,gc_threads.threads,sizeof(void*)*gc_threads.count);
+	gc_threads.threads = all;
+	all[gc_threads.count++] = t;
+	gc_global_lock(false);
+}
+
+HL_API void hl_unregister_thread() {
+	int i;
+	hl_thread_info *t = hl_get_thread();
+	if( !t )
+		hl_fatal("Thread not registered");
+	hl_remove_root(&t->exc_value);
+	hl_remove_root(&t->exc_handler);
+	gc_global_lock(true);
+	for(i=0;i<gc_threads.count;i++)
+		if( gc_threads.threads[i] == t ) {
+			memmove(gc_threads.threads + i, gc_threads.threads + i + 1, sizeof(void*) * (gc_threads.count - i - 1));
+			gc_threads.count--;
+			break;
+		}
+	free(t);
+	current_thread = NULL;
+	// don't use gc_global_lock(false)
+	hl_mutex_release(gc_threads.global_lock);
+}
+
+HL_API void *hl_gc_threads_info() {
+	return &gc_threads;
+}
+
+static void gc_stop_world( bool b ) {
+#	ifdef HL_THREADS
+	if( b ) {
+		int i;
+		gc_threads.stopping_world = true;
+		for(i=0;i<gc_threads.count;i++) {
+			hl_thread_info *t = gc_threads.threads[i];
+			while( t->gc_blocking == 0 ) {}; // spinwait
+		}
+	} else {
+		// releasing global lock will release all threads
+		gc_threads.stopping_world = false;
+	}
+#	else
+	if( b ) gc_save_context(current_thread);
+#	endif
+}
+
 // -------------------------  ALLOCATOR ----------------------------------------------------------
 
 static void *gc_alloc_page_memory( int size );
@@ -271,6 +375,7 @@ static int PAGE_ID = 0;
 #endif
 
 HL_API void hl_gc_dump_memory( const char *filename );
+static void gc_major( void );
 
 static gc_pheader *gc_alloc_new_page( int pid, int block, int size, int kind, bool varsize ) {
 	int m, i;
@@ -298,14 +403,16 @@ retry:
 	p = (gc_pheader*)base;
 	if( !base ) {
 		int pages = gc_stats.pages_allocated;
-		hl_gc_major();
+		gc_major();
 		if( pages != gc_stats.pages_allocated ) {
 			size = old_size;
 			goto retry;
 		}
 		// big block : report stack trace - we should manage to handle it
-		if( size >= (8 << 20) )
+		if( size >= (8 << 20) ) {
+			gc_global_lock(false);
 			hl_error_msg(USTR("Failed to alloc %d KB"),size>>10);
+		}
 		if( gc_flags & GC_DUMP_MEM ) hl_gc_dump_memory("hlmemory.dump");
 		out_of_memory("pages");
 	}
@@ -567,6 +674,7 @@ static void *gc_alloc_gen( int size, int flags, int *allocated ) {
 			return ptr;
 		}
 	}
+	gc_global_lock(false);
 	hl_error("Required memory allocation too big");
 	return NULL;
 }
@@ -577,6 +685,7 @@ void *hl_gc_alloc_gen( hl_type *t, int size, int flags ) {
 	void *ptr;
 	int time = 0;
 	int allocated = 0;
+	gc_global_lock(true);
 	gc_check_mark();
 #	ifdef GC_MEMCHK
 	size += HL_WSIZE;
@@ -591,6 +700,7 @@ void *hl_gc_alloc_gen( hl_type *t, int size, int flags ) {
 		MZERO(ptr,allocated);
 	else if( MEM_HAS_PTR(flags) && allocated != size )
 		MZERO((char*)ptr+size,allocated-size); // erase possible pointers after data
+	gc_global_lock(false);
 	if( (gc_flags & GC_TRACK) && gc_track_callback )
 		((void (*)(hl_type *,int,int,void*))gc_track_callback)(t,size,flags,ptr);
 #	ifdef GC_MEMCHK
@@ -602,7 +712,6 @@ void *hl_gc_alloc_gen( hl_type *t, int size, int flags ) {
 // -------------------------  MARKING ----------------------------------------------------------
 
 static float gc_mark_threshold = 0.2f;
-static void *gc_stack_top = NULL;
 static int mark_size = 0;
 static unsigned char *mark_data = NULL;
 static void **cur_mark_stack = NULL;
@@ -766,16 +875,35 @@ static void gc_call_finalizers(){
 	}
 }
 
+static void gc_mark_stack( void *start, void *end ) {
+	void **mark_stack = cur_mark_stack;
+	void **stack_head = (void**)start;
+	while( stack_head < (void**)end ) {
+		void *p = *stack_head++;
+		gc_pheader *page = GC_GET_PAGE(p);
+		int bid;
+		if( !page || (((unsigned char*)p - (unsigned char*)page)%page->block_size) != 0 ) continue;
+#		ifdef HL_64
+		if( !INPAGE(p,page) ) continue;
+#		endif
+		bid = (int)((unsigned char*)p - (unsigned char*)page) / page->block_size;
+		if( page->sizes ) {
+			if( page->sizes[bid] == 0 ) continue;
+		} else if( bid < page->first_block )
+			continue;
+		if( (page->bmp[bid>>3] & (1<<(bid&7))) == 0 ) {
+			page->bmp[bid>>3] |= 1<<(bid&7);
+			GC_PUSH_GEN(p,page,bid);
+		}
+	}
+	cur_mark_stack = mark_stack;
+}
+
 static void gc_mark() {
-	jmp_buf regs;
-	void **stack_head;
-	void **stack_top = (void**)gc_stack_top;
 	void **mark_stack = cur_mark_stack;
 	int mark_bytes = gc_stats.mark_bytes;
 	int pid, i;
 	unsigned char *mark_cur;
-	// save registers
-	setjmp(regs);
 	// prepare mark bits
 	if( mark_bytes > mark_size ) {
 		gc_free_page_memory(mark_data, mark_size);
@@ -814,27 +942,16 @@ static void gc_mark() {
 			GC_PUSH_GEN(p,page,bid);
 		}
 	}
-	// scan stack
-	stack_head = (void**)&stack_head;
-	if( stack_head > (void**)&regs ) stack_head = (void**)&regs; // fix for compilers that might inverse variables
-	while( stack_head <= stack_top ) {
-		void *p = *stack_head++;
-		gc_pheader *page = GC_GET_PAGE(p);
-		int bid;
-		if( !page || (((unsigned char*)p - (unsigned char*)page)%page->block_size) != 0 ) continue;
-#		ifdef HL_64
-		if( !INPAGE(p,page) ) continue;
-#		endif
-		bid = (int)((unsigned char*)p - (unsigned char*)page) / page->block_size;
-		if( page->sizes ) {
-			if( page->sizes[bid] == 0 ) continue;
-		} else if( bid < page->first_block )
-			continue;
-		if( (page->bmp[bid>>3] & (1<<(bid&7))) == 0 ) {
-			page->bmp[bid>>3] |= 1<<(bid&7);
-			GC_PUSH_GEN(p,page,bid);
-		}
+
+	// scan threads stacks & registers
+	for(i=0;i<gc_threads.count;i++) {
+		hl_thread_info *t = gc_threads.threads[i];
+		cur_mark_stack = mark_stack;
+		gc_mark_stack(t->stack_cur,t->stack_top);
+		gc_mark_stack(&t->gc_regs,(void**)&t->gc_regs + (sizeof(jmp_buf) / sizeof(void*) - 1));
+		mark_stack = cur_mark_stack;
 	}
+
 	cur_mark_stack = mark_stack;
 	if( mark_stack ) gc_flush_mark();
 	gc_call_finalizers();
@@ -844,11 +961,13 @@ static void gc_mark() {
 	gc_flush_empty_pages();
 }
 
-HL_API void hl_gc_major() {
+static void gc_major() {
 	int time = TIMESTAMP(), dt;
 	gc_stats.last_mark = gc_stats.total_allocated;
 	gc_stats.last_mark_allocs = gc_stats.allocation_count;
+	gc_stop_world(true);
 	gc_mark();
+	gc_stop_world(false);
 	dt = TIMESTAMP() - time;
 	gc_stats.mark_count++;
 	gc_stats.mark_time += dt;
@@ -868,6 +987,12 @@ HL_API void hl_gc_major() {
 	}
 }
 
+HL_API void hl_gc_major() {
+	gc_global_lock(true);
+	gc_major();
+	gc_global_lock(false);
+}
+
 HL_API bool hl_is_gc_ptr( void *ptr ) {
 	gc_pheader *page = GC_GET_PAGE(ptr);
 	int bid;
@@ -887,12 +1012,11 @@ static void gc_check_mark() {
 	int64 m = gc_stats.total_allocated - gc_stats.last_mark;
 	int64 b = gc_stats.allocation_count - gc_stats.last_mark_allocs;
 	if( (m > gc_stats.pages_total_memory * gc_mark_threshold || b > gc_stats.pages_blocks * gc_mark_threshold) && gc_is_active )
-		hl_gc_major();
+		gc_major();
 }
 
-static void hl_gc_init( void *stack_top ) {
+static void hl_gc_init() {
 	int i;
-	gc_stack_top = stack_top;
 	for(i=0;i<1<<GC_LEVEL0_BITS;i++)
 		hl_gc_page_map[i] = gc_level1_null;
 	if( TRAILING_ONES(0x080003FF) != 10 || TRAILING_ONES(0) != 0 || TRAILING_ONES(0xFFFFFFFF) != 32 )
@@ -905,25 +1029,50 @@ static void hl_gc_init( void *stack_top ) {
 	if( getenv("HL_DUMP_MEMORY") )
 		gc_flags |= GC_DUMP_MEM;
 #	endif
+	memset(&gc_threads,0,sizeof(gc_threads));
+	gc_threads.global_lock = hl_mutex_alloc();
 }
 
 // ---- UTILITIES ----------------------
 
-static bool is_blocking = false; // TODO : use TLS for multithread
-
 HL_API bool hl_is_blocking() {
-	return is_blocking;
-}
-
-HL_API void hl_blocking( bool b) {
-	is_blocking = b;
+	hl_thread_info *t = current_thread;
+	// when called from a non GC thread, tells if the main thread is blocking
+	if( t == NULL ) {
+		if( gc_threads.count == 0 )
+			return false;
+		t = gc_threads.threads[0];
+	}
+	return t->gc_blocking > 0;
 }
 
-void hl_global_init( void *stack_top ) {
-	hl_gc_init(stack_top);
+HL_API void hl_blocking( bool b ) {
+	hl_thread_info *t = current_thread;
+	if( !t ) hl_error("Unregistered thread");
+	if( b ) {
+#		ifdef HL_THREADS
+		if( t->gc_blocking == 0 )
+			gc_save_context(t);
+#		endif
+		t->gc_blocking++;
+	} else if( t->gc_blocking == 0 )
+		hl_error("Unblocked thread");
+	else {
+		t->gc_blocking--;
+		if( t->gc_blocking == 0 && gc_threads.stopping_world ) {
+			gc_global_lock(true);
+			gc_global_lock(false);
+		}
+	}
 }
 
 void hl_cache_free();
+void hl_cache_init();
+
+void hl_global_init() {
+	hl_gc_init();
+	hl_cache_init();
+}
 
 void hl_global_free() {
 	hl_cache_free();
@@ -1069,11 +1218,11 @@ vdynamic *hl_alloc_dynamic( hl_type *t ) {
 #	define DYN_PAD
 #endif
 
-static vdynamic vdyn_true = { &hlt_bool, DYN_PAD {true} };
-static vdynamic vdyn_false = { &hlt_bool, DYN_PAD {false} };
+static const vdynamic vdyn_true = { &hlt_bool, DYN_PAD {true} };
+static const vdynamic vdyn_false = { &hlt_bool, DYN_PAD {false} };
 
 vdynamic *hl_alloc_dynbool( bool b ) {
-	return b ? &vdyn_true : &vdyn_false;
+	return (vdynamic*)(b ? &vdyn_true : &vdyn_false);
 }
 
 
@@ -1157,6 +1306,8 @@ HL_API void hl_gc_set_dump_types( hl_types_dump tdump ) {
 
 HL_API void hl_gc_dump_memory( const char *filename ) {
 	int i;
+	gc_global_lock(true);
+	gc_stop_world(true);
 	gc_mark();
 	fdump = fopen(filename,"wb");
 	// header
@@ -1187,13 +1338,13 @@ HL_API void hl_gc_dump_memory( const char *filename ) {
 	for(i=0;i<gc_roots_count;i++)
 		fdump_p(*gc_roots[i]);
 	// stacks
-	fdump_i(1);
-	fdump_p(gc_stack_top);
-	{
-		void **stack_head = (void**)&stack_head;
-		int size = (int)((void**)gc_stack_top - stack_head);
+	fdump_i(gc_threads.count);
+	for(i=0;i<gc_threads.count;i++) {
+		hl_thread_info *t = gc_threads.threads[i];
+		fdump_p(t->stack_top);
+		int size = (int)((void**)t->stack_top - (void**)t->stack_cur);
 		fdump_i(size);
-		fdump_d(stack_head,size*sizeof(void*));
+		fdump_d(t->stack_cur,size*sizeof(void*));
 	}
 	// types
 #	define fdump_t(t)	fdump_i(t.kind); fdump_p(&t);
@@ -1209,6 +1360,8 @@ HL_API void hl_gc_dump_memory( const char *filename ) {
 	if( gc_types_dump ) gc_types_dump(fdump_d);
 	fclose(fdump);
 	fdump = NULL;
+	gc_stop_world(false);
+	gc_global_lock(false);
 }
 
 HL_API vdynamic *hl_debug_call( int mode, vdynamic *v ) {
@@ -1223,4 +1376,4 @@ DEFINE_PRIM(_VOID, gc_dump_memory, _BYTES);
 DEFINE_PRIM(_I32, gc_get_flags, _NO_ARG);
 DEFINE_PRIM(_VOID, gc_set_flags, _I32);
 DEFINE_PRIM(_DYN, debug_call, _I32 _DYN);
-
+DEFINE_PRIM(_VOID, blocking, _BOOL);

+ 20 - 12
src/debugger.c

@@ -34,12 +34,11 @@ HL_API int hl_socket_send( hl_socket *s, vbyte *buf, int pos, int len );
 HL_API int hl_socket_recv( hl_socket *s, vbyte *buf, int pos, int len );
 HL_API void hl_sys_sleep( double t );
 HL_API int hl_thread_id();
-HL_API vdynamic **hl_debug_exc;
+HL_API void *hl_gc_threads_info();
 
 static hl_socket *debug_socket = NULL;
 static hl_socket *client_socket = NULL;
 static bool debugger_connected = false;
-static int main_thread_id = 0;
 
 #define send hl_send_data
 static void send( void *ptr, int size ) {
@@ -47,24 +46,28 @@ static void send( void *ptr, int size ) {
 }
 
 static void hl_debug_loop( hl_module *m ) {
-	void *stack_top = hl_module_stack_top();
-	void *dbg_addr = &hl_debug_exc;
+	void *inf_addr = hl_gc_threads_info();
 	int flags = 0;
+	int hl_ver = HL_VERSION;
+	bool loop = false;
 #	ifdef HL_64
 	flags |= 1;
 #	endif
 	if( sizeof(bool) == 4 ) flags |= 2;
-	while( true ) {
+#	ifdef HL_THREADS
+	flags |= 4;
+	loop = true;
+#	endif
+	do {
 		int i;
 		vbyte cmd;
 		hl_socket *s = hl_socket_accept(debug_socket);
 		client_socket = s;
-		send("HLD0",4);
+		send("HLD1",4);
 		send(&flags,4);
-		send(&main_thread_id,4);
+		send(&hl_ver, 4);
+		send(&inf_addr, sizeof(void*));
 		send(&m->globals_data,sizeof(void*));
-		send(&dbg_addr,sizeof(void*));
-		send(&stack_top,sizeof(void*));
 		send(&m->jit_code,sizeof(void*));
 		send(&m->codesize,4);
 		send(&m->code->types,sizeof(void*));
@@ -96,7 +99,7 @@ static void hl_debug_loop( hl_module *m ) {
 		hl_socket_close(s);
 		debugger_connected = true;
 		client_socket = NULL;
-	}
+	} while( loop );
 }
 
 bool hl_module_debug( hl_module *m, int port, bool wait ) {
@@ -108,10 +111,10 @@ bool hl_module_debug( hl_module *m, int port, bool wait ) {
 		hl_socket_close(s);
 		return false;
 	}
+	debug_socket = s;
+#	ifdef HL_THREADS
 	hl_add_root(&debug_socket);
 	hl_add_root(&client_socket);
-	main_thread_id = hl_thread_id();
-	debug_socket = s;
 	if( !hl_thread_start(hl_debug_loop, m, false) ) {
 		hl_socket_close(s);
 		return false;
@@ -120,5 +123,10 @@ bool hl_module_debug( hl_module *m, int port, bool wait ) {
 		while( !debugger_connected )
 			hl_sys_sleep(0.01);
 	}
+#	else
+	// imply --debug-wait
+	hl_debug_loop(m);
+	hl_socket_close(debug_socket);
+#	endif
 	return true;
 }

+ 62 - 8
src/hl.h

@@ -27,7 +27,7 @@
 	https://github.com/HaxeFoundation/hashlink/wiki/
 **/
 
-#define HL_VERSION	0x150
+#define HL_VERSION	0x160
 
 #if defined(_WIN32)
 #	define HL_WIN
@@ -127,6 +127,20 @@
 #	define HL_DEBUG
 #endif
 
+#ifndef HL_NO_THREADS
+#	define HL_THREADS
+#	ifdef HL_VCC
+#		define HL_THREAD_VAR __declspec( thread )
+#		define HL_THREAD_STATIC_VAR HL_THREAD_VAR static
+#	else
+#		define HL_THREAD_VAR __thread
+#		define HL_THREAD_STATIC_VAR static HL_THREAD_VAR
+#	endif
+#else
+#	define HL_THREAD_VAR
+#	define HL_THREAD_STATIC_VAR static
+#endif
+
 #include <stddef.h>
 #ifndef HL_VCC
 #	include <stdint.h>
@@ -610,15 +624,33 @@ HL_API vclosure *hl_make_fun_wrapper( vclosure *c, hl_type *to );
 HL_API void *hl_wrapper_call( void *value, void **args, vdynamic *ret );
 HL_API void *hl_dyn_call_obj( vdynamic *obj, hl_type *ft, int hfield, void **args, vdynamic *ret );
 HL_API vdynamic *hl_dyn_call( vclosure *c, vdynamic **args, int nargs );
+HL_API vdynamic *hl_dyn_call_safe( vclosure *c, vdynamic **args, int nargs, bool *isException );
 
 // ----------------------- THREADS --------------------------------------------------
 
 struct _hl_thread;
+struct _hl_mutex;
+struct _hl_tls;
 typedef struct _hl_thread hl_thread;
+typedef struct _hl_mutex hl_mutex;
+typedef struct _hl_tls hl_tls;
 
 HL_API hl_thread *hl_thread_start( void *callback, void *param, bool withGC );
 HL_API hl_thread *hl_thread_current( void );
-HL_API bool hl_thread_pause( hl_thread *t, bool pause );
+HL_API void hl_thread_yield(void);
+HL_API void hl_register_thread( void *stack_top );
+HL_API void hl_unregister_thread( void );
+
+HL_API hl_mutex *hl_mutex_alloc( void );
+HL_API void hl_mutex_acquire( hl_mutex *l );
+HL_API bool hl_mutex_try_acquire( hl_mutex *l );
+HL_API void hl_mutex_release( hl_mutex *l );
+HL_API void hl_mutex_free( hl_mutex *l );
+
+HL_API hl_tls *hl_tls_alloc( void );
+HL_API void hl_tls_set( hl_tls *l, void *value );
+HL_API void *hl_tls_get( hl_tls *l );
+HL_API void hl_tls_free( hl_tls *l );
 
 // ----------------------- ALLOC --------------------------------------------------
 
@@ -632,7 +664,6 @@ HL_API bool hl_thread_pause( hl_thread *t, bool pause );
 
 HL_API void *hl_gc_alloc_gen( hl_type *t, int size, int flags );
 HL_API void hl_add_root( void *ptr );
-HL_API void hl_pop_root( void );
 HL_API void hl_remove_root( void *ptr );
 HL_API void hl_gc_major( void );
 HL_API bool hl_is_gc_ptr( void *ptr );
@@ -653,7 +684,7 @@ HL_API void *hl_malloc( hl_alloc *a, int size );
 HL_API void *hl_zalloc( hl_alloc *a, int size );
 HL_API void hl_free( hl_alloc *a );
 
-HL_API void hl_global_init( void *stack_top );
+HL_API void hl_global_init( void );
 HL_API void hl_global_free( void );
 
 HL_API void *hl_alloc_executable_memory( int size );
@@ -757,10 +788,33 @@ struct _hl_trap_ctx {
 	jmp_buf buf;
 	hl_trap_ctx *prev;
 };
-HL_API hl_trap_ctx *hl_current_trap;
-HL_API vdynamic *hl_current_exc;
-#define hl_trap(ctx,r,label) { ctx.prev = hl_current_trap; hl_current_trap = &ctx; if( setjmp(ctx.buf) ) { r = hl_current_exc; goto label; } }
-#define hl_endtrap(ctx)	hl_current_trap = ctx.prev
+#define hl_trap(ctx,r,label) { hl_thread_info *__tinf = hl_get_thread(); ctx.prev = __tinf->trap_current; __tinf->trap_current = &ctx; if( setjmp(ctx.buf) ) { r = __tinf->exc_value; goto label; } }
+#define hl_endtrap(ctx)	hl_get_thread()->trap_current = ctx.prev
+
+#define HL_EXC_MAX_STACK	0x100
+#define HL_EXC_RETHROW		1
+#define HL_EXC_CATCH_ALL	2
+#define HL_EXC_IS_THROW		4
+
+typedef struct {
+	int thread_id;
+	// gc vars
+	volatile int gc_blocking;
+	void *stack_top;
+	void *stack_cur;
+	// exception handling
+	hl_trap_ctx *trap_current;
+	hl_trap_ctx *trap_uncaught;
+	vclosure *exc_handler;
+	vdynamic *exc_value;
+	int exc_flags;
+	int exc_stack_count;
+	// extra
+	jmp_buf gc_regs;
+	void *exc_stack_trace[HL_EXC_MAX_STACK];
+} hl_thread_info;
+
+HL_API hl_thread_info *hl_get_thread();
 
 C_FUNCTION_END
 

+ 2 - 0
src/hlc.h

@@ -42,6 +42,8 @@
 #undef NO_ERROR
 #undef EOF
 #undef STRICT
+#undef TRUE
+#undef FALSE
 
 // disable some warnings triggered by HLC code generator
 

+ 24 - 27
src/hlc_main.c

@@ -36,6 +36,9 @@
 #ifdef HL_CONSOLE
 extern void sys_global_init();
 extern void sys_global_exit();
+#else
+#define sys_global_init()
+#define sys_global_exit()
 #endif
 
 
@@ -60,7 +63,7 @@ static uchar *hlc_resolve_symbol( void *addr, uchar *out, int *outSize ) {
 	if( !stack_process_handle ) {
 		stack_process_handle = GetCurrentProcess();
 		SymSetOptions(SYMOPT_LOAD_LINES);
-		SymInitialize(stack_process_handle,NULL,TRUE);
+		SymInitialize(stack_process_handle,NULL,(BOOL)1);
 	}
 	if( SymFromAddrW(stack_process_handle,(DWORD64)(int_val)addr,&index,&data.sym) ) {
 		DWORD offset = 0;
@@ -104,37 +107,31 @@ int wmain(int argc, uchar *argv[]) {
 #else
 int main(int argc, char *argv[]) {
 #endif
-	hl_trap_ctx ctx;
-	vdynamic *exc;
-#	ifdef HL_CONSOLE
+	vdynamic *ret;
+	bool isExc = false;
+	hl_type_fun tf = { 0 };
+	hl_type clt = { 0 };
+	vclosure cl = { 0 };
 	sys_global_init();
-#	endif
-	hl_global_init(&ctx);
+	hl_global_init();
+	hl_register_thread(&ret);
 	hl_setup_exception(hlc_resolve_symbol,hlc_capture_stack);
 	hl_setup_callbacks(hlc_static_call, hlc_get_wrapper);
 	hl_sys_init((void**)(argv + 1),argc - 1,NULL);
-	hl_trap(ctx, exc, on_exception);
-#	ifdef HL_VCC
-	__try {
-#	endif
-	hl_entry_point();
-#	ifdef HL_VCC
-	} __except( throw_handler(GetExceptionCode()) ) {}
-#	endif
-	hl_global_free();
-	return 0;
-on_exception:
-	{
-		varray *a = hl_exception_stack();
-		int i;
-		uprintf(USTR("Uncaught exception: %s\n"), hl_to_string(exc));
-		for(i=0;i<a->size;i++)
-			uprintf(USTR("Called from %s\n"), hl_aptr(a,uchar*)[i]);
-		hl_debug_break();
+	tf.ret = &hlt_void;
+	clt.kind = HFUN;
+	clt.fun = &tf;
+	cl.t = &clt;
+	cl.fun = hl_entry_point;
+	ret = hl_dyn_call_safe(&cl, NULL, 0, &isExc);
+	if( isExc ) {
+		varray *a = hl_exception_stack();
+		int i;
+		uprintf(USTR("Uncaught exception: %s\n"), hl_to_string(ret));
+		for (i = 0; i<a->size; i++)
+			uprintf(USTR("Called from %s\n"), hl_aptr(a, uchar*)[i]);
 	}
 	hl_global_free();
-#	ifdef HL_CONSOLE
 	sys_global_exit();
-#	endif
-	return 1;
+	return (int)isExc;
 }

+ 1 - 2
src/hlmodule.h

@@ -111,10 +111,9 @@ const uchar *hl_get_ustring( hl_code *c, int index );
 const char* hl_op_name( int op );
 
 hl_module *hl_module_alloc( hl_code *code );
-int hl_module_init( hl_module *m, void *stack_top );
+int hl_module_init( hl_module *m );
 void hl_module_free( hl_module *m );
 bool hl_module_debug( hl_module *m, int port, bool wait );
-void *hl_module_stack_top();
 
 jit_ctx *hl_jit_alloc();
 void hl_jit_free( jit_ctx *ctx );

+ 51 - 35
src/jit.c

@@ -142,7 +142,7 @@ typedef enum {
 #	define W64(wv)	W(wv)
 #endif
 
-static int SIB_MULT[] = {-1, 0, 1, -1, 2, -1, -1, -1, 3};
+static const int SIB_MULT[] = {-1, 0, 1, -1, 2, -1, -1, -1, 3};
 
 #define MOD_RM(mod,reg,rm)		B(((mod) << 6) | (((reg)&7) << 3) | ((rm)&7))
 #define SIB(mult,rmult,rbase)	B((SIB_MULT[mult]<<6) | (((rmult)&7)<<3) | ((rbase)&7))
@@ -215,14 +215,14 @@ struct vreg {
 #		define CALL_NREGS			4
 #		define RCPU_SCRATCH_COUNT	7
 #		define RFPU_SCRATCH_COUNT	6
-static int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx, R8, R9, R10, R11 };
-static CpuReg CALL_REGS[] = { Ecx, Edx, R8, R9 };
+static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx, R8, R9, R10, R11 };
+static const CpuReg CALL_REGS[] = { Ecx, Edx, R8, R9 };
 #	else
 #		define CALL_NREGS			6 // TODO : XMM6+XMM7 are FPU reg parameters
 #		define RCPU_SCRATCH_COUNT	9
 #		define RFPU_SCRATCH_COUNT	16
-static int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx, Esi, Edi, R8, R9, R10, R11 };
-static CpuReg CALL_REGS[] = { Edi, Esi, Edx, Ecx, R8, R9 };
+static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx, Esi, Edi, R8, R9, R10, R11 };
+static const CpuReg CALL_REGS[] = { Edi, Esi, Edx, Ecx, R8, R9 };
 #	endif
 #else
 #	define CALL_NREGS	0
@@ -230,7 +230,7 @@ static CpuReg CALL_REGS[] = { Edi, Esi, Edx, Ecx, R8, R9 };
 #	define RFPU_COUNT	8
 #	define RCPU_SCRATCH_COUNT	3
 #	define RFPU_SCRATCH_COUNT	8
-static int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx };
+static const int RCPU_SCRATCH_REGS[] = { Eax, Ecx, Edx };
 #endif
 
 #define XMM(i)			((i) + RCPU_COUNT)
@@ -2704,6 +2704,7 @@ int hl_jit_function( jit_ctx *ctx, hl_module *m, hl_function *f ) {
 	unsigned short *debug16 = NULL;
 	int *debug32 = NULL;
 	call_regs cregs = {0};
+	hl_thread_info *tinf = NULL;
 	preg p;
 	ctx->f = f;
 	ctx->allocOffset = 0;
@@ -3814,40 +3815,51 @@ int hl_jit_function( jit_ctx *ctx, hl_module *m, hl_function *f ) {
 		case OTrap:
 			{
 				int size, jenter, jtrap;
+				int offset = 0;
 				int trap_size = (sizeof(hl_trap_ctx) + 15) & 0xFFF0;
 				hl_trap_ctx *t = NULL;
-				// trap pad
+#				ifndef HL_THREADS
+				if( tinf == NULL ) tinf = hl_get_thread(); // single thread
+#				endif
+
 #				ifdef HL_64
 				preg *trap = REG_AT(CALL_REGS[0]);
-				RLOCK(trap);
-				preg *tmp = alloc_reg(ctx,RCPU);
-				op64(ctx,MOV,tmp,pconst64(&p,(int_val)&hl_current_trap));
-				op64(ctx,MOV,trap,pmem(&p,tmp->id,0));
-				op64(ctx,SUB,PESP,pconst(&p,trap_size));
-				op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&t->prev),trap);
-				op64(ctx,MOV,trap,PESP);
-				op64(ctx,MOV,pmem(&p,tmp->id,0),trap);
 #				else
 				preg *trap = PEAX;
-				op64(ctx,MOV,trap,paddr(&p,&hl_current_trap));
+#				endif
+				RLOCK(trap);
+
+				preg *treg = alloc_reg(ctx, RCPU);
+				if( !tinf ) {
+					call_native(ctx, hl_get_thread, 0);
+					op64(ctx,MOV,treg,PEAX);
+					offset = (int)(int_val)&tinf->trap_current;
+				} else {
+					offset = 0;
+					op64(ctx,MOV,treg,pconst64(&p,(int_val)&tinf->trap_current));
+				}
+				op64(ctx,MOV,trap,pmem(&p,treg->id,offset));
 				op64(ctx,SUB,PESP,pconst(&p,trap_size));
 				op64(ctx,MOV,pmem(&p,Esp,(int)(int_val)&t->prev),trap);
 				op64(ctx,MOV,trap,PESP);
-				op64(ctx,MOV,paddr(&p,&hl_current_trap),trap);
-#				endif
+				op64(ctx,MOV,pmem(&p,treg->id,offset),trap);
+
 				size = begin_native_call(ctx, 1);
 				set_native_arg(ctx,trap);
 				call_native(ctx,setjmp,size);
 				op64(ctx,TEST,PEAX,PEAX);
 				XJump_small(JZero,jenter);
+				call_native(ctx, hl_get_thread, 0);
 				op64(ctx,ADD,PESP,pconst(&p,trap_size));
-#				ifdef HL_64
-				op64(ctx,MOV,PEAX,pconst64(&p,(int_val)&hl_current_exc));
-				op64(ctx,MOV,PEAX,pmem(&p,Eax,0));
-#				else
-				op64(ctx,MOV,PEAX,paddr(&p,&hl_current_exc));
-#				endif
+				if( !tinf ) {
+					call_native(ctx, hl_get_thread, 0);
+					op64(ctx,MOV,PEAX,pmem(&p, Eax, (int)(int_val)&tinf->exc_value));
+				} else {
+					op64(ctx,MOV,PEAX,pconst64(&p,(int_val)&tinf->trap_current));
+					op64(ctx,MOV,PEAX,pmem(&p, Eax, 0));
+				}
 				store(ctx,dst,PEAX,false);
+
 				jtrap = do_jump(ctx,OJAlways,false);
 				register_jump(ctx,jtrap,(opCount + 1) + o->p2);
 				patch_jump(ctx,jenter);
@@ -3856,19 +3868,23 @@ int hl_jit_function( jit_ctx *ctx, hl_module *m, hl_function *f ) {
 		case OEndTrap:
 			{
 				int trap_size = (sizeof(hl_trap_ctx) + 15) & 0xFFF0;
-				preg *r = alloc_reg(ctx, RCPU);
 				hl_trap_ctx *tmp = NULL;
-#				ifdef HL_64
-				preg *addr = alloc_reg(ctx,RCPU);
-				op64(ctx, MOV, addr, pconst64(&p,(int_val)&hl_current_trap));
-				op64(ctx, MOV, r, pmem(&p,addr->id,0));
-				op64(ctx, MOV, r, pmem(&p,r->id,(int)(int_val)&tmp->prev));
-				op64(ctx, MOV, pmem(&p,addr->id,0), r);
-#				else
-				op64(ctx, MOV, r, paddr(&p,&hl_current_trap));
+				preg *addr,*r;
+				int offset;
+				if (!tinf) {
+					call_native(ctx, hl_get_thread, 0);
+					addr = PEAX;
+					RLOCK(addr);
+					offset = (int)(int_val)&tinf->trap_current;
+				} else {
+					offset = 0;
+					addr = alloc_reg(ctx, RCPU);
+					op64(ctx, MOV, addr, pconst64(&p, (int_val)&tinf->trap_current));
+				}
+				r = alloc_reg(ctx, RCPU);
+				op64(ctx, MOV, r, pmem(&p,addr->id,offset));
 				op64(ctx, MOV, r, pmem(&p,r->id,(int)(int_val)&tmp->prev));
-				op64(ctx, MOV, paddr(&p,&hl_current_trap), r);
-#				endif
+				op64(ctx, MOV, pmem(&p,addr->id, offset), r);
 				op64(ctx,ADD,PESP,pconst(&p,trap_size));
 			}
 			break;

+ 18 - 28
src/main.c

@@ -94,12 +94,14 @@ int main(int argc, pchar *argv[]) {
 	struct {
 		hl_code *code;
 		hl_module *m;
-		vdynamic *exc;
+		vdynamic *ret;
+		vclosure c;
 	} ctx;
-	hl_trap_ctx trap;
+	bool isExc = false;
 	int first_boot_arg = -1;
 	argv++;
 	argc--;
+
 	while( argc ) {
 		pchar *arg = *argv++;
 		argc--;
@@ -142,51 +144,39 @@ int main(int argc, pchar *argv[]) {
 			argc = first_boot_arg;
 		}
 	}
-#	ifdef HL_WIN
-	setlocale(LC_CTYPE,""); // printf to current locale
-#	endif
-	hl_global_init(&ctx);
+	hl_global_init();
 	hl_sys_init((void**)argv,argc,file);
-	setbuf(stdout,NULL); // disable stdout buffering
+	hl_register_thread(&ctx);
 	ctx.code = load_code(file);
 	if( ctx.code == NULL )
 		return 1;
 	ctx.m = hl_module_alloc(ctx.code);
 	if( ctx.m == NULL )
 		return 2;
-	if( !hl_module_init(ctx.m, &argc) )
+	if( !hl_module_init(ctx.m) )
 		return 3;
 	hl_code_free(ctx.code);
 	if( debug_port > 0 && !hl_module_debug(ctx.m,debug_port,debug_wait) ) {
 		fprintf(stderr,"Could not start debugger on port %d",debug_port);
 		return 4;
 	}
-	hl_trap(trap, ctx.exc, on_exception);
-#	ifdef HL_VCC
-	__try {
-#	endif
-		vclosure c;
-		c.t = ctx.code->functions[ctx.m->functions_indexes[ctx.m->code->entrypoint]].type;
-		c.fun = ctx.m->functions_ptrs[ctx.m->code->entrypoint];
-		c.hasValue = 0;
-		hl_dyn_call(&c,NULL,0);
-#	ifdef HL_VCC
-	} __except( throw_handler(GetExceptionCode()) ) {}
-#	endif
-	hl_module_free(ctx.m);
-	hl_free(&ctx.code->alloc);
-	hl_global_free();
-	return 0;
-on_exception:
-	{
+	ctx.c.t = ctx.code->functions[ctx.m->functions_indexes[ctx.m->code->entrypoint]].type;
+	ctx.c.fun = ctx.m->functions_ptrs[ctx.m->code->entrypoint];
+	ctx.c.hasValue = 0;
+	ctx.ret = hl_dyn_call_safe(&ctx.c,NULL,0,&isExc);
+	if( isExc ) {
 		varray *a = hl_exception_stack();
 		int i;
-		uprintf(USTR("Uncaught exception: %s\n"), hl_to_string(ctx.exc));
+		uprintf(USTR("Uncaught exception: %s\n"), hl_to_string(ctx.ret));
 		for(i=0;i<a->size;i++)
 			uprintf(USTR("Called from %s\n"), hl_aptr(a,uchar*)[i]);
 		hl_debug_break();
+		hl_global_free();
+		return 1;
 	}
+	hl_module_free(ctx.m);
+	hl_free(&ctx.code->alloc);
 	hl_global_free();
-	return 1;
+	return 0;
 }
 

+ 2 - 7
src/module.c

@@ -30,11 +30,6 @@
 #endif
 
 static hl_module *cur_module;
-static void *stack_top;
-
-void *hl_module_stack_top() {
-	return stack_top;
-}
 
 static bool module_resolve_pos( void *addr, int *fidx, int *fpos ) {
 	int code_pos = ((int)(int_val)((unsigned char*)addr - (unsigned char*)cur_module->jit_code));
@@ -104,6 +99,7 @@ static uchar *module_resolve_symbol( void *addr, uchar *out, int *outSize ) {
 static int module_capture_stack( void **stack, int size ) {
 	void **stack_ptr = (void**)&stack;
 	void *stack_bottom = stack_ptr;
+	void *stack_top = hl_get_thread()->stack_top;
 	int count = 0;
 	unsigned char *code = cur_module->jit_code;
 	int code_size = cur_module->codesize;
@@ -277,7 +273,7 @@ static void disabled_primitive() {
 	hl_error("This library primitive has been disabled");
 }
 
-int hl_module_init( hl_module *m, void *stack_top_val ) {
+int hl_module_init( hl_module *m ) {
 	int i;
 	jit_ctx *ctx;
 	// RESET globals
@@ -432,7 +428,6 @@ int hl_module_init( hl_module *m, void *stack_top_val ) {
 	}
 	// DONE
 	cur_module = m;
-	stack_top = stack_top_val;
 	hl_setup_exception(module_resolve_symbol, module_capture_stack);
 	hl_gc_set_dump_types(hl_module_types_dump);
 	hl_jit_free(ctx);

+ 20 - 32
src/std/error.c

@@ -27,15 +27,6 @@
 #include <posix/posix.h>
 #endif
 
-HL_PRIM hl_trap_ctx *hl_current_trap = NULL;
-HL_PRIM vdynamic *hl_current_exc = NULL;
-HL_PRIM vdynamic **hl_debug_exc = NULL;
-HL_PRIM bool hl_debug_catch_all = false;
-static hl_trap_ctx *hl_trap_root = NULL;
-static void *stack_trace[0x1000];
-static int stack_count = 0;
-static bool exc_rethrow = false;
-
 HL_PRIM void *hl_fatal_error( const char *msg, const char *file, int line ) {
 	hl_blocking(true);
 #	ifdef HL_WIN_DESKTOP
@@ -56,7 +47,6 @@ typedef int (*capture_stack_type)( void **stack, int size );
 
 static resolve_symbol_type resolve_symbol_func = NULL;
 static capture_stack_type capture_stack_func = NULL;
-static vclosure *hl_error_handler = NULL;
 
 int hl_internal_capture_stack( void **stack, int size ) {
 	return capture_stack_func(stack,size);
@@ -78,31 +68,28 @@ HL_PRIM void hl_setup_exception( void *resolve_symbol, void *capture_stack ) {
 }
 
 HL_PRIM void hl_set_error_handler( vclosure *d ) {
-	hl_trap_root = hl_current_trap;
-	if( d == hl_error_handler )
-		return;
-	hl_error_handler = d;
-	hl_remove_root(&hl_error_handler);
-	if( d )
-		hl_add_root(&hl_error_handler);
+	hl_thread_info *t = hl_get_thread();
+	t->trap_uncaught = t->trap_current;
+	t->exc_handler = d;
 }
 
 HL_PRIM void hl_throw( vdynamic *v ) {
-	hl_trap_ctx *t = hl_current_trap;
-	if( exc_rethrow )
-		exc_rethrow = false;
+	hl_thread_info *t = hl_get_thread();
+	hl_trap_ctx *trap = t->trap_current;
+	if( t->exc_flags & HL_EXC_RETHROW )
+		t->exc_flags &= ~HL_EXC_RETHROW;
 	else
-		stack_count = capture_stack_func(stack_trace, 0x1000);
-	hl_current_exc = v;
-	hl_current_trap = t->prev;
-	if( t == hl_trap_root || hl_current_trap == NULL || hl_debug_catch_all ) {
-		hl_debug_exc = &v;
+		t->exc_stack_count = capture_stack_func(t->exc_stack_trace, HL_EXC_MAX_STACK);
+	t->exc_value = v;
+	t->trap_current = trap->prev;
+	if( trap == t->trap_uncaught || t->trap_current == NULL || (t->exc_flags&HL_EXC_CATCH_ALL) ) {
+		t->exc_flags |= HL_EXC_IS_THROW;
 		hl_debug_break();
-		hl_debug_exc = NULL;
-		if( hl_error_handler ) hl_dyn_call(hl_error_handler,&v,1);
+		t->exc_flags &= ~HL_EXC_IS_THROW;
+		if( t->exc_handler ) hl_dyn_call(t->exc_handler,&v,1);
 	}
 	if( throw_jump == NULL ) throw_jump = longjmp;
-	throw_jump(t->buf,1);
+	throw_jump(trap->buf,1);
 }
 
 HL_PRIM void hl_dump_stack() {
@@ -124,10 +111,11 @@ HL_PRIM void hl_dump_stack() {
 }
 
 HL_PRIM varray *hl_exception_stack() {
-	varray *a = hl_alloc_array(&hlt_bytes, stack_count);
+	hl_thread_info *t = hl_get_thread();
+	varray *a = hl_alloc_array(&hlt_bytes, t->exc_stack_count);
 	int i;
-	for(i=0;i<stack_count;i++) {
-		void *addr = stack_trace[i];
+	for(i=0;i<t->exc_stack_count;i++) {
+		void *addr = t->exc_stack_trace[i];
 		uchar sym[512];
 		int size = 512;
 		uchar *str = resolve_symbol_func(addr, sym, &size);
@@ -142,7 +130,7 @@ HL_PRIM varray *hl_exception_stack() {
 }
 
 HL_PRIM void hl_rethrow( vdynamic *v ) {
-	exc_rethrow = true;
+	hl_get_thread()->exc_flags |= HL_EXC_RETHROW;
 	hl_throw(v);
 }
 

+ 2 - 0
src/std/file.c

@@ -159,7 +159,9 @@ HL_PRIM vbyte *hl_file_contents( vbyte *name, int *size ) {
 	len = ftell(f);
 	if( size ) *size = len;
 	fseek(f,0,SEEK_SET);
+	hl_blocking(false);
 	content = (vbyte*)hl_gc_alloc_noptr(size ? len : len+1);
+	hl_blocking(true);
 	if( !size ) content[len] = 0; // final 0 for UTF8
 	while( len > 0 ) {
 		int d = (int)fread((char*)content + p,1,len,f);

+ 29 - 0
src/std/fun.c

@@ -376,3 +376,32 @@ DEFINE_PRIM(_DYN, get_closure_value, _DYN);
 DEFINE_PRIM(_BOOL, fun_compare, _DYN _DYN);
 DEFINE_PRIM(_DYN, make_var_args, _FUN(_DYN,_ARR));
 DEFINE_PRIM(_DYN, call_method, _DYN _ARR);
+
+
+#ifdef HL_VCC
+static int throw_handler( int code ) {
+	switch( code ) {
+	case EXCEPTION_ACCESS_VIOLATION: hl_error("Access violation");
+	case EXCEPTION_STACK_OVERFLOW: hl_error("Stack overflow");
+	default: hl_error("Unknown runtime error");
+	}
+	return EXCEPTION_CONTINUE_SEARCH;
+}
+#endif
+
+HL_PRIM vdynamic *hl_dyn_call_safe( vclosure *c, vdynamic **args, int nargs, bool *isException ) {
+	hl_trap_ctx trap;
+	vdynamic *exc;
+	*isException = false;
+	hl_trap(trap, exc, on_exception);
+#	ifdef HL_VCC
+	__try {
+#	endif
+		return hl_dyn_call(c,args,nargs);
+#	ifdef HL_VCC
+	} __except( throw_handler(GetExceptionCode()) ) {}
+#	endif
+on_exception:
+	*isException = true;
+	return exc;
+}

+ 1 - 1
src/std/maps.h

@@ -110,7 +110,7 @@ static void _MNAME(resize)( t_map *m ) {
 				c = old.entries[c].next;
 			}
 		}
-		hl_pop_root();
+		hl_remove_root(&old);
 	}
 }
 

+ 11 - 1
src/std/obj.c

@@ -73,8 +73,13 @@ static hl_field_lookup *obj_resolve_field( hl_type_obj *o, int hfield ) {
 
 static int hl_cache_count = 0;
 static int hl_cache_size = 0;
+static hl_mutex *hl_cache_lock = NULL;
 static hl_field_lookup *hl_cache = NULL;
 
+void hl_cache_init() {
+	hl_cache_lock = hl_mutex_alloc();
+}
+
 HL_PRIM int hl_hash( vbyte *b ) {
 	return hl_hash_gen((uchar*)b,true);
 }
@@ -99,7 +104,9 @@ HL_PRIM int hl_hash_gen( const uchar *name, bool cache_name ) {
 	}
 	h %= 0x1FFFFF7B;
 	if( cache_name ) {
-		hl_field_lookup *l = hl_lookup_find(hl_cache, hl_cache_count, h);
+		hl_field_lookup *l;
+		hl_mutex_acquire(hl_cache_lock);
+		l = hl_lookup_find(hl_cache, hl_cache_count, h);
 		// check for potential conflict (see haxe#5572)
 		while( l && ucmp((uchar*)l->t,oname) != 0 ) {
 			h++;
@@ -117,6 +124,7 @@ HL_PRIM int hl_hash_gen( const uchar *name, bool cache_name ) {
 			}
 			hl_lookup_insert(hl_cache,hl_cache_count++,h,(hl_type*)ustrdup(oname),0);
 		}
+		hl_mutex_release(hl_cache_lock);
 	}
 	return h;
 }
@@ -133,6 +141,8 @@ HL_PRIM void hl_cache_free() {
 	free(hl_cache);
 	hl_cache = NULL;
 	hl_cache_count = hl_cache_size = 0;
+	hl_mutex_free(hl_cache_lock);
+	hl_cache_lock = NULL;
 }
 
 HL_PRIM hl_obj_field *hl_obj_field_fetch( hl_type *t, int fid ) {

+ 5 - 1
src/std/sys.c

@@ -588,10 +588,14 @@ HL_PRIM void hl_sys_init(void **args, int nargs, void *hlfile) {
 	sys_args = (pchar**)args;
 	sys_nargs = nargs;
 	hl_file = hlfile;
+#	ifdef HL_WIN
+	setlocale(LC_CTYPE, ""); // printf to current locale
+#	endif
+	setbuf(stdout, NULL); // disable stdout buffering
 }
 
 HL_PRIM vbyte *hl_sys_hl_file() {
-	return hl_file;
+	return (vbyte*)hl_file;
 }
 
 #ifndef HL_MOBILE

+ 208 - 32
src/std/thread.c

@@ -21,43 +21,218 @@
  */
 #include <hl.h>
 
-#ifndef HL_WIN
+#if !defined(HL_THREADS)
+
+struct _hl_mutex {
+	void *_unused;
+};
+
+struct _hl_tls {
+	void *value;
+};
+
+#elif defined(HL_WIN)
+
+struct _hl_mutex {
+	CRITICAL_SECTION cs;
+};
+
+#else
+
 #	include <pthread.h>
 #	include <unistd.h>
 #	include <sys/syscall.h>
+
+struct _hl_mutex {
+	pthread_mutex_t lock;
+};
+
+struct _hl_tls {
+	pthread_key_t key;
+};
+
 #endif
 
+// ----------------- ALLOC
+
+HL_PRIM hl_mutex *hl_mutex_alloc() {
+#	if !defined(HL_THREADS)
+	return (hl_mutex*)1;
+#	elif defined(HL_WIN)
+	hl_mutex *l = (hl_mutex*)malloc(sizeof(hl_mutex));
+	InitializeCriticalSection(&l->cs);
+	return l;
+#	else
+	hl_mutex *l = (hl_mutex*)malloc(sizeof(hl_mutex));
+	pthread_mutexattr_t a;
+	pthread_mutexattr_init(&a);
+	pthread_mutexattr_settype(&a,PTHREAD_MUTEX_RECURSIVE);
+	pthread_mutex_init(&l->lock,&a);
+	pthread_mutexattr_destroy(&a);
+	return l;
+#	endif
+}
+
+HL_PRIM void hl_mutex_acquire( hl_mutex *l ) {
+#	if !defined(HL_THREADS)
+#	elif defined(HL_WIN)
+	EnterCriticalSection(&l->cs);
+#	else
+	pthread_mutex_lock(&l->lock);
+#	endif
+}
+
+HL_PRIM bool hl_mutex_try_acquire( hl_mutex *l ) {
+#if	!defined(HL_THREADS)
+	return true;
+#	elif defined(HL_WIN)
+	return (bool)TryEnterCriticalSection(&l->cs);
+#	else
+	return pthread_mutex_trylock(&l->lock) == 0;
+#	endif
+}
+
+HL_PRIM void hl_mutex_release( hl_mutex *l ) {
+#	if !defined(HL_THREADS)
+#	elif defined(HL_WIN)
+	LeaveCriticalSection(&l->cs);
+#	else
+	pthread_mutex_unlock(&l->lock);
+#	endif
+}
+
+HL_PRIM void hl_mutex_free( hl_mutex *l ) {
+#	if !defined(HL_THREADS)
+#	elif defined(HL_WIN)
+	DeleteCriticalSection(&l->cs);
+	free(l);
+#	else
+	pthread_mutex_destroy(&l->lock);
+	free(l);
+#	endif
+}
+
+// ----------------- THREAD LOCAL
+
+HL_PRIM hl_tls *hl_tls_alloc() {
+#	if !defined(HL_THREADS)
+	hl_tls *l = malloc(sizeof(hl_tls));
+	l->value = NULL;
+	return l;
+#	elif defined(HL_WIN)
+	DWORD t = TlsAlloc();
+	TlsSetValue(t,NULL);
+	return (hl_tls*)(int_val)t;
+#	else
+	hl_tls *l = malloc(sizeof(hl_tls));
+	pthread_key_create(&l->key,NULL);
+	return l;
+#	endif
+}
+
+HL_PRIM void hl_tls_free( hl_tls *l ) {
+#	if !defined(HL_THREADS)
+	free(l);
+#	elif defined(HL_WIN)
+	TlsFree((DWORD)(int_val)l);
+#	else
+	pthread_key_delete(l->key);
+	free(l);
+#	endif
+}
+
+HL_PRIM void hl_tls_set( hl_tls *l, void *v ) {
+#	if !defined(HL_THREADS)
+	l->value = v;
+#	elif defined(HL_WIN)
+	TlsSetValue((DWORD)(int_val)l,v);
+#	else
+	pthread_setspecific(l->key,v);
+#	endif
+}
+
+HL_PRIM void *hl_tls_get( hl_tls *l ) {
+#	if !defined(HL_THREADS)
+	return l->value;
+#	elif defined(HL_WIN)
+	return (void*)TlsGetValue((DWORD)(int_val)l);
+#	else
+	return pthread_getspecific(l->key);
+#	endif
+}
+
+// ----------------- THREAD
+
 HL_PRIM hl_thread *hl_thread_current() {
-#	ifdef HL_WIN
+#if !defined(HL_THREADS)
+	return NULL;
+#elif defined(HL_WIN)
 	return (hl_thread*)(int_val)GetCurrentThreadId();
-#	else
+#else
 	return (hl_thread*)pthread_self();
-#	endif
+#endif
 }
 
+HL_PRIM void hl_thread_yield() {
+#if !defined(Hl_THREADS)
+	// nothing
+#elif defined(HL_WIN)
+	Sleep(0);
+#else
+	pthread_yield();
+#endif
+}
+
+
 HL_PRIM int hl_thread_id() {
-#	ifdef HL_WIN
+#if !defined(HL_THREADS)
+	return 0;
+#elif defined(HL_WIN)
 	return (int)GetCurrentThreadId();
-#	else
-#	if defined(SYS_gettid) && !defined(HL_TVOS)
+#elif defined(SYS_gettid) && !defined(HL_TVOS)
 	return syscall(SYS_gettid);
-#	else
+#else
 	hl_error("hl_thread_id() not available for this platform");
 	return -1;
-#	endif
-#	endif
+#endif
+}
+
+typedef struct {
+	void (*callb)( void *);
+	void *param;
+} thread_start;
+
+static void gc_thread_entry( thread_start *_s ) {
+	thread_start s = *_s;
+	hl_register_thread(&s);
+	hl_remove_root(&_s->param);
+	free(_s);
+	s.callb(s.param);
+	hl_unregister_thread();
 }
 
 HL_PRIM hl_thread *hl_thread_start( void *callback, void *param, bool withGC ) {
-	if( withGC ) hl_error("Threads with garbage collector are currently not supported");
-#	ifdef HL_WIN
+#ifdef HL_THREADS
+	if( withGC ) {
+		thread_start *s = (thread_start*)malloc(sizeof(thread_start));
+		s->callb = callback;
+		s->param = param;
+		hl_add_root(&s->param);
+		callback = gc_thread_entry;
+		param = s;
+	}
+#endif
+#if !defined(HL_THREADS)
+	hl_error("Threads support is disabled");
+	return NULL;
+#elif defined(HL_WIN)
 	DWORD tid;
 	HANDLE h = CreateThread(NULL,0,callback,param,0,&tid);
 	if( h == NULL )
 		return NULL;
 	CloseHandle(h);
 	return (hl_thread*)(int_val)tid;
-#	else
+#else
 	pthread_t t;
 	pthread_attr_t attr;
 	pthread_attr_init(&attr);
@@ -68,25 +243,26 @@ HL_PRIM hl_thread *hl_thread_start( void *callback, void *param, bool withGC ) {
 	}
 	pthread_attr_destroy(&attr);
 	return (hl_thread*)t;
-#	endif
+#endif
 }
 
-HL_PRIM bool hl_thread_pause( hl_thread *t, bool pause ) {
-#	ifdef HL_WIN
-	bool ret;
-	HANDLE h = OpenThread(THREAD_ALL_ACCESS,FALSE,(DWORD)(int_val)t);
-	if( pause )
-		ret = ((int)SuspendThread(h)) >= 0;
-	else {
-		int r;
-		while( (r = (int)ResumeThread(h)) > 0 ) {
-		}
-		ret = r == 0;
-	}
-	CloseHandle(h);
-	return ret;
-#	else
-	// TODO : use libthread_db
-	return false;
-#	endif
+static void hl_run_thread( vclosure *c ) {
+	bool isExc;
+	varray *a;
+	int i;
+	vdynamic *exc = hl_dyn_call_safe(c,NULL,0,&isExc);
+	if( !isExc )
+		return;
+	a = hl_exception_stack();
+	uprintf(USTR("Uncaught exception: %s\n"), hl_to_string(exc));
+	for(i=0;i<a->size;i++)
+		uprintf(USTR("Called from %s\n"), hl_aptr(a,uchar*)[i]);
+}
+
+HL_PRIM hl_thread *hl_thread_create( vclosure *c ) {
+	return hl_thread_start(hl_run_thread,c,true);
 }
+
+#define _THREAD _ABSTRACT(hl_thread)
+DEFINE_PRIM(_THREAD, thread_current, _NO_ARG);
+DEFINE_PRIM(_THREAD, thread_create, _FUN(_VOID,_NO_ARG));