Explorar el Código

Add low-overhead profiler. Part 1: interpreter, low-level C API.

Mike Pall hace 12 años
padre
commit
4dce22c40d
Se han modificado 18 ficheros con 558 adiciones y 33 borrados
  1. 1 1
      src/Makefile
  2. 20 16
      src/Makefile.dep
  3. 12 0
      src/lj_arch.h
  4. 108 3
      src/lj_debug.c
  5. 5 1
      src/lj_debug.h
  6. 47 8
      src/lj_dispatch.c
  7. 4 2
      src/lj_dispatch.h
  8. 1 0
      src/lj_obj.h
  9. 274 0
      src/lj_profile.c
  10. 17 0
      src/lj_profile.h
  11. 5 1
      src/lj_state.c
  12. 1 0
      src/lj_vm.h
  13. 1 0
      src/ljamalg.c
  14. 9 0
      src/luajit.h
  15. 12 0
      src/vm_arm.dasc
  16. 14 0
      src/vm_mips.dasc
  17. 13 0
      src/vm_ppc.dasc
  18. 14 1
      src/vm_x86.dasc

+ 1 - 1
src/Makefile

@@ -446,7 +446,7 @@ LJLIB_C= $(LJLIB_O:.o=.c)
 LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
 	  lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
 	  lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
-	  lj_strfmt.o lj_api.o \
+	  lj_strfmt.o lj_api.o lj_profile.o \
 	  lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
 	  lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
 	  lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \

+ 20 - 16
src/Makefile.dep

@@ -102,7 +102,7 @@ lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_func.h lj_tab.h \
  lj_meta.h lj_debug.h lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h \
  lj_strfmt.h lj_jit.h lj_ir.h lj_ccallback.h lj_ctype.h lj_trace.h \
- lj_dispatch.h lj_traceerr.h lj_vm.h luajit.h
+ lj_dispatch.h lj_traceerr.h lj_profile.h lj_vm.h luajit.h
 lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \
  lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \
  lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
@@ -168,6 +168,9 @@ lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \
  lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \
  lj_vm.h lj_vmevent.h
+lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \
+ lj_jit.h lj_ir.h lj_profile.h luajit.h
 lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
  lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \
@@ -180,7 +183,7 @@ lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
 lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \
  lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \
- lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h
+ lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h luajit.h
 lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_str.h lj_char.h
 lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
@@ -208,20 +211,21 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
  lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h lj_char.c \
  lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c lj_tab.c \
  lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \
- lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h luajit.h \
- lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c lj_strfmt.c lj_api.c \
- lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \
- lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \
- lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \
- lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \
- lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h lj_iropt.h lj_opt_mem.c \
- lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c \
- lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c \
- lj_record.h lj_ffrecord.h lj_crecord.c lj_crecord.h lj_ffrecord.c \
- lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h lj_asm_*.h lj_trace.c \
- lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c lj_libdef.h \
- lib_math.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c \
- lib_debug.c lib_bit.c lib_jit.c lib_ffi.c lib_init.c
+ lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \
+ lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \
+ lj_strfmt.c lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \
+ lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \
+ lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \
+ lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \
+ lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \
+ lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \
+ lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \
+ lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
+ lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
+ lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
+ lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
+ lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \
+ lib_init.c
 luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
 host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
  lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \

+ 12 - 0
src/lj_arch.h

@@ -366,6 +366,18 @@
 #define LJ_HASFFI		1
 #endif
 
+#if defined(LUAJIT_DISABLE_PROFILE)
+#define LJ_HASPROFILE		0
+#elif LJ_TARGET_POSIX
+#define LJ_HASPROFILE		1
+#define LJ_PROFILE_SIGPROF	1
+#elif LJ_TARGET_WINDOWS
+#define LJ_HASPROFILE		1
+#define LJ_PROFILE_WTHREAD	1
+#else
+#define LJ_HASPROFILE		0
+#endif
+
 #ifndef LJ_ARCH_HASFPU
 #define LJ_ARCH_HASFPU		1
 #endif

+ 108 - 3
src/lj_debug.c

@@ -28,7 +28,7 @@ cTValue *lj_debug_frame(lua_State *L, int level, int *size)
   /* Traverse frames backwards. */
   for (nextframe = frame = L->base-1; frame > bot; ) {
     if (frame_gc(frame) == obj2gco(L))
-      level++;  /* Skip dummy frames. See lj_meta_call(). */
+      level++;  /* Skip dummy frames. See lj_err_optype_call(). */
     if (level-- == 0) {
       *size = (int)(nextframe - frame);
       return frame;  /* Level found. */
@@ -278,9 +278,9 @@ restart:
 }
 
 /* Deduce function name from caller of a frame. */
-const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name)
+const char *lj_debug_funcname(lua_State *L, cTValue *frame, const char **name)
 {
-  TValue *pframe;
+  cTValue *pframe;
   GCfunc *fn;
   BCPos pc;
   if (frame <= tvref(L->stack))
@@ -534,6 +534,111 @@ LUA_API int lua_getstack(lua_State *L, int level, lua_Debug *ar)
   }
 }
 
+#if LJ_HASPROFILE
+/* Put the chunkname into a buffer. */
+static int debug_putchunkname(SBuf *sb, GCproto *pt, int pathstrip)
+{
+  GCstr *name = proto_chunkname(pt);
+  const char *p = strdata(name);
+  if (pt->firstline == ~(BCLine)0) {
+    lj_buf_putmem(sb, "[builtin:", 9);
+    lj_buf_putstr(sb, name);
+    lj_buf_putb(sb, ']');
+    return 0;
+  }
+  if (*p == '=' || *p == '@') {
+    MSize len = name->len-1;
+    p++;
+    if (pathstrip) {
+      int i;
+      for (i = len-1; i >= 0; i--)
+	if (p[i] == '/' || p[i] == '\\') {
+	  len -= i+1;
+	  p = p+i+1;
+	  break;
+	}
+    }
+    lj_buf_putmem(sb, p, len);
+  } else {
+    lj_buf_putmem(sb, "[string]", 9);
+  }
+  return 1;
+}
+
+/* Put a compact stack dump into a buffer. */
+void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, int depth)
+{
+  int level = 0, dir = 1, pathstrip = 1;
+  MSize lastlen = 0;
+  if (depth < 0) { level = ~depth; depth = dir = -1; }  /* Reverse frames. */
+  while (level != depth) {  /* Loop through all frame. */
+    int size;
+    cTValue *frame = lj_debug_frame(L, level, &size);
+    if (frame) {
+      cTValue *nextframe = size ? frame+size : NULL;
+      GCfunc *fn = frame_func(frame);
+      const uint8_t *p = (const uint8_t *)fmt;
+      int c;
+      while ((c = *p++)) {
+	switch (c) {
+	case 'p':  /* Preserve full path. */
+	  pathstrip = 0;
+	  break;
+	case 'F': case 'f': {  /* Dump function name. */
+	  const char *name;
+	  const char *what = lj_debug_funcname(L, frame, &name);
+	  if (what) {
+	    if (c == 'F' && isluafunc(fn)) {  /* Dump module:name for 'F'. */
+	      GCproto *pt = funcproto(fn);
+	      if (pt->firstline != ~(BCLine)0) {  /* Not a bytecode builtin. */
+		debug_putchunkname(sb, pt, pathstrip);
+		lj_buf_putb(sb, ':');
+	      }
+	    }
+	    lj_buf_putmem(sb, name, (MSize)strlen(name));
+	    break;
+	  }  /* else: can't derive a name, dump module:line. */
+	  }
+	  /* fallthrough */
+	case 'l':  /* Dump module:line. */
+	  if (isluafunc(fn)) {
+	    GCproto *pt = funcproto(fn);
+	    if (debug_putchunkname(sb, pt, pathstrip)) {
+	      /* Regular Lua function. */
+	      BCLine line = c == 'l' ? debug_frameline(L, fn, nextframe) :
+				       pt->firstline;
+	      lj_buf_putb(sb, ':');
+	      lj_strfmt_putint(sb, line >= 0 ? line : pt->firstline);
+	    }
+	  } else if (isffunc(fn)) {  /* Dump numbered builtins. */
+	    lj_buf_putmem(sb, "[builtin#", 9);
+	    lj_strfmt_putint(sb, fn->c.ffid);
+	    lj_buf_putb(sb, ']');
+	  } else {  /* Dump C function address. */
+	    lj_buf_putb(sb, '@');
+	    lj_strfmt_putptr(sb, fn->c.f);
+	  }
+	  break;
+	case 'Z':  /* Zap trailing separator. */
+	  lastlen = sbuflen(sb);
+	  break;
+	default:
+	  lj_buf_putb(sb, c);
+	  break;
+	}
+      }
+    } else if (dir == 1) {
+      break;
+    } else {
+      level -= size;  /* Reverse frame order: quickly skip missing level. */
+    }
+    level += dir;
+  }
+  if (lastlen)
+    setsbufP(sb, sbufB(sb) + lastlen);  /* Zap trailing separator. */
+}
+#endif
+
 /* Number of frames for the leading and trailing part of a traceback. */
 #define TRACEBACK_LEVELS1	12
 #define TRACEBACK_LEVELS2	10

+ 5 - 1
src/lj_debug.h

@@ -32,7 +32,7 @@ LJ_FUNC const char *lj_debug_uvname(GCproto *pt, uint32_t idx);
 LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp);
 LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc,
 				      BCReg slot, const char **name);
-LJ_FUNC const char *lj_debug_funcname(lua_State *L, TValue *frame,
+LJ_FUNC const char *lj_debug_funcname(lua_State *L, cTValue *frame,
 				      const char **name);
 LJ_FUNC void lj_debug_shortname(char *out, GCstr *str, BCLine line);
 LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg,
@@ -40,6 +40,10 @@ LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg,
 LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc);
 LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar,
 			     int ext);
+#if LJ_HASPROFILE
+LJ_FUNC void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt,
+				int depth);
+#endif
 
 /* Fixed internal variable names. */
 #define VARNAMEDEF(_) \

+ 47 - 8
src/lj_dispatch.c

@@ -27,6 +27,9 @@
 #endif
 #include "lj_trace.h"
 #include "lj_dispatch.h"
+#if LJ_HASPROFILE
+#include "lj_profile.h"
+#endif
 #include "lj_vm.h"
 #include "luajit.h"
 
@@ -84,11 +87,12 @@ void lj_dispatch_init_hotcount(global_State *g)
 #endif
 
 /* Internal dispatch mode bits. */
-#define DISPMODE_JIT	0x01	/* JIT compiler on. */
-#define DISPMODE_REC	0x02	/* Recording active. */
+#define DISPMODE_CALL	0x01	/* Override call dispatch. */
+#define DISPMODE_RET	0x02	/* Override return dispatch. */
 #define DISPMODE_INS	0x04	/* Override instruction dispatch. */
-#define DISPMODE_CALL	0x08	/* Override call dispatch. */
-#define DISPMODE_RET	0x10	/* Override return dispatch. */
+#define DISPMODE_JIT	0x10	/* JIT compiler on. */
+#define DISPMODE_REC	0x20	/* Recording active. */
+#define DISPMODE_PROF	0x40	/* Profiling active. */
 
 /* Update dispatch table depending on various flags. */
 void lj_dispatch_update(global_State *g)
@@ -99,6 +103,9 @@ void lj_dispatch_update(global_State *g)
   mode |= (G2J(g)->flags & JIT_F_ON) ? DISPMODE_JIT : 0;
   mode |= G2J(g)->state != LJ_TRACE_IDLE ?
 	    (DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0;
+#endif
+#if LJ_HASPROFILE
+  mode |= (g->hookmask & HOOK_PROFILE) ? (DISPMODE_PROF|DISPMODE_INS) : 0;
 #endif
   mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0;
   mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0;
@@ -128,9 +135,9 @@ void lj_dispatch_update(global_State *g)
     disp[GG_LEN_DDISP+BC_LOOP] = f_loop;
 
     /* Set dynamic instruction dispatch. */
-    if ((oldmode ^ mode) & (DISPMODE_REC|DISPMODE_INS)) {
+    if ((oldmode ^ mode) & (DISPMODE_PROF|DISPMODE_REC|DISPMODE_INS)) {
       /* Need to update the whole table. */
-      if (!(mode & (DISPMODE_REC|DISPMODE_INS))) {  /* No ins dispatch? */
+      if (!(mode & DISPMODE_INS)) {  /* No ins dispatch? */
 	/* Copy static dispatch table to dynamic dispatch table. */
 	memcpy(&disp[0], &disp[GG_LEN_DDISP], GG_LEN_SDISP*sizeof(ASMFunction));
 	/* Overwrite with dynamic return dispatch. */
@@ -142,12 +149,13 @@ void lj_dispatch_update(global_State *g)
 	}
       } else {
 	/* The recording dispatch also checks for hooks. */
-	ASMFunction f = (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook;
+	ASMFunction f = (mode & DISPMODE_PROF) ? lj_vm_profhook :
+			(mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook;
 	uint32_t i;
 	for (i = 0; i < GG_LEN_SDISP; i++)
 	  disp[i] = f;
       }
-    } else if (!(mode & (DISPMODE_REC|DISPMODE_INS))) {
+    } else if (!(mode & DISPMODE_INS)) {
       /* Otherwise set dynamic counting ins. */
       disp[BC_FORL] = f_forl;
       disp[BC_ITERL] = f_iterl;
@@ -495,3 +503,34 @@ out:
   return makeasmfunc(lj_bc_ofs[op]);  /* Return static dispatch target. */
 }
 
+#if LJ_HASPROFILE
+/* Profile dispatch. */
+void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc)
+{
+  ERRNO_SAVE
+  global_State *g = G(L);
+  uint8_t mask = g->hookmask;
+  g->hookmask = (mask & ~HOOK_PROFILE);
+  lj_dispatch_update(g);
+  if (!(mask & HOOK_VMEVENT)) {
+    GCfunc *fn = curr_func(L);
+    GCproto *pt = funcproto(fn);
+    void *cf = cframe_raw(L->cframe);
+    const BCIns *oldpc = cframe_pc(cf);
+    uint8_t oldh = hook_save(g);
+    BCReg slots;
+    hook_vmevent(g);
+    setcframe_pc(cf, pc);
+    slots = cur_topslot(pt, pc, cframe_multres_n(cf));
+    L->top = L->base + slots;  /* Fix top. */
+    lj_profile_interpreter(L);
+    setgcref(g->cur_L, obj2gco(L));
+    setcframe_pc(cf, oldpc);
+    hook_restore(g, oldh);
+    lj_trace_abort(g);
+    setvmstate(g, INTERP);
+  }
+  ERRNO_RESTORE
+}
+#endif
+

+ 4 - 2
src/lj_dispatch.h

@@ -29,7 +29,7 @@
   _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \
   _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \
   _(pow) _(fmod) _(ldexp) \
-  _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_err_throw) \
+  _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_profile) _(lj_err_throw)\
   _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
   _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \
   _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \
@@ -110,7 +110,9 @@ LJ_FUNC void lj_dispatch_update(global_State *g);
 /* Instruction dispatch callback for hooks or when recording. */
 LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc);
 LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc);
-LJ_FUNCA void LJ_FASTCALL lj_dispatch_return(lua_State *L, const BCIns *pc);
+#if LJ_HASPROFILE
+LJ_FUNCA void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc);
+#endif
 
 #if LJ_HASFFI && !defined(_BUILDVM_H)
 /* Save/restore errno and GetLastError() around hooks, exits and recording. */

+ 1 - 0
src/lj_obj.h

@@ -554,6 +554,7 @@ typedef struct global_State {
 #define HOOK_ACTIVE_SHIFT	4
 #define HOOK_VMEVENT		0x20
 #define HOOK_GC			0x40
+#define HOOK_PROFILE		0x80
 #define hook_active(g)		((g)->hookmask & HOOK_ACTIVE)
 #define hook_enter(g)		((g)->hookmask |= HOOK_ACTIVE)
 #define hook_entergc(g)		((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC))

+ 274 - 0
src/lj_profile.c

@@ -0,0 +1,274 @@
+/*
+** Low-overhead profiling.
+** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_profile_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+
+#if LJ_HASPROFILE
+
+#include "lj_buf.h"
+#include "lj_frame.h"
+#include "lj_debug.h"
+#include "lj_dispatch.h"
+#include "lj_profile.h"
+
+#include "luajit.h"
+
+#if LJ_PROFILE_SIGPROF
+
+#include <sys/time.h>
+#include <signal.h>
+
+#elif LJ_PROFILE_PTHREAD
+
+#include <pthread.h>
+
+#elif LJ_PROFILE_WTHREAD
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+typedef unsigned int (WINAPI *WMM_TPFUNC)(unsigned int);
+
+#endif
+
+/* Profiler state. */
+typedef struct ProfileState {
+  global_State *g;		/* VM state that started the profiler. */
+  luaJIT_profile_callback cb;	/* Profiler callback. */
+  void *data;			/* Profiler callback data. */
+  SBuf sb;			/* String buffer for stack dumps. */
+  int interval;			/* Sample interval in milliseconds. */
+  int samples;			/* Number of samples for next callback. */
+  int vmstate;			/* VM state when profile timer triggered. */
+#if LJ_PROFILE_SIGPROF
+  struct sigaction oldsa;	/* Previous SIGPROF state. */
+#elif LJ_PROFILE_PTHREAD
+  pthread_t thread;		/* Timer thread. */
+  int abort;			/* Abort timer thread. */
+#elif LJ_PROFILE_WTHREAD
+  HINSTANCE wmm;		/* WinMM library handle. */
+  WMM_TPFUNC wmm_tbp;		/* WinMM timeBeginPeriod function. */
+  WMM_TPFUNC wmm_tep;		/* WinMM timeEndPeriod function. */
+  HANDLE thread;		/* Timer thread. */
+  int abort;			/* Abort timer thread. */
+#endif
+} ProfileState;
+
+/* Sadly, we have to use a static profiler state.
+**
+** The SIGPROF variant needs a static pointer to the global state, anyway.
+** And it would be hard to extend for multiple threads. You can still use
+** multiple VMs in multiple threads, but only profile one at a time.
+*/
+static ProfileState profile_state;
+
+/* Default sample interval in milliseconds. */
+#define LJ_PROFILE_INTERVAL_DEFAULT	10
+
+/* -- Profile callbacks --------------------------------------------------- */
+
+/* Callback from profile hook (HOOK_PROFILE already cleared). */
+void LJ_FASTCALL lj_profile_interpreter(lua_State *L)
+{
+  ProfileState *ps = &profile_state;
+  int samples = ps->samples;
+  ps->samples = 0;
+  ps->cb(ps->data, L, samples, ps->vmstate);  /* Invoke user callback. */
+}
+
+/* Trigger profile hook. Asynchronous call from OS-specific profile timer. */
+static void profile_trigger(ProfileState *ps)
+{
+  global_State *g = ps->g;
+  uint8_t mask;
+  ps->samples++;  /* Always increment number of samples. */
+  mask = g->hookmask;
+  if (!(mask & HOOK_PROFILE)) {  /* Set profile hook, unless already set. */
+    int st = g->vmstate;
+    ps->vmstate = st >= 0 ? 'N' :
+		  st == ~LJ_VMST_INTERP ? 'I' :
+		  st == ~LJ_VMST_C ? 'C' :
+		  st == ~LJ_VMST_GC ? 'G' : 'J';
+    g->hookmask = (mask | HOOK_PROFILE);
+    lj_dispatch_update(g);
+  }
+}
+
+/* -- OS-specific profile timer handling ---------------------------------- */
+
+#if LJ_PROFILE_SIGPROF
+
+/* SIGPROF handler. */
+static void profile_signal(int sig)
+{
+  UNUSED(sig);
+  profile_trigger(&profile_state);
+}
+
+/* Start profiling timer. */
+static void profile_timer_start(ProfileState *ps)
+{
+  int interval = ps->interval;
+  struct itimerval tm;
+  struct sigaction sa;
+  tm.it_value.tv_sec = tm.it_interval.tv_sec = interval / 1000;
+  tm.it_value.tv_usec = tm.it_interval.tv_usec = (interval % 1000) * 1000;
+  setitimer(ITIMER_PROF, &tm, NULL);
+  sa.sa_flags = SA_RESTART;
+  sa.sa_handler = profile_signal;
+  sigemptyset(&sa.sa_mask);
+  sigaction(SIGPROF, &sa, &ps->oldsa);
+}
+
+/* Stop profiling timer. */
+static void profile_timer_stop(ProfileState *ps)
+{
+  struct itimerval tm;
+  tm.it_value.tv_sec = tm.it_interval.tv_sec = 0;
+  tm.it_value.tv_usec = tm.it_interval.tv_usec = 0;
+  setitimer(ITIMER_PROF, &tm, NULL);
+  sigaction(SIGPROF, &ps->oldsa, NULL);
+}
+
+#elif LJ_PROFILE_PTHREAD
+
+/* POSIX timer thread. */
+static void *profile_thread(ProfileState *ps)
+{
+  int interval = ps->interval;
+  struct timespec ts;
+  ts.tv_sec = interval / 1000;
+  ts.tv_nsec = (interval % 1000) * 1000000;
+  while (1) {
+    nanosleep(&ts, NULL);
+    if (ps->abort) break;
+    profile_trigger(ps);
+  }
+  return NULL;
+}
+
+/* Start profiling timer thread. */
+static void profile_timer_start(ProfileState *ps)
+{
+  ps->abort = 0;
+  pthread_create(&ps->thread, NULL, (void *(*)(void *))profile_thread, ps);
+}
+
+/* Stop profiling timer thread. */
+static void profile_timer_stop(ProfileState *ps)
+{
+  ps->abort = 1;
+  pthread_join(ps->thread, NULL);
+}
+
+#elif LJ_PROFILE_WTHREAD
+
+/* Windows timer thread. */
+static DWORD WINAPI profile_thread(void *psx)
+{
+  ProfileState *ps = (ProfileState *)psx;
+  int interval = ps->interval;
+  ps->wmm_tbp(1);
+  while (1) {
+    Sleep(interval);
+    if (ps->abort) break;
+    profile_trigger(ps);
+  }
+  ps->wmm_tep(1);
+  return 0;
+}
+
+/* Start profiling timer thread. */
+static void profile_timer_start(ProfileState *ps)
+{
+  if (!ps->wmm) {  /* Load WinMM library on-demand. */
+    ps->wmm = LoadLibraryA("winmm.dll");
+    if (ps->wmm) {
+      ps->wmm_tbp = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeBeginPeriod");
+      ps->wmm_tep = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeEndPeriod");
+      if (!ps->wmm_tbp || !ps->wmm_tep) {
+	ps->wmm = NULL;
+	return;
+      }
+    }
+  }
+  ps->abort = 0;
+  ps->thread = CreateThread(NULL, 0, profile_thread, ps, 0, NULL);
+}
+
+/* Stop profiling timer thread. */
+static void profile_timer_stop(ProfileState *ps)
+{
+  ps->abort = 1;
+  WaitForSingleObject(ps->thread, INFINITE);
+}
+
+#endif
+
+/* -- Public profiling API ------------------------------------------------ */
+
+/* Start profiling. */
+LUA_API void luaJIT_profile_start(lua_State *L, const char *mode,
+				  luaJIT_profile_callback cb, void *data)
+{
+  ProfileState *ps = &profile_state;
+  int interval = LJ_PROFILE_INTERVAL_DEFAULT;
+  while (*mode) {
+    switch (*mode++) {
+    case 'i':
+      interval = 0;
+      while (*mode >= '0' && *mode <= '9')
+	interval = interval * 10 + (*mode++ - '0');
+      if (interval <= 0) interval = 1;
+      break;
+    default:  /* Ignore unknown mode chars. */
+      break;
+    }
+  }
+  if (ps->g) {
+    luaJIT_profile_stop(L);
+    if (ps->g) return;  /* Profiler in use by another VM. */
+  }
+  ps->g = G(L);
+  ps->interval = interval;
+  ps->cb = cb;
+  ps->data = data;
+  ps->samples = 0;
+  lj_buf_init(L, &ps->sb);
+  profile_timer_start(ps);
+}
+
+/* Stop profiling. */
+LUA_API void luaJIT_profile_stop(lua_State *L)
+{
+  ProfileState *ps = &profile_state;
+  global_State *g = ps->g;
+  if (G(L) == g) {  /* Only stop profiler if started by this VM. */
+    profile_timer_stop(ps);
+    g->hookmask &= ~HOOK_PROFILE;
+    lj_dispatch_update(g);
+    lj_buf_free(g, &ps->sb);
+    setmref(ps->sb.b, NULL);
+    setmref(ps->sb.e, NULL);
+    ps->g = NULL;
+  }
+}
+
+/* Return a compact stack dump. */
+LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
+					     int depth, size_t *len)
+{
+  ProfileState *ps = &profile_state;
+  SBuf *sb = &ps->sb;
+  setsbufL(sb, L);
+  lj_buf_reset(sb);
+  lj_debug_dumpstack(L, sb, fmt, depth);
+  *len = (size_t)sbuflen(sb);
+  return sbufB(sb);
+}
+
+#endif

+ 17 - 0
src/lj_profile.h

@@ -0,0 +1,17 @@
+/*
+** Low-overhead profiling.
+** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_PROFILE_H
+#define _LJ_PROFILE_H
+
+#include "lj_obj.h"
+
+#if LJ_HASPROFILE
+
+LJ_FUNC void LJ_FASTCALL lj_profile_interpreter(lua_State *L);
+
+#endif
+
+#endif

+ 5 - 1
src/lj_state.c

@@ -27,6 +27,7 @@
 #include "lj_vm.h"
 #include "lj_lex.h"
 #include "lj_alloc.h"
+#include "luajit.h"
 
 /* -- Stack handling ------------------------------------------------------ */
 
@@ -237,8 +238,11 @@ LUA_API void lua_close(lua_State *L)
 {
   global_State *g = G(L);
   int i;
-  setgcrefnull(g->cur_L);
   L = mainthread(g);  /* Only the main thread can be closed. */
+#if LJ_HASPROFILE
+  luaJIT_profile_stop(L);
+#endif
+  setgcrefnull(g->cur_L);
   lj_func_closeuv(L, tvref(L->stack));
   lj_gc_separateudata(g, 1);  /* Separate udata which have GC metamethods. */
 #if LJ_HASJIT

+ 1 - 0
src/lj_vm.h

@@ -43,6 +43,7 @@ LJ_ASMF void lj_vm_record(void);
 LJ_ASMF void lj_vm_inshook(void);
 LJ_ASMF void lj_vm_rethook(void);
 LJ_ASMF void lj_vm_callhook(void);
+LJ_ASMF void lj_vm_profhook(void);
 
 /* Trace exit handling. */
 LJ_ASMF void lj_vm_exit_handler(void);

+ 1 - 0
src/ljamalg.c

@@ -47,6 +47,7 @@
 #include "lj_strscan.c"
 #include "lj_strfmt.c"
 #include "lj_api.c"
+#include "lj_profile.c"
 #include "lj_lex.c"
 #include "lj_parse.c"
 #include "lj_bcread.c"

+ 9 - 0
src/luajit.h

@@ -64,6 +64,15 @@ enum {
 /* Control the JIT engine. */
 LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode);
 
+/* Low-overhead profiling API. */
+typedef void (*luaJIT_profile_callback)(void *data, lua_State *L,
+					int samples, int vmstate);
+LUA_API void luaJIT_profile_start(lua_State *L, const char *mode,
+				  luaJIT_profile_callback cb, void *data);
+LUA_API void luaJIT_profile_stop(lua_State *L);
+LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
+					     int depth, size_t *len);
+
 /* Enforce (dynamic) linker error for version mismatches. Call from main. */
 LUA_API void LUAJIT_VERSION_SYM(void);
 

+ 12 - 0
src/vm_arm.dasc

@@ -2081,6 +2081,18 @@ static void build_subroutines(BuildCtx *ctx)
   |  ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
   |   ldr INS, [PC, #-4]
   |  bx CRET1
+  |
+  |->vm_profhook:			// Dispatch target for profiler hook.
+#if LJ_HASPROFILE
+  |  mov CARG1, L
+  |   str BASE, L->base
+  |  mov CARG2, PC
+  |  bl extern lj_dispatch_profile	// (lua_State *L, const BCIns *pc)
+  |  // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
+  |  ldr BASE, L->base
+  |  sub PC, PC, #4
+  |  b ->cont_nop
+#endif
   |
   |//-----------------------------------------------------------------------
   |//-- Trace exit handler -------------------------------------------------

+ 14 - 0
src/vm_mips.dasc

@@ -2010,6 +2010,20 @@ static void build_subroutines(BuildCtx *ctx)
   |  lw LFUNC:RB, FRAME_FUNC(BASE)
   |  jr CRET1
   |.  lw INS, -4(PC)
+  |
+  |->vm_profhook:			// Dispatch target for profiler hook.
+#if LJ_HASPROFILE
+  |  load_got lj_dispatch_profile
+  |   sw MULTRES, SAVE_MULTRES
+  |  move CARG2, PC
+  |   sw BASE, L->base
+  |  call_intern lj_dispatch_profile	// (lua_State *L, const BCIns *pc)
+  |.  move CARG1, L
+  |  // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
+  |  addiu PC, PC, -4
+  |  b ->cont_nop
+  |.  lw BASE, L->base
+#endif
   |
   |//-----------------------------------------------------------------------
   |//-- Trace exit handler -------------------------------------------------

+ 13 - 0
src/vm_ppc.dasc

@@ -2504,6 +2504,19 @@ static void build_subroutines(BuildCtx *ctx)
   |  lwz INS, -4(PC)
   |  mtctr CRET1
   |  bctr
+  |
+  |->vm_profhook:			// Dispatch target for profiler hook.
+#if LJ_HASPROFILE
+  |  mr CARG1, L
+  |   stw MULTRES, SAVE_MULTRES
+  |  mr CARG2, PC
+  |   stp BASE, L->base
+  |  bl extern lj_dispatch_profile	// (lua_State *L, const BCIns *pc)
+  |  // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
+  |  lp BASE, L->base
+  |  subi PC, PC, 4
+  |  b ->cont_nop
+#endif
   |
   |//-----------------------------------------------------------------------
   |//-- Trace exit handler -------------------------------------------------

+ 14 - 1
src/vm_x86.dasc

@@ -2588,7 +2588,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  mov FCARG2, PC			// Caveat: FCARG2 == BASE
   |  mov FCARG1, L:RB
   |  // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
-  |  call extern lj_dispatch_ins@8	// (lua_State *L, BCIns *pc)
+  |  call extern lj_dispatch_ins@8	// (lua_State *L, const BCIns *pc)
   |3:
   |  mov BASE, L:RB->base
   |4:
@@ -2658,6 +2658,19 @@ static void build_subroutines(BuildCtx *ctx)
   |  shr RD, 3
   |  add NARGS:RD, 1
   |  jmp RBa
+  |
+  |->vm_profhook:			// Dispatch target for profiler hook.
+#if LJ_HASPROFILE
+  |  mov L:RB, SAVE_L
+  |  mov L:RB->base, BASE
+  |  mov FCARG2, PC			// Caveat: FCARG2 == BASE
+  |  mov FCARG1, L:RB
+  |  call extern lj_dispatch_profile@8	// (lua_State *L, const BCIns *pc)
+  |  mov BASE, L:RB->base
+  |  // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
+  |  sub PC, 4
+  |  jmp ->cont_nop
+#endif
   |
   |//-----------------------------------------------------------------------
   |//-- Trace exit handler -------------------------------------------------