Bladeren bron

Move helper for syncing data/instruction cache to lj_mcode.c.

Sync caches after dynamic code generation for FFI callbacks.
Mike Pall 14 jaren geleden
bovenliggende
commit
10474987bd
8 gewijzigde bestanden met toevoegingen van 69 en 56 verwijderingen
  1. 5 5
      src/Makefile.dep
  2. 1 44
      src/lj_asm.c
  3. 1 1
      src/lj_asm_arm.h
  4. 2 2
      src/lj_asm_ppc.h
  5. 1 1
      src/lj_asm_x86.h
  6. 2 0
      src/lj_ccallback.c
  7. 49 2
      src/lj_mcode.c
  8. 8 1
      src/lj_mcode.h

+ 5 - 5
src/Makefile.dep

@@ -75,7 +75,7 @@ lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \
  lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \
  lj_bc.h lj_ctype.h lj_cconv.h lj_ccall.h lj_ccallback.h lj_target.h \
- lj_target_*.h lj_vm.h
+ lj_target_*.h lj_mcode.h lj_jit.h lj_ir.h lj_vm.h
 lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
  lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \
  lj_ccallback.h
@@ -195,10 +195,10 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
  lj_vmevent.h lj_vmmath.c lj_api.c lj_bcdump.h lj_parse.h lj_lex.c \
  lualib.h lj_parse.c lj_bcread.c lj_bcwrite.c lj_ctype.c lj_ccallback.h \
  lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \
- lj_target.h lj_target_*.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h \
- lj_cparse.c lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h lj_iropt.h \
- lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \
- lj_opt_loop.c lj_snap.h lj_opt_split.c lj_mcode.c lj_mcode.h lj_snap.c \
+ lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \
+ lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h \
+ lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \
+ lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_mcode.c lj_snap.c \
  lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c lj_crecord.h \
  lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h lj_asm_*.h \
  lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c \

+ 1 - 44
src/lj_asm.c

@@ -147,14 +147,6 @@ IRFLDEF(FLOFS)
   0
 };
 
-/* Define this if you want to run LuaJIT with Valgrind. */
-#ifdef LUAJIT_USE_VALGRIND
-#include <valgrind/valgrind.h>
-#define VG_INVALIDATE(p, sz)	VALGRIND_DISCARD_TRANSLATIONS(p, sz)
-#else
-#define VG_INVALIDATE(p, sz)	((void)0)
-#endif
-
 /* -- Target-specific instruction emitter --------------------------------- */
 
 #if LJ_TARGET_X86ORX64
@@ -931,41 +923,6 @@ static uint32_t ir_khash(IRIns *ir)
   return hashrot(lo, hi);
 }
 
-#if !LJ_TARGET_X86ORX64 && LJ_TARGET_OSX
-void sys_icache_invalidate(void *start, size_t len);
-#endif
-
-#if LJ_TARGET_LINUX && LJ_TARGET_PPC
-#include <dlfcn.h>
-static void (*asm_ppc_cache_flush)(MCode *start, MCode *end);
-static void asm_dummy_cache_flush(MCode *start, MCode *end)
-{
-  UNUSED(start); UNUSED(end);
-}
-#endif
-
-/* Flush instruction cache. */
-static void asm_cache_flush(MCode *start, MCode *end)
-{
-  VG_INVALIDATE(start, (char *)end-(char *)start);
-#if LJ_TARGET_X86ORX64
-  UNUSED(start); UNUSED(end);
-#elif LJ_TARGET_OSX
-  sys_icache_invalidate(start, end-start);
-#elif LJ_TARGET_LINUX && LJ_TARGET_PPC
-  if (!asm_ppc_cache_flush) {
-    void *vdso = dlopen("linux-vdso32.so.1", RTLD_LAZY);
-    if (!vdso || !(asm_ppc_cache_flush = dlsym(vdso, "__kernel_sync_dicache")))
-      asm_ppc_cache_flush = asm_dummy_cache_flush;
-  }
-  asm_ppc_cache_flush(start, end);
-#elif defined(__GNUC__) && !LJ_TARGET_PPC
-  __clear_cache(start, end);
-#else
-#error "Missing builtin to flush instruction cache"
-#endif
-}
-
 /* -- Allocations --------------------------------------------------------- */
 
 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args);
@@ -1776,7 +1733,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
   if (!as->loopref)
     asm_tail_fixup(as, T->link);  /* Note: this may change as->mctop! */
   T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
-  asm_cache_flush(T->mcode, origtop);
+  lj_mcode_sync(T->mcode, origtop);
 }
 
 #undef IR

+ 1 - 1
src/lj_asm_arm.h

@@ -1779,7 +1779,7 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
     }
   }
   lua_assert(cstart != NULL);
-  asm_cache_flush(cstart, cend);
+  lj_mcode_sync(cstart, cend);
   lj_mcode_patch(J, mcarea, 1);
 }
 

+ 2 - 2
src/lj_asm_ppc.h

@@ -2130,13 +2130,13 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
     *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu);
   }
   if (!cstart) cstart = px;
-  asm_cache_flush(cstart, px+1);
+  lj_mcode_sync(cstart, px+1);
   if (clearso) {  /* Extend the current trace. Ugly workaround. */
     MCode *pp = J->cur.mcode;
     J->cur.szmcode += sizeof(MCode);
     *--pp = PPCI_MCRXR;  /* Clear SO flag. */
     J->cur.mcode = pp;
-    asm_cache_flush(pp, pp+1);
+    lj_mcode_sync(pp, pp+1);
   }
   lj_mcode_patch(J, mcarea, 1);
 }

+ 1 - 1
src/lj_asm_x86.h

@@ -2745,7 +2745,7 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
       p += 5;
     }
   }
+  lj_mcode_sync(T->mcode, T->mcode + T->szmcode);
   lj_mcode_patch(J, mcarea, 1);
-  VG_INVALIDATE(T->mcode, T->szmcode);
 }
 

+ 2 - 0
src/lj_ccallback.c

@@ -17,6 +17,7 @@
 #include "lj_ccall.h"
 #include "lj_ccallback.h"
 #include "lj_target.h"
+#include "lj_mcode.h"
 #include "lj_vm.h"
 
 /* -- Target-specific handling of callback slots -------------------------- */
@@ -145,6 +146,7 @@ static void callback_mcode_new(CTState *cts)
 #endif
   cts->cb.mcode = p;
   callback_mcode_init(cts->g, p);
+  lj_mcode_sync(p, (char *)p + sz);
 #if LJ_TARGET_WINDOWS
   {
     DWORD oprot;

+ 49 - 2
src/lj_mcode.c

@@ -7,18 +7,65 @@
 #define LUA_CORE
 
 #include "lj_obj.h"
-
 #if LJ_HASJIT
-
 #include "lj_gc.h"
 #include "lj_jit.h"
 #include "lj_mcode.h"
 #include "lj_trace.h"
 #include "lj_dispatch.h"
 #include "lj_vm.h"
+#endif
 
 /* -- OS-specific functions ----------------------------------------------- */
 
+#if LJ_HASJIT || LJ_HASFFI
+
+/* Define this if you want to run LuaJIT with Valgrind. */
+#ifdef LUAJIT_USE_VALGRIND
+#include <valgrind/valgrind.h>
+#endif
+
+#if !LJ_TARGET_X86ORX64 && LJ_TARGET_OSX
+void sys_icache_invalidate(void *start, size_t len);
+#endif
+
+#if LJ_TARGET_LINUX && LJ_TARGET_PPC
+#include <dlfcn.h>
+static void (*mcode_sync_ppc)(void *start, void *end);
+static void mcode_sync_dummy(void *start, void *end)
+{
+    UNUSED(start); UNUSED(end);
+}
+#endif
+
+/* Synchronize data/instruction cache. */
+void lj_mcode_sync(void *start, void *end)
+{
+#ifdef LUAJIT_USE_VALGRIND
+  VALGRIND_DISCARD_TRANSLATIONS(start, (char *)end-(char *)start);
+#endif
+#if LJ_TARGET_X86ORX64
+  UNUSED(start); UNUSED(end);
+#elif LJ_TARGET_OSX
+  sys_icache_invalidate(start, (char *)end-(char *)start);
+#elif LJ_TARGET_LINUX && LJ_TARGET_PPC
+  if (!mcode_sync_ppc) {
+    void *vdso = dlopen("linux-vdso32.so.1", RTLD_LAZY);
+    if (!vdso || !(mcode_sync_ppc = dlsym(vdso, "__kernel_sync_dicache")))
+      mcode_sync_ppc = mcode_sync_dummy;
+  }
+  mcode_sync_ppc(start, end);
+#elif defined(__GNUC__) && !LJ_TARGET_PPC
+  __clear_cache(start, end);
+#else
+#error "Missing builtin to flush instruction cache"
+#endif
+}
+
+#endif
+
+#if LJ_HASJIT
+
 #if LJ_TARGET_WINDOWS
 
 #define WIN32_LEAN_AND_MEAN

+ 8 - 1
src/lj_mcode.h

@@ -6,9 +6,16 @@
 #ifndef _LJ_MCODE_H
 #define _LJ_MCODE_H
 
-#include "lj_jit.h"
+#include "lj_obj.h"
+
+#if LJ_HASJIT || LJ_HASFFI
+LJ_FUNC void lj_mcode_sync(void *start, void *end);
+#endif
 
 #if LJ_HASJIT
+
+#include "lj_jit.h"
+
 LJ_FUNC void lj_mcode_free(jit_State *J);
 LJ_FUNC MCode *lj_mcode_reserve(jit_State *J, MCode **lim);
 LJ_FUNC void lj_mcode_commit(jit_State *J, MCode *m);