Browse Source

Add build infrastructure for the SSE2-enabled interpreter.

Works on x86 now. Will be enabled by default on x64 (not ready, yet).
Mike Pall 15 years ago
parent
commit
6ce0c90ed6
4 changed files with 21 additions and 5 deletions
  1. 8 0
      src/Makefile
  2. 1 1
      src/buildvm_x86.dasc
  3. 1 1
      src/buildvm_x86.h
  4. 11 3
      src/lib_jit.c

+ 8 - 0
src/Makefile

@@ -62,6 +62,14 @@ XCFLAGS=
 # interpreter. Don't bother if your OS wouldn't run on them, anyway.
 # interpreter. Don't bother if your OS wouldn't run on them, anyway.
 #XCFLAGS+= -DLUAJIT_CPU_NOCMOV
 #XCFLAGS+= -DLUAJIT_CPU_NOCMOV
 #
 #
+# Use SSE2 instructions instead of x87 instructions in the x86 interpreter
+# (always enabled for x64). A pure interpreter built with this flag won't
+# run on older CPUs (before P4 or K8). There isn't much of a speed
+# difference, so this is not enabled by default.
+# The JIT compiler is not affected by this flag. It always uses runtime
+# CPU feature detection before emitting code for SSE2 up to SSE4.1.
+#XCFLAGS+= -DLUAJIT_CPU_SSE2
+#
 # Disable the JIT compiler, i.e. turn LuaJIT into a pure interpreter:
 # Disable the JIT compiler, i.e. turn LuaJIT into a pure interpreter:
 #XCFLAGS+= -DLUAJIT_DISABLE_JIT
 #XCFLAGS+= -DLUAJIT_DISABLE_JIT
 #
 #

+ 1 - 1
src/buildvm_x86.dasc

@@ -4554,7 +4554,7 @@ static int build_backend(BuildCtx *ctx)
 #ifdef LUAJIT_CPU_NOCMOV
 #ifdef LUAJIT_CPU_NOCMOV
   cmov = 0;
   cmov = 0;
 #endif
 #endif
-#ifdef LUAJIT_CPU_SSE2
+#if defined(LUAJIT_CPU_SSE2) || defined(LJ_TARGET_X64)
   sse = 1;
   sse = 1;
 #endif
 #endif
 
 

+ 1 - 1
src/buildvm_x86.h

@@ -2290,7 +2290,7 @@ static int build_backend(BuildCtx *ctx)
 #ifdef LUAJIT_CPU_NOCMOV
 #ifdef LUAJIT_CPU_NOCMOV
   cmov = 0;
   cmov = 0;
 #endif
 #endif
-#ifdef LUAJIT_CPU_SSE2
+#if defined(LUAJIT_CPU_SSE2) || defined(LJ_TARGET_X64)
   sse = 1;
   sse = 1;
 #endif
 #endif
 
 

+ 11 - 3
src/lib_jit.c

@@ -524,10 +524,11 @@ static uint32_t jit_cpudetect(lua_State *L)
   if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
   if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
 #if !LJ_HASJIT
 #if !LJ_HASJIT
 #define JIT_F_CMOV	1
 #define JIT_F_CMOV	1
+#define JIT_F_SSE2	2
 #endif
 #endif
     flags |= ((features[3] >> 15)&1) * JIT_F_CMOV;
     flags |= ((features[3] >> 15)&1) * JIT_F_CMOV;
-#if LJ_HASJIT
     flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
     flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
+#if LJ_HASJIT
     flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
     flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
     if (vendor[2] == 0x6c65746e) {  /* Intel. */
     if (vendor[2] == 0x6c65746e) {  /* Intel. */
       if ((features[0] & 0x0ff00f00) == 0x00000f00)  /* P4. */
       if ((features[0] & 0x0ff00f00) == 0x00000f00)  /* P4. */
@@ -543,13 +544,20 @@ static uint32_t jit_cpudetect(lua_State *L)
     }
     }
 #endif
 #endif
   }
   }
-#ifndef LUAJIT_CPU_NOCMOV
+  /* Check for required instruction set support on x86. */
+#if LJ_TARGET_X86
+#if !defined(LUAJIT_CPU_NOCMOV)
   if (!(flags & JIT_F_CMOV))
   if (!(flags & JIT_F_CMOV))
     luaL_error(L, "Ancient CPU lacks CMOV support (recompile with -DLUAJIT_CPU_NOCMOV)");
     luaL_error(L, "Ancient CPU lacks CMOV support (recompile with -DLUAJIT_CPU_NOCMOV)");
 #endif
 #endif
-#if LJ_HASJIT
   if (!(flags & JIT_F_SSE2))
   if (!(flags & JIT_F_SSE2))
+#if defined(LUAJIT_CPU_SSE2)
+    luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)");
+#elif LJ_HASJIT
     luaL_error(L, "Sorry, SSE2 CPU support required for this beta release");
     luaL_error(L, "Sorry, SSE2 CPU support required for this beta release");
+#else
+    (void)0;
+#endif
 #endif
 #endif
   UNUSED(L);
   UNUSED(L);
 #else
 #else