Browse Source

ARM64: Add support for ARM BTI.

Note: this is not enabled by default, look for CET in lj_arch.h.
Thanks to Yuichiro Naito. #1398
Mike Pall 1 month ago
parent
commit
8651ef6df4
7 changed files with 124 additions and 3 deletions
  1. 22 0
      dynasm/dasm_arm64.lua
  2. 7 1
      src/jit/dis_arm64.lua
  3. 5 0
      src/lj_arch.h
  4. 12 2
      src/lj_ccallback.c
  5. 7 0
      src/lj_emit_arm64.h
  6. 4 0
      src/lj_target_arm64.h
  7. 67 0
      src/vm_arm64.dasc

+ 22 - 0
dynasm/dasm_arm64.lua

@@ -244,6 +244,10 @@ local map_cond = {
   hs = 2, lo = 3,
 }
 
+local map_bti = {
+  c = 0x40, j = 0x80, jc = 0xc0,
+}
+
 ------------------------------------------------------------------------------
 
 local parse_reg_type
@@ -475,6 +479,12 @@ local function parse_cond(expr, inv)
   return shl(bit.bxor(c, inv), 12)
 end
 
+local function parse_map(expr, map)
+  local x = map[expr]
+  if not x then werror("bad operand") end
+  return x
+end
+
 local function parse_load(params, nparams, n, op)
   if params[n+2] then werror("too many operands") end
   local scale = shr(op, 30)
@@ -823,11 +833,21 @@ map_op = {
   tbz_3  = "36000000DTBw|36000000DTBx",
   tbnz_3 = "37000000DTBw|37000000DTBx",
 
+  -- Branch Target Identification.
+  bti_1  = "d503241ft",
+
   -- ARM64e: Pointer authentication codes (PAC).
   blraaz_1  = "d63f081fNx",
+  blrabz_1  = "d63f0c1fNx",
   braa_2    = "d71f0800NDx",
+  brab_2    = "d71f0c00NDx",
   braaz_1   = "d61f081fNx",
+  brabz_1   = "d61f0c1fNx",
+  paciasp_0 = "d503233f",
   pacibsp_0 = "d503237f",
+  autiasp_0 = "d50323bf",
+  autibsp_0 = "d50323ff",
+  retaa_0   = "d65f0bff",
   retab_0   = "d65f0fff",
 
   -- Miscellaneous instructions.
@@ -996,6 +1016,8 @@ local function parse_template(params, template, nparams, pos)
       op = op + parse_cond(q, 0); n = n + 1
     elseif p == "c" then
       op = op + parse_cond(q, 1); n = n + 1
+    elseif p == "t" then
+      op = op + parse_map(q, map_bti); n = n + 1
 
     else
       assert(false)

+ 7 - 1
src/jit/dis_arm64.lua

@@ -695,7 +695,10 @@ local map_br = { -- Branches, exception generating and system instructions.
     },
     { -- System instructions.
       shift = 0, mask = 0x3fffff,
-      [0x03201f] = "nop"
+      [0x03201f] = "nop",
+      [0x03245f] = "bti c",
+      [0x03249f] = "bti j",
+      [0x0324df] = "bti jc",
     },
     { -- Unconditional branch, register.
       shift = 0, mask = 0xfffc1f,
@@ -1171,6 +1174,9 @@ local function disass_ins(ctx)
 	end
       end
       second0 = true
+    elseif p == " " then
+      operands[#operands+1] = pat:match(" (.*)")
+      break
     else
       assert(false)
     end

+ 5 - 0
src/lj_arch.h

@@ -288,6 +288,11 @@
 #if !defined(LJ_ABI_PAUTH) && defined(__arm64e__)
 #define LJ_ABI_PAUTH		1
 #endif
+#if !defined(LJ_ABI_BRANCH_TRACK) && (__ARM_FEATURE_BTI_DEFAULT & 1) && \
+    defined(LUAJIT_ENABLE_CET_BR)
+/* See comments about LUAJIT_ENABLE_CET_BR above. */
+#define LJ_ABI_BRANCH_TRACK	1
+#endif
 #define LJ_TARGET_ARM64		1
 #define LJ_TARGET_EHRETREG	0
 #define LJ_TARGET_EHRAREG	30

+ 12 - 2
src/lj_ccallback.c

@@ -64,6 +64,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
 
 #elif LJ_TARGET_ARM64
 
+#if LJ_ABI_BRANCH_TRACK
+#define CALLBACK_MCODE_SLOTSZ		12
+#endif
+
 #define CALLBACK_MCODE_HEAD		32
 
 #elif LJ_TARGET_PPC
@@ -88,8 +92,11 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
 #endif
 
 #ifndef CALLBACK_SLOT2OFS
-#define CALLBACK_SLOT2OFS(slot)		(CALLBACK_MCODE_HEAD + 8*(slot))
-#define CALLBACK_OFS2SLOT(ofs)		(((ofs)-CALLBACK_MCODE_HEAD)/8)
+#ifndef CALLBACK_MCODE_SLOTSZ
+#define CALLBACK_MCODE_SLOTSZ		8
+#endif
+#define CALLBACK_SLOT2OFS(slot)		(CALLBACK_MCODE_HEAD + CALLBACK_MCODE_SLOTSZ*(slot))
+#define CALLBACK_OFS2SLOT(ofs)		(((ofs)-CALLBACK_MCODE_HEAD)/CALLBACK_MCODE_SLOTSZ)
 #define CALLBACK_MAX_SLOT		(CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
 #endif
 
@@ -193,6 +200,9 @@ static void *callback_mcode_init(global_State *g, uint32_t *page)
   ((void **)p)[1] = g;
   p += 4;
   for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
+#if LJ_ABI_BRANCH_TRACK
+    *p++ = A64I_BTI_C;
+#endif
     *p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot));
     *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu));
     p++;

+ 7 - 0
src/lj_emit_arm64.h

@@ -409,6 +409,13 @@ static void emit_call(ASMState *as, ASMFunction target)
   }
 }
 
+#if LJ_ABI_BRANCH_TRACK
+static void emit_branch_track(ASMState *as)
+{
+  *--as->mcp = A64I_BTI_J;
+}
+#endif
+
 /* -- Emit generic operations --------------------------------------------- */
 
 /* Generic move between two regs. */

+ 4 - 0
src/lj_target_arm64.h

@@ -265,6 +265,10 @@ typedef enum A64Ins {
   A64I_BRAAZ = 0xd61f081f,
   A64I_BLRAAZ = 0xd63f081f,
 
+  A64I_BTI_C  = 0xd503245f,
+  A64I_BTI_J  = 0xd503249f,
+  A64I_BTI_JC = 0xd50324df,
+
   A64I_NOP = 0xd503201f,
 
   /* FP */

+ 67 - 0
src/vm_arm64.dasc

@@ -92,6 +92,17 @@
 |.macro ret_auth; ret; .endmacro
 |.endif
 |
+|// ARM64 branch target identification (BTI).
+|.if BRANCH_TRACK
+|.macro bti_jump; bti j; .endmacro
+|.macro bti_call; bti c; .endmacro
+|.macro bti_tailcall; bti jc; .endmacro
+|.else
+|.macro bti_jump; .endmacro
+|.macro bti_call; .endmacro
+|.macro bti_tailcall; .endmacro
+|.endif
+|
 |//-----------------------------------------------------------------------
 |
 |// Stack layout while in interpreter. Must match with lj_frame.h.
@@ -439,24 +450,28 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |->vm_unwind_c:			// Unwind C stack, return from vm_pcall.
   |  // (void *cframe, int errcode)
+  |  bti_tailcall
   |  add fp, CARG1, # SAVE_FP_LR_
   |  mov sp, CARG1
   |  mov CRET1, CARG2
   |  ldr L, SAVE_L
   |  ldr GL, L->glref
   |->vm_unwind_c_eh:			// Landing pad for external unwinder.
+  |  bti_tailcall
   |   mv_vmstate TMP0w, C
   |   st_vmstate TMP0w
   |  b ->vm_leave_unw
   |
   |->vm_unwind_ff:			// Unwind C stack, return from ff pcall.
   |  // (void *cframe)
+  |  bti_tailcall
   |  add fp, CARG1, # SAVE_FP_LR_
   |  mov sp, CARG1
   |  ldr L, SAVE_L
   |    init_constants
   |   ldr GL, L->glref			// Setup pointer to global state.
   |->vm_unwind_ff_eh:			// Landing pad for external unwinder.
+  |  bti_tailcall
   |    mov RC, #16			// 2 results: false + error message.
   |  ldr BASE, L->base
   |    mov_false TMP0
@@ -632,6 +647,7 @@ static void build_subroutines(BuildCtx *ctx)
   |.endif
   |
   |->cont_cat:				// RA = resultptr, CARG4 = meta base
+  |  bti_jump
   |  ldr INSw, [PC, #-4]
   |   sub CARG2, CARG4, #32
   |   ldr TMP0, [RA]
@@ -789,9 +805,11 @@ static void build_subroutines(BuildCtx *ctx)
   |   sub RB, RB, #0x20000
   |  csel PC, PC, RB, lo
   |->cont_nop:
+  |  bti_jump
   |  ins_next
   |
   |->cont_ra:				// RA = resultptr
+  |  bti_jump
   |  ldr INSw, [PC, #-4]
   |   ldr TMP0, [RA]
   |  decode_RA TMP1, INS
@@ -799,12 +817,14 @@ static void build_subroutines(BuildCtx *ctx)
   |  b ->cont_nop
   |
   |->cont_condt:			// RA = resultptr
+  |  bti_jump
   |  ldr TMP0, [RA]
   |   mov_true TMP1
   |  cmp TMP1, TMP0			// Branch if result is true.
   |  b <4
   |
   |->cont_condf:			// RA = resultptr
+  |  bti_jump
   |  ldr TMP0, [RA]
   |   mov_false TMP1
   |  cmp TMP0, TMP1			// Branch if result is false.
@@ -956,10 +976,12 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |.macro .ffunc, name
   |->ff_ .. name:
+  |  bti_jump
   |.endmacro
   |
   |.macro .ffunc_1, name
   |->ff_ .. name:
+  |  bti_jump
   |  ldr CARG1, [BASE]
   |   cmp NARGS8:RC, #8
   |   blo ->fff_fallback
@@ -967,6 +989,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |.macro .ffunc_2, name
   |->ff_ .. name:
+  |  bti_jump
   |  ldp CARG1, CARG2, [BASE]
   |   cmp NARGS8:RC, #16
   |   blo ->fff_fallback
@@ -1810,6 +1833,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |->vm_record:				// Dispatch target for recording phase.
   |.if JIT
+  |  bti_jump
   |  ldrb CARG1w, GL->hookmask
   |  tst CARG1, #HOOK_VMEVENT		// No recording while in vmevent.
   |  bne >5
@@ -1825,6 +1849,7 @@ static void build_subroutines(BuildCtx *ctx)
   |.endif
   |
   |->vm_rethook:			// Dispatch target for return hooks.
+  |  bti_jump
   |  ldrb TMP2w, GL->hookmask
   |  tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1	// Hook already active?
   |5:  // Re-dispatch to static ins.
@@ -1832,6 +1857,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  br_auth TMP0
   |
   |->vm_inshook:			// Dispatch target for instr/line hooks.
+  |  bti_jump
   |  ldrb TMP2w, GL->hookmask
   |   ldr TMP3w, GL->hookcount
   |  tbnz TMP2w, #HOOK_ACTIVE_SHIFT, <5	// Hook already active?
@@ -1858,6 +1884,7 @@ static void build_subroutines(BuildCtx *ctx)
   |  br_auth TMP0
   |
   |->cont_hook:				// Continue from hook yield.
+  |  bti_jump
   |  ldr CARG1, [CARG4, #-40]
   |   add PC, PC, #4
   |  str CARG1w, SAVE_MULTRES		// Restore MULTRES for *M ins.
@@ -1881,6 +1908,7 @@ static void build_subroutines(BuildCtx *ctx)
   |.endif
   |
   |->vm_callhook:			// Dispatch target for call hooks.
+  |  bti_jump
   |  mov CARG2, PC
   |.if JIT
   |  b >1
@@ -1910,6 +1938,7 @@ static void build_subroutines(BuildCtx *ctx)
   |->cont_stitch:			// Trace stitching.
   |.if JIT
   |  // RA = resultptr, CARG4 = meta base
+  |  bti_jump
   |   ldr RBw, SAVE_MULTRES
   |  ldr INSw, [PC, #-4]
   |    ldr TRACE:CARG3, [CARG4, #-40]	// Save previous trace.
@@ -1958,6 +1987,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |->vm_profhook:			// Dispatch target for profiler hook.
 #if LJ_HASPROFILE
+  |  bti_jump
   |  mov CARG1, L
   |   str BASE, L->base
   |  mov CARG2, PC
@@ -1979,6 +2009,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |->vm_exit_handler:
   |.if JIT
+  |  bti_call
   |  sub     sp, sp, #(64*8)
   |  savex_, 0, 1
   |  savex_, 2, 3
@@ -2029,6 +2060,7 @@ static void build_subroutines(BuildCtx *ctx)
   |.endif
   |
   |->vm_exit_interp:
+  |  bti_jump
   |  // CARG1 = MULTRES or negated error code, BASE, PC and GL set.
   |.if JIT
   |  ldr L, SAVE_L
@@ -2106,6 +2138,7 @@ static void build_subroutines(BuildCtx *ctx)
   |
   |  // int lj_vm_modi(int dividend, int divisor);
   |->vm_modi:
+  |  bti_call
   |    eor CARG4w, CARG1w, CARG2w
   |    cmp CARG4w, #0
   |  eor CARG3w, CARG1w, CARG1w, asr #31
@@ -2142,6 +2175,7 @@ static void build_subroutines(BuildCtx *ctx)
   |// Next idx returned in CRET2w.
   |->vm_next:
   |.if JIT
+  |  bti_call
   |  ldr NEXT_LIM, NEXT_TAB->asize
   |   ldr NEXT_TMP1, NEXT_TAB->array
   |1:  // Traverse array part.
@@ -2286,6 +2320,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
   |=>defop:
 
   switch (op) {
+#if !LJ_HASJIT
+  case BC_FORL:
+  case BC_JFORI:
+  case BC_JFORL:
+  case BC_ITERL:
+  case BC_JITERL:
+  case BC_LOOP:
+  case BC_JLOOP:
+  case BC_FUNCF:
+  case BC_JFUNCF:
+  case BC_JFUNCV:
+#endif
+  case BC_FUNCV:  /* NYI: compiled vararg functions. */
+    break;  /* Avoid redundant bti instructions. */
+  default:
+    |  bti_jump
+    break;
+  }
+
+  switch (op) {
 
   /* -- Comparison ops ---------------------------------------------------- */
 
@@ -4122,6 +4176,19 @@ static void emit_asm_debug(BuildCtx *ctx)
 	"\t.align 3\n"
 	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
 #endif
+#endif
+#if LJ_TARGET_LINUX && LJ_ABI_BRANCH_TRACK
+    fprintf(ctx->fp,
+	"\t.section .note.gnu.property,\"a\"\n"
+	"\t.align 3\n"
+	"\t.long 4\n"
+	"\t.long 16\n"
+	"\t.long 5\n"
+	"\t.long 0x00554e47\n"
+	"\t.long 0xc0000000\n"
+	"\t.long 4\n"
+	"\t.long 1\n"
+	"\t.long 0\n");
 #endif
     break;
 #if !LJ_NO_UNWIND