浏览代码

thread-local storage for amd64_apple

It is quite similar to arm64_apple.
Probably, the call that needs to be
generated also provides extra
invariants on top of the regular
abi, but I have not checked that.

Clang generates code that is a bit
neater than qbe's because, on x86,
a load can be fused in a call
instruction! We do not bother with
supporting these since we expect
only sporadic use of the feature.

For reference, here is what clang
might output for a store to the
second entry of a thread-local
array of ints:

        movq    _x@TLVP(%rip), %rdi
        callq   *(%rdi)
        movl    %ecx, 4(%rax)
Quentin Carbonneaux 3 年之前
父节点
当前提交
8ecae92299
共有 4 个文件被更改,包括 63 次插入6 次删除
  1. 8 4
      amd64/emit.c
  2. 24 1
      amd64/isel.c
  3. 1 1
      arm64/isel.c
  4. 30 0
      test/tls.ssa

+ 8 - 4
amd64/emit.c

@@ -167,9 +167,12 @@ emitcon(Con *con, FILE *f)
 	case CAddr:
 		l = str(con->label);
 		p = l[0] == '"' ? "" : T.assym;
-		if (con->reloc == RelThr)
-			fprintf(f, "%%fs:%s%s@tpoff", p, l);
-		else
+		if (con->reloc == RelThr) {
+			if (T.apple)
+				fprintf(f, "%s%s@TLVP", p, l);
+			else
+				fprintf(f, "%%fs:%s%s@tpoff", p, l);
+		} else
 			fprintf(f, "%s%s", p, l);
 		if (con->bits.i)
 			fprintf(f, "%+"PRId64, con->bits.i);
@@ -340,7 +343,8 @@ Next:
 		case RCon:
 			off = fn->con[ref.val];
 			emitcon(&off, f);
-			if (off.type == CAddr && off.reloc != RelThr)
+			if (off.type == CAddr)
+			if (off.reloc != RelThr || T.apple)
 				fprintf(f, "(%%rip)");
 			break;
 		case RTmp:

+ 24 - 1
amd64/isel.c

@@ -63,7 +63,8 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn)
 {
 	char buf[32];
 	Addr a, *m;
-	Ref r0, r1;
+	Con cc, *c;
+	Ref r0, r1, r2, r3;
 	int s, n, op;
 
 	r1 = r0 = *r;
@@ -121,6 +122,28 @@ fixarg(Ref *r, int k, Ins *i, Fn *fn)
 			m->offset.type = CUndef;
 			m->base = r0;
 		}
+	} else if (T.apple && rtype(r0) == RCon
+	&& (c = &fn->con[r0.val])->type == CAddr
+	&& c->reloc == RelThr) {
+		r1 = newtmp("isel", Kl, fn);
+		if (c->bits.i) {
+			r2 = newtmp("isel", Kl, fn);
+			cc = (Con){.type = CBits};
+			cc.bits.i = c->bits.i;
+			r3 = newcon(&cc, fn);
+			emit(Oadd, Kl, r1, r2, r3);
+		} else
+			r2 = r1;
+		emit(Ocopy, Kl, r2, TMP(RAX), R);
+		r2 = newtmp("isel", Kl, fn);
+		r3 = newtmp("isel", Kl, fn);
+		emit(Ocall, 0, R, r3, CALL(17));
+		emit(Ocopy, Kl, TMP(RDI), r2, R);
+		emit(Oload, Kl, r3, r2, R);
+		cc = *c;
+		cc.bits.i = 0;
+		r3 = newcon(&cc, fn);
+		emit(Oload, Kl, r2, r3, R);
 	}
 	*r = r1;
 }

+ 1 - 1
arm64/isel.c

@@ -70,9 +70,9 @@ static void
 fixarg(Ref *pr, int k, int phi, Fn *fn)
 {
 	char buf[32];
+	Con *c, cc;
 	Ref r0, r1, r2, r3;
 	int s, n;
-	Con *c, cc;
 
 	r0 = *pr;
 	switch (rtype(r0)) {

+ 30 - 0
test/tls.ssa

@@ -0,0 +1,30 @@
+thread data $i = align 4 {w 42}
+data $fmt = align 1 {b "i%d==%d\n", b 0}
+
+export
+function w $main() {
+@start
+	%pthr =l alloc8 8
+	%rval =l alloc8 8
+	call $pthread_create(l %pthr, l 0, l $thread, l 0)
+	%t =l load %pthr
+	call $pthread_join(l %t, l %rval)
+	%i0 =w loadw thread $i
+	call $printf(l $fmt, ..., w 0, w %i0)
+	%i1 =w load %rval
+	call $printf(l $fmt, ..., w 1, w %i1)
+	ret 0
+}
+
+function l $thread(l %arg) {
+@start
+	%i3 =l add thread $i, 3
+	storeb 24, %i3
+	%ret =l loadsw thread $i
+	ret %ret
+}
+
+# >>> output
+# i0==42
+# i1==402653226
+# <<<