|
@@ -11,6 +11,7 @@
|
|
|
#include "lj_err.h"
|
|
|
#include "lj_str.h"
|
|
|
#include "lj_char.h"
|
|
|
+#include "lj_prng.h"
|
|
|
|
|
|
/* -- String helpers ------------------------------------------------------ */
|
|
|
|
|
@@ -37,28 +38,6 @@ int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
|
|
|
return (int32_t)(a->len - b->len);
|
|
|
}
|
|
|
|
|
|
-/* Fast string data comparison. Caveat: unaligned access to 1st string! */
|
|
|
-static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
|
|
|
-{
|
|
|
- MSize i = 0;
|
|
|
- lj_assertX(len > 0, "fast string compare with zero length");
|
|
|
- lj_assertX((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4,
|
|
|
- "fast string compare crossing page boundary");
|
|
|
- do { /* Note: innocuous access up to end of string + 3. */
|
|
|
- uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i);
|
|
|
- if (v) {
|
|
|
- i -= len;
|
|
|
-#if LJ_LE
|
|
|
- return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1;
|
|
|
-#else
|
|
|
- return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1;
|
|
|
-#endif
|
|
|
- }
|
|
|
- i += 4;
|
|
|
- } while (i < len);
|
|
|
- return 0;
|
|
|
-}
|
|
|
-
|
|
|
/* Find fixed string p inside string s. */
|
|
|
const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen)
|
|
|
{
|
|
@@ -91,108 +70,301 @@ int lj_str_haspattern(GCstr *s)
|
|
|
return 0; /* No pattern matching chars found. */
|
|
|
}
|
|
|
|
|
|
-/* -- String interning ---------------------------------------------------- */
|
|
|
-
|
|
|
-/* Resize the string hash table (grow and shrink). */
|
|
|
-void lj_str_resize(lua_State *L, MSize newmask)
|
|
|
-{
|
|
|
- global_State *g = G(L);
|
|
|
- GCRef *newhash;
|
|
|
- MSize i;
|
|
|
- if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
|
|
|
- return; /* No resizing during GC traversal or if already too big. */
|
|
|
- newhash = lj_mem_newvec(L, newmask+1, GCRef);
|
|
|
- memset(newhash, 0, (newmask+1)*sizeof(GCRef));
|
|
|
- for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */
|
|
|
- GCobj *p = gcref(g->strhash[i]);
|
|
|
- while (p) { /* Follow each hash chain and reinsert all strings. */
|
|
|
- MSize h = gco2str(p)->hash & newmask;
|
|
|
- GCobj *next = gcnext(p);
|
|
|
- /* NOBARRIER: The string table is a GC root. */
|
|
|
- setgcrefr(p->gch.nextgc, newhash[h]);
|
|
|
- setgcref(newhash[h], p);
|
|
|
- p = next;
|
|
|
- }
|
|
|
- }
|
|
|
- lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
|
|
|
- g->strmask = newmask;
|
|
|
- g->strhash = newhash;
|
|
|
-}
|
|
|
+/* -- String hashing ------------------------------------------------------ */
|
|
|
|
|
|
-/* Intern a string and return string object. */
|
|
|
-GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
|
|
|
+/* Keyed sparse ARX string hash. Constant time. */
|
|
|
+static StrHash hash_sparse(uint64_t seed, const char *str, MSize len)
|
|
|
{
|
|
|
- global_State *g;
|
|
|
- GCstr *s;
|
|
|
- GCobj *o;
|
|
|
- MSize len = (MSize)lenx;
|
|
|
- MSize a, b, h = len;
|
|
|
- if (lenx >= LJ_MAX_STR)
|
|
|
- lj_err_msg(L, LJ_ERR_STROV);
|
|
|
- g = G(L);
|
|
|
- /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
|
|
|
+ /* Constants taken from lookup3 hash by Bob Jenkins. */
|
|
|
+ StrHash a, b, h = len ^ (StrHash)seed;
|
|
|
if (len >= 4) { /* Caveat: unaligned access! */
|
|
|
a = lj_getu32(str);
|
|
|
h ^= lj_getu32(str+len-4);
|
|
|
b = lj_getu32(str+(len>>1)-2);
|
|
|
h ^= b; h -= lj_rol(b, 14);
|
|
|
b += lj_getu32(str+(len>>2)-1);
|
|
|
- } else if (len > 0) {
|
|
|
+ } else {
|
|
|
a = *(const uint8_t *)str;
|
|
|
h ^= *(const uint8_t *)(str+len-1);
|
|
|
b = *(const uint8_t *)(str+(len>>1));
|
|
|
h ^= b; h -= lj_rol(b, 14);
|
|
|
- } else {
|
|
|
- return &g->strempty;
|
|
|
}
|
|
|
a ^= h; a -= lj_rol(h, 11);
|
|
|
b ^= a; b -= lj_rol(a, 25);
|
|
|
h ^= b; h -= lj_rol(b, 16);
|
|
|
- /* Check if the string has already been interned. */
|
|
|
- o = gcref(g->strhash[h & g->strmask]);
|
|
|
- if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
|
|
|
- while (o != NULL) {
|
|
|
- GCstr *sx = gco2str(o);
|
|
|
- if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
|
|
|
- /* Resurrect if dead. Can only happen with fixstring() (keywords). */
|
|
|
- if (isdead(g, o)) flipwhite(o);
|
|
|
- return sx; /* Return existing string. */
|
|
|
+ return h;
|
|
|
+}
|
|
|
+
|
|
|
+#if LUAJIT_SECURITY_STRHASH
|
|
|
+/* Keyed dense ARX string hash. Linear time. */
|
|
|
+static LJ_NOINLINE StrHash hash_dense(uint64_t seed, StrHash h,
|
|
|
+ const char *str, MSize len)
|
|
|
+{
|
|
|
+ StrHash b = lj_bswap(lj_rol(h ^ (StrHash)(seed >> 32), 4));
|
|
|
+ if (len > 12) {
|
|
|
+ StrHash a = (StrHash)seed;
|
|
|
+ const char *pe = str+len-12, *p = pe, *q = str;
|
|
|
+ do {
|
|
|
+ a += lj_getu32(p);
|
|
|
+ b += lj_getu32(p+4);
|
|
|
+ h += lj_getu32(p+8);
|
|
|
+ p = q; q += 12;
|
|
|
+ h ^= b; h -= lj_rol(b, 14);
|
|
|
+ a ^= h; a -= lj_rol(h, 11);
|
|
|
+ b ^= a; b -= lj_rol(a, 25);
|
|
|
+ } while (p < pe);
|
|
|
+ h ^= b; h -= lj_rol(b, 16);
|
|
|
+ a ^= h; a -= lj_rol(h, 4);
|
|
|
+ b ^= a; b -= lj_rol(a, 14);
|
|
|
+ }
|
|
|
+ return b;
|
|
|
+}
|
|
|
+#endif
|
|
|
+
|
|
|
+/* -- String interning ---------------------------------------------------- */
|
|
|
+
|
|
|
+#define LJ_STR_MAXCOLL 32
|
|
|
+
|
|
|
+/* Resize the string interning hash table (grow and shrink). */
|
|
|
+void lj_str_resize(lua_State *L, MSize newmask)
|
|
|
+{
|
|
|
+ global_State *g = G(L);
|
|
|
+ GCRef *newtab, *oldtab = g->str.tab;
|
|
|
+ MSize i;
|
|
|
+
|
|
|
+ /* No resizing during GC traversal or if already too big. */
|
|
|
+ if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
|
|
|
+ return;
|
|
|
+
|
|
|
+ newtab = lj_mem_newvec(L, newmask+1, GCRef);
|
|
|
+ memset(newtab, 0, (newmask+1)*sizeof(GCRef));
|
|
|
+
|
|
|
+#if LUAJIT_SECURITY_STRHASH
|
|
|
+ /* Check which chains need secondary hashes. */
|
|
|
+ if (g->str.second) {
|
|
|
+ int newsecond = 0;
|
|
|
+ /* Compute primary chain lengths. */
|
|
|
+ for (i = g->str.mask; i != ~(MSize)0; i--) {
|
|
|
+ GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1);
|
|
|
+ while (o) {
|
|
|
+ GCstr *s = gco2str(o);
|
|
|
+ MSize hash = s->hashalg ? hash_sparse(g->str.seed, strdata(s), s->len) :
|
|
|
+ s->hash;
|
|
|
+ hash &= newmask;
|
|
|
+ setgcrefp(newtab[hash], gcrefu(newtab[hash]) + 1);
|
|
|
+ o = gcnext(o);
|
|
|
}
|
|
|
- o = gcnext(o);
|
|
|
}
|
|
|
- } else { /* Slow path: end of string is too close to a page boundary. */
|
|
|
- while (o != NULL) {
|
|
|
- GCstr *sx = gco2str(o);
|
|
|
- if (sx->len == len && memcmp(str, strdata(sx), len) == 0) {
|
|
|
- /* Resurrect if dead. Can only happen with fixstring() (keywords). */
|
|
|
- if (isdead(g, o)) flipwhite(o);
|
|
|
- return sx; /* Return existing string. */
|
|
|
+ /* Mark secondary chains. */
|
|
|
+ for (i = newmask; i != ~(MSize)0; i--) {
|
|
|
+ int secondary = gcrefu(newtab[i]) > LJ_STR_MAXCOLL;
|
|
|
+ newsecond |= secondary;
|
|
|
+ setgcrefp(newtab[i], secondary);
|
|
|
+ }
|
|
|
+ g->str.second = newsecond;
|
|
|
+ }
|
|
|
+#endif
|
|
|
+
|
|
|
+ /* Reinsert all strings from the old table into the new table. */
|
|
|
+ for (i = g->str.mask; i != ~(MSize)0; i--) {
|
|
|
+ GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1);
|
|
|
+ while (o) {
|
|
|
+ GCobj *next = gcnext(o);
|
|
|
+ GCstr *s = gco2str(o);
|
|
|
+ MSize hash = s->hash;
|
|
|
+#if LUAJIT_SECURITY_STRHASH
|
|
|
+ uintptr_t u;
|
|
|
+ if (LJ_LIKELY(!s->hashalg)) { /* String hashed with primary hash. */
|
|
|
+ hash &= newmask;
|
|
|
+ u = gcrefu(newtab[hash]);
|
|
|
+ if (LJ_UNLIKELY(u & 1)) { /* Switch string to secondary hash. */
|
|
|
+ s->hash = hash = hash_dense(g->str.seed, s->hash, strdata(s), s->len);
|
|
|
+ s->hashalg = 1;
|
|
|
+ hash &= newmask;
|
|
|
+ u = gcrefu(newtab[hash]);
|
|
|
+ }
|
|
|
+ } else { /* String hashed with secondary hash. */
|
|
|
+ MSize shash = hash_sparse(g->str.seed, strdata(s), s->len);
|
|
|
+ u = gcrefu(newtab[shash & newmask]);
|
|
|
+ if (u & 1) {
|
|
|
+ hash &= newmask;
|
|
|
+ u = gcrefu(newtab[hash]);
|
|
|
+ } else { /* Revert string back to primary hash. */
|
|
|
+ s->hash = shash;
|
|
|
+ s->hashalg = 0;
|
|
|
+ hash = (shash & newmask);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ /* NOBARRIER: The string table is a GC root. */
|
|
|
+ setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1));
|
|
|
+ setgcrefp(newtab[hash], ((uintptr_t)o | (u & 1)));
|
|
|
+#else
|
|
|
+ hash &= newmask;
|
|
|
+ /* NOBARRIER: The string table is a GC root. */
|
|
|
+ setgcrefr(o->gch.nextgc, newtab[hash]);
|
|
|
+ setgcref(newtab[hash], o);
|
|
|
+#endif
|
|
|
+ o = next;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Free old table and replace with new table. */
|
|
|
+ lj_str_freetab(g);
|
|
|
+ g->str.tab = newtab;
|
|
|
+ g->str.mask = newmask;
|
|
|
+}
|
|
|
+
|
|
|
+#if LUAJIT_SECURITY_STRHASH
|
|
|
+/* Rehash and rechain all strings in a chain. */
|
|
|
+static LJ_NOINLINE GCstr *lj_str_rehash_chain(lua_State *L, StrHash hashc,
|
|
|
+ const char *str, MSize len)
|
|
|
+{
|
|
|
+ global_State *g = G(L);
|
|
|
+ int ow = g->gc.state == GCSsweepstring ? otherwhite(g) : 0; /* Sweeping? */
|
|
|
+ GCRef *strtab = g->str.tab;
|
|
|
+ MSize strmask = g->str.mask;
|
|
|
+ GCobj *o = gcref(strtab[hashc & strmask]);
|
|
|
+ setgcrefp(strtab[hashc & strmask], (void *)((uintptr_t)1));
|
|
|
+ g->str.second = 1;
|
|
|
+ while (o) {
|
|
|
+ uintptr_t u;
|
|
|
+ GCobj *next = gcnext(o);
|
|
|
+ GCstr *s = gco2str(o);
|
|
|
+ StrHash hash;
|
|
|
+ if (ow) { /* Must sweep while rechaining. */
|
|
|
+ if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* String alive? */
|
|
|
+ lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED),
|
|
|
+ "sweep of undead string");
|
|
|
+ makewhite(g, o);
|
|
|
+ } else { /* Free dead string. */
|
|
|
+ lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED,
|
|
|
+ "sweep of unlive string");
|
|
|
+ lj_str_free(g, s);
|
|
|
+ o = next;
|
|
|
+ continue;
|
|
|
}
|
|
|
- o = gcnext(o);
|
|
|
}
|
|
|
+ hash = s->hash;
|
|
|
+ if (!s->hashalg) { /* Rehash with secondary hash. */
|
|
|
+ hash = hash_dense(g->str.seed, hash, strdata(s), s->len);
|
|
|
+ s->hash = hash;
|
|
|
+ s->hashalg = 1;
|
|
|
+ }
|
|
|
+ /* Rechain. */
|
|
|
+ hash &= strmask;
|
|
|
+ u = gcrefu(strtab[hash]);
|
|
|
+ setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1));
|
|
|
+ setgcrefp(strtab[hash], ((uintptr_t)o | (u & 1)));
|
|
|
+ o = next;
|
|
|
}
|
|
|
- /* Nope, create a new string. */
|
|
|
- s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
|
|
|
+ /* Try to insert the pending string again. */
|
|
|
+ return lj_str_new(L, str, len);
|
|
|
+}
|
|
|
+#endif
|
|
|
+
|
|
|
+/* Reseed String ID from PRNG after random interval < 2^bits. */
|
|
|
+#if LUAJIT_SECURITY_STRID == 1
|
|
|
+#define STRID_RESEED_INTERVAL 8
|
|
|
+#elif LUAJIT_SECURITY_STRID == 2
|
|
|
+#define STRID_RESEED_INTERVAL 4
|
|
|
+#elif LUAJIT_SECURITY_STRID >= 3
|
|
|
+#define STRID_RESEED_INTERVAL 0
|
|
|
+#endif
|
|
|
+
|
|
|
+/* Allocate a new string and add to string interning table. */
|
|
|
+static GCstr *lj_str_alloc(lua_State *L, const char *str, MSize len,
|
|
|
+ StrHash hash, int hashalg)
|
|
|
+{
|
|
|
+ GCstr *s = lj_mem_newt(L, lj_str_size(len), GCstr);
|
|
|
+ global_State *g = G(L);
|
|
|
+ uintptr_t u;
|
|
|
newwhite(g, s);
|
|
|
s->gct = ~LJ_TSTR;
|
|
|
s->len = len;
|
|
|
- s->hash = h;
|
|
|
+ s->hash = hash;
|
|
|
+#ifndef STRID_RESEED_INTERVAL
|
|
|
+ s->sid = g->str.id++;
|
|
|
+#elif STRID_RESEED_INTERVAL
|
|
|
+ if (!g->str.idreseed--) {
|
|
|
+ uint64_t r = lj_prng_u64(&g->prng);
|
|
|
+ g->str.id = (StrID)r;
|
|
|
+ g->str.idreseed = (uint8_t)(r >> (64 - STRID_RESEED_INTERVAL));
|
|
|
+ }
|
|
|
+ s->sid = g->str.id++;
|
|
|
+#else
|
|
|
+ s->sid = (StrID)lj_prng_u64(&g->prng);
|
|
|
+#endif
|
|
|
s->reserved = 0;
|
|
|
+ s->hashalg = (uint8_t)hashalg;
|
|
|
+ /* Clear last 4 bytes of allocated memory. Implies zero-termination, too. */
|
|
|
+ *(uint32_t *)(strdatawr(s)+(len & ~(MSize)3)) = 0;
|
|
|
memcpy(strdatawr(s), str, len);
|
|
|
- strdatawr(s)[len] = '\0'; /* Zero-terminate string. */
|
|
|
- /* Add it to string hash table. */
|
|
|
- h &= g->strmask;
|
|
|
- s->nextgc = g->strhash[h];
|
|
|
+ /* Add to string hash table. */
|
|
|
+ hash &= g->str.mask;
|
|
|
+ u = gcrefu(g->str.tab[hash]);
|
|
|
+ setgcrefp(s->nextgc, (u & ~(uintptr_t)1));
|
|
|
/* NOBARRIER: The string table is a GC root. */
|
|
|
- setgcref(g->strhash[h], obj2gco(s));
|
|
|
- if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */
|
|
|
- lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */
|
|
|
+ setgcrefp(g->str.tab[hash], ((uintptr_t)s | (u & 1)));
|
|
|
+ if (g->str.num++ > g->str.mask) /* Allow a 100% load factor. */
|
|
|
+ lj_str_resize(L, (g->str.mask<<1)+1); /* Grow string table. */
|
|
|
return s; /* Return newly interned string. */
|
|
|
}
|
|
|
|
|
|
+/* Intern a string and return string object. */
|
|
|
+GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
|
|
|
+{
|
|
|
+ global_State *g = G(L);
|
|
|
+ if (lenx-1 < LJ_MAX_STR-1) {
|
|
|
+ MSize len = (MSize)lenx;
|
|
|
+ StrHash hash = hash_sparse(g->str.seed, str, len);
|
|
|
+ MSize coll = 0;
|
|
|
+ int hashalg = 0;
|
|
|
+ /* Check if the string has already been interned. */
|
|
|
+ GCobj *o = gcref(g->str.tab[hash & g->str.mask]);
|
|
|
+#if LUAJIT_SECURITY_STRHASH
|
|
|
+ if (LJ_UNLIKELY((uintptr_t)o & 1)) { /* Secondary hash for this chain? */
|
|
|
+ hashalg = 1;
|
|
|
+ hash = hash_dense(g->str.seed, hash, str, len);
|
|
|
+ o = (GCobj *)(gcrefu(g->str.tab[hash & g->str.mask]) & ~(uintptr_t)1);
|
|
|
+ }
|
|
|
+#endif
|
|
|
+ while (o != NULL) {
|
|
|
+ GCstr *sx = gco2str(o);
|
|
|
+ if (sx->hash == hash && sx->len == len) {
|
|
|
+ if (memcmp(str, strdata(sx), len) == 0) {
|
|
|
+ if (isdead(g, o)) flipwhite(o); /* Resurrect if dead. */
|
|
|
+ return sx; /* Return existing string. */
|
|
|
+ }
|
|
|
+ coll++;
|
|
|
+ }
|
|
|
+ coll++;
|
|
|
+ o = gcnext(o);
|
|
|
+ }
|
|
|
+#if LUAJIT_SECURITY_STRHASH
|
|
|
+ /* Rehash chain if there are too many collisions. */
|
|
|
+ if (LJ_UNLIKELY(coll > LJ_STR_MAXCOLL) && !hashalg) {
|
|
|
+ return lj_str_rehash_chain(L, hash, str, len);
|
|
|
+ }
|
|
|
+#endif
|
|
|
+ /* Otherwise allocate a new string. */
|
|
|
+ return lj_str_alloc(L, str, len, hash, hashalg);
|
|
|
+ } else {
|
|
|
+ if (lenx)
|
|
|
+ lj_err_msg(L, LJ_ERR_STROV);
|
|
|
+ return &g->strempty;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
|
|
|
{
|
|
|
- g->strnum--;
|
|
|
- lj_mem_free(g, s, sizestring(s));
|
|
|
+ g->str.num--;
|
|
|
+ lj_mem_free(g, s, lj_str_size(s->len));
|
|
|
+}
|
|
|
+
|
|
|
+void LJ_FASTCALL lj_str_init(lua_State *L)
|
|
|
+{
|
|
|
+ global_State *g = G(L);
|
|
|
+ g->str.seed = lj_prng_u64(&g->prng);
|
|
|
+ lj_str_resize(L, LJ_MIN_STRTAB-1);
|
|
|
}
|
|
|
|