lj_str.c 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. /*
  2. ** String handling.
  3. ** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
  4. **
  5. ** Portions taken verbatim or adapted from the Lua interpreter.
  6. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
  7. */
  8. #include <stdio.h>
  9. #define lj_str_c
  10. #define LUA_CORE
  11. #include "lj_obj.h"
  12. #include "lj_gc.h"
  13. #include "lj_err.h"
  14. #include "lj_str.h"
  15. #include "lj_state.h"
  16. #include "lj_char.h"
  17. /* -- String interning ---------------------------------------------------- */
  18. /* Ordered compare of strings. Assumes string data is 4-byte aligned. */
  19. int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
  20. {
  21. MSize i, n = a->len > b->len ? b->len : a->len;
  22. for (i = 0; i < n; i += 4) {
  23. /* Note: innocuous access up to end of string + 3. */
  24. uint32_t va = *(const uint32_t *)(strdata(a)+i);
  25. uint32_t vb = *(const uint32_t *)(strdata(b)+i);
  26. if (va != vb) {
  27. #if LJ_LE
  28. va = lj_bswap(va); vb = lj_bswap(vb);
  29. #endif
  30. i -= n;
  31. if ((int32_t)i >= -3) {
  32. va >>= 32+(i<<3); vb >>= 32+(i<<3);
  33. if (va == vb) break;
  34. }
  35. return va < vb ? -1 : 1;
  36. }
  37. }
  38. return (int32_t)(a->len - b->len);
  39. }
  40. /* Fast string data comparison. Caveat: unaligned access to 1st string! */
  41. static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
  42. {
  43. MSize i = 0;
  44. lua_assert(len > 0);
  45. lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4);
  46. do { /* Note: innocuous access up to end of string + 3. */
  47. uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i);
  48. if (v) {
  49. i -= len;
  50. #if LJ_LE
  51. return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1;
  52. #else
  53. return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1;
  54. #endif
  55. }
  56. i += 4;
  57. } while (i < len);
  58. return 0;
  59. }
  60. /* Resize the string hash table (grow and shrink). */
  61. void lj_str_resize(lua_State *L, MSize newmask)
  62. {
  63. global_State *g = G(L);
  64. GCRef *newhash;
  65. MSize i;
  66. if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
  67. return; /* No resizing during GC traversal or if already too big. */
  68. newhash = lj_mem_newvec(L, newmask+1, GCRef);
  69. memset(newhash, 0, (newmask+1)*sizeof(GCRef));
  70. for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */
  71. GCobj *p = gcref(g->strhash[i]);
  72. while (p) { /* Follow each hash chain and reinsert all strings. */
  73. MSize h = gco2str(p)->hash & newmask;
  74. GCobj *next = gcnext(p);
  75. /* NOBARRIER: The string table is a GC root. */
  76. setgcrefr(p->gch.nextgc, newhash[h]);
  77. setgcref(newhash[h], p);
  78. p = next;
  79. }
  80. }
  81. lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
  82. g->strmask = newmask;
  83. g->strhash = newhash;
  84. }
  85. /* Intern a string and return string object. */
  86. GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
  87. {
  88. global_State *g;
  89. GCstr *s;
  90. GCobj *o;
  91. MSize len = (MSize)lenx;
  92. MSize a, b, h = len;
  93. if (lenx >= LJ_MAX_STR)
  94. lj_err_msg(L, LJ_ERR_STROV);
  95. g = G(L);
  96. /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
  97. if (len >= 4) { /* Caveat: unaligned access! */
  98. a = lj_getu32(str);
  99. h ^= lj_getu32(str+len-4);
  100. b = lj_getu32(str+(len>>1)-2);
  101. h ^= b; h -= lj_rol(b, 14);
  102. b += lj_getu32(str+(len>>2)-1);
  103. } else if (len > 0) {
  104. a = *(const uint8_t *)str;
  105. h ^= *(const uint8_t *)(str+len-1);
  106. b = *(const uint8_t *)(str+(len>>1));
  107. h ^= b; h -= lj_rol(b, 14);
  108. } else {
  109. return &g->strempty;
  110. }
  111. a ^= h; a -= lj_rol(h, 11);
  112. b ^= a; b -= lj_rol(a, 25);
  113. h ^= b; h -= lj_rol(b, 16);
  114. /* Check if the string has already been interned. */
  115. o = gcref(g->strhash[h & g->strmask]);
  116. if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
  117. while (o != NULL) {
  118. GCstr *sx = gco2str(o);
  119. if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
  120. /* Resurrect if dead. Can only happen with fixstring() (keywords). */
  121. if (isdead(g, o)) flipwhite(o);
  122. return sx; /* Return existing string. */
  123. }
  124. o = gcnext(o);
  125. }
  126. } else { /* Slow path: end of string is too close to a page boundary. */
  127. while (o != NULL) {
  128. GCstr *sx = gco2str(o);
  129. if (sx->len == len && memcmp(str, strdata(sx), len) == 0) {
  130. /* Resurrect if dead. Can only happen with fixstring() (keywords). */
  131. if (isdead(g, o)) flipwhite(o);
  132. return sx; /* Return existing string. */
  133. }
  134. o = gcnext(o);
  135. }
  136. }
  137. /* Nope, create a new string. */
  138. s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
  139. newwhite(g, s);
  140. s->gct = ~LJ_TSTR;
  141. s->len = len;
  142. s->hash = h;
  143. s->reserved = 0;
  144. memcpy(strdatawr(s), str, len);
  145. strdatawr(s)[len] = '\0'; /* Zero-terminate string. */
  146. /* Add it to string hash table. */
  147. h &= g->strmask;
  148. s->nextgc = g->strhash[h];
  149. /* NOBARRIER: The string table is a GC root. */
  150. setgcref(g->strhash[h], obj2gco(s));
  151. if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */
  152. lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */
  153. return s; /* Return newly interned string. */
  154. }
  155. void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
  156. {
  157. g->strnum--;
  158. lj_mem_free(g, s, sizestring(s));
  159. }
  160. /* -- Type conversions ---------------------------------------------------- */
  161. /* Print number to buffer. Canonicalizes non-finite values. */
  162. size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o)
  163. {
  164. if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */
  165. lua_Number n = o->n;
  166. #if __BIONIC__
  167. if (tvismzero(o)) { s[0] = '-'; s[1] = '0'; return 2; }
  168. #endif
  169. return (size_t)lua_number2str(s, n);
  170. } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) {
  171. s[0] = 'n'; s[1] = 'a'; s[2] = 'n'; return 3;
  172. } else if ((o->u32.hi & 0x80000000) == 0) {
  173. s[0] = 'i'; s[1] = 'n'; s[2] = 'f'; return 3;
  174. } else {
  175. s[0] = '-'; s[1] = 'i'; s[2] = 'n'; s[3] = 'f'; return 4;
  176. }
  177. }
  178. /* Print integer to buffer. Returns pointer to start. */
  179. char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k)
  180. {
  181. uint32_t u = (uint32_t)(k < 0 ? -k : k);
  182. p += 1+10;
  183. do { *--p = (char)('0' + u % 10); } while (u /= 10);
  184. if (k < 0) *--p = '-';
  185. return p;
  186. }
  187. /* Convert number to string. */
  188. GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np)
  189. {
  190. char buf[LJ_STR_NUMBUF];
  191. size_t len = lj_str_bufnum(buf, (TValue *)np);
  192. return lj_str_new(L, buf, len);
  193. }
  194. /* Convert integer to string. */
  195. GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k)
  196. {
  197. char s[1+10];
  198. char *p = lj_str_bufint(s, k);
  199. return lj_str_new(L, p, (size_t)(s+sizeof(s)-p));
  200. }
  201. GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o)
  202. {
  203. return tvisint(o) ? lj_str_fromint(L, intV(o)) : lj_str_fromnum(L, &o->n);
  204. }
  205. /* -- String formatting --------------------------------------------------- */
  206. static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len)
  207. {
  208. char *p;
  209. MSize i;
  210. if (sb->n + len > sb->sz) {
  211. MSize sz = sb->sz * 2;
  212. while (sb->n + len > sz) sz = sz * 2;
  213. lj_str_resizebuf(L, sb, sz);
  214. }
  215. p = sb->buf + sb->n;
  216. sb->n += len;
  217. for (i = 0; i < len; i++) p[i] = str[i];
  218. }
  219. static void addchar(lua_State *L, SBuf *sb, int c)
  220. {
  221. if (sb->n + 1 > sb->sz) {
  222. MSize sz = sb->sz * 2;
  223. lj_str_resizebuf(L, sb, sz);
  224. }
  225. sb->buf[sb->n++] = (char)c;
  226. }
  227. /* Push formatted message as a string object to Lua stack. va_list variant. */
  228. const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp)
  229. {
  230. SBuf *sb = &G(L)->tmpbuf;
  231. lj_str_needbuf(L, sb, (MSize)strlen(fmt));
  232. lj_str_resetbuf(sb);
  233. for (;;) {
  234. const char *e = strchr(fmt, '%');
  235. if (e == NULL) break;
  236. addstr(L, sb, fmt, (MSize)(e-fmt));
  237. /* This function only handles %s, %c, %d, %f and %p formats. */
  238. switch (e[1]) {
  239. case 's': {
  240. const char *s = va_arg(argp, char *);
  241. if (s == NULL) s = "(null)";
  242. addstr(L, sb, s, (MSize)strlen(s));
  243. break;
  244. }
  245. case 'c':
  246. addchar(L, sb, va_arg(argp, int));
  247. break;
  248. case 'd': {
  249. char buf[LJ_STR_INTBUF];
  250. char *p = lj_str_bufint(buf, va_arg(argp, int32_t));
  251. addstr(L, sb, p, (MSize)(buf+LJ_STR_INTBUF-p));
  252. break;
  253. }
  254. case 'f': {
  255. char buf[LJ_STR_NUMBUF];
  256. TValue tv;
  257. MSize len;
  258. tv.n = (lua_Number)(va_arg(argp, LUAI_UACNUMBER));
  259. len = (MSize)lj_str_bufnum(buf, &tv);
  260. addstr(L, sb, buf, len);
  261. break;
  262. }
  263. case 'p': {
  264. #define FMTP_CHARS (2*sizeof(ptrdiff_t))
  265. char buf[2+FMTP_CHARS];
  266. ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *));
  267. ptrdiff_t i, lasti = 2+FMTP_CHARS;
  268. if (p == 0) {
  269. addstr(L, sb, "NULL", 4);
  270. break;
  271. }
  272. #if LJ_64
  273. /* Shorten output for 64 bit pointers. */
  274. lasti = 2+2*4+((p >> 32) ? 2+2*(lj_fls((uint32_t)(p >> 32))>>3) : 0);
  275. #endif
  276. buf[0] = '0';
  277. buf[1] = 'x';
  278. for (i = lasti-1; i >= 2; i--, p >>= 4)
  279. buf[i] = "0123456789abcdef"[(p & 15)];
  280. addstr(L, sb, buf, (MSize)lasti);
  281. break;
  282. }
  283. case '%':
  284. addchar(L, sb, '%');
  285. break;
  286. default:
  287. addchar(L, sb, '%');
  288. addchar(L, sb, e[1]);
  289. break;
  290. }
  291. fmt = e+2;
  292. }
  293. addstr(L, sb, fmt, (MSize)strlen(fmt));
  294. setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n));
  295. incr_top(L);
  296. return strVdata(L->top - 1);
  297. }
  298. /* Push formatted message as a string object to Lua stack. Vararg variant. */
  299. const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
  300. {
  301. const char *msg;
  302. va_list argp;
  303. va_start(argp, fmt);
  304. msg = lj_str_pushvf(L, fmt, argp);
  305. va_end(argp);
  306. return msg;
  307. }
  308. /* -- Buffer handling ----------------------------------------------------- */
  309. char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz)
  310. {
  311. if (sz > sb->sz) {
  312. if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF;
  313. lj_str_resizebuf(L, sb, sz);
  314. }
  315. return sb->buf;
  316. }