lj_str.c 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. /*
  2. ** String handling.
  3. ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
  4. **
  5. ** Portions taken verbatim or adapted from the Lua interpreter.
  6. ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
  7. */
  8. #include <stdio.h>
  9. #define lj_str_c
  10. #define LUA_CORE
  11. #include "lj_obj.h"
  12. #include "lj_gc.h"
  13. #include "lj_err.h"
  14. #include "lj_str.h"
  15. #include "lj_state.h"
  16. #include "lj_ctype.h"
  17. /* -- String interning ---------------------------------------------------- */
  18. /* Ordered compare of strings. Assumes string data is 4-byte aligned. */
  19. int32_t lj_str_cmp(GCstr *a, GCstr *b)
  20. {
  21. MSize i, n = a->len > b->len ? b->len : a->len;
  22. for (i = 0; i < n; i += 4) {
  23. /* Note: innocuous access up to end of string + 3. */
  24. uint32_t va = *(const uint32_t *)(strdata(a)+i);
  25. uint32_t vb = *(const uint32_t *)(strdata(b)+i);
  26. if (va != vb) {
  27. #if LJ_ARCH_ENDIAN == LUAJIT_LE
  28. va = lj_bswap(va); vb = lj_bswap(vb);
  29. #endif
  30. i -= n;
  31. if ((int32_t)i >= -3) {
  32. va >>= 32+(i<<3); vb >>= 32+(i<<3);
  33. if (va == vb) break;
  34. }
  35. return (int32_t)(va - vb);
  36. }
  37. }
  38. return (int32_t)(a->len - b->len);
  39. }
  40. /* Resize the string hash table (grow and shrink). */
  41. void lj_str_resize(lua_State *L, MSize newmask)
  42. {
  43. global_State *g = G(L);
  44. GCRef *newhash;
  45. MSize i;
  46. if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
  47. return; /* No resizing during GC traversal or if already too big. */
  48. newhash = lj_mem_newvec(L, newmask+1, GCRef);
  49. memset(newhash, 0, (newmask+1)*sizeof(GCRef));
  50. for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */
  51. GCobj *p = gcref(g->strhash[i]);
  52. while (p) { /* Follow each hash chain and reinsert all strings. */
  53. MSize h = gco2str(p)->hash & newmask;
  54. GCobj *next = gcnext(p);
  55. /* NOBARRIER: The string table is a GC root. */
  56. setgcrefr(p->gch.nextgc, newhash[h]);
  57. setgcref(newhash[h], p);
  58. p = next;
  59. }
  60. }
  61. lj_mem_freevec(g, g->strhash, g->strmask+1, GCstr *);
  62. g->strmask = newmask;
  63. g->strhash = newhash;
  64. }
  65. /* Intern a string and return string object. */
  66. GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
  67. {
  68. global_State *g;
  69. GCstr *s;
  70. GCobj *o;
  71. MSize len = (MSize)lenx;
  72. MSize h = len;
  73. MSize step = (len>>5)+1; /* Partial hash. */
  74. MSize l1;
  75. if (lenx >= LJ_MAX_STR)
  76. lj_err_msg(L, LJ_ERR_STROV);
  77. for (l1 = len; l1 >= step; l1 -= step) /* Compute string hash. */
  78. h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1]));
  79. /* Check if the string has already been interned. */
  80. g = G(L);
  81. for (o = gcref(g->strhash[h & g->strmask]); o != NULL; o = gcnext(o)) {
  82. GCstr *tso = gco2str(o);
  83. if (tso->len == len && (memcmp(str, strdata(tso), len) == 0)) {
  84. if (isdead(g, o)) flipwhite(o); /* Resurrect if dead. */
  85. return tso; /* Return existing string. */
  86. }
  87. }
  88. /* Nope, create a new string. */
  89. s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
  90. newwhite(g, s);
  91. s->gct = ~LJ_TSTR;
  92. s->len = len;
  93. s->hash = h;
  94. s->reserved = 0;
  95. memcpy(strdatawr(s), str, len);
  96. strdatawr(s)[len] = '\0'; /* Zero-terminate string. */
  97. /* Add it to string hash table. */
  98. h &= g->strmask;
  99. s->nextgc = g->strhash[h];
  100. /* NOBARRIER: The string table is a GC root. */
  101. setgcref(g->strhash[h], obj2gco(s));
  102. if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */
  103. lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */
  104. return s; /* Return newly interned string. */
  105. }
  106. void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
  107. {
  108. g->strnum--;
  109. lj_mem_free(g, s, sizestring(s));
  110. }
  111. /* -- Type conversions ---------------------------------------------------- */
  112. /* Convert string to number. */
  113. int lj_str_numconv(const char *s, TValue *n)
  114. {
  115. lua_Number sign = 1;
  116. const uint8_t *p = (const uint8_t *)s;
  117. while (lj_ctype_isspace(*p)) p++;
  118. if (*p == '-') { p++; sign = -1; } else if (*p == '+') { p++; }
  119. if ((uint32_t)(*p - '0') < 10) {
  120. uint32_t k = (uint32_t)(*p++ - '0');
  121. if (k == 0 && ((*p & ~0x20) == 'X')) {
  122. p++;
  123. while (lj_ctype_isxdigit(*p)) {
  124. if (k >= 0x10000000) goto parsedbl;
  125. k = (k << 4) + (*p & 15u);
  126. if (!lj_ctype_isdigit(*p)) k += 9;
  127. p++;
  128. }
  129. } else {
  130. while ((uint32_t)(*p - '0') < 10) {
  131. if (k >= 0x19999999) goto parsedbl;
  132. k = k * 10u + (uint32_t)(*p++ - '0');
  133. }
  134. }
  135. while (LJ_UNLIKELY(lj_ctype_isspace(*p))) p++;
  136. if (LJ_LIKELY(*p == '\0')) {
  137. setnumV(n, sign * cast_num(k));
  138. return 1;
  139. }
  140. }
  141. parsedbl:
  142. {
  143. TValue tv;
  144. char *endptr;
  145. setnumV(&tv, lua_str2number(s, &endptr));
  146. if (endptr == s) return 0; /* conversion failed */
  147. if (LJ_UNLIKELY(*endptr != '\0')) {
  148. while (lj_ctype_isspace((uint8_t)*endptr)) endptr++;
  149. if (*endptr != '\0') return 0; /* invalid trailing characters? */
  150. }
  151. if (LJ_LIKELY(!tvisnan(&tv)))
  152. setnumV(n, numV(&tv));
  153. else
  154. setnanV(n); /* Canonicalize injected NaNs. */
  155. return 1;
  156. }
  157. }
  158. /* Convert number to string. */
  159. GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np)
  160. {
  161. char s[LUAI_MAXNUMBER2STR];
  162. lua_Number n = *np;
  163. size_t len = (size_t)lua_number2str(s, n);
  164. return lj_str_new(L, s, len);
  165. }
  166. /* Convert integer to string. */
  167. GCstr *lj_str_fromint(lua_State *L, int32_t k)
  168. {
  169. char s[1+10];
  170. char *p = s+sizeof(s);
  171. uint32_t i = (uint32_t)(k < 0 ? -k : k);
  172. do { *--p = (char)('0' + i % 10); } while (i /= 10);
  173. if (k < 0) *--p = '-';
  174. return lj_str_new(L, p, (size_t)(s+sizeof(s)-p));
  175. }
  176. /* -- String formatting --------------------------------------------------- */
  177. static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len)
  178. {
  179. char *p;
  180. MSize i;
  181. if (sb->n + len > sb->sz) {
  182. MSize sz = sb->sz * 2;
  183. while (sb->n + len > sz) sz = sz * 2;
  184. lj_str_resizebuf(L, sb, sz);
  185. }
  186. p = sb->buf + sb->n;
  187. sb->n += len;
  188. for (i = 0; i < len; i++) p[i] = str[i];
  189. }
  190. static void addchar(lua_State *L, SBuf *sb, int c)
  191. {
  192. if (sb->n + 1 > sb->sz) {
  193. MSize sz = sb->sz * 2;
  194. lj_str_resizebuf(L, sb, sz);
  195. }
  196. sb->buf[sb->n++] = cast(char, c);
  197. }
  198. /* Push formatted message as a string object to Lua stack. va_list variant. */
  199. const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp)
  200. {
  201. SBuf *sb = &G(L)->tmpbuf;
  202. lj_str_needbuf(L, sb, (MSize)strlen(fmt));
  203. lj_str_resetbuf(sb);
  204. for (;;) {
  205. const char *e = strchr(fmt, '%');
  206. if (e == NULL) break;
  207. addstr(L, sb, fmt, (MSize)(e-fmt));
  208. /* This function only handles %s, %c, %d, %f and %p formats. */
  209. switch (e[1]) {
  210. case 's': {
  211. const char *s = va_arg(argp, char *);
  212. if (s == NULL) s = "(null)";
  213. addstr(L, sb, s, (MSize)strlen(s));
  214. break;
  215. }
  216. case 'c':
  217. addchar(L, sb, va_arg(argp, int));
  218. break;
  219. case 'd': {
  220. char buff[1+10];
  221. char *p = buff+sizeof(buff);
  222. int32_t k = va_arg(argp, int32_t);
  223. uint32_t i = (uint32_t)(k < 0 ? -k : k);
  224. do { *--p = (char)('0' + i % 10); } while (i /= 10);
  225. if (k < 0) *--p = '-';
  226. addstr(L, sb, p, (MSize)(buff+sizeof(buff)-p));
  227. break;
  228. }
  229. case 'f': {
  230. char buff[LUAI_MAXNUMBER2STR];
  231. lua_Number n = cast_num(va_arg(argp, LUAI_UACNUMBER));
  232. MSize len = (MSize)lua_number2str(buff, n);
  233. addstr(L, sb, buff, len);
  234. break;
  235. }
  236. case 'p': {
  237. #define FMTP_CHARS (2*sizeof(ptrdiff_t))
  238. char buff[2+FMTP_CHARS];
  239. ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *));
  240. int i;
  241. buff[0] = '0';
  242. buff[1] = 'x';
  243. for (i = 2+FMTP_CHARS-1; i >= 2; i--, p >>= 4)
  244. buff[i] = "0123456789abcdef"[(p & 15)];
  245. addstr(L, sb, buff, 2+FMTP_CHARS);
  246. break;
  247. }
  248. case '%':
  249. addchar(L, sb, '%');
  250. break;
  251. default:
  252. addchar(L, sb, '%');
  253. addchar(L, sb, e[1]);
  254. break;
  255. }
  256. fmt = e+2;
  257. }
  258. addstr(L, sb, fmt, (MSize)strlen(fmt));
  259. setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n));
  260. incr_top(L);
  261. return strVdata(L->top - 1);
  262. }
  263. /* Push formatted message as a string object to Lua stack. Vararg variant. */
  264. const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
  265. {
  266. const char *msg;
  267. va_list argp;
  268. va_start(argp, fmt);
  269. msg = lj_str_pushvf(L, fmt, argp);
  270. va_end(argp);
  271. return msg;
  272. }
  273. /* -- Buffer handling ----------------------------------------------------- */
  274. char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz)
  275. {
  276. if (sz > sb->sz) {
  277. if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF;
  278. lj_str_resizebuf(L, sb, sz);
  279. }
  280. return sb->buf;
  281. }