lstrlib.c 32 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190
  1. /*
  2. ** $Id: lstrlib.c,v 1.184 2014/01/08 18:34:34 roberto Exp roberto $
  3. ** Standard library for string operations and pattern-matching
  4. ** See Copyright Notice in lua.h
  5. */
  6. #include <ctype.h>
  7. #include <limits.h>
  8. #include <stddef.h>
  9. #include <stdio.h>
  10. #include <stdlib.h>
  11. #include <string.h>
  12. #define lstrlib_c
  13. #define LUA_LIB
  14. #include "lua.h"
  15. #include "lauxlib.h"
  16. #include "lualib.h"
  17. /*
  18. ** maximum number of captures that a pattern can do during
  19. ** pattern-matching. This limit is arbitrary.
  20. */
  21. #if !defined(LUA_MAXCAPTURES)
  22. #define LUA_MAXCAPTURES 32
  23. #endif
  24. /* macro to `unsign' a character */
  25. #define uchar(c) ((unsigned char)(c))
  26. static int str_len (lua_State *L) {
  27. size_t l;
  28. luaL_checklstring(L, 1, &l);
  29. lua_pushinteger(L, (lua_Integer)l);
  30. return 1;
  31. }
  32. /* translate a relative string position: negative means back from end */
  33. static lua_Integer posrelat (lua_Integer pos, size_t len) {
  34. if (pos >= 0) return pos;
  35. else if (0u - (size_t)pos > len) return 0;
  36. else return (lua_Integer)len + pos + 1;
  37. }
  38. static int str_sub (lua_State *L) {
  39. size_t l;
  40. const char *s = luaL_checklstring(L, 1, &l);
  41. lua_Integer start = posrelat(luaL_checkinteger(L, 2), l);
  42. lua_Integer end = posrelat(luaL_optinteger(L, 3, -1), l);
  43. if (start < 1) start = 1;
  44. if (end > (lua_Integer)l) end = l;
  45. if (start <= end)
  46. lua_pushlstring(L, s + start - 1, end - start + 1);
  47. else lua_pushliteral(L, "");
  48. return 1;
  49. }
  50. static int str_reverse (lua_State *L) {
  51. size_t l, i;
  52. luaL_Buffer b;
  53. const char *s = luaL_checklstring(L, 1, &l);
  54. char *p = luaL_buffinitsize(L, &b, l);
  55. for (i = 0; i < l; i++)
  56. p[i] = s[l - i - 1];
  57. luaL_pushresultsize(&b, l);
  58. return 1;
  59. }
  60. static int str_lower (lua_State *L) {
  61. size_t l;
  62. size_t i;
  63. luaL_Buffer b;
  64. const char *s = luaL_checklstring(L, 1, &l);
  65. char *p = luaL_buffinitsize(L, &b, l);
  66. for (i=0; i<l; i++)
  67. p[i] = tolower(uchar(s[i]));
  68. luaL_pushresultsize(&b, l);
  69. return 1;
  70. }
  71. static int str_upper (lua_State *L) {
  72. size_t l;
  73. size_t i;
  74. luaL_Buffer b;
  75. const char *s = luaL_checklstring(L, 1, &l);
  76. char *p = luaL_buffinitsize(L, &b, l);
  77. for (i=0; i<l; i++)
  78. p[i] = toupper(uchar(s[i]));
  79. luaL_pushresultsize(&b, l);
  80. return 1;
  81. }
  82. /* reasonable limit to avoid arithmetic overflow and strings too big */
  83. #if INT_MAX / 2 <= 0x10000000
  84. #define MAXSIZE ((size_t)(INT_MAX / 2))
  85. #else
  86. #define MAXSIZE ((size_t)0x10000000)
  87. #endif
  88. static int str_rep (lua_State *L) {
  89. size_t l, lsep;
  90. const char *s = luaL_checklstring(L, 1, &l);
  91. lua_Integer n = luaL_checkinteger(L, 2);
  92. const char *sep = luaL_optlstring(L, 3, "", &lsep);
  93. if (n <= 0) lua_pushliteral(L, "");
  94. else if (l + lsep < l || l + lsep >= MAXSIZE / n) /* may overflow? */
  95. return luaL_error(L, "resulting string too large");
  96. else {
  97. size_t totallen = n * l + (n - 1) * lsep;
  98. luaL_Buffer b;
  99. char *p = luaL_buffinitsize(L, &b, totallen);
  100. while (n-- > 1) { /* first n-1 copies (followed by separator) */
  101. memcpy(p, s, l * sizeof(char)); p += l;
  102. if (lsep > 0) { /* avoid empty 'memcpy' (may be expensive) */
  103. memcpy(p, sep, lsep * sizeof(char)); p += lsep;
  104. }
  105. }
  106. memcpy(p, s, l * sizeof(char)); /* last copy (not followed by separator) */
  107. luaL_pushresultsize(&b, totallen);
  108. }
  109. return 1;
  110. }
  111. static int str_byte (lua_State *L) {
  112. size_t l;
  113. const char *s = luaL_checklstring(L, 1, &l);
  114. lua_Integer posi = posrelat(luaL_optinteger(L, 2, 1), l);
  115. lua_Integer pose = posrelat(luaL_optinteger(L, 3, posi), l);
  116. int n, i;
  117. if (posi < 1) posi = 1;
  118. if (pose > (lua_Integer)l) pose = l;
  119. if (posi > pose) return 0; /* empty interval; return no values */
  120. n = (int)(pose - posi + 1);
  121. if (posi + n <= pose) /* arithmetic overflow? */
  122. return luaL_error(L, "string slice too long");
  123. luaL_checkstack(L, n, "string slice too long");
  124. for (i=0; i<n; i++)
  125. lua_pushinteger(L, uchar(s[posi+i-1]));
  126. return n;
  127. }
  128. static int str_char (lua_State *L) {
  129. int n = lua_gettop(L); /* number of arguments */
  130. int i;
  131. luaL_Buffer b;
  132. char *p = luaL_buffinitsize(L, &b, n);
  133. for (i=1; i<=n; i++) {
  134. lua_Integer c = luaL_checkinteger(L, i);
  135. luaL_argcheck(L, uchar(c) == c, i, "value out of range");
  136. p[i - 1] = uchar(c);
  137. }
  138. luaL_pushresultsize(&b, n);
  139. return 1;
  140. }
  141. static int writer (lua_State *L, const void *b, size_t size, void *B) {
  142. (void)L;
  143. luaL_addlstring((luaL_Buffer *) B, (const char *)b, size);
  144. return 0;
  145. }
  146. static int str_dump (lua_State *L) {
  147. luaL_Buffer b;
  148. luaL_checktype(L, 1, LUA_TFUNCTION);
  149. lua_settop(L, 1);
  150. luaL_buffinit(L,&b);
  151. if (lua_dump(L, writer, &b) != 0)
  152. return luaL_error(L, "unable to dump given function");
  153. luaL_pushresult(&b);
  154. return 1;
  155. }
  156. /*
  157. ** {======================================================
  158. ** PATTERN MATCHING
  159. ** =======================================================
  160. */
  161. #define CAP_UNFINISHED (-1)
  162. #define CAP_POSITION (-2)
  163. typedef struct MatchState {
  164. int matchdepth; /* control for recursive depth (to avoid C stack overflow) */
  165. const char *src_init; /* init of source string */
  166. const char *src_end; /* end ('\0') of source string */
  167. const char *p_end; /* end ('\0') of pattern */
  168. lua_State *L;
  169. int level; /* total number of captures (finished or unfinished) */
  170. struct {
  171. const char *init;
  172. ptrdiff_t len;
  173. } capture[LUA_MAXCAPTURES];
  174. } MatchState;
  175. /* recursive function */
  176. static const char *match (MatchState *ms, const char *s, const char *p);
  177. /* maximum recursion depth for 'match' */
  178. #if !defined(MAXCCALLS)
  179. #define MAXCCALLS 200
  180. #endif
  181. #define L_ESC '%'
  182. #define SPECIALS "^$*+?.([%-"
  183. static int check_capture (MatchState *ms, int l) {
  184. l -= '1';
  185. if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED)
  186. return luaL_error(ms->L, "invalid capture index %%%d", l + 1);
  187. return l;
  188. }
  189. static int capture_to_close (MatchState *ms) {
  190. int level = ms->level;
  191. for (level--; level>=0; level--)
  192. if (ms->capture[level].len == CAP_UNFINISHED) return level;
  193. return luaL_error(ms->L, "invalid pattern capture");
  194. }
  195. static const char *classend (MatchState *ms, const char *p) {
  196. switch (*p++) {
  197. case L_ESC: {
  198. if (p == ms->p_end)
  199. luaL_error(ms->L, "malformed pattern (ends with " LUA_QL("%%") ")");
  200. return p+1;
  201. }
  202. case '[': {
  203. if (*p == '^') p++;
  204. do { /* look for a `]' */
  205. if (p == ms->p_end)
  206. luaL_error(ms->L, "malformed pattern (missing " LUA_QL("]") ")");
  207. if (*(p++) == L_ESC && p < ms->p_end)
  208. p++; /* skip escapes (e.g. `%]') */
  209. } while (*p != ']');
  210. return p+1;
  211. }
  212. default: {
  213. return p;
  214. }
  215. }
  216. }
  217. static int match_class (int c, int cl) {
  218. int res;
  219. switch (tolower(cl)) {
  220. case 'a' : res = isalpha(c); break;
  221. case 'c' : res = iscntrl(c); break;
  222. case 'd' : res = isdigit(c); break;
  223. case 'g' : res = isgraph(c); break;
  224. case 'l' : res = islower(c); break;
  225. case 'p' : res = ispunct(c); break;
  226. case 's' : res = isspace(c); break;
  227. case 'u' : res = isupper(c); break;
  228. case 'w' : res = isalnum(c); break;
  229. case 'x' : res = isxdigit(c); break;
  230. case 'z' : res = (c == 0); break; /* deprecated option */
  231. default: return (cl == c);
  232. }
  233. return (islower(cl) ? res : !res);
  234. }
  235. static int matchbracketclass (int c, const char *p, const char *ec) {
  236. int sig = 1;
  237. if (*(p+1) == '^') {
  238. sig = 0;
  239. p++; /* skip the `^' */
  240. }
  241. while (++p < ec) {
  242. if (*p == L_ESC) {
  243. p++;
  244. if (match_class(c, uchar(*p)))
  245. return sig;
  246. }
  247. else if ((*(p+1) == '-') && (p+2 < ec)) {
  248. p+=2;
  249. if (uchar(*(p-2)) <= c && c <= uchar(*p))
  250. return sig;
  251. }
  252. else if (uchar(*p) == c) return sig;
  253. }
  254. return !sig;
  255. }
  256. static int singlematch (MatchState *ms, const char *s, const char *p,
  257. const char *ep) {
  258. if (s >= ms->src_end)
  259. return 0;
  260. else {
  261. int c = uchar(*s);
  262. switch (*p) {
  263. case '.': return 1; /* matches any char */
  264. case L_ESC: return match_class(c, uchar(*(p+1)));
  265. case '[': return matchbracketclass(c, p, ep-1);
  266. default: return (uchar(*p) == c);
  267. }
  268. }
  269. }
  270. static const char *matchbalance (MatchState *ms, const char *s,
  271. const char *p) {
  272. if (p >= ms->p_end - 1)
  273. luaL_error(ms->L, "malformed pattern "
  274. "(missing arguments to " LUA_QL("%%b") ")");
  275. if (*s != *p) return NULL;
  276. else {
  277. int b = *p;
  278. int e = *(p+1);
  279. int cont = 1;
  280. while (++s < ms->src_end) {
  281. if (*s == e) {
  282. if (--cont == 0) return s+1;
  283. }
  284. else if (*s == b) cont++;
  285. }
  286. }
  287. return NULL; /* string ends out of balance */
  288. }
  289. static const char *max_expand (MatchState *ms, const char *s,
  290. const char *p, const char *ep) {
  291. ptrdiff_t i = 0; /* counts maximum expand for item */
  292. while (singlematch(ms, s + i, p, ep))
  293. i++;
  294. /* keeps trying to match with the maximum repetitions */
  295. while (i>=0) {
  296. const char *res = match(ms, (s+i), ep+1);
  297. if (res) return res;
  298. i--; /* else didn't match; reduce 1 repetition to try again */
  299. }
  300. return NULL;
  301. }
  302. static const char *min_expand (MatchState *ms, const char *s,
  303. const char *p, const char *ep) {
  304. for (;;) {
  305. const char *res = match(ms, s, ep+1);
  306. if (res != NULL)
  307. return res;
  308. else if (singlematch(ms, s, p, ep))
  309. s++; /* try with one more repetition */
  310. else return NULL;
  311. }
  312. }
  313. static const char *start_capture (MatchState *ms, const char *s,
  314. const char *p, int what) {
  315. const char *res;
  316. int level = ms->level;
  317. if (level >= LUA_MAXCAPTURES) luaL_error(ms->L, "too many captures");
  318. ms->capture[level].init = s;
  319. ms->capture[level].len = what;
  320. ms->level = level+1;
  321. if ((res=match(ms, s, p)) == NULL) /* match failed? */
  322. ms->level--; /* undo capture */
  323. return res;
  324. }
  325. static const char *end_capture (MatchState *ms, const char *s,
  326. const char *p) {
  327. int l = capture_to_close(ms);
  328. const char *res;
  329. ms->capture[l].len = s - ms->capture[l].init; /* close capture */
  330. if ((res = match(ms, s, p)) == NULL) /* match failed? */
  331. ms->capture[l].len = CAP_UNFINISHED; /* undo capture */
  332. return res;
  333. }
  334. static const char *match_capture (MatchState *ms, const char *s, int l) {
  335. size_t len;
  336. l = check_capture(ms, l);
  337. len = ms->capture[l].len;
  338. if ((size_t)(ms->src_end-s) >= len &&
  339. memcmp(ms->capture[l].init, s, len) == 0)
  340. return s+len;
  341. else return NULL;
  342. }
  343. static const char *match (MatchState *ms, const char *s, const char *p) {
  344. if (ms->matchdepth-- == 0)
  345. luaL_error(ms->L, "pattern too complex");
  346. init: /* using goto's to optimize tail recursion */
  347. if (p != ms->p_end) { /* end of pattern? */
  348. switch (*p) {
  349. case '(': { /* start capture */
  350. if (*(p + 1) == ')') /* position capture? */
  351. s = start_capture(ms, s, p + 2, CAP_POSITION);
  352. else
  353. s = start_capture(ms, s, p + 1, CAP_UNFINISHED);
  354. break;
  355. }
  356. case ')': { /* end capture */
  357. s = end_capture(ms, s, p + 1);
  358. break;
  359. }
  360. case '$': {
  361. if ((p + 1) != ms->p_end) /* is the `$' the last char in pattern? */
  362. goto dflt; /* no; go to default */
  363. s = (s == ms->src_end) ? s : NULL; /* check end of string */
  364. break;
  365. }
  366. case L_ESC: { /* escaped sequences not in the format class[*+?-]? */
  367. switch (*(p + 1)) {
  368. case 'b': { /* balanced string? */
  369. s = matchbalance(ms, s, p + 2);
  370. if (s != NULL) {
  371. p += 4; goto init; /* return match(ms, s, p + 4); */
  372. } /* else fail (s == NULL) */
  373. break;
  374. }
  375. case 'f': { /* frontier? */
  376. const char *ep; char previous;
  377. p += 2;
  378. if (*p != '[')
  379. luaL_error(ms->L, "missing " LUA_QL("[") " after "
  380. LUA_QL("%%f") " in pattern");
  381. ep = classend(ms, p); /* points to what is next */
  382. previous = (s == ms->src_init) ? '\0' : *(s - 1);
  383. if (!matchbracketclass(uchar(previous), p, ep - 1) &&
  384. matchbracketclass(uchar(*s), p, ep - 1)) {
  385. p = ep; goto init; /* return match(ms, s, ep); */
  386. }
  387. s = NULL; /* match failed */
  388. break;
  389. }
  390. case '0': case '1': case '2': case '3':
  391. case '4': case '5': case '6': case '7':
  392. case '8': case '9': { /* capture results (%0-%9)? */
  393. s = match_capture(ms, s, uchar(*(p + 1)));
  394. if (s != NULL) {
  395. p += 2; goto init; /* return match(ms, s, p + 2) */
  396. }
  397. break;
  398. }
  399. default: goto dflt;
  400. }
  401. break;
  402. }
  403. default: dflt: { /* pattern class plus optional suffix */
  404. const char *ep = classend(ms, p); /* points to optional suffix */
  405. /* does not match at least once? */
  406. if (!singlematch(ms, s, p, ep)) {
  407. if (*ep == '*' || *ep == '?' || *ep == '-') { /* accept empty? */
  408. p = ep + 1; goto init; /* return match(ms, s, ep + 1); */
  409. }
  410. else /* '+' or no suffix */
  411. s = NULL; /* fail */
  412. }
  413. else { /* matched once */
  414. switch (*ep) { /* handle optional suffix */
  415. case '?': { /* optional */
  416. const char *res;
  417. if ((res = match(ms, s + 1, ep + 1)) != NULL)
  418. s = res;
  419. else {
  420. p = ep + 1; goto init; /* else return match(ms, s, ep + 1); */
  421. }
  422. break;
  423. }
  424. case '+': /* 1 or more repetitions */
  425. s++; /* 1 match already done */
  426. /* go through */
  427. case '*': /* 0 or more repetitions */
  428. s = max_expand(ms, s, p, ep);
  429. break;
  430. case '-': /* 0 or more repetitions (minimum) */
  431. s = min_expand(ms, s, p, ep);
  432. break;
  433. default: /* no suffix */
  434. s++; p = ep; goto init; /* return match(ms, s + 1, ep); */
  435. }
  436. }
  437. break;
  438. }
  439. }
  440. }
  441. ms->matchdepth++;
  442. return s;
  443. }
  444. static const char *lmemfind (const char *s1, size_t l1,
  445. const char *s2, size_t l2) {
  446. if (l2 == 0) return s1; /* empty strings are everywhere */
  447. else if (l2 > l1) return NULL; /* avoids a negative `l1' */
  448. else {
  449. const char *init; /* to search for a `*s2' inside `s1' */
  450. l2--; /* 1st char will be checked by `memchr' */
  451. l1 = l1-l2; /* `s2' cannot be found after that */
  452. while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
  453. init++; /* 1st char is already checked */
  454. if (memcmp(init, s2+1, l2) == 0)
  455. return init-1;
  456. else { /* correct `l1' and `s1' to try again */
  457. l1 -= init-s1;
  458. s1 = init;
  459. }
  460. }
  461. return NULL; /* not found */
  462. }
  463. }
  464. static void push_onecapture (MatchState *ms, int i, const char *s,
  465. const char *e) {
  466. if (i >= ms->level) {
  467. if (i == 0) /* ms->level == 0, too */
  468. lua_pushlstring(ms->L, s, e - s); /* add whole match */
  469. else
  470. luaL_error(ms->L, "invalid capture index");
  471. }
  472. else {
  473. ptrdiff_t l = ms->capture[i].len;
  474. if (l == CAP_UNFINISHED) luaL_error(ms->L, "unfinished capture");
  475. if (l == CAP_POSITION)
  476. lua_pushinteger(ms->L, ms->capture[i].init - ms->src_init + 1);
  477. else
  478. lua_pushlstring(ms->L, ms->capture[i].init, l);
  479. }
  480. }
  481. static int push_captures (MatchState *ms, const char *s, const char *e) {
  482. int i;
  483. int nlevels = (ms->level == 0 && s) ? 1 : ms->level;
  484. luaL_checkstack(ms->L, nlevels, "too many captures");
  485. for (i = 0; i < nlevels; i++)
  486. push_onecapture(ms, i, s, e);
  487. return nlevels; /* number of strings pushed */
  488. }
  489. /* check whether pattern has no special characters */
  490. static int nospecials (const char *p, size_t l) {
  491. size_t upto = 0;
  492. do {
  493. if (strpbrk(p + upto, SPECIALS))
  494. return 0; /* pattern has a special character */
  495. upto += strlen(p + upto) + 1; /* may have more after \0 */
  496. } while (upto <= l);
  497. return 1; /* no special chars found */
  498. }
  499. static int str_find_aux (lua_State *L, int find) {
  500. size_t ls, lp;
  501. const char *s = luaL_checklstring(L, 1, &ls);
  502. const char *p = luaL_checklstring(L, 2, &lp);
  503. lua_Integer init = posrelat(luaL_optinteger(L, 3, 1), ls);
  504. if (init < 1) init = 1;
  505. else if (init > (lua_Integer)ls + 1) { /* start after string's end? */
  506. lua_pushnil(L); /* cannot find anything */
  507. return 1;
  508. }
  509. /* explicit request or no special characters? */
  510. if (find && (lua_toboolean(L, 4) || nospecials(p, lp))) {
  511. /* do a plain search */
  512. const char *s2 = lmemfind(s + init - 1, ls - init + 1, p, lp);
  513. if (s2) {
  514. lua_pushinteger(L, s2 - s + 1);
  515. lua_pushinteger(L, s2 - s + lp);
  516. return 2;
  517. }
  518. }
  519. else {
  520. MatchState ms;
  521. const char *s1 = s + init - 1;
  522. int anchor = (*p == '^');
  523. if (anchor) {
  524. p++; lp--; /* skip anchor character */
  525. }
  526. ms.L = L;
  527. ms.matchdepth = MAXCCALLS;
  528. ms.src_init = s;
  529. ms.src_end = s + ls;
  530. ms.p_end = p + lp;
  531. do {
  532. const char *res;
  533. ms.level = 0;
  534. lua_assert(ms.matchdepth == MAXCCALLS);
  535. if ((res=match(&ms, s1, p)) != NULL) {
  536. if (find) {
  537. lua_pushinteger(L, s1 - s + 1); /* start */
  538. lua_pushinteger(L, res - s); /* end */
  539. return push_captures(&ms, NULL, 0) + 2;
  540. }
  541. else
  542. return push_captures(&ms, s1, res);
  543. }
  544. } while (s1++ < ms.src_end && !anchor);
  545. }
  546. lua_pushnil(L); /* not found */
  547. return 1;
  548. }
  549. static int str_find (lua_State *L) {
  550. return str_find_aux(L, 1);
  551. }
  552. static int str_match (lua_State *L) {
  553. return str_find_aux(L, 0);
  554. }
  555. static int gmatch_aux (lua_State *L) {
  556. MatchState ms;
  557. size_t ls, lp;
  558. const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
  559. const char *p = lua_tolstring(L, lua_upvalueindex(2), &lp);
  560. const char *src;
  561. ms.L = L;
  562. ms.matchdepth = MAXCCALLS;
  563. ms.src_init = s;
  564. ms.src_end = s+ls;
  565. ms.p_end = p + lp;
  566. for (src = s + (size_t)lua_tointeger(L, lua_upvalueindex(3));
  567. src <= ms.src_end;
  568. src++) {
  569. const char *e;
  570. ms.level = 0;
  571. lua_assert(ms.matchdepth == MAXCCALLS);
  572. if ((e = match(&ms, src, p)) != NULL) {
  573. lua_Integer newstart = e-s;
  574. if (e == src) newstart++; /* empty match? go at least one position */
  575. lua_pushinteger(L, newstart);
  576. lua_replace(L, lua_upvalueindex(3));
  577. return push_captures(&ms, src, e);
  578. }
  579. }
  580. return 0; /* not found */
  581. }
  582. static int gmatch (lua_State *L) {
  583. luaL_checkstring(L, 1);
  584. luaL_checkstring(L, 2);
  585. lua_settop(L, 2);
  586. lua_pushinteger(L, 0);
  587. lua_pushcclosure(L, gmatch_aux, 3);
  588. return 1;
  589. }
  590. static void add_s (MatchState *ms, luaL_Buffer *b, const char *s,
  591. const char *e) {
  592. size_t l, i;
  593. const char *news = lua_tolstring(ms->L, 3, &l);
  594. for (i = 0; i < l; i++) {
  595. if (news[i] != L_ESC)
  596. luaL_addchar(b, news[i]);
  597. else {
  598. i++; /* skip ESC */
  599. if (!isdigit(uchar(news[i]))) {
  600. if (news[i] != L_ESC)
  601. luaL_error(ms->L, "invalid use of " LUA_QL("%c")
  602. " in replacement string", L_ESC);
  603. luaL_addchar(b, news[i]);
  604. }
  605. else if (news[i] == '0')
  606. luaL_addlstring(b, s, e - s);
  607. else {
  608. push_onecapture(ms, news[i] - '1', s, e);
  609. luaL_addvalue(b); /* add capture to accumulated result */
  610. }
  611. }
  612. }
  613. }
  614. static void add_value (MatchState *ms, luaL_Buffer *b, const char *s,
  615. const char *e, int tr) {
  616. lua_State *L = ms->L;
  617. switch (tr) {
  618. case LUA_TFUNCTION: {
  619. int n;
  620. lua_pushvalue(L, 3);
  621. n = push_captures(ms, s, e);
  622. lua_call(L, n, 1);
  623. break;
  624. }
  625. case LUA_TTABLE: {
  626. push_onecapture(ms, 0, s, e);
  627. lua_gettable(L, 3);
  628. break;
  629. }
  630. default: { /* LUA_TNUMBER or LUA_TSTRING */
  631. add_s(ms, b, s, e);
  632. return;
  633. }
  634. }
  635. if (!lua_toboolean(L, -1)) { /* nil or false? */
  636. lua_pop(L, 1);
  637. lua_pushlstring(L, s, e - s); /* keep original text */
  638. }
  639. else if (!lua_isstring(L, -1))
  640. luaL_error(L, "invalid replacement value (a %s)", luaL_typename(L, -1));
  641. luaL_addvalue(b); /* add result to accumulator */
  642. }
  643. static int str_gsub (lua_State *L) {
  644. size_t srcl, lp;
  645. const char *src = luaL_checklstring(L, 1, &srcl);
  646. const char *p = luaL_checklstring(L, 2, &lp);
  647. int tr = lua_type(L, 3);
  648. size_t max_s = luaL_optinteger(L, 4, srcl+1);
  649. int anchor = (*p == '^');
  650. size_t n = 0;
  651. MatchState ms;
  652. luaL_Buffer b;
  653. luaL_argcheck(L, tr == LUA_TNUMBER || tr == LUA_TSTRING ||
  654. tr == LUA_TFUNCTION || tr == LUA_TTABLE, 3,
  655. "string/function/table expected");
  656. luaL_buffinit(L, &b);
  657. if (anchor) {
  658. p++; lp--; /* skip anchor character */
  659. }
  660. ms.L = L;
  661. ms.matchdepth = MAXCCALLS;
  662. ms.src_init = src;
  663. ms.src_end = src+srcl;
  664. ms.p_end = p + lp;
  665. while (n < max_s) {
  666. const char *e;
  667. ms.level = 0;
  668. lua_assert(ms.matchdepth == MAXCCALLS);
  669. e = match(&ms, src, p);
  670. if (e) {
  671. n++;
  672. add_value(&ms, &b, src, e, tr);
  673. }
  674. if (e && e>src) /* non empty match? */
  675. src = e; /* skip it */
  676. else if (src < ms.src_end)
  677. luaL_addchar(&b, *src++);
  678. else break;
  679. if (anchor) break;
  680. }
  681. luaL_addlstring(&b, src, ms.src_end-src);
  682. luaL_pushresult(&b);
  683. lua_pushinteger(L, n); /* number of substitutions */
  684. return 2;
  685. }
  686. /* }====================================================== */
  687. /*
  688. ** {======================================================
  689. ** STRING FORMAT
  690. ** =======================================================
  691. */
  692. /* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */
  693. #define MAX_ITEM 512
  694. /* valid flags in a format specification */
  695. #define FLAGS "-+ #0"
  696. /*
  697. ** maximum size of each format specification (such as "%-099.99d")
  698. ** (+2 for length modifiers; +10 accounts for %99.99x plus margin of error)
  699. */
  700. #define MAX_FORMAT (sizeof(FLAGS) + 2 + 10)
  701. static void addquoted (lua_State *L, luaL_Buffer *b, int arg) {
  702. size_t l;
  703. const char *s = luaL_checklstring(L, arg, &l);
  704. luaL_addchar(b, '"');
  705. while (l--) {
  706. if (*s == '"' || *s == '\\' || *s == '\n') {
  707. luaL_addchar(b, '\\');
  708. luaL_addchar(b, *s);
  709. }
  710. else if (*s == '\0' || iscntrl(uchar(*s))) {
  711. char buff[10];
  712. if (!isdigit(uchar(*(s+1))))
  713. sprintf(buff, "\\%d", (int)uchar(*s));
  714. else
  715. sprintf(buff, "\\%03d", (int)uchar(*s));
  716. luaL_addstring(b, buff);
  717. }
  718. else
  719. luaL_addchar(b, *s);
  720. s++;
  721. }
  722. luaL_addchar(b, '"');
  723. }
  724. static const char *scanformat (lua_State *L, const char *strfrmt, char *form) {
  725. const char *p = strfrmt;
  726. while (*p != '\0' && strchr(FLAGS, *p) != NULL) p++; /* skip flags */
  727. if ((size_t)(p - strfrmt) >= sizeof(FLAGS)/sizeof(char))
  728. luaL_error(L, "invalid format (repeated flags)");
  729. if (isdigit(uchar(*p))) p++; /* skip width */
  730. if (isdigit(uchar(*p))) p++; /* (2 digits at most) */
  731. if (*p == '.') {
  732. p++;
  733. if (isdigit(uchar(*p))) p++; /* skip precision */
  734. if (isdigit(uchar(*p))) p++; /* (2 digits at most) */
  735. }
  736. if (isdigit(uchar(*p)))
  737. luaL_error(L, "invalid format (width or precision too long)");
  738. *(form++) = '%';
  739. memcpy(form, strfrmt, (p - strfrmt + 1) * sizeof(char));
  740. form += p - strfrmt + 1;
  741. *form = '\0';
  742. return p;
  743. }
  744. /*
  745. ** add length modifier into formats
  746. */
  747. static void addlenmod (char *form, const char *lenmod) {
  748. size_t l = strlen(form);
  749. size_t lm = strlen(lenmod);
  750. char spec = form[l - 1];
  751. strcpy(form + l - 1, lenmod);
  752. form[l + lm - 1] = spec;
  753. form[l + lm] = '\0';
  754. }
  755. static int str_format (lua_State *L) {
  756. int top = lua_gettop(L);
  757. int arg = 1;
  758. size_t sfl;
  759. const char *strfrmt = luaL_checklstring(L, arg, &sfl);
  760. const char *strfrmt_end = strfrmt+sfl;
  761. luaL_Buffer b;
  762. luaL_buffinit(L, &b);
  763. while (strfrmt < strfrmt_end) {
  764. if (*strfrmt != L_ESC)
  765. luaL_addchar(&b, *strfrmt++);
  766. else if (*++strfrmt == L_ESC)
  767. luaL_addchar(&b, *strfrmt++); /* %% */
  768. else { /* format item */
  769. char form[MAX_FORMAT]; /* to store the format (`%...') */
  770. char *buff = luaL_prepbuffsize(&b, MAX_ITEM); /* to put formatted item */
  771. int nb = 0; /* number of bytes in added item */
  772. if (++arg > top)
  773. luaL_argerror(L, arg, "no value");
  774. strfrmt = scanformat(L, strfrmt, form);
  775. switch (*strfrmt++) {
  776. case 'c': {
  777. nb = sprintf(buff, form, luaL_checkint(L, arg));
  778. break;
  779. }
  780. case 'd': case 'i':
  781. case 'o': case 'u': case 'x': case 'X': {
  782. lua_Integer n = luaL_checkinteger(L, arg);
  783. addlenmod(form, LUA_INTEGER_FRMLEN);
  784. nb = sprintf(buff, form, n);
  785. break;
  786. }
  787. case 'e': case 'E': case 'f':
  788. #if defined(LUA_USE_AFORMAT)
  789. case 'a': case 'A':
  790. #endif
  791. case 'g': case 'G': {
  792. addlenmod(form, LUA_NUMBER_FRMLEN);
  793. nb = sprintf(buff, form, luaL_checknumber(L, arg));
  794. break;
  795. }
  796. case 'q': {
  797. addquoted(L, &b, arg);
  798. break;
  799. }
  800. case 's': {
  801. size_t l;
  802. const char *s = luaL_tolstring(L, arg, &l);
  803. if (!strchr(form, '.') && l >= 100) {
  804. /* no precision and string is too long to be formatted;
  805. keep original string */
  806. luaL_addvalue(&b);
  807. break;
  808. }
  809. else {
  810. nb = sprintf(buff, form, s);
  811. lua_pop(L, 1); /* remove result from 'luaL_tolstring' */
  812. break;
  813. }
  814. }
  815. default: { /* also treat cases `pnLlh' */
  816. return luaL_error(L, "invalid option " LUA_QL("%%%c") " to "
  817. LUA_QL("format"), *(strfrmt - 1));
  818. }
  819. }
  820. luaL_addsize(&b, nb);
  821. }
  822. }
  823. luaL_pushresult(&b);
  824. return 1;
  825. }
  826. /* }====================================================== */
  827. /*
  828. ** {======================================================
  829. ** PACK/UNPACK
  830. ** =======================================================
  831. */
  832. /* maximum size for the binary representation of an integer */
  833. #define MAXINTSIZE 8
  834. /* number of bits in a character */
  835. #define NB CHAR_BIT
  836. /* mask for one character (NB ones) */
  837. #define MC (((lua_Integer)1 << NB) - 1)
  838. /* mask for one character without sign ((NB - 1) ones) */
  839. #define SM (((lua_Integer)1 << (NB - 1)) - 1)
  840. #define SZINT ((int)sizeof(lua_Integer))
  841. static union {
  842. int dummy;
  843. char little; /* true iff machine is little endian */
  844. } const nativeendian = {1};
  845. static int getendian (lua_State *L, int arg) {
  846. const char *endian = luaL_optstring(L, arg,
  847. (nativeendian.little ? "l" : "b"));
  848. if (*endian == 'n') /* native? */
  849. return nativeendian.little;
  850. luaL_argcheck(L, *endian == 'l' || *endian == 'b', arg,
  851. "endianess must be 'l'/'b'/'n'");
  852. return (*endian == 'l');
  853. }
  854. static int getintsize (lua_State *L, int arg) {
  855. int size = luaL_optint(L, arg, 0);
  856. if (size == 0) size = SZINT;
  857. luaL_argcheck(L, 1 <= size && size <= MAXINTSIZE, arg,
  858. "integer size out of valid range");
  859. return size;
  860. }
  861. static int packint (char *buff, lua_Integer n, int littleendian, int size) {
  862. int i;
  863. if (littleendian) {
  864. for (i = 0; i < size - 1; i++) {
  865. buff[i] = (n & MC);
  866. n >>= NB;
  867. }
  868. }
  869. else {
  870. for (i = size - 1; i > 0; i--) {
  871. buff[i] = (n & MC);
  872. n >>= NB;
  873. }
  874. }
  875. buff[i] = (n & MC); /* last byte */
  876. /* test for overflow: OK if there are only zeros left in higher bytes,
  877. or if there are only oneś left and packed number is negative (signal
  878. bit, the higher bit in last byte, is one) */
  879. return ((n & ~MC) == 0 || (n | SM) == ~(lua_Integer)0);
  880. }
  881. static int packint_l (lua_State *L) {
  882. char buff[MAXINTSIZE];
  883. lua_Integer n = luaL_checkinteger(L, 1);
  884. int size = getintsize(L, 2);
  885. int endian = getendian(L, 3);
  886. if (packint(buff, n, endian, size))
  887. lua_pushlstring(L, buff, size);
  888. else
  889. luaL_error(L, "integer does not fit into given size (%d)", size);
  890. return 1;
  891. }
  892. /* mask to check higher-order byte in a Lua integer */
  893. #define HIGHERBYTE (MC << (NB * (SZINT - 1)))
  894. /* mask to check higher-order byte + signal bit of next byte */
  895. #define HIGHERBYTE1 (HIGHERBYTE | (HIGHERBYTE >> 1))
  896. static int unpackint (const char *buff, lua_Integer *res,
  897. int littleendian, int size) {
  898. lua_Integer n = 0;
  899. int i;
  900. for (i = 0; i < size; i++) {
  901. if (i >= SZINT) { /* will throw away a byte? */
  902. /* check for overflow: it is OK to throw away leading zeros for a
  903. positive number; leading ones for a negative number; and one
  904. last leading zero to allow unsigned integers with a 1 in
  905. its "signal bit" */
  906. if (!((n & HIGHERBYTE1) == 0 || /* zeros for pos. number */
  907. (n & HIGHERBYTE1) == HIGHERBYTE1 || /* ones for neg. number */
  908. ((n & HIGHERBYTE) == 0 && i == size - 1))) /* last zero */
  909. return 0; /* overflow */
  910. }
  911. n <<= NB;
  912. n |= (lua_Integer)(unsigned char)buff[littleendian ? size - 1 - i : i];
  913. }
  914. if (size < SZINT) { /* need sign extension? */
  915. lua_Integer mask = (~(lua_Integer)0) << (size*NB - 1);
  916. if (n & mask) /* negative value? */
  917. n |= mask; /* signal extension */
  918. }
  919. *res = n;
  920. return 1;
  921. }
  922. static int unpackint_l (lua_State *L) {
  923. lua_Integer res;
  924. size_t len;
  925. const char *s = luaL_checklstring(L, 1, &len);
  926. lua_Integer pos = posrelat(luaL_optinteger(L, 2, 1), len);
  927. int size = getintsize(L, 3);
  928. int endian = getendian(L, 4);
  929. luaL_argcheck(L, 1 <= pos && (size_t)pos + size - 1 <= len, 1,
  930. "string too short");
  931. if(unpackint(s + pos - 1, &res, endian, size))
  932. lua_pushinteger(L, res);
  933. else
  934. luaL_error(L, "result does not fit into a Lua integer");
  935. return 1;
  936. }
  937. static void correctendianess (lua_State *L, char *b, int size, int endianarg) {
  938. int endian = getendian(L, endianarg);
  939. if (endian != nativeendian.little) { /* not native endianess? */
  940. int i = 0;
  941. while (i < --size) {
  942. char temp = b[i];
  943. b[i++] = b[size];
  944. b[size] = temp;
  945. }
  946. }
  947. }
  948. #define DEFAULTFLOATSIZE \
  949. (sizeof(lua_Number) == sizeof(float) ? "f" : "d")
  950. static int getfloatsize (lua_State *L, int arg) {
  951. const char *size = luaL_optstring(L, arg, "n");
  952. if (*size == 'n') size = DEFAULTFLOATSIZE;
  953. luaL_argcheck(L, *size == 'd' || *size == 'f', arg,
  954. "size must be 'f'/'d'/'n'");
  955. return (*size == 'd' ? sizeof(double) : sizeof(float));
  956. }
  957. static int packfloat_l (lua_State *L) {
  958. float f; double d;
  959. char *pn; /* pointer to number */
  960. lua_Number n = luaL_checknumber(L, 1);
  961. int size = getfloatsize(L, 2);
  962. if (size == sizeof(double)) {
  963. d = (double)n;
  964. pn = (char*)&d;
  965. }
  966. else {
  967. f = (float)n;
  968. pn = (char*)&f;
  969. }
  970. correctendianess(L, pn, size, 3);
  971. lua_pushlstring(L, pn, size);
  972. return 1;
  973. }
  974. static int unpackfloat_l (lua_State *L) {
  975. lua_Number res;
  976. size_t len;
  977. const char *s = luaL_checklstring(L, 1, &len);
  978. lua_Integer pos = posrelat(luaL_optinteger(L, 2, 1), len);
  979. int size = getfloatsize(L, 3);
  980. luaL_argcheck(L, 1 <= pos && (size_t)pos + size - 1 <= len, 1,
  981. "string too short");
  982. if (size == sizeof(double)) {
  983. double d;
  984. memcpy(&d, s + pos - 1, size);
  985. correctendianess(L, (char*)&d, size, 4);
  986. res = (lua_Number)d;
  987. }
  988. else {
  989. float f;
  990. memcpy(&f, s + pos - 1, size);
  991. correctendianess(L, (char*)&f, size, 4);
  992. res = (lua_Number)f;
  993. }
  994. lua_pushnumber(L, res);
  995. return 1;
  996. }
  997. /* }====================================================== */
  998. static const luaL_Reg strlib[] = {
  999. {"byte", str_byte},
  1000. {"char", str_char},
  1001. {"dump", str_dump},
  1002. {"find", str_find},
  1003. {"format", str_format},
  1004. {"gmatch", gmatch},
  1005. {"gsub", str_gsub},
  1006. {"len", str_len},
  1007. {"lower", str_lower},
  1008. {"match", str_match},
  1009. {"rep", str_rep},
  1010. {"reverse", str_reverse},
  1011. {"sub", str_sub},
  1012. {"upper", str_upper},
  1013. {"packfloat", packfloat_l},
  1014. {"packint", packint_l},
  1015. {"unpackfloat", unpackfloat_l},
  1016. {"unpackint", unpackint_l},
  1017. {NULL, NULL}
  1018. };
  1019. static void createmetatable (lua_State *L) {
  1020. lua_createtable(L, 0, 1); /* table to be metatable for strings */
  1021. lua_pushliteral(L, ""); /* dummy string */
  1022. lua_pushvalue(L, -2); /* copy table */
  1023. lua_setmetatable(L, -2); /* set table as metatable for strings */
  1024. lua_pop(L, 1); /* pop dummy string */
  1025. lua_pushvalue(L, -2); /* get string library */
  1026. lua_setfield(L, -2, "__index"); /* metatable.__index = string */
  1027. lua_pop(L, 1); /* pop metatable */
  1028. }
  1029. /*
  1030. ** Open string library
  1031. */
  1032. LUAMOD_API int luaopen_string (lua_State *L) {
  1033. luaL_newlib(L, strlib);
  1034. createmetatable(L);
  1035. return 1;
  1036. }