strlib.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505
  1. /*
  2. ** strlib.c
  3. ** String library to LUA
  4. */
  5. char *rcs_strlib="$Id: strlib.c,v 1.26 1996/08/05 20:55:24 roberto Exp roberto $";
  6. #include <string.h>
  7. #include <stdio.h>
  8. #include <stdlib.h>
  9. #include <ctype.h>
  10. #include "lua.h"
  11. #include "lualib.h"
  12. static char *buffer = NULL;
  13. static size_t maxbuff = 0;
  14. static size_t buff_size = 0;
  15. static char *lua_strbuffer (unsigned long size)
  16. {
  17. if (size > maxbuff) {
  18. buffer = (buffer) ? realloc(buffer, maxbuff=size) : malloc(maxbuff=size);
  19. if (buffer == NULL)
  20. lua_error("memory overflow");
  21. }
  22. return buffer;
  23. }
  24. static char *openspace (unsigned long size)
  25. {
  26. char *buff = lua_strbuffer(buff_size+size);
  27. return buff+buff_size;
  28. }
  29. void lua_arg_check(int cond, char *funcname)
  30. {
  31. if (!cond) {
  32. char buff[100];
  33. sprintf(buff, "incorrect argument to function `%s'", funcname);
  34. lua_error(buff);
  35. }
  36. }
  37. char *lua_check_string (int numArg, char *funcname)
  38. {
  39. lua_Object o = lua_getparam(numArg);
  40. lua_arg_check(lua_isstring(o), funcname);
  41. return lua_getstring(o);
  42. }
  43. char *lua_opt_string (int numArg, char *def, char *funcname)
  44. {
  45. return (lua_getparam(numArg) == LUA_NOOBJECT) ? def :
  46. lua_check_string(numArg, funcname);
  47. }
  48. double lua_check_number (int numArg, char *funcname)
  49. {
  50. lua_Object o = lua_getparam(numArg);
  51. lua_arg_check(lua_isnumber(o), funcname);
  52. return lua_getnumber(o);
  53. }
  54. long lua_opt_number (int numArg, long def, char *funcname)
  55. {
  56. return (lua_getparam(numArg) == LUA_NOOBJECT) ? def :
  57. (long)lua_check_number(numArg, funcname);
  58. }
  59. char *luaI_addchar (int c)
  60. {
  61. if (buff_size >= maxbuff)
  62. lua_strbuffer(maxbuff == 0 ? 100 : maxbuff*2);
  63. buffer[buff_size++] = c;
  64. if (c == 0)
  65. buff_size = 0; /* prepare for next string */
  66. return buffer;
  67. }
  68. static void addnchar (char *s, int n)
  69. {
  70. char *b = openspace(n);
  71. strncpy(b, s, n);
  72. buff_size += n;
  73. }
  74. /*
  75. ** Interface to strtok
  76. */
  77. static void str_tok (void)
  78. {
  79. char *s1 = lua_check_string(1, "strtok");
  80. char *del = lua_check_string(2, "strtok");
  81. lua_Object t = lua_createtable();
  82. int i = 1;
  83. /* As strtok changes s1, and s1 is "constant", make a copy of it */
  84. s1 = strcpy(lua_strbuffer(strlen(s1+1)), s1);
  85. while ((s1 = strtok(s1, del)) != NULL) {
  86. lua_pushobject(t);
  87. lua_pushnumber(i++);
  88. lua_pushstring(s1);
  89. lua_storesubscript();
  90. s1 = NULL; /* prepare for next strtok */
  91. }
  92. lua_pushobject(t);
  93. lua_pushnumber(i-1); /* total number of tokens */
  94. }
  95. /*
  96. ** Return the string length
  97. ** LUA interface:
  98. ** n = strlen (string)
  99. */
  100. static void str_len (void)
  101. {
  102. lua_pushnumber(strlen(lua_check_string(1, "strlen")));
  103. }
  104. /*
  105. ** Return the substring of a string, from start to end
  106. ** LUA interface:
  107. ** substring = strsub (string, start, end)
  108. */
  109. static void str_sub (void)
  110. {
  111. char *s = lua_check_string(1, "strsub");
  112. long start = (long)lua_check_number(2, "strsub");
  113. long end = lua_opt_number(3, strlen(s), "strsub");
  114. if (1 <= start && start <= end && end <= strlen(s)) {
  115. luaI_addchar(0);
  116. addnchar(s+start-1, end-start+1);
  117. lua_pushstring(luaI_addchar(0));
  118. }
  119. else lua_pushliteral("");
  120. }
  121. /*
  122. ** Transliterate a string
  123. */
  124. static void str_map (void)
  125. {
  126. char *s = lua_check_string(1, "strmap");
  127. char *from = lua_check_string(2, "strmap");
  128. char *to = lua_opt_string(3, "", "strmap");
  129. long len = strlen(to);
  130. for (luaI_addchar(0); *s; s++) {
  131. char *f = strrchr(from, *s);
  132. if (f == NULL)
  133. luaI_addchar(*s);
  134. else {
  135. long pos = f-from;
  136. if (pos < len)
  137. luaI_addchar(to[pos]);
  138. }
  139. }
  140. lua_pushstring(luaI_addchar(0));
  141. }
  142. /*
  143. ** get ascii value of a character in a string
  144. */
  145. static void str_ascii (void)
  146. {
  147. char *s = lua_check_string(1, "ascii");
  148. long pos = lua_opt_number(2, 1, "ascii") - 1;
  149. lua_arg_check(0<=pos && pos<strlen(s), "ascii");
  150. lua_pushnumber((unsigned char)s[pos]);
  151. }
  152. /* pattern matching */
  153. #define ESC '%'
  154. #define SPECIALS "^$*?.([%"
  155. static char *bracket_end (char *p)
  156. {
  157. return (*p == 0) ? NULL : strchr((*p=='^') ? p+2 : p+1, ']');
  158. }
  159. char *item_end (char *p)
  160. {
  161. switch (*p++) {
  162. case '\0': return p-1;
  163. case ESC:
  164. if (*p == 0) lua_error("incorrect pattern");
  165. return p+1;
  166. case '[': {
  167. char *end = bracket_end(p);
  168. if (end == NULL) lua_error("incorrect pattern");
  169. return end+1;
  170. }
  171. default:
  172. return p;
  173. }
  174. }
  175. static int matchclass (int c, int cl)
  176. {
  177. int res;
  178. switch (tolower(cl)) {
  179. case 'a' : res = isalpha(c); break;
  180. case 'c' : res = iscntrl(c); break;
  181. case 'd' : res = isdigit(c); break;
  182. case 'l' : res = islower(c); break;
  183. case 'p' : res = ispunct(c); break;
  184. case 's' : res = isspace(c); break;
  185. case 'u' : res = isupper(c); break;
  186. case 'w' : res = isalnum(c); break;
  187. default: return (cl == c);
  188. }
  189. return (islower(cl) ? res : !res);
  190. }
  191. int singlematch (int c, char *p)
  192. {
  193. if (c <= 0) return 0; /* \0, EOF or other strange flags */
  194. switch (*p) {
  195. case '.': return 1;
  196. case ESC: return matchclass(c, *(p+1));
  197. case '[': {
  198. char *end = bracket_end(p+1);
  199. int sig = *(p+1) == '^' ? (p++, 0) : 1;
  200. while (++p < end) {
  201. if (*p == ESC) {
  202. if (((p+1) < end) && matchclass(c, *++p)) return sig;
  203. }
  204. else if ((*(p+1) == '-') && (p+2 < end)) {
  205. p+=2;
  206. if (*(p-2) <= c && c <= *p) return sig;
  207. }
  208. else if (*p == c) return sig;
  209. }
  210. return !sig;
  211. }
  212. default: return (*p == c);
  213. }
  214. }
  215. #define MAX_CAPT 9
  216. static struct {
  217. char *init;
  218. int len; /* -1 signals unfinished capture */
  219. } capture[MAX_CAPT];
  220. static int num_captures; /* only valid after a sucessful call to match */
  221. static void push_captures (void)
  222. {
  223. int i;
  224. luaI_addchar(0);
  225. for (i=0; i<num_captures; i++) {
  226. if (capture[i].len == -1) lua_error("unfinished capture");
  227. addnchar(capture[i].init, capture[i].len);
  228. lua_pushstring(luaI_addchar(0));
  229. }
  230. }
  231. static int check_cap (int l, int level)
  232. {
  233. l -= '1';
  234. if (!(0 <= l && l < level && capture[l].len != -1))
  235. lua_error("invalid capture index");
  236. return l;
  237. }
  238. static void add_s (char *newp)
  239. {
  240. while (*newp) {
  241. if (*newp != ESC || !isdigit(*++newp))
  242. luaI_addchar(*newp++);
  243. else {
  244. int l = check_cap(*newp++, num_captures);
  245. addnchar(capture[l].init, capture[l].len);
  246. }
  247. }
  248. }
  249. static int capture_to_close (int level)
  250. {
  251. for (level--; level>=0; level--)
  252. if (capture[level].len == -1) return level;
  253. lua_error("invalid pattern capture");
  254. return 0; /* to avoid warnings */
  255. }
  256. static char *match (char *s, char *p, int level)
  257. {
  258. init: /* using goto's to optimize tail recursion */
  259. switch (*p) {
  260. case '(': /* start capture */
  261. if (level >= MAX_CAPT) lua_error("too many captures");
  262. capture[level].init = s;
  263. capture[level].len = -1;
  264. level++; p++; goto init; /* return match(s, p+1, level); */
  265. case ')': { /* end capture */
  266. int l = capture_to_close(level);
  267. char *res;
  268. capture[l].len = s - capture[l].init; /* close capture */
  269. if ((res = match(s, p+1, level)) == NULL) /* match failed? */
  270. capture[l].len = -1; /* undo capture */
  271. return res;
  272. }
  273. case ESC: /* possibly a capture (if followed by a digit) */
  274. if (!isdigit(*(p+1))) goto dflt;
  275. else {
  276. int l = check_cap(*(p+1), level);
  277. if (strncmp(capture[l].init, s, capture[l].len) == 0) {
  278. /* return match(p+2, s+capture[l].len, level); */
  279. p+=2; s+=capture[l].len; goto init;
  280. }
  281. else return NULL;
  282. }
  283. case '\0': case '$': /* (possibly) end of pattern */
  284. if (*p == 0 || (*(p+1) == 0 && *s == 0)) {
  285. num_captures = level;
  286. return s;
  287. }
  288. else goto dflt;
  289. default: dflt: { /* it is a pattern item */
  290. int m = singlematch(*s, p);
  291. char *ep = item_end(p); /* get what is next */
  292. switch (*ep) {
  293. case '*': { /* repetition */
  294. char *res;
  295. if (m && (res = match(s+1, p, level)))
  296. return res;
  297. p=ep+1; goto init; /* else return match(s, ep+1, level); */
  298. }
  299. case '?': { /* optional */
  300. char *res;
  301. if (m && (res = match(s+1, ep+1, level)))
  302. return res;
  303. p=ep+1; goto init; /* else return match(s, ep+1, level); */
  304. }
  305. default:
  306. if (m) { s++; p=ep; goto init; } /* return match(s+1, ep, level); */
  307. else return NULL;
  308. }
  309. }
  310. }
  311. }
  312. static void str_find (void)
  313. {
  314. char *s = lua_check_string(1, "find");
  315. char *p = lua_check_string(2, "find");
  316. long init = lua_opt_number(3, 1, "strfind") - 1;
  317. lua_arg_check(0 <= init && init <= strlen(s), "find");
  318. if (strpbrk(p, SPECIALS) == NULL) { /* no special caracters? */
  319. char *s2 = strstr(s+init, p);
  320. if (s2) {
  321. lua_pushnumber(s2-s+1);
  322. lua_pushnumber(s2-s+strlen(p));
  323. }
  324. }
  325. else {
  326. int anchor = (*p == '^') ? (p++, 1) : 0;
  327. char *s1=s+init;
  328. do {
  329. char *res;
  330. if ((res=match(s1, p, 0)) != NULL) {
  331. lua_pushnumber(s1-s+1); /* start */
  332. lua_pushnumber(res-s); /* end */
  333. push_captures();
  334. return;
  335. }
  336. } while (*s1++ && !anchor);
  337. }
  338. }
  339. static void str_s (void)
  340. {
  341. char *src = lua_check_string(1, "s");
  342. char *p = lua_check_string(2, "s");
  343. char *newp = lua_check_string(3, "s");
  344. int max_s = lua_opt_number(4, strlen(src), "s");
  345. int anchor = (*p == '^') ? (p++, 1) : 0;
  346. int n = 0;
  347. luaI_addchar(0);
  348. while (*src && n < max_s) {
  349. char *e;
  350. if ((e=match(src, p, 0)) == NULL)
  351. luaI_addchar(*src++);
  352. else {
  353. if (e == src) lua_error("empty pattern in substitution"); /* ??? */
  354. add_s(newp);
  355. src = e;
  356. n++;
  357. }
  358. if (anchor) break;
  359. }
  360. addnchar(src, strlen(src));
  361. lua_pushstring(luaI_addchar(0));
  362. lua_pushnumber(n); /* number of substitutions */
  363. }
  364. static void str_set (void)
  365. {
  366. char *item = lua_check_string(1, "strset");
  367. int i;
  368. lua_arg_check(*item_end(item) == 0, "strset");
  369. luaI_addchar(0);
  370. for (i=1; i<256; i++) /* 0 cannot be part of a set */
  371. if (singlematch(i, item))
  372. luaI_addchar(i);
  373. lua_pushstring(luaI_addchar(0));
  374. }
  375. void luaI_addquoted (char *s)
  376. {
  377. luaI_addchar('"');
  378. for (; *s; s++) {
  379. if (strchr("\"\\\n", *s))
  380. luaI_addchar('\\');
  381. luaI_addchar(*s);
  382. }
  383. luaI_addchar('"');
  384. }
  385. #define MAX_FORMAT 200
  386. static void str_format (void)
  387. {
  388. int arg = 1;
  389. char *strfrmt = lua_check_string(arg++, "format");
  390. luaI_addchar(0); /* initialize */
  391. while (*strfrmt) {
  392. if (*strfrmt != '%')
  393. luaI_addchar(*strfrmt++);
  394. else if (*++strfrmt == '%')
  395. luaI_addchar(*strfrmt++); /* %% */
  396. else { /* format item */
  397. char form[MAX_FORMAT]; /* store the format ('%...') */
  398. char *buff;
  399. char *initf = strfrmt-1; /* -1 to include % */
  400. strfrmt = match(strfrmt, "[-+ #]*(%d*)%.?(%d*)", 0);
  401. if (capture[0].len > 3 || capture[1].len > 3) /* < 1000? */
  402. lua_error("invalid format (width/precision too long)");
  403. strncpy(form, initf, strfrmt-initf+1); /* +1 to include convertion */
  404. form[strfrmt-initf+1] = 0;
  405. buff = openspace(1000); /* to store the formated value */
  406. switch (*strfrmt++) {
  407. case 'q':
  408. luaI_addquoted(lua_check_string(arg++, "format"));
  409. break;
  410. case 's': {
  411. char *s = lua_check_string(arg++, "format");
  412. buff = openspace(strlen(s));
  413. buff_size += sprintf(buff, form, s);
  414. break;
  415. }
  416. case 'c': case 'd': case 'i': case 'o':
  417. case 'u': case 'x': case 'X':
  418. buff_size += sprintf(buff, form,
  419. (int)lua_check_number(arg++, "format"));
  420. break;
  421. case 'e': case 'E': case 'f': case 'g':
  422. buff_size += sprintf(buff, form, lua_check_number(arg++, "format"));
  423. break;
  424. default: /* also treat cases 'pnLlh' */
  425. lua_error("invalid format option in function `format'");
  426. }
  427. }
  428. }
  429. lua_pushstring(luaI_addchar(0)); /* push the result */
  430. }
  431. void luaI_openlib (struct lua_reg *l, int n)
  432. {
  433. int i;
  434. for (i=0; i<n; i++)
  435. lua_register(l[i].name, l[i].func);
  436. }
  437. static struct lua_reg strlib[] = {
  438. {"strtok", str_tok},
  439. {"strlen", str_len},
  440. {"strsub", str_sub},
  441. {"strset", str_set},
  442. {"strmap", str_map},
  443. {"ascii", str_ascii},
  444. {"format", str_format},
  445. {"strfind", str_find},
  446. {"gsub", str_s}
  447. };
  448. /*
  449. ** Open string library
  450. */
  451. void strlib_open (void)
  452. {
  453. luaI_openlib(strlib, (sizeof(strlib)/sizeof(strlib[0])));
  454. }