sq_pcre.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539
  1. #ifdef SQ_USE_PCRE
  2. #include <ctype.h>
  3. #include <string.h>
  4. #include <stdio.h>
  5. #include "squirrel.h"
  6. #include "sqstdblobimpl.h"
  7. #include <pcre.h>
  8. ////////
  9. #include "dynamic_library.h"
  10. /*SquiLu
  11. local library_functions = [
  12. ["pcre *", "pcre_compile", "const char *, int, const char **, int *, const unsigned char *"],
  13. ["pcre *", "pcre_compile2", "const char *, int, int *, const char **, int *, const unsigned char *"],
  14. ["int", "pcre_config", "int, void *"],
  15. ["int", "pcre_fullinfo", "const pcre *, const pcre_extra *, int, void *"],
  16. ["int", "pcre_dfa_exec", "const pcre *, const pcre_extra *, const char *, int, int, int, int *, int , int *, int"],
  17. ["int", "pcre_exec", "const pcre *, const pcre_extra *, PCRE_SPTR, int, int, int, int *, int"],
  18. ["pcre_extra*", "pcre_study", "const pcre *, int, const char **"],
  19. ["void", "pcre_free_study", "pcre_extra *"],
  20. //next entry should be the last one
  21. //to make valid the test made on load_library function
  22. ["const char *", "pcre_version", "void"],
  23. ];
  24. function write_library_functions_declaration(){
  25. foreach(k,v in library_functions) {
  26. putsnl("typedef " + v[0] + " (*" + v[1] + "_t)(" + v[2] + ");");
  27. putsnl("static " + v[1] + "_t dl" + v[1] + " = 0;");
  28. }
  29. }
  30. function write_library_functions_load(){
  31. foreach(k,v in library_functions){
  32. putsnl("dl" + v[1] + " = (" + v[1] + "_t) libdyn.dlsym(\"" + v[1] + "\");");
  33. putsnl("if(!dl" + v[1] + ") return false;");
  34. }
  35. }
  36. SquiLu*/
  37. static DynamicLibrary libdyn;
  38. //@write_library_functions_declaration();
  39. // generated-code:begin
  40. typedef pcre * (*pcre_compile_t)(const char *, int, const char **, int *, const unsigned char *);
  41. static pcre_compile_t dlpcre_compile = 0;
  42. typedef pcre * (*pcre_compile2_t)(const char *, int, int *, const char **, int *, const unsigned char *);
  43. static pcre_compile2_t dlpcre_compile2 = 0;
  44. typedef int (*pcre_config_t)(int, void *);
  45. static pcre_config_t dlpcre_config = 0;
  46. typedef int (*pcre_fullinfo_t)(const pcre *, const pcre_extra *, int, void *);
  47. static pcre_fullinfo_t dlpcre_fullinfo = 0;
  48. typedef int (*pcre_dfa_exec_t)(const pcre *, const pcre_extra *, const char *, int, int, int, int *, int , int *, int);
  49. static pcre_dfa_exec_t dlpcre_dfa_exec = 0;
  50. typedef int (*pcre_exec_t)(const pcre *, const pcre_extra *, PCRE_SPTR, int, int, int, int *, int);
  51. static pcre_exec_t dlpcre_exec = 0;
  52. typedef pcre_extra* (*pcre_study_t)(const pcre *, int, const char **);
  53. static pcre_study_t dlpcre_study = 0;
  54. typedef void (*pcre_free_study_t)(pcre_extra *);
  55. static pcre_free_study_t dlpcre_free_study = 0;
  56. typedef const char * (*pcre_version_t)(void);
  57. static pcre_version_t dlpcre_version = 0;
  58. // generated-code:end
  59. static const char *dynamicLibName = DYNLIB_FOR_OS(libpcre);
  60. static bool load_library(const char *libname)
  61. {
  62. if(dlpcre_version) return true;
  63. if(libdyn.open(libname))
  64. {
  65. //@write_library_functions_load();
  66. // generated-code:begin
  67. dlpcre_compile = (pcre_compile_t) libdyn.dlsym("pcre_compile");
  68. if(!dlpcre_compile) return false;
  69. dlpcre_compile2 = (pcre_compile2_t) libdyn.dlsym("pcre_compile2");
  70. if(!dlpcre_compile2) return false;
  71. dlpcre_config = (pcre_config_t) libdyn.dlsym("pcre_config");
  72. if(!dlpcre_config) return false;
  73. dlpcre_fullinfo = (pcre_fullinfo_t) libdyn.dlsym("pcre_fullinfo");
  74. if(!dlpcre_fullinfo) return false;
  75. dlpcre_dfa_exec = (pcre_dfa_exec_t) libdyn.dlsym("pcre_dfa_exec");
  76. if(!dlpcre_dfa_exec) return false;
  77. dlpcre_exec = (pcre_exec_t) libdyn.dlsym("pcre_exec");
  78. if(!dlpcre_exec) return false;
  79. dlpcre_study = (pcre_study_t) libdyn.dlsym("pcre_study");
  80. if(!dlpcre_study) return false;
  81. dlpcre_free_study = (pcre_free_study_t) libdyn.dlsym("pcre_free_study");
  82. if(!dlpcre_free_study) return false;
  83. dlpcre_version = (pcre_version_t) libdyn.dlsym("pcre_version");
  84. if(!dlpcre_version) return false;
  85. // generated-code:end
  86. return true;
  87. }
  88. return false;
  89. }
  90. struct sqpcre_st {
  91. pcre *re;
  92. pcre_extra *re_extra;
  93. size_t ovector_size;
  94. int ovector[1];
  95. };
  96. static const SQChar PCRE_Tag[] = _SC("sqpcre");
  97. #define GET_pcre_INSTANCE() SQ_GET_INSTANCE(v, 1, sqpcre_st, PCRE_Tag) \
  98. if(self == NULL) return sq_throwerror(v, _SC("sqpcre object already closed"));
  99. static SQRESULT sq_pcre_releasehook(SQUserPointer p, SQInteger /*size*/, void */*ep*/)
  100. {
  101. sqpcre_st *self = ((sqpcre_st *)p);
  102. if(self)
  103. {
  104. free(self->re);
  105. if(self->re_extra) dlpcre_free_study(self->re_extra);
  106. sq_free(self, sizeof(sqpcre_st));
  107. }
  108. return 1;
  109. }
  110. static SQRESULT sq_pcre_constructor(HSQUIRRELVM v)
  111. {
  112. if(!load_library(dynamicLibName)) return sq_throwerror(v, _SC("Failed to load libpcre !"));
  113. SQ_FUNC_VARS(v);
  114. SQ_GET_STRING(v, 2, pattern);
  115. SQ_OPT_INTEGER(v, 3, options, 0);
  116. const SQChar *error;
  117. int erroffset;
  118. pcre *re = dlpcre_compile(
  119. pattern, /* the pattern */
  120. options, /* 0 = default options */
  121. &error, /* for error message */
  122. &erroffset, /* for error offset */
  123. NULL); /* use default character tables */
  124. if(!re) return sq_throwerror(v,_SC("PCRE compilation failed at offset %d: %s"), erroffset, error);
  125. int back_ref_max;
  126. int capture_count;
  127. int rc = dlpcre_fullinfo(re, NULL, PCRE_INFO_BACKREFMAX, &back_ref_max);
  128. rc = dlpcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &capture_count);
  129. //printf("%d : %d : %d\n", __LINE__, back_ref_max, capture_count);
  130. size_t ovector_size = back_ref_max + capture_count + 1;
  131. //NP1_ASSERT(ovector_size < NP1_SIZE_T_MAX/3, "Too many back references and/or captures");
  132. ovector_size *= 3;
  133. size_t alloc_size = sizeof(sqpcre_st) + (ovector_size * sizeof(int));
  134. sqpcre_st *sqpcre = (sqpcre_st *)sq_malloc(alloc_size);
  135. sqpcre->re = re;
  136. sqpcre->re_extra = NULL;
  137. sqpcre->ovector_size = ovector_size;
  138. sq_setinstanceup(v,1,sqpcre);
  139. sq_setreleasehook(v,1,sq_pcre_releasehook);
  140. return 0;
  141. }
  142. static SQRESULT sq_pcre_study(HSQUIRRELVM v)
  143. {
  144. SQ_FUNC_VARS(v);
  145. GET_pcre_INSTANCE();
  146. SQ_OPT_INTEGER(v, 2, options, 0);
  147. const char *errptr = 0;
  148. self->re_extra = dlpcre_study(self->re, options, &errptr);
  149. sq_pushbool(v, self->re_extra != NULL);
  150. return 1;
  151. }
  152. #define OVECCOUNT 30 /* should be a multiple of 3 */
  153. static SQRESULT sq_pcre_exec(HSQUIRRELVM v)
  154. {
  155. SQ_FUNC_VARS(v);
  156. GET_pcre_INSTANCE();
  157. SQ_GET_STRING(v, 2, subject);
  158. SQ_OPT_INTEGER(v, 4, start_offset, 0);
  159. SQ_OPT_INTEGER(v, 5, options, 0);
  160. int rc = dlpcre_exec(
  161. self->re, /* the compiled pattern */
  162. self->re_extra, /* no extra data - we didn't study the pattern */
  163. subject, /* the subject string */
  164. subject_size, /* the length of the subject */
  165. start_offset, /* start at offset 0 in the subject */
  166. options, /* 0 = default options */
  167. self->ovector, /* output vector for substring information */
  168. self->ovector_size); /* number of elements in the output vector */
  169. const int array_pos = 3;
  170. SQInteger rtype = sq_gettype(v, array_pos);
  171. /* The output vector wasn't big enough */
  172. if (rc > 0)
  173. {
  174. if(rtype == OT_ARRAY)
  175. {
  176. int nelms = rc*2;
  177. sq_clear(v, array_pos);
  178. for (int i = 0; i < nelms; i++)
  179. {
  180. SQInteger pos = self->ovector[i];
  181. if(pos < 0) continue; //forget defined subroutines
  182. sq_pushinteger(v, pos);
  183. sq_arrayappend(v, array_pos);
  184. }
  185. }
  186. }
  187. sq_pushinteger(v, rc);
  188. return 1;
  189. }
  190. static SQRESULT sq_pcre_match(HSQUIRRELVM v)
  191. {
  192. SQ_FUNC_VARS(v);
  193. GET_pcre_INSTANCE();
  194. SQ_GET_STRING(v, 2, subject);
  195. SQ_OPT_INTEGER(v, 3, start_offset, 0);
  196. SQ_OPT_INTEGER(v, 4, options, 0);
  197. int rc = dlpcre_exec(
  198. self->re, /* the compiled pattern */
  199. self->re_extra, /* no extra data - we didn't study the pattern */
  200. subject, /* the subject string */
  201. subject_size, /* the length of the subject */
  202. start_offset, /* start at offset 0 in the subject */
  203. options, /* 0 = default options */
  204. self->ovector, /* output vector for substring information */
  205. self->ovector_size); /* number of elements in the output vector */
  206. if(rc > 0)
  207. {
  208. SQInteger start_pos = self->ovector[0], end_pos = self->ovector[1];
  209. if(start_pos == end_pos) sq_pushinteger(v, start_pos);
  210. else sq_pushstring(v, subject + start_pos, end_pos - start_pos);
  211. return 1;
  212. }
  213. sq_pushbool(v,SQFalse);
  214. return 1;
  215. }
  216. static SQRESULT sq_pcre_gmatch(HSQUIRRELVM v)
  217. {
  218. SQ_FUNC_VARS(v);
  219. GET_pcre_INSTANCE();
  220. SQ_GET_STRING(v, 2, subject);
  221. SQ_OPT_INTEGER(v, 4, start_offset, 0);
  222. SQ_OPT_INTEGER(v, 5, options, 0);
  223. SQInteger rc;
  224. bool isFirst = true;
  225. while( (rc = dlpcre_exec(
  226. self->re, /* the compiled pattern */
  227. self->re_extra, /* no extra data - we didn't study the pattern */
  228. subject, /* the subject string */
  229. subject_size, /* the length of the subject */
  230. start_offset, /* start at offset 0 in the subject */
  231. options, /* 0 = default options */
  232. self->ovector, /* output vector for substring information */
  233. self->ovector_size)) > 0) /* number of elements in the output vector */
  234. {
  235. if(isFirst)
  236. {
  237. sq_push(v, 3); //push the function
  238. isFirst = false;
  239. }
  240. sq_pushroottable(v); //this
  241. SQInteger start_pos, end_pos, ov_offset = 0, i = 0,
  242. param_count = 1; //root table already on the stack
  243. for(;i < rc; i++) {
  244. ov_offset = i*2;
  245. start_pos = self->ovector[ov_offset];
  246. if(start_pos < 0) continue;
  247. end_pos = self->ovector[ov_offset+1];
  248. if(start_pos == end_pos) sq_pushinteger(v, start_pos);
  249. else sq_pushstring(v, subject + start_pos, end_pos - start_pos);
  250. ++param_count;
  251. }
  252. i = sq_call(v, rc+1, SQFalse, SQTrue);
  253. if(i < 0) return i;
  254. start_offset = self->ovector[(rc*2)-1]; //the last match + 1
  255. }
  256. sq_pushbool(v,SQFalse);
  257. return 1;
  258. }
  259. #include "sqstdblobimpl.h"
  260. static SQRESULT sq_pcre_gsub(HSQUIRRELVM v)
  261. {
  262. SQ_FUNC_VARS(v);
  263. GET_pcre_INSTANCE();
  264. SQ_GET_STRING(v, 2, str);
  265. SQ_OPT_INTEGER(v, 4, start_offset, 0);
  266. SQ_OPT_INTEGER(v, 5, options, 0);
  267. SQBlob blob(0,8192);
  268. const int replacement_idx = 3;
  269. SQObjectType ptype = sq_gettype(v, replacement_idx);
  270. const SQChar *replacement;
  271. SQInteger replacement_size;
  272. SQInteger rc;
  273. bool isFirst = true;
  274. while( (rc = dlpcre_exec(
  275. self->re, /* the compiled pattern */
  276. self->re_extra, /* no extra data - we didn't study the pattern */
  277. str, /* the subject string */
  278. str_size, /* the length of the subject */
  279. start_offset, /* start at offset 0 in the subject */
  280. options, /* 0 = default options */
  281. self->ovector, /* output vector for substring information */
  282. self->ovector_size)) > 0) /* number of elements in the output vector */
  283. {
  284. SQInteger i, ov_offset, start_pos, end_pos;
  285. blob.Write(str+start_offset, self->ovector[0]-start_offset);
  286. switch(ptype){
  287. case OT_CLOSURE:{
  288. if(isFirst)
  289. {
  290. sq_push(v, replacement_idx); //push the function
  291. isFirst = false;
  292. }
  293. sq_pushroottable(v); //this
  294. SQInteger param_count = 1; //root table
  295. for(i=0; i < rc; i++) {
  296. ov_offset = i*2;
  297. start_pos = self->ovector[ov_offset];
  298. if(start_pos < 0) continue; //defined subroutines not pushed as parameter
  299. end_pos = self->ovector[ov_offset+1];
  300. if(start_pos == end_pos) sq_pushinteger(v, start_pos);
  301. else sq_pushstring(v, str + start_pos, end_pos - start_pos);
  302. ++param_count;
  303. }
  304. i = sq_call(v, param_count, SQTrue, SQTrue);
  305. if(i < 0) return i;
  306. if(sq_gettype(v, -1) == OT_STRING){
  307. const SQChar *svalue;
  308. sq_getstring(v, -1, &svalue);
  309. blob.Write(svalue, sq_getsize(v, -1));
  310. }
  311. sq_poptop(v);
  312. }
  313. break;
  314. case OT_ARRAY:{
  315. SQInteger array_idx = 0;
  316. for(i=0; i < rc; i++) {
  317. ov_offset = i*2;
  318. SQInteger pos = self->ovector[ov_offset];
  319. if(pos < 0) continue; //forget defined subroutines
  320. sq_pushinteger(v, array_idx++);
  321. if(SQ_SUCCEEDED(sq_get(v, replacement_idx)) &&
  322. SQ_SUCCEEDED(sq_getstr_and_size(v, -1, &replacement, &replacement_size))){
  323. blob.Write(replacement, replacement_size);
  324. sq_pop(v, 1); //remove value
  325. }
  326. }
  327. }
  328. break;
  329. case OT_TABLE:{
  330. for(i=0; i < rc; i++) {
  331. ov_offset = i*2;
  332. start_pos = self->ovector[ov_offset];
  333. if(start_pos < 0) continue;
  334. end_pos = self->ovector[ov_offset+1];
  335. sq_pushstring(v, str + start_pos, end_pos - start_pos);
  336. if(SQ_SUCCEEDED(sq_get(v, replacement_idx)) &&
  337. SQ_SUCCEEDED(sq_getstr_and_size(v, -1, &replacement, &replacement_size))){
  338. blob.Write(replacement, replacement_size);
  339. sq_pop(v, 1); //remove value
  340. }
  341. }
  342. }
  343. break;
  344. case OT_STRING:{
  345. sq_getstr_and_size(v, replacement_idx, &replacement, &replacement_size);
  346. for(i=0; i < replacement_size; i++) {
  347. SQInteger c = replacement[i];
  348. switch(c)
  349. {
  350. case '$':
  351. ++i;
  352. if(i < replacement_size)
  353. {
  354. SQInteger idx = replacement[i] - '0', match_idx = 0;
  355. for(int j=0; j < rc; j++) {
  356. ov_offset = j*2;
  357. start_pos = self->ovector[ov_offset];
  358. if(start_pos < 0) continue;
  359. if(match_idx == idx)
  360. {
  361. end_pos = self->ovector[ov_offset+1];
  362. blob.Write(str+start_pos, end_pos-start_pos);
  363. break;
  364. }
  365. ++match_idx;
  366. }
  367. if(idx != match_idx)
  368. {
  369. return sq_throwerror(v, _SC("there is no match for replacement $%d"), idx);
  370. }
  371. continue;
  372. }
  373. else
  374. {
  375. return sq_throwerror(v, _SC("unexpected end of replacement string"));
  376. }
  377. break;
  378. case '\\':
  379. ++i;
  380. if(i < replacement_size)
  381. {
  382. blob.WriteChar(replacement[i]);
  383. continue;
  384. }
  385. //falthrough last character on replacement string
  386. default:
  387. blob.WriteChar(c);
  388. }
  389. }
  390. }
  391. break;
  392. default:
  393. return sq_throwerror(v, _SC("gsub only works with closure, array, table for replacement"));
  394. }
  395. start_offset = self->ovector[(rc*2)-1]; //the last match + 1
  396. }
  397. if(str_size) blob.Write(str+start_offset, str_size-start_offset);
  398. sq_pushstring(v, (const SQChar *)blob.GetBuf(), blob.Len());
  399. return 1;
  400. }
  401. static SQRESULT sq_pcre__typeof(HSQUIRRELVM v)
  402. {
  403. sq_pushstring(v,_SC("sqpcre"),-1);
  404. return 1;
  405. }
  406. static SQRESULT sq_pcre_version(HSQUIRRELVM v)
  407. {
  408. sq_pushstring(v, dlpcre_version(),-1);
  409. return 1;
  410. }
  411. static SQRESULT sq_pcre_loadlib(HSQUIRRELVM v)
  412. {
  413. SQ_FUNC_VARS_NO_TOP(v);
  414. SQ_GET_STRING(v, 2, libname);
  415. sq_pushbool(v, load_library(libname));
  416. return 1;
  417. }
  418. #define _DECL_FUNC(name,nparams,tycheck) {_SC(#name),sq_pcre_##name,nparams,tycheck}
  419. static SQRegFunction sq_pcre_methods[] =
  420. {
  421. _DECL_FUNC(constructor,-2,_SC(".sn")),
  422. _DECL_FUNC(study,-1,_SC("xn")),
  423. _DECL_FUNC(exec,-3,_SC("xsann")),
  424. _DECL_FUNC(match,-2,_SC("xsnn")),
  425. _DECL_FUNC(gmatch,-3,_SC("xscnn")),
  426. _DECL_FUNC(gsub,-3,_SC("xs s|c|a|t nn")),
  427. _DECL_FUNC(_typeof,1,_SC("x")),
  428. _DECL_FUNC(version,1,_SC(".")),
  429. _DECL_FUNC(loadlib,2,_SC(".s")),
  430. {0,0}
  431. };
  432. #undef _DECL_FUNC
  433. typedef struct {
  434. const SQChar *Str;
  435. SQInteger Val;
  436. } KeyIntType, * KeyIntPtrType;
  437. static KeyIntType sqpcre_constants[] = {
  438. #define MK_CONST(c) {_SC(#c), PCRE_##c}
  439. #define MK_CONST2(c) {_SC(c), (SQInteger)PCRE_##c}
  440. //MK_CONST(SSL_SESSION_ID_SIZE),
  441. MK_CONST(ANCHORED),
  442. MK_CONST(NOTBOL),
  443. MK_CONST(NOTEOL),
  444. MK_CONST(NOTEMPTY),
  445. MK_CONST(NOTEMPTY_ATSTART),
  446. MK_CONST(NO_START_OPTIMIZE),
  447. MK_CONST(PARTIAL_HARD),
  448. MK_CONST(PARTIAL_SOFT),
  449. MK_CONST(NEWLINE_CR),
  450. MK_CONST(NEWLINE_LF),
  451. MK_CONST(NEWLINE_CRLF),
  452. MK_CONST(NEWLINE_ANYCRLF),
  453. MK_CONST(NEWLINE_ANY),
  454. MK_CONST(NO_UTF8_CHECK),
  455. MK_CONST(STUDY_JIT_COMPILE),
  456. MK_CONST(STUDY_JIT_PARTIAL_HARD_COMPILE),
  457. MK_CONST(STUDY_JIT_PARTIAL_SOFT_COMPILE),
  458. {0,0}
  459. };
  460. #ifdef __cplusplus
  461. extern "C" {
  462. #endif
  463. SQRESULT sqext_register_pcre(HSQUIRRELVM v)
  464. {
  465. sq_pushstring(v,PCRE_Tag,-1);
  466. sq_newclass(v,SQFalse);
  467. sq_settypetag(v,-1,(void*)PCRE_Tag);
  468. sq_insert_reg_funcs(v, sq_pcre_methods);
  469. //add constants
  470. KeyIntPtrType KeyIntPtr;
  471. for (KeyIntPtr = sqpcre_constants; KeyIntPtr->Str; KeyIntPtr++) {
  472. sq_pushstring(v, KeyIntPtr->Str, -1); //first the key
  473. sq_pushinteger(v, KeyIntPtr->Val); //then the value
  474. sq_newslot(v, -3, SQFalse); //store then
  475. }
  476. sq_newslot(v,-3,SQTrue);
  477. return 0;
  478. }
  479. #ifdef __cplusplus
  480. }
  481. #endif
  482. #endif //SQ_USE_PCRE