sq_pcre2.cpp 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839
  1. #if defined(SQ_USE_PCRE2) || defined(SQ_USE_PCRE2_STATIC)
  2. #include <ctype.h>
  3. #include <string.h>
  4. #include <stdio.h>
  5. #include "squirrel.h"
  6. #include "sqstdblobimpl.h"
  7. #define PCRE2_CODE_UNIT_WIDTH 8
  8. #include <pcre2.h>
  9. ////////
  10. #include "dynamic_library.h"
  11. /*SquiLu
  12. local library_functions = [
  13. ["pcre2_code *", "pcre2_compile", "PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, pcre2_compile_context *"],
  14. ["int", "pcre2_jit_compile", "pcre2_code *, uint32_t"],
  15. ["void", "pcre2_code_free", "pcre2_code *"],
  16. ["pcre2_match_data *", "pcre2_match_data_create_from_pattern", "const pcre2_code *, pcre2_general_context *"],
  17. ["PCRE2_SIZE *", "pcre2_get_ovector_pointer", "pcre2_match_data *match_data"],
  18. ["uint32_t", "pcre2_get_ovector_count", "pcre2_match_data *match_data"],
  19. ["void", "pcre2_match_data_free", "pcre2_match_data *"],
  20. ["int", "pcre2_match", "const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, pcre2_match_data *, pcre2_match_context *"],
  21. ["int", "pcre2_dfa_match", "const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options, pcre2_match_data *match_data, pcre2_match_context *mcontext, int *workspace, PCRE2_SIZE wscount"],
  22. ["int", "pcre2_get_error_message", "int, PCRE2_UCHAR *, PCRE2_SIZE"],
  23. ["pcre2_match_context *", "pcre2_match_context_create", "pcre2_general_context *gcontext"],
  24. ["pcre2_match_context *", "pcre2_match_context_copy", "pcre2_match_context *"],
  25. ["void", "pcre2_match_context_free", "pcre2_match_context *"],
  26. ["int", "pcre2_set_match_limit", "pcre2_match_context *mcontext, uint32_t value"],
  27. ["int", "pcre2_set_recursion_limit", "pcre2_match_context *mcontext, uint32_t value"],
  28. ["int", "pcre2_set_callout", "pcre2_match_context *, int (*)(pcre2_callout_block *, void *), void *"],
  29. ["int", "pcre2_substitute", "const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, pcre2_match_data *, pcre2_match_context *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *"],
  30. //["int", "pcre2_get_error_message", "int errorcode, PCRE2_UCHAR *buffer, PCRE2_SIZE bufflen"],
  31. //next entry should be the last one
  32. //to make valid the test made on load_library function
  33. ["int", "pcre2_config", "uint32_t what, void *where"],
  34. ];
  35. function write_library_functions_declaration(){
  36. foreach(k,v in library_functions) {
  37. putsnl("typedef " + v[0] + " (*" + v[1] + "_t)(" + v[2] + ");");
  38. putsnl("static " + v[1] + "_t dl" + v[1] + " = 0;");
  39. }
  40. }
  41. function write_library_functions_load(chsz){
  42. foreach(k,v in library_functions){
  43. putsnl("dl" + v[1] + " = (" + v[1] + "_t) libdyn.dlsym(\"" + v[1] + chsz + "\");");
  44. putsnl("if(!dl" + v[1] + ") return false;");
  45. }
  46. }
  47. function write_library_functions_static_defines(){
  48. foreach(k,v in library_functions){
  49. putsnl("#define dl" + v[1] + " " + v[1]);
  50. }
  51. }
  52. SquiLu*/
  53. #ifdef SQ_USE_PCRE2_STATIC
  54. #define load_library(x) true
  55. //@write_library_functions_static_defines()
  56. // generated-code:begin
  57. #define dlpcre2_compile pcre2_compile
  58. #define dlpcre2_jit_compile pcre2_jit_compile
  59. #define dlpcre2_code_free pcre2_code_free
  60. #define dlpcre2_match_data_create_from_pattern pcre2_match_data_create_from_pattern
  61. #define dlpcre2_get_ovector_pointer pcre2_get_ovector_pointer
  62. #define dlpcre2_get_ovector_count pcre2_get_ovector_count
  63. #define dlpcre2_match_data_free pcre2_match_data_free
  64. #define dlpcre2_match pcre2_match
  65. #define dlpcre2_dfa_match pcre2_dfa_match
  66. #define dlpcre2_get_error_message pcre2_get_error_message
  67. #define dlpcre2_match_context_create pcre2_match_context_create
  68. #define dlpcre2_match_context_copy pcre2_match_context_copy
  69. #define dlpcre2_match_context_free pcre2_match_context_free
  70. #define dlpcre2_set_match_limit pcre2_set_match_limit
  71. #define dlpcre2_set_recursion_limit pcre2_set_recursion_limit
  72. #define dlpcre2_set_callout pcre2_set_callout
  73. #define dlpcre2_substitute pcre2_substitute
  74. #define dlpcre2_config pcre2_config
  75. // generated-code:end
  76. #else
  77. static DynamicLibrary libdyn;
  78. //@write_library_functions_declaration();
  79. // generated-code:begin
  80. typedef pcre2_code * (*pcre2_compile_t)(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, pcre2_compile_context *);
  81. static pcre2_compile_t dlpcre2_compile = 0;
  82. typedef int (*pcre2_jit_compile_t)(pcre2_code *, uint32_t);
  83. static pcre2_jit_compile_t dlpcre2_jit_compile = 0;
  84. typedef void (*pcre2_code_free_t)(pcre2_code *);
  85. static pcre2_code_free_t dlpcre2_code_free = 0;
  86. typedef pcre2_match_data * (*pcre2_match_data_create_from_pattern_t)(const pcre2_code *, pcre2_general_context *);
  87. static pcre2_match_data_create_from_pattern_t dlpcre2_match_data_create_from_pattern = 0;
  88. typedef PCRE2_SIZE * (*pcre2_get_ovector_pointer_t)(pcre2_match_data *match_data);
  89. static pcre2_get_ovector_pointer_t dlpcre2_get_ovector_pointer = 0;
  90. typedef uint32_t (*pcre2_get_ovector_count_t)(pcre2_match_data *match_data);
  91. static pcre2_get_ovector_count_t dlpcre2_get_ovector_count = 0;
  92. typedef void (*pcre2_match_data_free_t)(pcre2_match_data *);
  93. static pcre2_match_data_free_t dlpcre2_match_data_free = 0;
  94. typedef int (*pcre2_match_t)(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, pcre2_match_data *, pcre2_match_context *);
  95. static pcre2_match_t dlpcre2_match = 0;
  96. typedef int (*pcre2_dfa_match_t)(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options, pcre2_match_data *match_data, pcre2_match_context *mcontext, int *workspace, PCRE2_SIZE wscount);
  97. static pcre2_dfa_match_t dlpcre2_dfa_match = 0;
  98. typedef int (*pcre2_get_error_message_t)(int, PCRE2_UCHAR *, PCRE2_SIZE);
  99. static pcre2_get_error_message_t dlpcre2_get_error_message = 0;
  100. typedef pcre2_match_context * (*pcre2_match_context_create_t)(pcre2_general_context *gcontext);
  101. static pcre2_match_context_create_t dlpcre2_match_context_create = 0;
  102. typedef pcre2_match_context * (*pcre2_match_context_copy_t)(pcre2_match_context *);
  103. static pcre2_match_context_copy_t dlpcre2_match_context_copy = 0;
  104. typedef void (*pcre2_match_context_free_t)(pcre2_match_context *);
  105. static pcre2_match_context_free_t dlpcre2_match_context_free = 0;
  106. typedef int (*pcre2_set_match_limit_t)(pcre2_match_context *mcontext, uint32_t value);
  107. static pcre2_set_match_limit_t dlpcre2_set_match_limit = 0;
  108. typedef int (*pcre2_set_recursion_limit_t)(pcre2_match_context *mcontext, uint32_t value);
  109. static pcre2_set_recursion_limit_t dlpcre2_set_recursion_limit = 0;
  110. typedef int (*pcre2_set_callout_t)(pcre2_match_context *, int (*)(pcre2_callout_block *, void *), void *);
  111. static pcre2_set_callout_t dlpcre2_set_callout = 0;
  112. typedef int (*pcre2_substitute_t)(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, pcre2_match_data *, pcre2_match_context *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE *);
  113. static pcre2_substitute_t dlpcre2_substitute = 0;
  114. typedef int (*pcre2_config_t)(uint32_t what, void *where);
  115. static pcre2_config_t dlpcre2_config = 0;
  116. // generated-code:end
  117. static const char *dynamicLibName = DYNLIB_FOR_OS(libpcre2-8);
  118. static bool load_library(const char *libname)
  119. {
  120. if(dlpcre2_config) return true;
  121. if(libdyn.open(libname))
  122. {
  123. //@write_library_functions_load("_8");
  124. // generated-code:begin
  125. dlpcre2_compile = (pcre2_compile_t) libdyn.dlsym("pcre2_compile_8");
  126. if(!dlpcre2_compile) return false;
  127. dlpcre2_jit_compile = (pcre2_jit_compile_t) libdyn.dlsym("pcre2_jit_compile_8");
  128. if(!dlpcre2_jit_compile) return false;
  129. dlpcre2_code_free = (pcre2_code_free_t) libdyn.dlsym("pcre2_code_free_8");
  130. if(!dlpcre2_code_free) return false;
  131. dlpcre2_match_data_create_from_pattern = (pcre2_match_data_create_from_pattern_t) libdyn.dlsym("pcre2_match_data_create_from_pattern_8");
  132. if(!dlpcre2_match_data_create_from_pattern) return false;
  133. dlpcre2_get_ovector_pointer = (pcre2_get_ovector_pointer_t) libdyn.dlsym("pcre2_get_ovector_pointer_8");
  134. if(!dlpcre2_get_ovector_pointer) return false;
  135. dlpcre2_get_ovector_count = (pcre2_get_ovector_count_t) libdyn.dlsym("pcre2_get_ovector_count_8");
  136. if(!dlpcre2_get_ovector_count) return false;
  137. dlpcre2_match_data_free = (pcre2_match_data_free_t) libdyn.dlsym("pcre2_match_data_free_8");
  138. if(!dlpcre2_match_data_free) return false;
  139. dlpcre2_match = (pcre2_match_t) libdyn.dlsym("pcre2_match_8");
  140. if(!dlpcre2_match) return false;
  141. dlpcre2_dfa_match = (pcre2_dfa_match_t) libdyn.dlsym("pcre2_dfa_match_8");
  142. if(!dlpcre2_dfa_match) return false;
  143. dlpcre2_get_error_message = (pcre2_get_error_message_t) libdyn.dlsym("pcre2_get_error_message_8");
  144. if(!dlpcre2_get_error_message) return false;
  145. dlpcre2_match_context_create = (pcre2_match_context_create_t) libdyn.dlsym("pcre2_match_context_create_8");
  146. if(!dlpcre2_match_context_create) return false;
  147. dlpcre2_match_context_copy = (pcre2_match_context_copy_t) libdyn.dlsym("pcre2_match_context_copy_8");
  148. if(!dlpcre2_match_context_copy) return false;
  149. dlpcre2_match_context_free = (pcre2_match_context_free_t) libdyn.dlsym("pcre2_match_context_free_8");
  150. if(!dlpcre2_match_context_free) return false;
  151. dlpcre2_set_match_limit = (pcre2_set_match_limit_t) libdyn.dlsym("pcre2_set_match_limit_8");
  152. if(!dlpcre2_set_match_limit) return false;
  153. dlpcre2_set_recursion_limit = (pcre2_set_recursion_limit_t) libdyn.dlsym("pcre2_set_recursion_limit_8");
  154. if(!dlpcre2_set_recursion_limit) return false;
  155. dlpcre2_set_callout = (pcre2_set_callout_t) libdyn.dlsym("pcre2_set_callout_8");
  156. if(!dlpcre2_set_callout) return false;
  157. dlpcre2_substitute = (pcre2_substitute_t) libdyn.dlsym("pcre2_substitute_8");
  158. if(!dlpcre2_substitute) return false;
  159. dlpcre2_config = (pcre2_config_t) libdyn.dlsym("pcre2_config_8");
  160. if(!dlpcre2_config) return false;
  161. // generated-code:end
  162. return true;
  163. }
  164. return false;
  165. }
  166. #endif // SQ_USE_PCRE2_STATIC
  167. static SQInteger calc_new_size_by_max_len(SQInteger start_pos, SQInteger max_len, SQInteger curr_size)
  168. {
  169. SQInteger new_size;
  170. if(start_pos < 0)
  171. {
  172. new_size = curr_size + start_pos;
  173. start_pos = new_size < 0 ? 0 : new_size;
  174. }
  175. if(max_len > 0) new_size = start_pos + max_len;
  176. else new_size = curr_size + max_len;
  177. if( (new_size < curr_size) && (new_size > start_pos) )
  178. {
  179. return new_size;
  180. }
  181. return curr_size;
  182. }
  183. typedef int (*pre2_callout_func_t)(pcre2_callout_block *, void *);
  184. struct sqpcre2_st {
  185. pcre2_code *re;
  186. pcre2_match_context *match_context;
  187. pcre2_match_data *match_data;
  188. PCRE2_SIZE *ovector;
  189. /* associated squirrel vm */
  190. HSQUIRRELVM v;
  191. HSQOBJECT callout_cb;
  192. HSQOBJECT callout_cb_udata;
  193. };
  194. static const SQChar PCRE2_Tag[] = _SC("sqpcre2");
  195. #define GET_pcre2_INSTANCE() SQ_GET_INSTANCE(v, 1, sqpcre2_st, PCRE2_Tag) \
  196. if(self == NULL) return sq_throwerror(v, _SC("sqpcre2 object already closed"));
  197. static SQRESULT sq_pcre2_releasehook(SQUserPointer p, SQInteger /*size*/, void */*ep*/)
  198. {
  199. sqpcre2_st *self = ((sqpcre2_st *)p);
  200. if(self)
  201. {
  202. if(self->match_data) dlpcre2_match_data_free(self->match_data);
  203. dlpcre2_code_free(self->re);
  204. if(self->match_context)
  205. {
  206. sq_release(self->v, &self->callout_cb);
  207. sq_release(self->v, &self->callout_cb_udata);
  208. dlpcre2_match_context_free(self->match_context);
  209. }
  210. sq_free(self, sizeof(*self));
  211. }
  212. return 1;
  213. }
  214. static SQRESULT sq_pcre2_constructor(HSQUIRRELVM v)
  215. {
  216. if(!load_library(dynamicLibName)) return sq_throwerror(v, _SC("Failed to load libpcre !"));
  217. SQ_FUNC_VARS(v);
  218. SQ_GET_STRING(v, 2, pattern);
  219. SQ_OPT_INTEGER(v, 3, options, 0);
  220. PCRE2_SIZE erroroffset;
  221. int errornumber;
  222. pcre2_code *re = dlpcre2_compile(
  223. (PCRE2_SPTR)pattern, /* the pattern */
  224. PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
  225. options, /* 0 = default options */
  226. &errornumber, /* for error message */
  227. &erroroffset, /* for error offset */
  228. NULL); /* use default character tables */
  229. if(!re)
  230. {
  231. PCRE2_UCHAR buffer[256];
  232. dlpcre2_get_error_message(errornumber, buffer, sizeof(buffer));
  233. return sq_throwerror(v,_SC("PCRE compilation failed at offset %d: %s"), (int)erroroffset, buffer);
  234. }
  235. sqpcre2_st *sqpcre2 = (sqpcre2_st *)sq_malloc(sizeof(*sqpcre2));
  236. sqpcre2->re = re;
  237. sqpcre2->match_data = dlpcre2_match_data_create_from_pattern(re, NULL);
  238. sqpcre2->ovector = dlpcre2_get_ovector_pointer(sqpcre2->match_data);
  239. sqpcre2->v = v;
  240. sqpcre2->match_context = NULL;
  241. /*
  242. sq_resetobject(&sqpcre2->callout_cb);
  243. sq_resetobject(&sqpcre2->callout_cb_udata);
  244. */
  245. sq_setinstanceup(v,1,sqpcre2);
  246. sq_setreleasehook(v,1,sq_pcre2_releasehook);
  247. return 0;
  248. }
  249. static SQRESULT sq_pcre2_exec(HSQUIRRELVM v)
  250. {
  251. SQ_FUNC_VARS(v);
  252. GET_pcre2_INSTANCE();
  253. SQ_GET_STRING(v, 2, subject);
  254. SQ_OPT_INTEGER(v, 4, start_offset, 0);
  255. SQ_OPT_INTEGER(v, 5, options, 0);
  256. SQ_OPT_INTEGER(v, 6, max_len, 0);
  257. if(max_len)
  258. {
  259. subject_size = calc_new_size_by_max_len(start_offset, max_len, subject_size);
  260. }
  261. int rc = dlpcre2_match(
  262. self->re, /* the compiled pattern */
  263. (PCRE2_SPTR)subject, /* the subject string */
  264. subject_size, /* the length of the subject */
  265. start_offset, /* start at offset 0 in the subject */
  266. options, /* 0 = default options */
  267. self->match_data, /* block for storing the result */
  268. self->match_context); /* use default match context */
  269. const int array_pos = 3;
  270. SQInteger rtype = sq_gettype(v, array_pos);
  271. /* The output vector wasn't big enough */
  272. if (rc > 0)
  273. {
  274. if(rtype == OT_ARRAY)
  275. {
  276. int nelms = rc*2;
  277. sq_clear(v, array_pos);
  278. for (int i = 0; i < nelms; i++)
  279. {
  280. SQInteger pos = self->ovector[i];
  281. if(pos < 0) continue; //forget defined subroutines
  282. sq_pushinteger(v, pos);
  283. sq_arrayappend(v, array_pos);
  284. }
  285. }
  286. }
  287. sq_pushinteger(v, rc);
  288. return 1;
  289. }
  290. static SQRESULT sq_pcre2_match(HSQUIRRELVM v)
  291. {
  292. SQ_FUNC_VARS(v);
  293. GET_pcre2_INSTANCE();
  294. SQ_GET_STRING(v, 2, subject);
  295. SQ_OPT_INTEGER(v, 3, start_offset, 0);
  296. SQ_OPT_INTEGER(v, 4, options, 0);
  297. SQ_OPT_INTEGER(v, 5, max_len, 0);
  298. if(max_len)
  299. {
  300. subject_size = calc_new_size_by_max_len(start_offset, max_len, subject_size);
  301. }
  302. int rc = dlpcre2_match(
  303. self->re, /* the compiled pattern */
  304. (PCRE2_SPTR)subject, /* the subject string */
  305. subject_size, /* the length of the subject */
  306. start_offset, /* start at offset 0 in the subject */
  307. options, /* 0 = default options */
  308. self->match_data, /* block for storing the result */
  309. self->match_context); /* use default match context */
  310. if(rc > 0)
  311. {
  312. SQInteger start_pos = self->ovector[0], end_pos = self->ovector[1];
  313. if(start_pos == end_pos) sq_pushinteger(v, start_pos);
  314. else sq_pushstring(v, subject + start_pos, end_pos - start_pos);
  315. return 1;
  316. }
  317. if(rc < -2) //only no matching errore
  318. {
  319. return sq_throwerror(v, _SC("pcre2_match error %d"), (int)rc);
  320. }
  321. return 0;
  322. }
  323. static SQRESULT sq_pcre2_gmatch(HSQUIRRELVM v)
  324. {
  325. SQ_FUNC_VARS(v);
  326. GET_pcre2_INSTANCE();
  327. SQ_GET_STRING(v, 2, subject);
  328. SQ_OPT_INTEGER(v, 4, start_offset, 0);
  329. SQ_OPT_INTEGER(v, 5, options, 0);
  330. SQ_OPT_INTEGER(v, 6, max_len, 0);
  331. SQInteger rc;
  332. bool isFirst = true;
  333. if(max_len)
  334. {
  335. subject_size = calc_new_size_by_max_len(start_offset, max_len, subject_size);
  336. }
  337. while( (rc = dlpcre2_match(
  338. self->re, /* the compiled pattern */
  339. (PCRE2_SPTR)subject, /* the subject string */
  340. subject_size, /* the length of the subject */
  341. start_offset, /* start at offset 0 in the subject */
  342. options, /* 0 = default options */
  343. self->match_data, /* block for storing the result */
  344. self->match_context)) > 0) /* use default match context */
  345. {
  346. if(isFirst)
  347. {
  348. sq_push(v, 3); //push the function
  349. isFirst = false;
  350. }
  351. sq_pushroottable(v); //this
  352. SQInteger start_pos, end_pos, ov_offset = 0, i = 0,
  353. param_count = 1; //root table already on the stack
  354. for(;i < rc; i++) {
  355. ov_offset = i*2;
  356. start_pos = self->ovector[ov_offset];
  357. if(start_pos < 0) continue;
  358. end_pos = self->ovector[ov_offset+1];
  359. if(start_pos == end_pos) sq_pushinteger(v, start_pos);
  360. else sq_pushstring(v, subject + start_pos, end_pos - start_pos);
  361. ++param_count;
  362. }
  363. i = sq_call(v, param_count, SQTrue, SQTrue);
  364. if(i < 0) return i;
  365. SQObjectType rtype = sq_gettype(v, -1);
  366. SQBool keep_matching = SQFalse;
  367. if(rtype == OT_BOOL) {
  368. sq_getbool(v, -1, &keep_matching);
  369. }
  370. sq_poptop(v); //remove return parameter
  371. if(!keep_matching) break;
  372. start_offset = self->ovector[(rc*2)-1]; //the last match + 1
  373. }
  374. if(rc < -2) //only no matching errore
  375. {
  376. return sq_throwerror(v, _SC("pcre2_match error %d"), (int)rc);
  377. }
  378. return 0;
  379. }
  380. #include "sqstdblobimpl.h"
  381. static SQRESULT sq_pcre2_gsub(HSQUIRRELVM v)
  382. {
  383. SQ_FUNC_VARS(v);
  384. GET_pcre2_INSTANCE();
  385. SQ_GET_STRING(v, 2, str);
  386. SQ_OPT_INTEGER(v, 4, options, 0);
  387. SQ_OPT_INTEGER(v, 5, max_sub, 0);
  388. if(max_sub < 0) return sq_throwerror(v, _SC("max substitutions can't be less than zero"));
  389. SQBlob blob(0,8192);
  390. const int replacement_idx = 3;
  391. SQObjectType ptype = sq_gettype(v, replacement_idx);
  392. const SQChar *replacement;
  393. SQInteger replacement_size, start_offset=0;
  394. SQInteger rc;
  395. bool isFirst = true;
  396. while( (rc = dlpcre2_match(
  397. self->re, /* the compiled pattern */
  398. (PCRE2_SPTR)str, /* the subject string */
  399. str_size, /* the length of the subject */
  400. start_offset, /* start at offset 0 in the subject */
  401. options, /* 0 = default options */
  402. self->match_data, /* block for storing the result */
  403. self->match_context)) > 0) /* use default match context */
  404. {
  405. SQInteger i, ov_offset, start_pos, end_pos;
  406. blob.Write(str+start_offset, self->ovector[0]-start_offset);
  407. switch(ptype){
  408. case OT_CLOSURE:{
  409. if(isFirst)
  410. {
  411. sq_push(v, replacement_idx); //push the function
  412. isFirst = false;
  413. }
  414. sq_pushroottable(v); //this
  415. SQInteger param_count = 1; //root table
  416. for(i=0; i < rc; i++) {
  417. ov_offset = i*2;
  418. start_pos = self->ovector[ov_offset];
  419. if(start_pos < 0) continue; //defined subroutines not pushed as parameter
  420. end_pos = self->ovector[ov_offset+1];
  421. if(start_pos == end_pos) sq_pushinteger(v, start_pos);
  422. else sq_pushstring(v, str + start_pos, end_pos - start_pos);
  423. ++param_count;
  424. }
  425. i = sq_call(v, param_count, SQTrue, SQTrue);
  426. if(i < 0) return i;
  427. if(sq_gettype(v, -1) == OT_STRING){
  428. const SQChar *svalue;
  429. sq_getstring(v, -1, &svalue);
  430. blob.Write(svalue, sq_getsize(v, -1));
  431. }
  432. sq_poptop(v);
  433. }
  434. break;
  435. case OT_ARRAY:{
  436. SQInteger array_idx = 0;
  437. for(i=0; i < rc; i++) {
  438. ov_offset = i*2;
  439. SQInteger pos = self->ovector[ov_offset];
  440. if(pos < 0) continue; //forget defined subroutines
  441. sq_pushinteger(v, array_idx++);
  442. if(SQ_SUCCEEDED(sq_get(v, replacement_idx)) &&
  443. SQ_SUCCEEDED(sq_getstr_and_size(v, -1, &replacement, &replacement_size))){
  444. blob.Write(replacement, replacement_size);
  445. sq_pop(v, 1); //remove value
  446. }
  447. }
  448. }
  449. break;
  450. case OT_TABLE:{
  451. for(i=0; i < rc; i++) {
  452. ov_offset = i*2;
  453. start_pos = self->ovector[ov_offset];
  454. if(start_pos < 0) continue;
  455. end_pos = self->ovector[ov_offset+1];
  456. sq_pushstring(v, str + start_pos, end_pos - start_pos);
  457. if(SQ_SUCCEEDED(sq_get(v, replacement_idx)) &&
  458. SQ_SUCCEEDED(sq_getstr_and_size(v, -1, &replacement, &replacement_size))){
  459. blob.Write(replacement, replacement_size);
  460. sq_pop(v, 1); //remove value
  461. }
  462. }
  463. }
  464. break;
  465. case OT_STRING:{
  466. sq_getstr_and_size(v, replacement_idx, &replacement, &replacement_size);
  467. for(i=0; i < replacement_size; i++) {
  468. SQInteger c = replacement[i];
  469. switch(c)
  470. {
  471. case '$':
  472. ++i;
  473. if(i < replacement_size)
  474. {
  475. SQInteger idx = replacement[i] - '0', match_idx = 0;
  476. for(int j=0; j < rc; j++) {
  477. ov_offset = j*2;
  478. start_pos = self->ovector[ov_offset];
  479. if(start_pos < 0) continue;
  480. if(match_idx == idx)
  481. {
  482. end_pos = self->ovector[ov_offset+1];
  483. blob.Write(str+start_pos, end_pos-start_pos);
  484. break;
  485. }
  486. ++match_idx;
  487. }
  488. if(idx != match_idx)
  489. {
  490. return sq_throwerror(v, _SC("there is no match for replacement $%d"), idx);
  491. }
  492. continue;
  493. }
  494. else
  495. {
  496. return sq_throwerror(v, _SC("unexpected end of replacement string"));
  497. }
  498. break;
  499. case '\\':
  500. ++i;
  501. if(i < replacement_size)
  502. {
  503. blob.WriteChar(replacement[i]);
  504. continue;
  505. }
  506. //falthrough last character on replacement string
  507. default:
  508. blob.WriteChar(c);
  509. }
  510. }
  511. }
  512. break;
  513. default:
  514. return sq_throwerror(v, _SC("gsub only works with closure, array, table for replacement"));
  515. }
  516. start_offset = self->ovector[(rc*2)-1]; //the last match + 1
  517. if(max_sub)
  518. {
  519. if(--max_sub == 0) break;
  520. }
  521. }
  522. if(rc < -2) //only no matching errore
  523. {
  524. return sq_throwerror(v, _SC("pcre2_match error %d"), (int)rc);
  525. }
  526. if(str_size) blob.Write(str+start_offset, str_size-start_offset);
  527. sq_pushstring(v, (const SQChar *)blob.GetBuf(), blob.Len());
  528. return 1;
  529. }
  530. static SQRESULT sq_pcre2_jit_compile(HSQUIRRELVM v)
  531. {
  532. SQ_FUNC_VARS(v);
  533. GET_pcre2_INSTANCE();
  534. SQ_OPT_INTEGER(v, 2, options, 0);
  535. sq_pushinteger(v, dlpcre2_jit_compile(self->re, options));
  536. return 1;
  537. }
  538. static SQRESULT sq_pcre2__typeof(HSQUIRRELVM v)
  539. {
  540. sq_pushstring(v,_SC(PCRE2_Tag),-1);
  541. return 1;
  542. }
  543. static SQRESULT sq_pcre2_version(HSQUIRRELVM v)
  544. {
  545. PCRE2_UCHAR8 buf[32];
  546. int rc = dlpcre2_config(PCRE2_CONFIG_VERSION, buf);
  547. sq_pushstring(v, (const SQChar*)buf, -1);
  548. return 1;
  549. }
  550. static SQRESULT sq_pcre2_loadlib(HSQUIRRELVM v)
  551. {
  552. SQ_FUNC_VARS_NO_TOP(v);
  553. SQ_GET_STRING(v, 2, libname);
  554. sq_pushbool(v, load_library(libname));
  555. return 1;
  556. }
  557. /*
  558. The external callout function returns an integer to PCRE2.
  559. If the value is zero, matching proceeds as normal.
  560. If the value is greater than zero, matching fails at the current point,
  561. but the testing of other matching possibilities goes ahead,
  562. just as if a lookahead assertion had failed.
  563. If the value is less than zero, the match is abandoned,
  564. and the matching function returns the negative value.
  565. */
  566. static int sq_pcre2_callout_callback(pcre2_callout_block *pcb, void *udata)
  567. {
  568. SQInteger result = -1; /* abort by default */
  569. sqpcre2_st *sqpcre2 = (sqpcre2_st*)udata;
  570. HSQUIRRELVM v = sqpcre2->v;
  571. int top = sq_gettop(v);
  572. sq_pushobject(v, sqpcre2->callout_cb);
  573. sq_pushroottable(v); //this
  574. sq_pushinteger(v, pcb->callout_number);
  575. if(pcb->callout_string_length) sq_pushstring(v,
  576. (const SQChar*)pcb->callout_string, pcb->callout_string_length);
  577. else sq_pushnull(v);
  578. sq_pushinteger(v, pcb->start_match);
  579. sq_pushinteger(v, pcb->current_position);
  580. sq_pushobject(v, sqpcre2->callout_cb_udata);
  581. /* call squilu function */
  582. if (sq_call(v, 6, SQTrue, SQFalse) == SQ_OK)
  583. sq_getinteger(v, -1, &result);
  584. sq_settop(v, top);
  585. return result;
  586. }
  587. static SQRESULT sq_pcre2_set_callout(HSQUIRRELVM v)
  588. {
  589. SQ_FUNC_VARS(v);
  590. GET_pcre2_INSTANCE();
  591. if(!self->match_context)
  592. {
  593. self->match_context = dlpcre2_match_context_create(NULL);
  594. sq_resetobject(&self->callout_cb);
  595. sq_resetobject(&self->callout_cb_udata);
  596. }
  597. else
  598. {
  599. /* clear progress handler */
  600. dlpcre2_set_callout(self->match_context, NULL, NULL);
  601. }
  602. sq_release(v, &self->callout_cb);
  603. sq_release(v, &self->callout_cb_udata);
  604. sq_resetobject(&self->callout_cb);
  605. sq_resetobject(&self->callout_cb_udata);
  606. SQObjectType otype = sq_gettype(v, 2);
  607. if (_top_ > 1 && (otype != OT_NULL))
  608. {
  609. if(sq_gettype(v, 2) != OT_CLOSURE)
  610. return sq_throwerror(v, _SC("invalid second parameter expected closure"));
  611. sq_getstackobj(v, 2, &self->callout_cb);
  612. sq_addref(v, &self->callout_cb);
  613. if(_top_ > 2)
  614. {
  615. sq_getstackobj(v, 3, &self->callout_cb_udata);
  616. sq_addref(v, &self->callout_cb_udata);
  617. }
  618. /* set progress callback */
  619. dlpcre2_set_callout(self->match_context, sq_pcre2_callout_callback, self);
  620. }
  621. return 0;
  622. }
  623. static SQRESULT sq_pcre2_set_callout_param(HSQUIRRELVM v)
  624. {
  625. SQ_FUNC_VARS(v);
  626. GET_pcre2_INSTANCE();
  627. if(!self->match_context) return sq_throwerror(v, _SC("callout not set till now"));
  628. sq_release(v, &self->callout_cb_udata);
  629. sq_resetobject(&self->callout_cb_udata);
  630. SQObjectType otype = sq_gettype(v, 2);
  631. if (_top_ > 1 && (otype != OT_NULL))
  632. {
  633. sq_getstackobj(v, 2, &self->callout_cb_udata);
  634. sq_addref(v, &self->callout_cb_udata);
  635. }
  636. return 0;
  637. }
  638. #if 0
  639. static SQRESULT sq_pcre2_substitute(HSQUIRRELVM v)
  640. {
  641. SQ_FUNC_VARS(v);
  642. GET_pcre2_INSTANCE();
  643. SQ_GET_STRING(v, 2, subject);
  644. SQ_GET_STRING(v, 3, replacement);
  645. PCRE2_UCHAR *out_str;
  646. PCRE2_SIZE out_str_size;
  647. int rc = dlpcre2_substitute(
  648. self->re, /* the compiled pattern */
  649. (PCRE2_SPTR)subject, /* the subject string */
  650. str_size, /* the length of the subject */
  651. start_offset, /* start at offset 0 in the subject */
  652. options, /* 0 = default options */
  653. self->match_data, /* block for storing the result */
  654. self->match_context, /* use default match context */
  655. (PCRE2_SPTR)replacement,
  656. replacement_size,
  657. out_str,
  658. &out_str_size);
  659. if(rc)
  660. {
  661. sq_pushstring(v, (const SQChar*)out_str, out_str_size);
  662. }
  663. else sq_pushnull(v);
  664. return 1;
  665. }
  666. #endif
  667. #define _DECL_FUNC(name,nparams,tycheck) {_SC(#name),sq_pcre2_##name,nparams,tycheck}
  668. static SQRegFunction sq_pcre2_methods[] =
  669. {
  670. _DECL_FUNC(constructor,-2,_SC(".sn")),
  671. _DECL_FUNC(jit_compile,-1,_SC("xi")),
  672. _DECL_FUNC(exec,-3,_SC("xsannn")),
  673. _DECL_FUNC(match,-2,_SC("xsnnn")),
  674. _DECL_FUNC(gmatch,-3,_SC("xscnnn")),
  675. _DECL_FUNC(gsub,-3,_SC("xs s|c|a|t nn")),
  676. _DECL_FUNC(_typeof,1,_SC("x")),
  677. _DECL_FUNC(version,1,_SC(".")),
  678. _DECL_FUNC(loadlib,2,_SC(".s")),
  679. _DECL_FUNC(set_callout,-2,_SC("xc.")),
  680. _DECL_FUNC(set_callout_param,-1,_SC("x.")),
  681. {0,0}
  682. };
  683. #undef _DECL_FUNC
  684. typedef struct {
  685. const SQChar *Str;
  686. SQInteger Val;
  687. } KeyIntType, * KeyIntPtrType;
  688. static KeyIntType sqpcre2_constants[] = {
  689. #define MK_CONST(c) {_SC(#c), PCRE2_##c}
  690. #define MK_CONST2(c) {_SC(c), (SQInteger)PCRE2_##c}
  691. //MK_CONST(SSL_SESSION_ID_SIZE),
  692. MK_CONST(ANCHORED),
  693. MK_CONST(NOTBOL),
  694. MK_CONST(NOTEOL),
  695. MK_CONST(NOTEMPTY),
  696. MK_CONST(NOTEMPTY_ATSTART),
  697. MK_CONST(NO_START_OPTIMIZE),
  698. MK_CONST(PARTIAL_HARD),
  699. MK_CONST(PARTIAL_SOFT),
  700. MK_CONST(NO_AUTO_POSSESS),
  701. MK_CONST(NO_START_OPTIMIZE),
  702. MK_CONST(NO_DOTSTAR_ANCHOR),
  703. MK_CONST(DOTALL),
  704. MK_CONST(CASELESS),
  705. MK_CONST(MULTILINE),
  706. MK_CONST(EXTENDED),
  707. MK_CONST(NEWLINE_CR),
  708. MK_CONST(NEWLINE_LF),
  709. MK_CONST(NEWLINE_CRLF),
  710. MK_CONST(NEWLINE_ANYCRLF),
  711. MK_CONST(NEWLINE_ANY),
  712. MK_CONST(INFO_NEWLINE),
  713. MK_CONST(INFO_ALLOPTIONS),
  714. MK_CONST(CONFIG_VERSION),
  715. MK_CONST(UTF),
  716. MK_CONST(NO_UTF_CHECK),
  717. MK_CONST(ERROR_UTF8_ERR1),
  718. MK_CONST(CONFIG_JIT),
  719. MK_CONST(JIT_COMPLETE),
  720. MK_CONST(JIT_PARTIAL_HARD),
  721. MK_CONST(JIT_PARTIAL_SOFT),
  722. MK_CONST(AUTO_CALLOUT),
  723. MK_CONST(SUBSTITUTE_GLOBAL),
  724. {0,0}
  725. };
  726. #ifdef __cplusplus
  727. extern "C" {
  728. #endif
  729. SQRESULT sqext_register_pcre2(HSQUIRRELVM v)
  730. {
  731. sq_pushstring(v,PCRE2_Tag,-1);
  732. sq_newclass(v,SQFalse);
  733. sq_settypetag(v,-1,(void*)PCRE2_Tag);
  734. sq_insert_reg_funcs(v, sq_pcre2_methods);
  735. //add constants
  736. KeyIntPtrType KeyIntPtr;
  737. for (KeyIntPtr = sqpcre2_constants; KeyIntPtr->Str; KeyIntPtr++) {
  738. sq_pushstring(v, KeyIntPtr->Str, -1); //first the key
  739. sq_pushinteger(v, KeyIntPtr->Val); //then the value
  740. sq_newslot(v, -3, SQFalse); //store then
  741. }
  742. sq_newslot(v,-3,SQTrue);
  743. return 0;
  744. }
  745. #ifdef __cplusplus
  746. }
  747. #endif
  748. #endif //SQ_USE_PCRE2