pcre2_stubs.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794
  1. /*
  2. PCRE2-OCAML - Perl Compatibility Regular Expressions for OCaml
  3. Copyright (C) 1999- Markus Mottl
  4. email: [email protected]
  5. WWW: http://www.ocaml.info
  6. This library is free software; you can redistribute it and/or
  7. modify it under the terms of the GNU Lesser General Public
  8. License as published by the Free Software Foundation; either
  9. version 2.1 of the License, or (at your option) any later version.
  10. This library is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. Lesser General Public License for more details.
  14. You should have received a copy of the GNU Lesser General Public
  15. License along with this library; if not, write to the Free Software
  16. Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #if defined(_WIN32)
  19. # define snprintf _snprintf
  20. # if defined(_DLL)
  21. # define PCREextern __declspec(dllexport)
  22. # else
  23. # define PCREextern
  24. # endif
  25. #endif
  26. #if _WIN64
  27. typedef long long *caml_int_ptr;
  28. #else
  29. typedef long *caml_int_ptr;
  30. #endif
  31. #if __GNUC__ >= 3
  32. # define __unused __attribute__ ((unused))
  33. #else
  34. # define __unused
  35. #endif
  36. #include <ctype.h>
  37. #include <string.h>
  38. #include <stdio.h>
  39. #include <caml/mlvalues.h>
  40. #include <caml/alloc.h>
  41. #include <caml/memory.h>
  42. #include <caml/fail.h>
  43. #include <caml/callback.h>
  44. #include <caml/custom.h>
  45. #define PCRE2_CODE_UNIT_WIDTH 8
  46. #include <pcre2.h>
  47. typedef const unsigned char *chartables; /* Type of chartable sets */
  48. /* Contents of callout data */
  49. struct cod {
  50. long subj_start; /* Start of subject string */
  51. value *v_substrings_p; /* Pointer to substrings matched so far */
  52. value *v_cof_p; /* Pointer to callout function */
  53. value v_exn; /* Possible exception raised by callout function */
  54. };
  55. /* Cache for exceptions */
  56. static const value *pcre2_exc_Error = NULL; /* Exception [Error] */
  57. static const value *pcre2_exc_Backtrack = NULL; /* Exception [Backtrack] */
  58. /* Cache for polymorphic variants */
  59. static value var_Start_only; /* Variant [`Start_only] */
  60. static value var_ANCHORED; /* Variant [`ANCHORED] */
  61. static value var_Char; /* Variant [`Char char] */
  62. static value None = Val_int(0);
  63. /* Data associated with OCaml values of PCRE regular expression */
  64. struct pcre2_ocaml_regexp { pcre2_code *rex; pcre2_match_context *mcontext; };
  65. #define Pcre2_ocaml_regexp_val(v) \
  66. ((struct pcre2_ocaml_regexp *) Data_custom_val(v))
  67. #define get_rex(v) Pcre2_ocaml_regexp_val(v)->rex
  68. #define get_mcontext(v) Pcre2_ocaml_regexp_val(v)->mcontext
  69. #define set_rex(v, r) Pcre2_ocaml_regexp_val(v)->rex = r
  70. #define set_mcontext(v, c) Pcre2_ocaml_regexp_val(v)->mcontext = c
  71. /* Data associated with OCaml values of PCRE tables */
  72. struct pcre2_ocaml_tables { chartables tables; };
  73. #define Pcre2_ocaml_tables_val(v) \
  74. ((struct pcre2_ocaml_tables *) Data_custom_val(v))
  75. #define get_tables(v) Pcre2_ocaml_tables_val(v)->tables
  76. #define set_tables(v, t) Pcre2_ocaml_tables_val(v)->tables = t
  77. /* Converts subject offsets from C-integers to OCaml-Integers.
  78. This is a bit tricky, because there are 32- and 64-bit platforms around
  79. and OCaml chooses the larger possibility for representing integers when
  80. available (also in arrays) - not so the PCRE!
  81. */
  82. static inline void copy_ovector(
  83. long subj_start, const size_t* ovec_src, caml_int_ptr ovec_dst, uint32_t subgroups2)
  84. {
  85. if (subj_start == 0)
  86. while (subgroups2--) {
  87. *ovec_dst = Val_int(*ovec_src);
  88. --ovec_src; --ovec_dst;
  89. }
  90. else
  91. while (subgroups2--) {
  92. *ovec_dst = Val_long(*ovec_src + subj_start);
  93. --ovec_src; --ovec_dst;
  94. }
  95. }
  96. /* Callout handler */
  97. static int pcre2_callout_handler(pcre2_callout_block* cb, struct cod* cod)
  98. {
  99. if (cod != NULL) {
  100. /* Callout is available */
  101. value v_res;
  102. /* Set up parameter array */
  103. value v_callout_data = caml_alloc_small(8, 0);
  104. const value v_substrings = *cod->v_substrings_p;
  105. const uint32_t capture_top = cb->capture_top;
  106. uint32_t subgroups2 = capture_top << 1;
  107. const uint32_t subgroups2_1 = subgroups2 - 1;
  108. const size_t *ovec_src = cb->offset_vector + subgroups2_1;
  109. caml_int_ptr ovec_dst = &Field(Field(v_substrings, 1), 0) + subgroups2_1;
  110. long subj_start = cod->subj_start;
  111. copy_ovector(subj_start, ovec_src, ovec_dst, subgroups2);
  112. Field(v_callout_data, 0) = Val_int(cb->callout_number);
  113. Field(v_callout_data, 1) = v_substrings;
  114. Field(v_callout_data, 2) = Val_int(cb->start_match + subj_start);
  115. Field(v_callout_data, 3) = Val_int(cb->current_position + subj_start);
  116. Field(v_callout_data, 4) = Val_int(capture_top);
  117. Field(v_callout_data, 5) = Val_int(cb->capture_last);
  118. Field(v_callout_data, 6) = Val_int(cb->pattern_position);
  119. Field(v_callout_data, 7) = Val_int(cb->next_item_length);
  120. /* Perform callout */
  121. v_res = caml_callback_exn(*cod->v_cof_p, v_callout_data);
  122. if (Is_exception_result(v_res)) {
  123. /* Callout raised an exception */
  124. const value v_exn = Extract_exception(v_res);
  125. if (Field(v_exn, 0) == *pcre2_exc_Backtrack) return 1;
  126. cod->v_exn = v_exn;
  127. return PCRE2_ERROR_CALLOUT;
  128. }
  129. }
  130. return 0;
  131. }
  132. /* Fetches the named OCaml-values + caches them and
  133. calculates + caches the variant hash values */
  134. CAMLprim value pcre2_ocaml_init(value __unused v_unit)
  135. {
  136. pcre2_exc_Error = caml_named_value("Pcre2.Error");
  137. pcre2_exc_Backtrack = caml_named_value("Pcre2.Backtrack");
  138. var_Start_only = caml_hash_variant("Start_only");
  139. var_ANCHORED = caml_hash_variant("ANCHORED");
  140. var_Char = caml_hash_variant("Char");
  141. return Val_unit;
  142. }
  143. /* Finalizing deallocation function for chartable sets */
  144. static void pcre2_dealloc_tables(value v_tables)
  145. {
  146. #if PCRE2_MINOR >= 34
  147. pcre2_maketables_free(NULL, get_tables(v_tables));
  148. #else
  149. free((void*)get_tables(v_tables));
  150. #endif
  151. }
  152. /* Finalizing deallocation function for compiled regular expressions */
  153. static void pcre2_dealloc_regexp(value v_rex)
  154. {
  155. pcre2_code_free(get_rex(v_rex));
  156. pcre2_match_context_free(get_mcontext(v_rex));
  157. }
  158. /* Raising exceptions */
  159. CAMLnoreturn_start
  160. static inline void raise_pcre2_error(value v_arg)
  161. CAMLnoreturn_end;
  162. CAMLnoreturn_start
  163. static inline void raise_partial()
  164. CAMLnoreturn_end;
  165. CAMLnoreturn_start
  166. static inline void raise_bad_utf()
  167. CAMLnoreturn_end;
  168. CAMLnoreturn_start
  169. static inline void raise_bad_utf_offset()
  170. CAMLnoreturn_end;
  171. CAMLnoreturn_start
  172. static inline void raise_match_limit()
  173. CAMLnoreturn_end;
  174. CAMLnoreturn_start
  175. static inline void raise_depth_limit()
  176. CAMLnoreturn_end;
  177. CAMLnoreturn_start
  178. static inline void raise_workspace_size()
  179. CAMLnoreturn_end;
  180. CAMLnoreturn_start
  181. static inline void raise_bad_pattern(int code, size_t pos)
  182. CAMLnoreturn_end;
  183. CAMLnoreturn_start
  184. static inline void raise_internal_error(char *msg)
  185. CAMLnoreturn_end;
  186. static inline void raise_pcre2_error(value v_arg)
  187. { caml_raise_with_arg(*pcre2_exc_Error, v_arg); }
  188. static inline void raise_partial() { raise_pcre2_error(Val_int(0)); }
  189. static inline void raise_bad_utf() { raise_pcre2_error(Val_int(1)); }
  190. static inline void raise_bad_utf_offset() { raise_pcre2_error(Val_int(2)); }
  191. static inline void raise_match_limit() { raise_pcre2_error(Val_int(3)); }
  192. static inline void raise_depth_limit() { raise_pcre2_error(Val_int(4)); }
  193. static inline void raise_workspace_size() { raise_pcre2_error(Val_int(5)); }
  194. static inline void raise_bad_pattern(int code, size_t pos)
  195. {
  196. CAMLparam0();
  197. CAMLlocal1(v_msg);
  198. value v_arg;
  199. v_msg = caml_alloc_string(128);
  200. pcre2_get_error_message(code, (PCRE2_UCHAR *)String_val(v_msg), 128);
  201. v_arg = caml_alloc_small(2, 0);
  202. Field(v_arg, 0) = v_msg;
  203. Field(v_arg, 1) = Val_int(pos);
  204. raise_pcre2_error(v_arg);
  205. CAMLnoreturn;
  206. }
  207. static inline void raise_internal_error(char *msg)
  208. {
  209. CAMLparam0();
  210. CAMLlocal1(v_msg);
  211. value v_arg;
  212. v_msg = caml_copy_string(msg);
  213. v_arg = caml_alloc_small(1, 1);
  214. Field(v_arg, 0) = v_msg;
  215. raise_pcre2_error(v_arg);
  216. CAMLnoreturn;
  217. }
  218. /* PCRE pattern compilation */
  219. static struct custom_operations regexp_ops = {
  220. "pcre2_ocaml_regexp",
  221. pcre2_dealloc_regexp,
  222. custom_compare_default,
  223. custom_hash_default,
  224. custom_serialize_default,
  225. custom_deserialize_default,
  226. custom_compare_ext_default
  227. };
  228. /* Makes compiled regular expression from compilation options, an optional
  229. value of chartables and the pattern string */
  230. CAMLprim value pcre2_compile_stub(int64_t v_opt, value v_tables, value v_pat)
  231. {
  232. value v_rex; /* Final result -> value of type [regexp] */
  233. int error_code = 0; /* error code for potential error */
  234. size_t error_ofs = 0; /* offset in the pattern at which error occurred */
  235. size_t length = caml_string_length(v_pat);
  236. pcre2_compile_context* ccontext = NULL;
  237. /* If v_tables = [None], then pointer to tables is NULL, otherwise
  238. set it to the appropriate value */
  239. if (v_tables != None) {
  240. ccontext = pcre2_compile_context_create(NULL);
  241. pcre2_set_character_tables(ccontext, get_tables(Field(v_tables, 0)));
  242. }
  243. /* Compiles the pattern */
  244. pcre2_code* regexp = pcre2_compile((PCRE2_SPTR)String_val(v_pat), length, v_opt,
  245. &error_code, &error_ofs, ccontext);
  246. pcre2_compile_context_free(ccontext);
  247. /* Raises appropriate exception with [BadPattern] if the pattern
  248. could not be compiled */
  249. if (regexp == NULL) raise_bad_pattern(error_code, error_ofs);
  250. /* GC will do a full cycle every 1_000_000 regexp allocations (a typical
  251. regexp probably consumes less than 100 bytes -> maximum of 100_000_000
  252. bytes unreclaimed regexps) */
  253. v_rex =
  254. caml_alloc_custom(&regexp_ops,
  255. sizeof(struct pcre2_ocaml_regexp), 1, 1000000);
  256. set_rex(v_rex, regexp);
  257. set_mcontext(v_rex, pcre2_match_context_create(NULL));
  258. return v_rex;
  259. }
  260. CAMLprim value pcre2_compile_stub_bc(value v_opt, value v_tables, value v_pat)
  261. {
  262. return pcre2_compile_stub(Int64_val(v_opt), v_tables, v_pat);
  263. }
  264. /* Gets the depth limit of a regular expression if it exists */
  265. /* CAMLprim value pcre2_get_depth_limit_stub(value v_rex); */
  266. /* Gets the match limit of a regular expression if it exists */
  267. /* CAMLprim value pcre2_get_match_limit_stub(value v_rex); */
  268. /* Sets a match limit for a regular expression imperatively */
  269. CAMLprim value pcre2_set_imp_match_limit_stub(value v_rex, intnat v_lim) {
  270. pcre2_match_context* mcontext = get_mcontext(v_rex);
  271. pcre2_set_match_limit(mcontext, v_lim);
  272. return v_rex;
  273. }
  274. CAMLprim value pcre2_set_imp_match_limit_stub_bc(value v_rex, value v_lim)
  275. {
  276. return pcre2_set_imp_match_limit_stub(v_rex, Int_val(v_lim));
  277. }
  278. /* Sets a depth limit for a regular expression imperatively */
  279. CAMLprim value pcre2_set_imp_depth_limit_stub(value v_rex, intnat v_lim) {
  280. pcre2_match_context* mcontext = get_mcontext(v_rex);
  281. pcre2_set_depth_limit(mcontext, v_lim);
  282. return v_rex;
  283. }
  284. CAMLprim value pcre2_set_imp_depth_limit_stub_bc(value v_rex, value v_lim)
  285. {
  286. return pcre2_set_imp_depth_limit_stub(v_rex, Int_val(v_lim));
  287. }
  288. /* Performs the call to the pcre2_pattern_info function */
  289. static inline int pcre2_pattern_info_stub(value v_rex, int what, void* where)
  290. {
  291. return pcre2_pattern_info(get_rex(v_rex), what, where);
  292. }
  293. /* Some stubs for info-functions */
  294. /* Generic macro for getting integer results from pcre2_pattern_info */
  295. #define make_intnat_info(tp, name, option) \
  296. CAMLprim intnat pcre2_##name##_stub(value v_rex) \
  297. { \
  298. tp options; \
  299. const int ret = pcre2_pattern_info_stub(v_rex, PCRE2_INFO_##option, &options); \
  300. if (ret != 0) raise_internal_error("pcre2_##name##_stub"); \
  301. return options; \
  302. } \
  303. \
  304. CAMLprim value pcre2_##name##_stub_bc(value v_rex) \
  305. { return Val_int(pcre2_##name##_stub(v_rex)); }
  306. make_intnat_info(size_t, size, SIZE)
  307. make_intnat_info(int, capturecount, CAPTURECOUNT)
  308. make_intnat_info(int, backrefmax, BACKREFMAX)
  309. make_intnat_info(int, namecount, NAMECOUNT)
  310. make_intnat_info(int, nameentrysize, NAMEENTRYSIZE)
  311. CAMLprim int64_t pcre2_argoptions_stub(value v_rex)
  312. {
  313. uint32_t options;
  314. const int ret = pcre2_pattern_info_stub(v_rex, PCRE2_INFO_ARGOPTIONS, &options);
  315. if (ret != 0) raise_internal_error("pcre2_argoptions_stub");
  316. return (int64_t)options;
  317. }
  318. CAMLprim value pcre2_argoptions_stub_bc(value v_rex)
  319. {
  320. CAMLparam1(v_rex);
  321. CAMLreturn(caml_copy_int64(pcre2_argoptions_stub(v_rex)));
  322. }
  323. CAMLprim value pcre2_firstcodeunit_stub(value v_rex)
  324. {
  325. uint32_t firstcodetype;
  326. const int ret = pcre2_pattern_info_stub(v_rex, PCRE2_INFO_FIRSTCODETYPE, &firstcodetype);
  327. if (ret != 0) raise_internal_error("pcre2_firstcodeunit_stub");
  328. switch (firstcodetype) {
  329. case 2 : return var_Start_only; break; /* [`Start_only] */
  330. case 0 : return var_ANCHORED; break; /* [`ANCHORED] */
  331. case 1: {
  332. uint32_t firstcodeunit;
  333. const int ret = pcre2_pattern_info_stub(v_rex, PCRE2_INFO_FIRSTCODEUNIT, &firstcodeunit);
  334. if (ret != 0) raise_internal_error("pcre2_firstcodeunit_stub");
  335. value v_firstbyte;
  336. /* Allocates the non-constant constructor [`Char of char] and fills
  337. in the appropriate value */
  338. v_firstbyte = caml_alloc_small(2, 0);
  339. Field(v_firstbyte, 0) = var_Char;
  340. Field(v_firstbyte, 1) = Val_int(firstcodeunit);
  341. return v_firstbyte;
  342. break;
  343. }
  344. default: /* Should not happen */
  345. raise_internal_error("pcre2_firstcodeunit_stub");
  346. }
  347. }
  348. CAMLprim value pcre2_lastcodeunit_stub(value v_rex)
  349. {
  350. uint32_t lastcodetype;
  351. const int ret =
  352. pcre2_pattern_info_stub(v_rex, PCRE2_INFO_LASTCODETYPE, &lastcodetype);
  353. if (ret != 0) raise_internal_error("pcre2_lastcodeunit_stub");
  354. if (lastcodetype == 0) return None;
  355. if (lastcodetype != 1) raise_internal_error("pcre2_lastcodeunit_stub");
  356. else {
  357. /* Allocates [Some char] */
  358. value v_res = caml_alloc_small(1, 0);
  359. uint32_t lastcodeunit;
  360. const int ret = pcre2_pattern_info_stub(v_rex, PCRE2_INFO_LASTCODEUNIT, &lastcodeunit);
  361. if (ret != 0) raise_internal_error("pcre2_lastcodeunit_stub");
  362. Field(v_res, 0) = Val_int(lastcodeunit);
  363. return v_res;
  364. }
  365. }
  366. CAMLnoreturn_start
  367. static inline void handle_match_error(char *loc, const int ret)
  368. CAMLnoreturn_end;
  369. static inline void handle_match_error(char *loc, const int ret)
  370. {
  371. switch (ret) {
  372. /* Dedicated exceptions */
  373. case PCRE2_ERROR_NOMATCH : caml_raise_not_found();
  374. case PCRE2_ERROR_PARTIAL : raise_partial();
  375. case PCRE2_ERROR_MATCHLIMIT : raise_match_limit();
  376. case PCRE2_ERROR_BADUTFOFFSET : raise_bad_utf_offset();
  377. case PCRE2_ERROR_DEPTHLIMIT : raise_depth_limit();
  378. case PCRE2_ERROR_DFA_WSSIZE : raise_workspace_size();
  379. default : {
  380. if (PCRE2_ERROR_UTF8_ERR21 <= ret && ret <= PCRE2_ERROR_UTF8_ERR1)
  381. raise_bad_utf();
  382. /* Unknown error */
  383. char err_buf[100];
  384. snprintf(err_buf, 100, "%s: unhandled PCRE2 error code: %d", loc, ret);
  385. raise_internal_error(err_buf);
  386. }
  387. }
  388. }
  389. static inline void handle_pcre2_match_result(
  390. size_t *ovec, value v_ovec, size_t ovec_len, long subj_start, uint32_t ret)
  391. {
  392. caml_int_ptr ocaml_ovec = (caml_int_ptr) &Field(v_ovec, 0);
  393. const uint32_t subgroups2 = ret * 2;
  394. const uint32_t subgroups2_1 = subgroups2 - 1;
  395. const size_t *ovec_src = ovec + subgroups2_1;
  396. caml_int_ptr ovec_clear_stop = ocaml_ovec + (ovec_len * 2) / 3;
  397. caml_int_ptr ovec_dst = ocaml_ovec + subgroups2_1;
  398. copy_ovector(subj_start, ovec_src, ovec_dst, subgroups2);
  399. while (++ovec_dst < ovec_clear_stop) *ovec_dst = -1;
  400. }
  401. /* Executes a pattern match with runtime options, a regular expression, a
  402. matching position, the start of the the subject string, a subject string,
  403. a number of subgroup offsets, an offset vector and an optional callout
  404. function */
  405. CAMLprim value pcre2_match_stub0(
  406. int64_t v_opt, value v_rex, intnat v_pos, intnat v_subj_start, value v_subj,
  407. value v_ovec, value v_maybe_cof, value v_workspace)
  408. {
  409. int ret;
  410. int is_dfa = v_workspace != (value) NULL;
  411. long
  412. pos = v_pos,
  413. subj_start = v_subj_start;
  414. size_t
  415. ovec_len = Wosize_val(v_ovec),
  416. len = caml_string_length(v_subj);
  417. if (pos > (long)len || pos < subj_start)
  418. caml_invalid_argument("Pcre2.pcre2_match_stub: illegal position");
  419. if (subj_start > (long)len || subj_start < 0)
  420. caml_invalid_argument("Pcre2.pcre2_match_stub: illegal subject start");
  421. pos -= subj_start;
  422. len -= subj_start;
  423. {
  424. const pcre2_code *code = get_rex(v_rex); /* Compiled pattern */
  425. pcre2_match_context* mcontext = get_mcontext(v_rex); /* Match context */
  426. PCRE2_SPTR ocaml_subj = (PCRE2_SPTR)String_val(v_subj) + subj_start; /* Subject string */
  427. pcre2_match_data* match_data = pcre2_match_data_create_from_pattern(code, NULL);
  428. /* Special case when no callout functions specified */
  429. if (v_maybe_cof == None) {
  430. /* Performs the match */
  431. if (is_dfa)
  432. ret =
  433. pcre2_dfa_match(code, ocaml_subj, len, pos, v_opt, match_data, mcontext,
  434. (int *) &Field(v_workspace, 0), Wosize_val(v_workspace));
  435. else
  436. ret = pcre2_match(code, ocaml_subj, len, pos, v_opt, match_data, mcontext);
  437. size_t *ovec = pcre2_get_ovector_pointer(match_data);
  438. if (ret < 0) {
  439. pcre2_match_data_free(match_data);
  440. handle_match_error("pcre2_match_stub", ret);
  441. } else {
  442. handle_pcre2_match_result(ovec, v_ovec, ovec_len, subj_start, ret);
  443. }
  444. }
  445. /* There are callout functions */
  446. else {
  447. value v_cof = Field(v_maybe_cof, 0);
  448. value v_substrings;
  449. PCRE2_UCHAR* subj = caml_stat_alloc(sizeof(char) * len);
  450. int workspace_len;
  451. int *workspace;
  452. struct cod cod = { 0, (value *) NULL, (value *) NULL, (value) NULL };
  453. pcre2_match_context* new_mcontext = pcre2_match_context_copy(mcontext);
  454. pcre2_set_callout(new_mcontext, (int (*)(pcre2_callout_block_8*, void*))&pcre2_callout_handler, &cod);
  455. cod.subj_start = subj_start;
  456. memcpy(subj, ocaml_subj, len);
  457. Begin_roots4(v_rex, v_cof, v_substrings, v_ovec);
  458. Begin_roots1(v_subj);
  459. v_substrings = caml_alloc_small(2, 0);
  460. End_roots();
  461. Field(v_substrings, 0) = v_subj;
  462. Field(v_substrings, 1) = v_ovec;
  463. cod.v_substrings_p = &v_substrings;
  464. cod.v_cof_p = &v_cof;
  465. if (is_dfa) {
  466. workspace_len = Wosize_val(v_workspace);
  467. workspace = caml_stat_alloc(sizeof(int) * workspace_len);
  468. ret =
  469. pcre2_dfa_match(code, subj, len, pos, v_opt, match_data, new_mcontext,
  470. (int *) &Field(v_workspace, 0), workspace_len);
  471. } else
  472. ret =
  473. pcre2_match(code, subj, len, pos, v_opt, match_data, new_mcontext);
  474. caml_stat_free(subj);
  475. End_roots();
  476. pcre2_match_context_free(new_mcontext);
  477. size_t* ovec = pcre2_get_ovector_pointer(match_data);
  478. if (ret < 0) {
  479. if (is_dfa) caml_stat_free(workspace);
  480. pcre2_match_data_free(match_data);
  481. if (ret == PCRE2_ERROR_CALLOUT) caml_raise(cod.v_exn);
  482. else handle_match_error("pcre2_match_stub(callout)", ret);
  483. } else {
  484. handle_pcre2_match_result(ovec, v_ovec, ovec_len, subj_start, ret);
  485. if (is_dfa) {
  486. caml_int_ptr ocaml_workspace_dst =
  487. (caml_int_ptr) &Field(v_workspace, 0);
  488. const int *workspace_src = workspace;
  489. const int *workspace_src_stop = workspace + workspace_len;
  490. while (workspace_src != workspace_src_stop) {
  491. *ocaml_workspace_dst = *workspace_src;
  492. ocaml_workspace_dst++;
  493. workspace_src++;
  494. }
  495. caml_stat_free(workspace);
  496. }
  497. }
  498. }
  499. pcre2_match_data_free(match_data);
  500. }
  501. return Val_unit;
  502. }
  503. CAMLprim value pcre2_match_stub(
  504. int64_t v_opt, value v_rex, intnat v_pos, intnat v_subj_start, value v_subj,
  505. value v_ovec, value v_maybe_cof)
  506. {
  507. return pcre2_match_stub0(v_opt, v_rex, v_pos, v_subj_start, v_subj,
  508. v_ovec, v_maybe_cof, (value) NULL);
  509. }
  510. /* Byte-code hook for pcre2_match_stub
  511. Needed, because there are more than 5 arguments */
  512. CAMLprim value pcre2_match_stub_bc(value *argv, int __unused argn)
  513. {
  514. return
  515. pcre2_match_stub0(
  516. Int64_val(argv[0]), argv[1], Int_val(argv[2]), Int_val(argv[3]),
  517. argv[4], argv[5], argv[6], (value) NULL);
  518. }
  519. /* Byte-code hook for pcre2_dfa_match_stub
  520. Needed, because there are more than 5 arguments */
  521. CAMLprim value pcre2_dfa_match_stub_bc(value *argv, int __unused argn)
  522. {
  523. return
  524. pcre2_match_stub0(
  525. Int64_val(argv[0]), argv[1], Int_val(argv[2]), Int_val(argv[3]),
  526. argv[4], argv[5], argv[6], argv[7]);
  527. }
  528. static struct custom_operations tables_ops = {
  529. "pcre2_ocaml_tables",
  530. pcre2_dealloc_tables,
  531. custom_compare_default,
  532. custom_hash_default,
  533. custom_serialize_default,
  534. custom_deserialize_default,
  535. custom_compare_ext_default
  536. };
  537. /* Generates a new set of chartables for the current locale (see man
  538. page of PCRE */
  539. CAMLprim value pcre2_maketables_stub(value __unused v_unit)
  540. {
  541. /* GC will do a full cycle every 1_000_000 table set allocations (one
  542. table set consumes 864 bytes -> maximum of 864_000_000 bytes unreclaimed
  543. table sets) */
  544. const value v_tables =
  545. caml_alloc_custom(
  546. &tables_ops, sizeof(struct pcre2_ocaml_tables), 1, 1000000);
  547. set_tables(v_tables, pcre2_maketables(NULL));
  548. return v_tables;
  549. }
  550. /* Wraps around the isspace-function */
  551. CAMLprim value pcre2_isspace_stub(value v_c)
  552. {
  553. return Val_bool(isspace(Int_val(v_c)));
  554. }
  555. /* Returns number of substring associated with a name */
  556. CAMLprim intnat pcre2_substring_number_from_name_stub(value v_rex, value v_name)
  557. {
  558. const int ret = pcre2_substring_number_from_name(get_rex(v_rex), (PCRE2_SPTR)String_val(v_name));
  559. if (ret == PCRE2_ERROR_NOSUBSTRING)
  560. caml_invalid_argument("Named string not found");
  561. return ret;
  562. }
  563. CAMLprim value pcre2_substring_number_from_name_stub_bc(value v_rex, value v_name)
  564. {
  565. return Val_int(pcre2_substring_number_from_name_stub(v_rex, v_name));
  566. }
  567. /* Returns array of names of named substrings in a regexp */
  568. CAMLprim value pcre2_names_stub(value v_rex)
  569. {
  570. CAMLparam1(v_rex);
  571. CAMLlocal1(v_res);
  572. uint32_t name_count;
  573. uint32_t entry_size;
  574. const char *tbl_ptr;
  575. uint32_t i;
  576. int ret = pcre2_pattern_info_stub(v_rex, PCRE2_INFO_NAMECOUNT, &name_count);
  577. if (ret != 0) raise_internal_error("pcre2_names_stub: namecount");
  578. ret = pcre2_pattern_info_stub(v_rex, PCRE2_INFO_NAMEENTRYSIZE, &entry_size);
  579. if (ret != 0) raise_internal_error("pcre2_names_stub: nameentrysize");
  580. ret = pcre2_pattern_info_stub(v_rex, PCRE2_INFO_NAMETABLE, &tbl_ptr);
  581. if (ret != 0) raise_internal_error("pcre2_names_stub: nametable");
  582. v_res = caml_alloc(name_count, 0);
  583. for (i = 0; i < name_count; ++i) {
  584. value v_name = caml_copy_string(tbl_ptr + 2);
  585. Store_field(v_res, i, v_name);
  586. tbl_ptr += entry_size;
  587. }
  588. CAMLreturn(v_res);
  589. }
  590. /* Generic stub for getting integer results from pcre2_config */
  591. static inline int pcre2_config_int(int what)
  592. {
  593. int ret;
  594. pcre2_config(what, (void *) &ret);
  595. return ret;
  596. }
  597. /* Generic stub for getting long integer results from pcre2_config */
  598. static inline long pcre2_config_long(int what)
  599. {
  600. long ret;
  601. pcre2_config(what, (void *) &ret);
  602. return ret;
  603. }
  604. /* Some stubs for config-functions */
  605. /* Makes OCaml-string from PCRE-version */
  606. CAMLprim value pcre2_version_stub(value __unused v_unit) {
  607. CAMLparam1(v_unit);
  608. CAMLlocal1(v_version);
  609. v_version = caml_alloc_string(32);
  610. pcre2_config(PCRE2_CONFIG_VERSION, (void *)String_val(v_version));
  611. CAMLreturn(v_version);
  612. }
  613. /* Returns boolean indicating unicode support */
  614. CAMLprim value pcre2_config_unicode_stub(value __unused v_unit)
  615. { return Val_bool(pcre2_config_int(PCRE2_CONFIG_UNICODE)); }
  616. /* Returns character used as newline */
  617. CAMLprim value pcre2_config_newline_stub(value __unused v_unit)
  618. { return Val_int(pcre2_config_int(PCRE2_CONFIG_NEWLINE)); }
  619. /* Returns number of bytes used for internal linkage of regular expressions */
  620. CAMLprim intnat pcre2_config_link_size_stub(value __unused v_unit)
  621. { return pcre2_config_int(PCRE2_CONFIG_LINKSIZE); }
  622. CAMLprim value pcre2_config_link_size_stub_bc(value v_unit)
  623. { return Val_int(pcre2_config_link_size_stub(v_unit)); }
  624. /* Returns default limit for calls to internal matching function */
  625. CAMLprim intnat pcre2_config_match_limit_stub(value __unused v_unit)
  626. { return pcre2_config_long(PCRE2_CONFIG_MATCHLIMIT); }
  627. CAMLprim value pcre2_config_match_limit_stub_bc(value v_unit)
  628. { return Val_int(pcre2_config_match_limit_stub(v_unit)); }
  629. /* Returns default limit for depth of nested backtracking */
  630. CAMLprim intnat pcre2_config_depth_limit_stub(value __unused v_unit)
  631. { return pcre2_config_long(PCRE2_CONFIG_DEPTHLIMIT); }
  632. CAMLprim value pcre2_config_depth_limit_stub_bc(value v_unit)
  633. { return Val_int(pcre2_config_depth_limit_stub(v_unit)); }
  634. /* Returns boolean indicating use of stack recursion */
  635. CAMLprim intnat pcre2_config_stackrecurse_stub(value __unused v_unit)
  636. { return Val_bool(pcre2_config_int(PCRE2_CONFIG_STACKRECURSE)); }