pcre_stubs.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737
  1. /*
  2. PCRE-OCAML - Perl Compatibility Regular Expressions for OCaml
  3. Copyright (C) 1999- Markus Mottl
  4. email: [email protected]
  5. WWW: http://www.ocaml.info
  6. This library is free software; you can redistribute it and/or
  7. modify it under the terms of the GNU Lesser General Public
  8. License as published by the Free Software Foundation; either
  9. version 2 of the License, or (at your option) any later version.
  10. This library is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. Lesser General Public License for more details.
  14. You should have received a copy of the GNU Lesser General Public
  15. License along with this library; if not, write to the Free Software
  16. Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  17. */
  18. #if defined(_WIN32)
  19. # define snprintf _snprintf
  20. # if defined(_DLL)
  21. # define PCREextern __declspec(dllexport)
  22. # else
  23. # define PCREextern
  24. # endif
  25. #endif
  26. #if _WIN64
  27. typedef long long *ovec_dst_ptr;
  28. #else
  29. typedef long *ovec_dst_ptr;
  30. #endif
  31. #if __GNUC__ >= 3
  32. # define inline inline __attribute__ ((always_inline))
  33. # define __unused __attribute__ ((unused))
  34. #else
  35. # define __unused
  36. # define inline
  37. #endif
  38. #include <ctype.h>
  39. #include <string.h>
  40. #include <stdio.h>
  41. #include <caml/mlvalues.h>
  42. #include <caml/alloc.h>
  43. #include <caml/memory.h>
  44. #include <caml/fail.h>
  45. #include <caml/callback.h>
  46. #include "pcre.h"
  47. /* Error codes as defined for pcre 7.9, undefined in pcre 4.5 */
  48. #ifndef PCRE_ERROR_PARTIAL
  49. #define PCRE_ERROR_PARTIAL (-12)
  50. #endif
  51. #ifndef PCRE_ERROR_BADPARTIAL
  52. #define PCRE_ERROR_BADPARTIAL (-13)
  53. #endif
  54. #ifndef PCRE_ERROR_RECURSIONLIMIT
  55. #define PCRE_ERROR_RECURSIONLIMIT (-21)
  56. #endif
  57. typedef const unsigned char *chartables; /* Type of chartable sets */
  58. /* Contents of callout data */
  59. struct cod {
  60. long subj_start; /* Start of subject string */
  61. value *v_substrings_p; /* Pointer to substrings matched so far */
  62. value *v_cof_p; /* Pointer to callout function */
  63. value v_exn; /* Possible exception raised by callout function */
  64. };
  65. /* Cache for exceptions */
  66. static value *pcre_exc_Error = NULL; /* Exception [Error] */
  67. static value *pcre_exc_Backtrack = NULL; /* Exception [Backtrack] */
  68. /* Cache for polymorphic variants */
  69. static value var_Start_only; /* Variant [`Start_only] */
  70. static value var_ANCHORED; /* Variant [`ANCHORED] */
  71. static value var_Char; /* Variant [`Char char] */
  72. static value var_Not_studied; /* Variant [`Not_studied] */
  73. static value var_Studied; /* Variant [`Studied] */
  74. static value var_Optimal; /* Variant [`Optimal] */
  75. static value None = Val_int(0);
  76. /* Converts subject offsets from C-integers to OCaml-Integers.
  77. This is a bit tricky, because there are 32- and 64-bit platforms around
  78. and OCaml chooses the larger possibility for representing integers when
  79. available (also in arrays) - not so the PCRE!
  80. */
  81. static inline void copy_ovector(
  82. long subj_start, const int *ovec_src, ovec_dst_ptr ovec_dst, int subgroups2)
  83. {
  84. if (subj_start == 0)
  85. while (subgroups2--) {
  86. *ovec_dst = Val_int(*ovec_src);
  87. --ovec_src; --ovec_dst;
  88. }
  89. else
  90. while (subgroups2--) {
  91. *ovec_dst = Val_long(*ovec_src + subj_start);
  92. --ovec_src; --ovec_dst;
  93. }
  94. }
  95. /* Callout handler */
  96. static int pcre_callout_handler(pcre_callout_block* cb)
  97. {
  98. struct cod *cod = (struct cod *) cb->callout_data;
  99. if (cod != NULL) {
  100. /* Callout is available */
  101. value v_res;
  102. /* Set up parameter array */
  103. value v_callout_data = caml_alloc_small(8, 0);
  104. const value v_substrings = *cod->v_substrings_p;
  105. const int capture_top = cb->capture_top;
  106. int subgroups2 = capture_top << 1;
  107. const int subgroups2_1 = subgroups2 - 1;
  108. const int *ovec_src = cb->offset_vector + subgroups2_1;
  109. ovec_dst_ptr ovec_dst = &Field(Field(v_substrings, 1), 0) + subgroups2_1;
  110. long subj_start = cod->subj_start;
  111. copy_ovector(subj_start, ovec_src, ovec_dst, subgroups2);
  112. Field(v_callout_data, 0) = Val_int(cb->callout_number);
  113. Field(v_callout_data, 1) = v_substrings;
  114. Field(v_callout_data, 2) = Val_int(cb->start_match + subj_start);
  115. Field(v_callout_data, 3) = Val_int(cb->current_position + subj_start);
  116. Field(v_callout_data, 4) = Val_int(capture_top);
  117. Field(v_callout_data, 5) = Val_int(cb->capture_last);
  118. Field(v_callout_data, 6) = Val_int(cb->pattern_position);
  119. Field(v_callout_data, 7) = Val_int(cb->next_item_length);
  120. /* Perform callout */
  121. v_res = caml_callback_exn(*cod->v_cof_p, v_callout_data);
  122. if (Is_exception_result(v_res)) {
  123. /* Callout raised an exception */
  124. const value v_exn = Extract_exception(v_res);
  125. if (Field(v_exn, 0) == *pcre_exc_Backtrack) return 1;
  126. cod->v_exn = v_exn;
  127. return PCRE_ERROR_CALLOUT;
  128. }
  129. }
  130. return 0;
  131. }
  132. /* Fetchs the named OCaml-values + caches them and
  133. calculates + caches the variant hash values */
  134. CAMLprim value pcre_ocaml_init(value __unused v_unit)
  135. {
  136. pcre_exc_Error = caml_named_value("Pcre.Error");
  137. pcre_exc_Backtrack = caml_named_value("Pcre.Backtrack");
  138. var_Start_only = caml_hash_variant("Start_only");
  139. var_ANCHORED = caml_hash_variant("ANCHORED");
  140. var_Char = caml_hash_variant("Char");
  141. var_Not_studied = caml_hash_variant("Not_studied");
  142. var_Studied = caml_hash_variant("Studied");
  143. var_Optimal = caml_hash_variant("Optimal");
  144. pcre_callout = &pcre_callout_handler;
  145. return Val_unit;
  146. }
  147. /* Finalizing deallocation function for chartable sets */
  148. static void pcre_dealloc_tables(value v_table)
  149. { (pcre_free)((void *) Field(v_table, 1)); }
  150. /* Finalizing deallocation function for compiled regular expressions */
  151. static void pcre_dealloc_regexp(value v_rex)
  152. {
  153. void *extra = (void *) Field(v_rex, 2);
  154. (pcre_free)((void *) Field(v_rex, 1));
  155. if (extra != NULL)
  156. #ifdef PCRE_STUDY_JIT_COMPILE
  157. pcre_free_study(extra);
  158. #else
  159. pcre_free(extra);
  160. #endif
  161. }
  162. /* Makes OCaml-string from PCRE-version */
  163. CAMLprim value pcre_version_stub(value __unused v_unit)
  164. {
  165. return caml_copy_string((char *) pcre_version());
  166. }
  167. /* Raising exceptions */
  168. static inline void raise_pcre_error(value v_arg) Noreturn;
  169. static inline void raise_partial() Noreturn;
  170. static inline void raise_bad_partial() Noreturn;
  171. static inline void raise_bad_utf8() Noreturn;
  172. static inline void raise_bad_utf8_offset() Noreturn;
  173. static inline void raise_match_limit() Noreturn;
  174. static inline void raise_recursion_limit() Noreturn;
  175. static inline void raise_bad_pattern(const char *msg, int pos) Noreturn;
  176. static inline void raise_internal_error(char *msg) Noreturn;
  177. static inline void raise_pcre_error(value v_arg)
  178. { caml_raise_with_arg(*pcre_exc_Error, v_arg); }
  179. static inline void raise_partial() { raise_pcre_error(Val_int(0)); }
  180. static inline void raise_bad_partial() { raise_pcre_error(Val_int(1)); }
  181. static inline void raise_bad_utf8() { raise_pcre_error(Val_int(2)); }
  182. static inline void raise_bad_utf8_offset() { raise_pcre_error(Val_int(3)); }
  183. static inline void raise_match_limit() { raise_pcre_error(Val_int(4)); }
  184. static inline void raise_recursion_limit() { raise_pcre_error(Val_int(5)); }
  185. static inline void raise_bad_pattern(const char *msg, int pos)
  186. {
  187. CAMLparam0();
  188. CAMLlocal1(v_msg);
  189. value v_arg;
  190. v_msg = caml_copy_string(msg);
  191. v_arg = caml_alloc_small(2, 0);
  192. Field(v_arg, 0) = v_msg;
  193. Field(v_arg, 1) = Val_int(pos);
  194. raise_pcre_error(v_arg);
  195. CAMLnoreturn;
  196. }
  197. static inline void raise_internal_error(char *msg)
  198. {
  199. CAMLparam0();
  200. CAMLlocal1(v_msg);
  201. value v_arg;
  202. v_msg = caml_copy_string(msg);
  203. v_arg = caml_alloc_small(1, 1);
  204. Field(v_arg, 0) = v_msg;
  205. raise_pcre_error(v_arg);
  206. CAMLnoreturn;
  207. }
  208. /* PCRE pattern compilation */
  209. /* Makes compiled regular expression from compilation options, an optional
  210. value of chartables and the pattern string */
  211. CAMLprim value pcre_compile_stub(value v_opt, value v_tables, value v_pat)
  212. {
  213. value v_rex; /* Final result -> value of type [regexp] */
  214. const char *error = NULL; /* pointer to possible error message */
  215. int error_ofs = 0; /* offset in the pattern at which error occurred */
  216. /* If v_tables = [None], then pointer to tables is NULL, otherwise
  217. set it to the appropriate value */
  218. chartables tables =
  219. (v_tables == None) ? NULL : (chartables) Field(Field(v_tables, 0), 1);
  220. /* Compiles the pattern */
  221. pcre *regexp = pcre_compile(String_val(v_pat), Int_val(v_opt), &error,
  222. &error_ofs, tables);
  223. /* Raises appropriate exception with [BadPattern] if the pattern
  224. could not be compiled */
  225. if (regexp == NULL) raise_bad_pattern(error, error_ofs);
  226. /* GC will do a full cycle every 1_000_000 regexp allocations (a typical
  227. regexp probably consumes less than 100 bytes -> maximum of 100_000_000
  228. bytes unreclaimed regexps) */
  229. v_rex = caml_alloc_final(4, pcre_dealloc_regexp, 1, 1000000);
  230. /* Field[1]: compiled regular expression (Field[0] is finalizing
  231. function! See above!) */
  232. Field(v_rex, 1) = (value) regexp;
  233. /* Field[2]: extra information about regexp when it has been studied
  234. successfully */
  235. Field(v_rex, 2) = (value) NULL;
  236. /* Field[3]: If 0 -> regexp has not yet been studied
  237. 1 -> regexp has already been studied */
  238. Field(v_rex, 3) = 0;
  239. return v_rex;
  240. }
  241. /* Studies a regexp */
  242. CAMLprim value pcre_study_stub(value v_rex)
  243. {
  244. /* If it has not yet been studied */
  245. if (! (int) Field(v_rex, 3)) {
  246. const char *error = NULL;
  247. pcre_extra *extra = pcre_study((pcre *) Field(v_rex, 1), 0, &error);
  248. if (error != NULL) caml_invalid_argument((char *) error);
  249. Field(v_rex, 2) = (value) extra;
  250. Field(v_rex, 3) = Val_int(1);
  251. }
  252. return v_rex;
  253. }
  254. /* Sets a match limit recursion for a regular expression imperatively */
  255. CAMLprim value pcre_set_imp_match_limit_recursion_stub(value v_rex, value v_lim)
  256. {
  257. pcre_extra *extra = (pcre_extra *) Field(v_rex, 2);
  258. if (extra == NULL) {
  259. extra = pcre_malloc(sizeof(pcre_extra));
  260. extra->flags = PCRE_EXTRA_MATCH_LIMIT_RECURSION;
  261. Field(v_rex, 2) = (value) extra;
  262. } else {
  263. unsigned long *flags_ptr = &extra->flags;
  264. *flags_ptr = PCRE_EXTRA_MATCH_LIMIT_RECURSION | *flags_ptr;
  265. }
  266. extra->match_limit_recursion = Int_val(v_lim);
  267. return v_rex;
  268. }
  269. /* Gets the match limit recursion of a regular expression if it exists */
  270. CAMLprim value pcre_get_match_limit_recursion_stub(value v_rex)
  271. {
  272. pcre_extra *extra = (pcre_extra *) Field(v_rex, 2);
  273. if (extra == NULL) return None;
  274. if (extra->flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) {
  275. value v_lim = Val_int(extra->match_limit_recursion);
  276. value v_res = caml_alloc_small(1, 0);
  277. Field(v_res, 0) = v_lim;
  278. return v_res;
  279. }
  280. return None;
  281. }
  282. /* Sets a match limit for a regular expression imperatively */
  283. CAMLprim value pcre_set_imp_match_limit_stub(value v_rex, value v_lim)
  284. {
  285. pcre_extra *extra = (pcre_extra *) Field(v_rex, 2);
  286. if (extra == NULL) {
  287. extra = pcre_malloc(sizeof(pcre_extra));
  288. extra->flags = PCRE_EXTRA_MATCH_LIMIT;
  289. Field(v_rex, 2) = (value) extra;
  290. } else {
  291. unsigned long *flags_ptr = &extra->flags;
  292. *flags_ptr = PCRE_EXTRA_MATCH_LIMIT | *flags_ptr;
  293. }
  294. extra->match_limit = Int_val(v_lim);
  295. return v_rex;
  296. }
  297. /* Gets the match limit of a regular expression if it exists */
  298. CAMLprim value pcre_get_match_limit_stub(value v_rex)
  299. {
  300. pcre_extra *extra = (pcre_extra *) Field(v_rex, 2);
  301. if (extra == NULL) return None;
  302. if (extra->flags & PCRE_EXTRA_MATCH_LIMIT) {
  303. value v_lim = Val_int(extra->match_limit);
  304. value v_res = caml_alloc_small(1, 0);
  305. Field(v_res, 0) = v_lim;
  306. return v_res;
  307. }
  308. return None;
  309. }
  310. /* Performs the call to the pcre_fullinfo function */
  311. static inline int pcre_fullinfo_stub(value v_rex, int what, void *where)
  312. {
  313. return pcre_fullinfo((pcre *) Field(v_rex, 1), (pcre_extra *) Field(v_rex, 2),
  314. what, where);
  315. }
  316. /* Some stubs for info-functions */
  317. /* Generic macro for getting integer results from pcre_fullinfo */
  318. #define make_info(tp, cnv, name, option) \
  319. CAMLprim value pcre_##name##_stub(value v_rex) \
  320. { \
  321. tp options; \
  322. const int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_##option, &options); \
  323. if (ret != 0) raise_internal_error("pcre_##name##_stub"); \
  324. return cnv(options); \
  325. }
  326. make_info(unsigned long, Val_long, options, OPTIONS)
  327. make_info(size_t, Val_long, size, SIZE)
  328. make_info(size_t, Val_long, studysize, STUDYSIZE)
  329. make_info(int, Val_int, capturecount, CAPTURECOUNT)
  330. make_info(int, Val_int, backrefmax, BACKREFMAX)
  331. make_info(int, Val_int, namecount, NAMECOUNT)
  332. make_info(int, Val_int, nameentrysize, NAMEENTRYSIZE)
  333. CAMLprim value pcre_firstbyte_stub(value v_rex)
  334. {
  335. int firstbyte;
  336. const int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_FIRSTBYTE, &firstbyte);
  337. if (ret != 0) raise_internal_error("pcre_firstbyte_stub");
  338. switch (firstbyte) {
  339. case -1 : return var_Start_only; break; /* [`Start_only] */
  340. case -2 : return var_ANCHORED; break; /* [`ANCHORED] */
  341. default :
  342. if (firstbyte < 0 ) /* Should not happen */
  343. raise_internal_error("pcre_firstbyte_stub");
  344. else {
  345. value v_firstbyte;
  346. /* Allocates the non-constant constructor [`Char of char] and fills
  347. in the appropriate value */
  348. v_firstbyte = caml_alloc_small(2, 0);
  349. Field(v_firstbyte, 0) = var_Char;
  350. Field(v_firstbyte, 1) = Val_int(firstbyte);
  351. return v_firstbyte;
  352. }
  353. }
  354. }
  355. CAMLprim value pcre_firsttable_stub(value v_rex)
  356. {
  357. const unsigned char *ftable;
  358. int ret =
  359. pcre_fullinfo_stub(v_rex, PCRE_INFO_FIRSTTABLE, (void *) &ftable);
  360. if (ret != 0) raise_internal_error("pcre_firsttable_stub");
  361. if (ftable == NULL) return None;
  362. else {
  363. value v_res, v_res_str;
  364. char *ptr;
  365. int i;
  366. Begin_roots1(v_rex);
  367. v_res_str = caml_alloc_string(32);
  368. End_roots();
  369. ptr = String_val(v_res_str);
  370. for (i = 0; i <= 31; ++i) { *ptr = *ftable; ++ptr; ++ftable; }
  371. Begin_roots1(v_res_str);
  372. /* Allocates [Some string] from firsttable */
  373. v_res = caml_alloc_small(1, 0);
  374. End_roots();
  375. Field(v_res, 0) = v_res_str;
  376. return v_res;
  377. }
  378. }
  379. CAMLprim value pcre_lastliteral_stub(value v_rex)
  380. {
  381. int lastliteral;
  382. const int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_LASTLITERAL,
  383. &lastliteral);
  384. if (ret != 0) raise_internal_error("pcre_lastliteral_stub");
  385. if (lastliteral == -1) return None;
  386. if (lastliteral < 0) raise_internal_error("pcre_lastliteral_stub");
  387. else {
  388. /* Allocates [Some char] */
  389. value v_res = caml_alloc_small(1, 0);
  390. Field(v_res, 0) = Val_int(lastliteral);
  391. return v_res;
  392. }
  393. }
  394. CAMLprim value pcre_study_stat_stub(value v_rex)
  395. {
  396. /* Generates the appropriate constant constructor [`Optimal] or
  397. [`Studied] if regexp has already been studied */
  398. if (Field(v_rex, 3))
  399. return ((pcre_extra *) Field(v_rex, 2) == NULL) ? var_Optimal : var_Studied;
  400. return var_Not_studied; /* otherwise [`Not_studied] */
  401. }
  402. static inline void handle_exec_error(char *loc, const int ret) Noreturn;
  403. static inline void handle_exec_error(char *loc, const int ret)
  404. {
  405. switch (ret) {
  406. /* Dedicated exceptions */
  407. case PCRE_ERROR_NOMATCH : caml_raise_not_found();
  408. case PCRE_ERROR_PARTIAL : raise_partial();
  409. case PCRE_ERROR_MATCHLIMIT : raise_match_limit();
  410. case PCRE_ERROR_BADPARTIAL : raise_bad_partial();
  411. case PCRE_ERROR_BADUTF8 : raise_bad_utf8();
  412. case PCRE_ERROR_BADUTF8_OFFSET : raise_bad_utf8_offset();
  413. case PCRE_ERROR_RECURSIONLIMIT : raise_recursion_limit();
  414. /* Unknown error */
  415. default : {
  416. char err_buf[100];
  417. snprintf(err_buf, 100, "%s: unhandled PCRE error code: %d", loc, ret);
  418. raise_internal_error(err_buf);
  419. }
  420. }
  421. }
  422. static inline void handle_pcre_exec_result(
  423. int *ovec, value v_ovec, long ovec_len, long subj_start, int ret)
  424. {
  425. ovec_dst_ptr ocaml_ovec = (ovec_dst_ptr) &Field(v_ovec, 0);
  426. const int subgroups2 = ret * 2;
  427. const int subgroups2_1 = subgroups2 - 1;
  428. const int *ovec_src = ovec + subgroups2_1;
  429. ovec_dst_ptr ovec_clear_stop = ocaml_ovec + (ovec_len * 2) / 3;
  430. ovec_dst_ptr ovec_dst = ocaml_ovec + subgroups2_1;
  431. copy_ovector(subj_start, ovec_src, ovec_dst, subgroups2);
  432. while (++ovec_dst < ovec_clear_stop) *ovec_dst = -1;
  433. }
  434. /* Executes a pattern match with runtime options, a regular expression, a
  435. matching position, the start of the the subject string, a subject string,
  436. a number of subgroup offsets, an offset vector and an optional callout
  437. function */
  438. CAMLprim value pcre_exec_stub(value v_opt, value v_rex, value v_pos,
  439. value v_subj_start, value v_subj,
  440. value v_ovec, value v_maybe_cof)
  441. {
  442. int ret;
  443. long
  444. pos = Long_val(v_pos),
  445. len = caml_string_length(v_subj),
  446. subj_start = Long_val(v_subj_start);
  447. long ovec_len = Wosize_val(v_ovec);
  448. if (pos > len || pos < subj_start)
  449. caml_invalid_argument("Pcre.pcre_exec_stub: illegal position");
  450. if (subj_start > len || subj_start < 0)
  451. caml_invalid_argument("Pcre.pcre_exec_stub: illegal subject start");
  452. pos -= subj_start;
  453. len -= subj_start;
  454. {
  455. const pcre *code = (pcre *) Field(v_rex, 1); /* Compiled pattern */
  456. const pcre_extra *extra = (pcre_extra *) Field(v_rex, 2); /* Extra info */
  457. const char *ocaml_subj =
  458. String_val(v_subj) + subj_start; /* Subject string */
  459. const int opt = Int_val(v_opt); /* Runtime options */
  460. /* Special case when no callout functions specified */
  461. if (v_maybe_cof == None) {
  462. int *ovec = (int *) &Field(v_ovec, 0);
  463. /* Performs the match */
  464. ret = pcre_exec(code, extra, ocaml_subj, len, pos, opt, ovec, ovec_len);
  465. if (ret < 0) handle_exec_error("pcre_exec_stub", ret);
  466. else handle_pcre_exec_result(ovec, v_ovec, ovec_len, subj_start, ret);
  467. }
  468. /* There are callout functions */
  469. else {
  470. value v_cof = Field(v_maybe_cof, 0);
  471. value v_substrings;
  472. char *subj = caml_stat_alloc(sizeof(char) * len);
  473. int *ovec = caml_stat_alloc(sizeof(int) * ovec_len);
  474. struct cod cod = { 0, (value *) NULL, (value *) NULL, (value) NULL };
  475. struct pcre_extra new_extra =
  476. #ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION
  477. # ifdef PCRE_EXTRA_MARK
  478. # ifdef PCRE_EXTRA_EXECUTABLE_JIT
  479. { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0, NULL, NULL };
  480. # else
  481. { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0, NULL };
  482. # endif
  483. # else
  484. { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0 };
  485. # endif
  486. #else
  487. { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL };
  488. #endif
  489. cod.subj_start = subj_start;
  490. memcpy(subj, ocaml_subj, len);
  491. Begin_roots4(v_rex, v_cof, v_substrings, v_ovec);
  492. Begin_roots1(v_subj);
  493. v_substrings = caml_alloc_small(2, 0);
  494. End_roots();
  495. Field(v_substrings, 0) = v_subj;
  496. Field(v_substrings, 1) = v_ovec;
  497. cod.v_substrings_p = &v_substrings;
  498. cod.v_cof_p = &v_cof;
  499. new_extra.callout_data = &cod;
  500. if (extra == NULL) {
  501. ret = pcre_exec(code, &new_extra, subj, len, pos, opt, ovec,
  502. ovec_len);
  503. }
  504. else {
  505. new_extra.flags = PCRE_EXTRA_CALLOUT_DATA | extra->flags;
  506. new_extra.study_data = extra->study_data;
  507. new_extra.match_limit = extra->match_limit;
  508. new_extra.tables = extra->tables;
  509. #ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION
  510. new_extra.match_limit_recursion = extra->match_limit_recursion;
  511. #endif
  512. ret = pcre_exec(code, &new_extra, subj, len, pos, opt, ovec,
  513. ovec_len);
  514. }
  515. caml_stat_free(subj);
  516. End_roots();
  517. if (ret < 0) {
  518. caml_stat_free(ovec);
  519. if (ret == PCRE_ERROR_CALLOUT) caml_raise(cod.v_exn);
  520. else handle_exec_error("pcre_exec_stub(callout)", ret);
  521. } else {
  522. handle_pcre_exec_result(ovec, v_ovec, ovec_len, subj_start, ret);
  523. caml_stat_free(ovec);
  524. }
  525. }
  526. }
  527. return Val_unit;
  528. }
  529. /* Byte-code hook for pcre_exec_stub
  530. Needed, because there are more than 5 arguments */
  531. CAMLprim value pcre_exec_stub_bc(value *argv, int __unused argn)
  532. {
  533. return pcre_exec_stub(argv[0], argv[1], argv[2], argv[3],
  534. argv[4], argv[5], argv[6]);
  535. }
  536. /* Generates a new set of chartables for the current locale (see man
  537. page of PCRE */
  538. CAMLprim value pcre_maketables_stub(value __unused v_unit)
  539. {
  540. /* GC will do a full cycle every 1_000_000 table set allocations (one
  541. table set consumes 864 bytes -> maximum of 864_000_000 bytes unreclaimed
  542. table sets) */
  543. const value v_res = caml_alloc_final(2, pcre_dealloc_tables, 1, 1000000);
  544. Field(v_res, 1) = (value) pcre_maketables();
  545. return v_res;
  546. }
  547. /* Wraps around the isspace-function */
  548. CAMLprim value pcre_isspace_stub(value v_c)
  549. {
  550. return Val_bool(isspace(Int_val(v_c)));
  551. }
  552. /* Returns number of substring associated with a name */
  553. CAMLprim value pcre_get_stringnumber_stub(value v_rex, value v_name)
  554. {
  555. const int ret = pcre_get_stringnumber((pcre *) Field(v_rex, 1),
  556. String_val(v_name));
  557. if (ret == PCRE_ERROR_NOSUBSTRING)
  558. caml_invalid_argument("Named string not found");
  559. return Val_int(ret);
  560. }
  561. /* Returns array of names of named substrings in a regexp */
  562. CAMLprim value pcre_names_stub(value v_rex)
  563. {
  564. CAMLparam0();
  565. CAMLlocal1(v_res);
  566. int name_count;
  567. int entry_size;
  568. const char *tbl_ptr;
  569. int i;
  570. int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMECOUNT, &name_count);
  571. if (ret != 0) raise_internal_error("pcre_names_stub: namecount");
  572. ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMEENTRYSIZE, &entry_size);
  573. if (ret != 0) raise_internal_error("pcre_names_stub: nameentrysize");
  574. ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMETABLE, &tbl_ptr);
  575. if (ret != 0) raise_internal_error("pcre_names_stub: nametable");
  576. v_res = caml_alloc(name_count, 0);
  577. for (i = 0; i < name_count; ++i) {
  578. value v_name = caml_copy_string(tbl_ptr + 2);
  579. Store_field(v_res, i, v_name);
  580. tbl_ptr += entry_size;
  581. }
  582. CAMLreturn(v_res);
  583. }
  584. /* Generic stub for getting integer results from pcre_config */
  585. static inline int pcre_config_int(int what)
  586. {
  587. int ret;
  588. pcre_config(what, (void *) &ret);
  589. return ret;
  590. }
  591. /* Generic stub for getting long integer results from pcre_config */
  592. static inline int pcre_config_long(int what)
  593. {
  594. long ret;
  595. pcre_config(what, (void *) &ret);
  596. return ret;
  597. }
  598. /* Some stubs for config-functions */
  599. /* Returns boolean indicating UTF8-support */
  600. CAMLprim value pcre_config_utf8_stub(value __unused v_unit)
  601. { return Val_bool(pcre_config_int(PCRE_CONFIG_UTF8)); }
  602. /* Returns character used as newline */
  603. CAMLprim value pcre_config_newline_stub(value __unused v_unit)
  604. { return Val_int(pcre_config_int(PCRE_CONFIG_NEWLINE)); }
  605. /* Returns number of bytes used for internal linkage of regular expressions */
  606. CAMLprim value pcre_config_link_size_stub(value __unused v_unit)
  607. { return Val_int(pcre_config_int(PCRE_CONFIG_LINK_SIZE)); }
  608. /* Returns boolean indicating use of stack recursion */
  609. CAMLprim value pcre_config_stackrecurse_stub(value __unused v_unit)
  610. { return Val_bool(pcre_config_int(PCRE_CONFIG_STACKRECURSE)); }
  611. /* Returns default limit for calls to internal matching function */
  612. CAMLprim value pcre_config_match_limit_stub(value __unused v_unit)
  613. { return Val_long(pcre_config_long(PCRE_CONFIG_MATCH_LIMIT)); }
  614. /* Returns default limit for calls to internal matching function */
  615. CAMLprim value pcre_config_match_limit_recursion_stub(value __unused v_unit)
  616. { return Val_long(pcre_config_long(PCRE_CONFIG_MATCH_LIMIT_RECURSION)); }