pcre_stubs.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736
  1. /*
  2. PCRE-OCAML - Perl Compatibility Regular Expressions for OCaml
  3. Copyright (C) 1999- Markus Mottl
  4. email: [email protected]
  5. WWW: http://www.ocaml.info
  6. This library is free software; you can redistribute it and/or
  7. modify it under the terms of the GNU Lesser General Public
  8. License as published by the Free Software Foundation; either
  9. version 2 of the License, or (at your option) any later version.
  10. This library is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. Lesser General Public License for more details.
  14. You should have received a copy of the GNU Lesser General Public
  15. License along with this library; if not, write to the Free Software
  16. Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  17. */
  18. #if defined(_WIN32)
  19. # if defined(_DLL)
  20. # define PCREextern __declspec(dllexport)
  21. # else
  22. # define PCREextern
  23. # endif
  24. #endif
  25. #if _WIN64
  26. typedef long long *ovec_dst_ptr;
  27. #else
  28. typedef long *ovec_dst_ptr;
  29. #endif
  30. #if __GNUC__ >= 3
  31. # define inline inline __attribute__ ((always_inline))
  32. # define __unused __attribute__ ((unused))
  33. #else
  34. # define __unused
  35. # define inline
  36. #endif
  37. #include <ctype.h>
  38. #include <string.h>
  39. #include <stdio.h>
  40. #include <caml/mlvalues.h>
  41. #include <caml/alloc.h>
  42. #include <caml/memory.h>
  43. #include <caml/fail.h>
  44. #include <caml/callback.h>
  45. #include "pcre.h"
  46. /* Error codes as defined for pcre 7.9, undefined in pcre 4.5 */
  47. #ifndef PCRE_ERROR_PARTIAL
  48. #define PCRE_ERROR_PARTIAL (-12)
  49. #endif
  50. #ifndef PCRE_ERROR_BADPARTIAL
  51. #define PCRE_ERROR_BADPARTIAL (-13)
  52. #endif
  53. #ifndef PCRE_ERROR_RECURSIONLIMIT
  54. #define PCRE_ERROR_RECURSIONLIMIT (-21)
  55. #endif
  56. typedef const unsigned char *chartables; /* Type of chartable sets */
  57. /* Contents of callout data */
  58. struct cod {
  59. long subj_start; /* Start of subject string */
  60. value *v_substrings_p; /* Pointer to substrings matched so far */
  61. value *v_cof_p; /* Pointer to callout function */
  62. value v_exn; /* Possible exception raised by callout function */
  63. };
  64. /* Cache for exceptions */
  65. static value *pcre_exc_Error = NULL; /* Exception [Error] */
  66. static value *pcre_exc_Backtrack = NULL; /* Exception [Backtrack] */
  67. /* Cache for polymorphic variants */
  68. static value var_Start_only; /* Variant [`Start_only] */
  69. static value var_ANCHORED; /* Variant [`ANCHORED] */
  70. static value var_Char; /* Variant [`Char char] */
  71. static value var_Not_studied; /* Variant [`Not_studied] */
  72. static value var_Studied; /* Variant [`Studied] */
  73. static value var_Optimal; /* Variant [`Optimal] */
  74. static value None = Val_int(0);
  75. /* Converts subject offsets from C-integers to OCaml-Integers.
  76. This is a bit tricky, because there are 32- and 64-bit platforms around
  77. and OCaml chooses the larger possibility for representing integers when
  78. available (also in arrays) - not so the PCRE!
  79. */
  80. static inline void copy_ovector(
  81. long subj_start, const int *ovec_src, ovec_dst_ptr ovec_dst, int subgroups2)
  82. {
  83. if (subj_start == 0)
  84. while (subgroups2--) {
  85. *ovec_dst = Val_int(*ovec_src);
  86. --ovec_src; --ovec_dst;
  87. }
  88. else
  89. while (subgroups2--) {
  90. *ovec_dst = Val_long(*ovec_src + subj_start);
  91. --ovec_src; --ovec_dst;
  92. }
  93. }
  94. /* Callout handler */
  95. static int pcre_callout_handler(pcre_callout_block* cb)
  96. {
  97. struct cod *cod = (struct cod *) cb->callout_data;
  98. if (cod != NULL) {
  99. /* Callout is available */
  100. value v_res;
  101. /* Set up parameter array */
  102. value v_callout_data = caml_alloc_small(8, 0);
  103. const value v_substrings = *cod->v_substrings_p;
  104. const int capture_top = cb->capture_top;
  105. int subgroups2 = capture_top << 1;
  106. const int subgroups2_1 = subgroups2 - 1;
  107. const int *ovec_src = cb->offset_vector + subgroups2_1;
  108. ovec_dst_ptr ovec_dst = &Field(Field(v_substrings, 1), 0) + subgroups2_1;
  109. long subj_start = cod->subj_start;
  110. copy_ovector(subj_start, ovec_src, ovec_dst, subgroups2);
  111. Field(v_callout_data, 0) = Val_int(cb->callout_number);
  112. Field(v_callout_data, 1) = v_substrings;
  113. Field(v_callout_data, 2) = Val_int(cb->start_match + subj_start);
  114. Field(v_callout_data, 3) = Val_int(cb->current_position + subj_start);
  115. Field(v_callout_data, 4) = Val_int(capture_top);
  116. Field(v_callout_data, 5) = Val_int(cb->capture_last);
  117. Field(v_callout_data, 6) = Val_int(cb->pattern_position);
  118. Field(v_callout_data, 7) = Val_int(cb->next_item_length);
  119. /* Perform callout */
  120. v_res = caml_callback_exn(*cod->v_cof_p, v_callout_data);
  121. if (Is_exception_result(v_res)) {
  122. /* Callout raised an exception */
  123. const value v_exn = Extract_exception(v_res);
  124. if (Field(v_exn, 0) == *pcre_exc_Backtrack) return 1;
  125. cod->v_exn = v_exn;
  126. return PCRE_ERROR_CALLOUT;
  127. }
  128. }
  129. return 0;
  130. }
  131. /* Fetchs the named OCaml-values + caches them and
  132. calculates + caches the variant hash values */
  133. CAMLprim value pcre_ocaml_init(value __unused v_unit)
  134. {
  135. pcre_exc_Error = caml_named_value("Pcre.Error");
  136. pcre_exc_Backtrack = caml_named_value("Pcre.Backtrack");
  137. var_Start_only = caml_hash_variant("Start_only");
  138. var_ANCHORED = caml_hash_variant("ANCHORED");
  139. var_Char = caml_hash_variant("Char");
  140. var_Not_studied = caml_hash_variant("Not_studied");
  141. var_Studied = caml_hash_variant("Studied");
  142. var_Optimal = caml_hash_variant("Optimal");
  143. pcre_callout = &pcre_callout_handler;
  144. return Val_unit;
  145. }
  146. /* Finalizing deallocation function for chartable sets */
  147. static void pcre_dealloc_tables(value v_table)
  148. { (pcre_free)((void *) Field(v_table, 1)); }
  149. /* Finalizing deallocation function for compiled regular expressions */
  150. static void pcre_dealloc_regexp(value v_rex)
  151. {
  152. void *extra = (void *) Field(v_rex, 2);
  153. (pcre_free)((void *) Field(v_rex, 1));
  154. if (extra != NULL)
  155. #ifdef PCRE_STUDY_JIT_COMPILE
  156. pcre_free_study(extra);
  157. #else
  158. pcre_free(extra);
  159. #endif
  160. }
  161. /* Makes OCaml-string from PCRE-version */
  162. CAMLprim value pcre_version_stub(value __unused v_unit)
  163. {
  164. return caml_copy_string((char *) pcre_version());
  165. }
  166. /* Raising exceptions */
  167. static inline void raise_pcre_error(value v_arg) Noreturn;
  168. static inline void raise_partial() Noreturn;
  169. static inline void raise_bad_partial() Noreturn;
  170. static inline void raise_bad_utf8() Noreturn;
  171. static inline void raise_bad_utf8_offset() Noreturn;
  172. static inline void raise_match_limit() Noreturn;
  173. static inline void raise_recursion_limit() Noreturn;
  174. static inline void raise_bad_pattern(const char *msg, int pos) Noreturn;
  175. static inline void raise_internal_error(char *msg) Noreturn;
  176. static inline void raise_pcre_error(value v_arg)
  177. { caml_raise_with_arg(*pcre_exc_Error, v_arg); }
  178. static inline void raise_partial() { raise_pcre_error(Val_int(0)); }
  179. static inline void raise_bad_partial() { raise_pcre_error(Val_int(1)); }
  180. static inline void raise_bad_utf8() { raise_pcre_error(Val_int(2)); }
  181. static inline void raise_bad_utf8_offset() { raise_pcre_error(Val_int(3)); }
  182. static inline void raise_match_limit() { raise_pcre_error(Val_int(4)); }
  183. static inline void raise_recursion_limit() { raise_pcre_error(Val_int(5)); }
  184. static inline void raise_bad_pattern(const char *msg, int pos)
  185. {
  186. CAMLparam0();
  187. CAMLlocal1(v_msg);
  188. value v_arg;
  189. v_msg = caml_copy_string(msg);
  190. v_arg = caml_alloc_small(2, 0);
  191. Field(v_arg, 0) = v_msg;
  192. Field(v_arg, 1) = Val_int(pos);
  193. raise_pcre_error(v_arg);
  194. CAMLnoreturn;
  195. }
  196. static inline void raise_internal_error(char *msg)
  197. {
  198. CAMLparam0();
  199. CAMLlocal1(v_msg);
  200. value v_arg;
  201. v_msg = caml_copy_string(msg);
  202. v_arg = caml_alloc_small(1, 1);
  203. Field(v_arg, 0) = v_msg;
  204. raise_pcre_error(v_arg);
  205. CAMLnoreturn;
  206. }
  207. /* PCRE pattern compilation */
  208. /* Makes compiled regular expression from compilation options, an optional
  209. value of chartables and the pattern string */
  210. CAMLprim value pcre_compile_stub(value v_opt, value v_tables, value v_pat)
  211. {
  212. value v_rex; /* Final result -> value of type [regexp] */
  213. const char *error = NULL; /* pointer to possible error message */
  214. int error_ofs = 0; /* offset in the pattern at which error occurred */
  215. /* If v_tables = [None], then pointer to tables is NULL, otherwise
  216. set it to the appropriate value */
  217. chartables tables =
  218. (v_tables == None) ? NULL : (chartables) Field(Field(v_tables, 0), 1);
  219. /* Compiles the pattern */
  220. pcre *regexp = pcre_compile(String_val(v_pat), Int_val(v_opt), &error,
  221. &error_ofs, tables);
  222. /* Raises appropriate exception with [BadPattern] if the pattern
  223. could not be compiled */
  224. if (regexp == NULL) raise_bad_pattern(error, error_ofs);
  225. /* GC will do a full cycle every 1_000_000 regexp allocations (a typical
  226. regexp probably consumes less than 100 bytes -> maximum of 100_000_000
  227. bytes unreclaimed regexps) */
  228. v_rex = caml_alloc_final(4, pcre_dealloc_regexp, 1, 1000000);
  229. /* Field[1]: compiled regular expression (Field[0] is finalizing
  230. function! See above!) */
  231. Field(v_rex, 1) = (value) regexp;
  232. /* Field[2]: extra information about regexp when it has been studied
  233. successfully */
  234. Field(v_rex, 2) = (value) NULL;
  235. /* Field[3]: If 0 -> regexp has not yet been studied
  236. 1 -> regexp has already been studied */
  237. Field(v_rex, 3) = 0;
  238. return v_rex;
  239. }
  240. /* Studies a regexp */
  241. CAMLprim value pcre_study_stub(value v_rex)
  242. {
  243. /* If it has not yet been studied */
  244. if (! (int) Field(v_rex, 3)) {
  245. const char *error = NULL;
  246. pcre_extra *extra = pcre_study((pcre *) Field(v_rex, 1), 0, &error);
  247. if (error != NULL) caml_invalid_argument((char *) error);
  248. Field(v_rex, 2) = (value) extra;
  249. Field(v_rex, 3) = Val_int(1);
  250. }
  251. return v_rex;
  252. }
  253. /* Sets a match limit recursion for a regular expression imperatively */
  254. CAMLprim value pcre_set_imp_match_limit_recursion_stub(value v_rex, value v_lim)
  255. {
  256. pcre_extra *extra = (pcre_extra *) Field(v_rex, 2);
  257. if (extra == NULL) {
  258. extra = pcre_malloc(sizeof(pcre_extra));
  259. extra->flags = PCRE_EXTRA_MATCH_LIMIT_RECURSION;
  260. Field(v_rex, 2) = (value) extra;
  261. } else {
  262. unsigned long *flags_ptr = &extra->flags;
  263. *flags_ptr = PCRE_EXTRA_MATCH_LIMIT_RECURSION | *flags_ptr;
  264. }
  265. extra->match_limit_recursion = Int_val(v_lim);
  266. return v_rex;
  267. }
  268. /* Gets the match limit recursion of a regular expression if it exists */
  269. CAMLprim value pcre_get_match_limit_recursion_stub(value v_rex)
  270. {
  271. pcre_extra *extra = (pcre_extra *) Field(v_rex, 2);
  272. if (extra == NULL) return None;
  273. if (extra->flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) {
  274. value v_lim = Val_int(extra->match_limit_recursion);
  275. value v_res = caml_alloc_small(1, 0);
  276. Field(v_res, 0) = v_lim;
  277. return v_res;
  278. }
  279. return None;
  280. }
  281. /* Sets a match limit for a regular expression imperatively */
  282. CAMLprim value pcre_set_imp_match_limit_stub(value v_rex, value v_lim)
  283. {
  284. pcre_extra *extra = (pcre_extra *) Field(v_rex, 2);
  285. if (extra == NULL) {
  286. extra = pcre_malloc(sizeof(pcre_extra));
  287. extra->flags = PCRE_EXTRA_MATCH_LIMIT;
  288. Field(v_rex, 2) = (value) extra;
  289. } else {
  290. unsigned long *flags_ptr = &extra->flags;
  291. *flags_ptr = PCRE_EXTRA_MATCH_LIMIT | *flags_ptr;
  292. }
  293. extra->match_limit = Int_val(v_lim);
  294. return v_rex;
  295. }
  296. /* Gets the match limit of a regular expression if it exists */
  297. CAMLprim value pcre_get_match_limit_stub(value v_rex)
  298. {
  299. pcre_extra *extra = (pcre_extra *) Field(v_rex, 2);
  300. if (extra == NULL) return None;
  301. if (extra->flags & PCRE_EXTRA_MATCH_LIMIT) {
  302. value v_lim = Val_int(extra->match_limit);
  303. value v_res = caml_alloc_small(1, 0);
  304. Field(v_res, 0) = v_lim;
  305. return v_res;
  306. }
  307. return None;
  308. }
  309. /* Performs the call to the pcre_fullinfo function */
  310. static inline int pcre_fullinfo_stub(value v_rex, int what, void *where)
  311. {
  312. return pcre_fullinfo((pcre *) Field(v_rex, 1), (pcre_extra *) Field(v_rex, 2),
  313. what, where);
  314. }
  315. /* Some stubs for info-functions */
  316. /* Generic macro for getting integer results from pcre_fullinfo */
  317. #define make_info(tp, cnv, name, option) \
  318. CAMLprim value pcre_##name##_stub(value v_rex) \
  319. { \
  320. tp options; \
  321. const int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_##option, &options); \
  322. if (ret != 0) raise_internal_error("pcre_##name##_stub"); \
  323. return cnv(options); \
  324. }
  325. make_info(unsigned long, Val_long, options, OPTIONS)
  326. make_info(size_t, Val_long, size, SIZE)
  327. make_info(size_t, Val_long, studysize, STUDYSIZE)
  328. make_info(int, Val_int, capturecount, CAPTURECOUNT)
  329. make_info(int, Val_int, backrefmax, BACKREFMAX)
  330. make_info(int, Val_int, namecount, NAMECOUNT)
  331. make_info(int, Val_int, nameentrysize, NAMEENTRYSIZE)
  332. CAMLprim value pcre_firstbyte_stub(value v_rex)
  333. {
  334. int firstbyte;
  335. const int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_FIRSTBYTE, &firstbyte);
  336. if (ret != 0) raise_internal_error("pcre_firstbyte_stub");
  337. switch (firstbyte) {
  338. case -1 : return var_Start_only; break; /* [`Start_only] */
  339. case -2 : return var_ANCHORED; break; /* [`ANCHORED] */
  340. default :
  341. if (firstbyte < 0 ) /* Should not happen */
  342. raise_internal_error("pcre_firstbyte_stub");
  343. else {
  344. value v_firstbyte;
  345. /* Allocates the non-constant constructor [`Char of char] and fills
  346. in the appropriate value */
  347. v_firstbyte = caml_alloc_small(2, 0);
  348. Field(v_firstbyte, 0) = var_Char;
  349. Field(v_firstbyte, 1) = Val_int(firstbyte);
  350. return v_firstbyte;
  351. }
  352. }
  353. }
  354. CAMLprim value pcre_firsttable_stub(value v_rex)
  355. {
  356. const unsigned char *ftable;
  357. int ret =
  358. pcre_fullinfo_stub(v_rex, PCRE_INFO_FIRSTTABLE, (void *) &ftable);
  359. if (ret != 0) raise_internal_error("pcre_firsttable_stub");
  360. if (ftable == NULL) return None;
  361. else {
  362. value v_res, v_res_str;
  363. char *ptr;
  364. int i;
  365. Begin_roots1(v_rex);
  366. v_res_str = caml_alloc_string(32);
  367. End_roots();
  368. ptr = String_val(v_res_str);
  369. for (i = 0; i <= 31; ++i) { *ptr = *ftable; ++ptr; ++ftable; }
  370. Begin_roots1(v_res_str);
  371. /* Allocates [Some string] from firsttable */
  372. v_res = caml_alloc_small(1, 0);
  373. End_roots();
  374. Field(v_res, 0) = v_res_str;
  375. return v_res;
  376. }
  377. }
  378. CAMLprim value pcre_lastliteral_stub(value v_rex)
  379. {
  380. int lastliteral;
  381. const int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_LASTLITERAL,
  382. &lastliteral);
  383. if (ret != 0) raise_internal_error("pcre_lastliteral_stub");
  384. if (lastliteral == -1) return None;
  385. if (lastliteral < 0) raise_internal_error("pcre_lastliteral_stub");
  386. else {
  387. /* Allocates [Some char] */
  388. value v_res = caml_alloc_small(1, 0);
  389. Field(v_res, 0) = Val_int(lastliteral);
  390. return v_res;
  391. }
  392. }
  393. CAMLprim value pcre_study_stat_stub(value v_rex)
  394. {
  395. /* Generates the appropriate constant constructor [`Optimal] or
  396. [`Studied] if regexp has already been studied */
  397. if (Field(v_rex, 3))
  398. return ((pcre_extra *) Field(v_rex, 2) == NULL) ? var_Optimal : var_Studied;
  399. return var_Not_studied; /* otherwise [`Not_studied] */
  400. }
  401. static inline void handle_exec_error(char *loc, const int ret) Noreturn;
  402. static inline void handle_exec_error(char *loc, const int ret)
  403. {
  404. switch (ret) {
  405. /* Dedicated exceptions */
  406. case PCRE_ERROR_NOMATCH : caml_raise_not_found();
  407. case PCRE_ERROR_PARTIAL : raise_partial();
  408. case PCRE_ERROR_MATCHLIMIT : raise_match_limit();
  409. case PCRE_ERROR_BADPARTIAL : raise_bad_partial();
  410. case PCRE_ERROR_BADUTF8 : raise_bad_utf8();
  411. case PCRE_ERROR_BADUTF8_OFFSET : raise_bad_utf8_offset();
  412. case PCRE_ERROR_RECURSIONLIMIT : raise_recursion_limit();
  413. /* Unknown error */
  414. default : {
  415. char err_buf[100];
  416. snprintf(err_buf, 100, "%s: unhandled PCRE error code: %d", loc, ret);
  417. raise_internal_error(err_buf);
  418. }
  419. }
  420. }
  421. static inline void handle_pcre_exec_result(
  422. int *ovec, value v_ovec, long ovec_len, long subj_start, int ret)
  423. {
  424. ovec_dst_ptr ocaml_ovec = (ovec_dst_ptr) &Field(v_ovec, 0);
  425. const int subgroups2 = ret * 2;
  426. const int subgroups2_1 = subgroups2 - 1;
  427. const int *ovec_src = ovec + subgroups2_1;
  428. ovec_dst_ptr ovec_clear_stop = ocaml_ovec + (ovec_len * 2) / 3;
  429. ovec_dst_ptr ovec_dst = ocaml_ovec + subgroups2_1;
  430. copy_ovector(subj_start, ovec_src, ovec_dst, subgroups2);
  431. while (++ovec_dst < ovec_clear_stop) *ovec_dst = -1;
  432. }
  433. /* Executes a pattern match with runtime options, a regular expression, a
  434. matching position, the start of the the subject string, a subject string,
  435. a number of subgroup offsets, an offset vector and an optional callout
  436. function */
  437. CAMLprim value pcre_exec_stub(value v_opt, value v_rex, value v_pos,
  438. value v_subj_start, value v_subj,
  439. value v_ovec, value v_maybe_cof)
  440. {
  441. int ret;
  442. long
  443. pos = Long_val(v_pos),
  444. len = caml_string_length(v_subj),
  445. subj_start = Long_val(v_subj_start);
  446. long ovec_len = Wosize_val(v_ovec);
  447. if (pos > len || pos < subj_start)
  448. caml_invalid_argument("Pcre.pcre_exec_stub: illegal position");
  449. if (subj_start > len || subj_start < 0)
  450. caml_invalid_argument("Pcre.pcre_exec_stub: illegal subject start");
  451. pos -= subj_start;
  452. len -= subj_start;
  453. {
  454. const pcre *code = (pcre *) Field(v_rex, 1); /* Compiled pattern */
  455. const pcre_extra *extra = (pcre_extra *) Field(v_rex, 2); /* Extra info */
  456. const char *ocaml_subj =
  457. String_val(v_subj) + subj_start; /* Subject string */
  458. const int opt = Int_val(v_opt); /* Runtime options */
  459. /* Special case when no callout functions specified */
  460. if (v_maybe_cof == None) {
  461. int *ovec = (int *) &Field(v_ovec, 0);
  462. /* Performs the match */
  463. ret = pcre_exec(code, extra, ocaml_subj, len, pos, opt, ovec, ovec_len);
  464. if (ret < 0) handle_exec_error("pcre_exec_stub", ret);
  465. else handle_pcre_exec_result(ovec, v_ovec, ovec_len, subj_start, ret);
  466. }
  467. /* There are callout functions */
  468. else {
  469. value v_cof = Field(v_maybe_cof, 0);
  470. value v_substrings;
  471. char *subj = caml_stat_alloc(sizeof(char) * len);
  472. int *ovec = caml_stat_alloc(sizeof(int) * ovec_len);
  473. struct cod cod = { 0, (value *) NULL, (value *) NULL, (value) NULL };
  474. struct pcre_extra new_extra =
  475. #ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION
  476. # ifdef PCRE_EXTRA_MARK
  477. # ifdef PCRE_EXTRA_EXECUTABLE_JIT
  478. { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0, NULL, NULL };
  479. # else
  480. { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0, NULL };
  481. # endif
  482. # else
  483. { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0 };
  484. # endif
  485. #else
  486. { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL };
  487. #endif
  488. cod.subj_start = subj_start;
  489. memcpy(subj, ocaml_subj, len);
  490. Begin_roots4(v_rex, v_cof, v_substrings, v_ovec);
  491. Begin_roots1(v_subj);
  492. v_substrings = caml_alloc_small(2, 0);
  493. End_roots();
  494. Field(v_substrings, 0) = v_subj;
  495. Field(v_substrings, 1) = v_ovec;
  496. cod.v_substrings_p = &v_substrings;
  497. cod.v_cof_p = &v_cof;
  498. new_extra.callout_data = &cod;
  499. if (extra == NULL) {
  500. ret = pcre_exec(code, &new_extra, subj, len, pos, opt, ovec,
  501. ovec_len);
  502. }
  503. else {
  504. new_extra.flags = PCRE_EXTRA_CALLOUT_DATA | extra->flags;
  505. new_extra.study_data = extra->study_data;
  506. new_extra.match_limit = extra->match_limit;
  507. new_extra.tables = extra->tables;
  508. #ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION
  509. new_extra.match_limit_recursion = extra->match_limit_recursion;
  510. #endif
  511. ret = pcre_exec(code, &new_extra, subj, len, pos, opt, ovec,
  512. ovec_len);
  513. }
  514. caml_stat_free(subj);
  515. End_roots();
  516. if (ret < 0) {
  517. caml_stat_free(ovec);
  518. if (ret == PCRE_ERROR_CALLOUT) caml_raise(cod.v_exn);
  519. else handle_exec_error("pcre_exec_stub(callout)", ret);
  520. } else {
  521. handle_pcre_exec_result(ovec, v_ovec, ovec_len, subj_start, ret);
  522. caml_stat_free(ovec);
  523. }
  524. }
  525. }
  526. return Val_unit;
  527. }
  528. /* Byte-code hook for pcre_exec_stub
  529. Needed, because there are more than 5 arguments */
  530. CAMLprim value pcre_exec_stub_bc(value *argv, int __unused argn)
  531. {
  532. return pcre_exec_stub(argv[0], argv[1], argv[2], argv[3],
  533. argv[4], argv[5], argv[6]);
  534. }
  535. /* Generates a new set of chartables for the current locale (see man
  536. page of PCRE */
  537. CAMLprim value pcre_maketables_stub(value __unused v_unit)
  538. {
  539. /* GC will do a full cycle every 1_000_000 table set allocations (one
  540. table set consumes 864 bytes -> maximum of 864_000_000 bytes unreclaimed
  541. table sets) */
  542. const value v_res = caml_alloc_final(2, pcre_dealloc_tables, 1, 1000000);
  543. Field(v_res, 1) = (value) pcre_maketables();
  544. return v_res;
  545. }
  546. /* Wraps around the isspace-function */
  547. CAMLprim value pcre_isspace_stub(value v_c)
  548. {
  549. return Val_bool(isspace(Int_val(v_c)));
  550. }
  551. /* Returns number of substring associated with a name */
  552. CAMLprim value pcre_get_stringnumber_stub(value v_rex, value v_name)
  553. {
  554. const int ret = pcre_get_stringnumber((pcre *) Field(v_rex, 1),
  555. String_val(v_name));
  556. if (ret == PCRE_ERROR_NOSUBSTRING)
  557. caml_invalid_argument("Named string not found");
  558. return Val_int(ret);
  559. }
  560. /* Returns array of names of named substrings in a regexp */
  561. CAMLprim value pcre_names_stub(value v_rex)
  562. {
  563. CAMLparam0();
  564. CAMLlocal1(v_res);
  565. int name_count;
  566. int entry_size;
  567. const char *tbl_ptr;
  568. int i;
  569. int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMECOUNT, &name_count);
  570. if (ret != 0) raise_internal_error("pcre_names_stub: namecount");
  571. ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMEENTRYSIZE, &entry_size);
  572. if (ret != 0) raise_internal_error("pcre_names_stub: nameentrysize");
  573. ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMETABLE, &tbl_ptr);
  574. if (ret != 0) raise_internal_error("pcre_names_stub: nametable");
  575. v_res = caml_alloc(name_count, 0);
  576. for (i = 0; i < name_count; ++i) {
  577. value v_name = caml_copy_string(tbl_ptr + 2);
  578. Store_field(v_res, i, v_name);
  579. tbl_ptr += entry_size;
  580. }
  581. CAMLreturn(v_res);
  582. }
  583. /* Generic stub for getting integer results from pcre_config */
  584. static inline int pcre_config_int(int what)
  585. {
  586. int ret;
  587. pcre_config(what, (void *) &ret);
  588. return ret;
  589. }
  590. /* Generic stub for getting long integer results from pcre_config */
  591. static inline int pcre_config_long(int what)
  592. {
  593. long ret;
  594. pcre_config(what, (void *) &ret);
  595. return ret;
  596. }
  597. /* Some stubs for config-functions */
  598. /* Returns boolean indicating UTF8-support */
  599. CAMLprim value pcre_config_utf8_stub(value __unused v_unit)
  600. { return Val_bool(pcre_config_int(PCRE_CONFIG_UTF8)); }
  601. /* Returns character used as newline */
  602. CAMLprim value pcre_config_newline_stub(value __unused v_unit)
  603. { return Val_int(pcre_config_int(PCRE_CONFIG_NEWLINE)); }
  604. /* Returns number of bytes used for internal linkage of regular expressions */
  605. CAMLprim value pcre_config_link_size_stub(value __unused v_unit)
  606. { return Val_int(pcre_config_int(PCRE_CONFIG_LINK_SIZE)); }
  607. /* Returns boolean indicating use of stack recursion */
  608. CAMLprim value pcre_config_stackrecurse_stub(value __unused v_unit)
  609. { return Val_bool(pcre_config_int(PCRE_CONFIG_STACKRECURSE)); }
  610. /* Returns default limit for calls to internal matching function */
  611. CAMLprim value pcre_config_match_limit_stub(value __unused v_unit)
  612. { return Val_long(pcre_config_long(PCRE_CONFIG_MATCH_LIMIT)); }
  613. /* Returns default limit for calls to internal matching function */
  614. CAMLprim value pcre_config_match_limit_recursion_stub(value __unused v_unit)
  615. { return Val_long(pcre_config_long(PCRE_CONFIG_MATCH_LIMIT_RECURSION)); }