|
@@ -1,16 +1,20 @@
|
|
(*
|
|
(*
|
|
- PCRE-OCAML - Perl Compatibility Regular Expressions for OCaml
|
|
|
|
|
|
+ PCRE2-OCAML - Perl Compatibility Regular Expressions for OCaml
|
|
|
|
+
|
|
Copyright (C) 1999- Markus Mottl
|
|
Copyright (C) 1999- Markus Mottl
|
|
email: [email protected]
|
|
email: [email protected]
|
|
WWW: http://www.ocaml.info
|
|
WWW: http://www.ocaml.info
|
|
|
|
+
|
|
This library is free software; you can redistribute it and/or
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
+
|
|
This library is distributed in the hope that it will be useful,
|
|
This library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
Lesser General Public License for more details.
|
|
|
|
+
|
|
You should have received a copy of the GNU Lesser General Public
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with this library; if not, write to the Free Software
|
|
License along with this library; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
@@ -18,17 +22,14 @@
|
|
|
|
|
|
(* Public exceptions and their registration with the C runtime *)
|
|
(* Public exceptions and their registration with the C runtime *)
|
|
|
|
|
|
-let string_copy str = str
|
|
|
|
-let buffer_add_subbytes = Buffer.add_subbytes
|
|
|
|
-
|
|
|
|
type error =
|
|
type error =
|
|
| Partial
|
|
| Partial
|
|
- | BadPartial
|
|
|
|
| BadPattern of string * int
|
|
| BadPattern of string * int
|
|
- | BadUTF8
|
|
|
|
- | BadUTF8Offset
|
|
|
|
|
|
+ | BadUTF
|
|
|
|
+ | BadUTFOffset
|
|
| MatchLimit
|
|
| MatchLimit
|
|
- | RecursionLimit
|
|
|
|
|
|
+ | DepthLimit
|
|
|
|
+ | WorkspaceSize
|
|
| InternalError of string
|
|
| InternalError of string
|
|
|
|
|
|
exception Error of error
|
|
exception Error of error
|
|
@@ -36,87 +37,141 @@ exception Backtrack
|
|
exception Regexp_or of string * error
|
|
exception Regexp_or of string * error
|
|
|
|
|
|
(* Puts exceptions into global C-variables for fast retrieval *)
|
|
(* Puts exceptions into global C-variables for fast retrieval *)
|
|
-external pcre_ocaml_init : unit -> unit = "pcre_ocaml_init"
|
|
|
|
|
|
+external pcre2_ocaml_init : unit -> unit = "pcre2_ocaml_init"
|
|
|
|
|
|
(* Registers exceptions with the C runtime and caches polymorphic variants *)
|
|
(* Registers exceptions with the C runtime and caches polymorphic variants *)
|
|
let () =
|
|
let () =
|
|
- Callback.register_exception "Pcre.Error" (Error (InternalError ""));
|
|
|
|
- Callback.register_exception "Pcre.Backtrack" Backtrack;
|
|
|
|
- pcre_ocaml_init ()
|
|
|
|
|
|
+ Callback.register_exception "Pcre2.Error" (Error (InternalError ""));
|
|
|
|
+ Callback.register_exception "Pcre2.Backtrack" Backtrack;
|
|
|
|
+ pcre2_ocaml_init ()
|
|
|
|
|
|
|
|
|
|
(* Compilation and runtime flags and their conversion functions *)
|
|
(* Compilation and runtime flags and their conversion functions *)
|
|
|
|
|
|
-type icflag = int
|
|
|
|
-type irflag = int
|
|
|
|
|
|
+type icflag = int64
|
|
|
|
+type irflag = int64
|
|
|
|
|
|
(* Compilation flags *)
|
|
(* Compilation flags *)
|
|
|
|
|
|
type cflag =
|
|
type cflag =
|
|
[
|
|
[
|
|
|
|
+ | `ALLOW_EMPTY_CLASS
|
|
|
|
+ | `ALT_BSUX
|
|
|
|
+ | `ALT_CIRCUMFLEX
|
|
|
|
+ | `ALT_VERBNAMES
|
|
|
|
+ | `ANCHORED
|
|
|
|
+ | `AUTO_CALLOUT
|
|
| `CASELESS
|
|
| `CASELESS
|
|
- | `MULTILINE
|
|
|
|
|
|
+ | `DOLLAR_ENDONLY
|
|
| `DOTALL
|
|
| `DOTALL
|
|
|
|
+ | `DUPNAMES
|
|
|
|
+ | `ENDANCHORED
|
|
| `EXTENDED
|
|
| `EXTENDED
|
|
- | `ANCHORED
|
|
|
|
- | `DOLLAR_ENDONLY
|
|
|
|
- | `EXTRA
|
|
|
|
- | `UNGREEDY
|
|
|
|
- | `UTF8
|
|
|
|
- | `NO_UTF8_CHECK
|
|
|
|
- | `NO_AUTO_CAPTURE
|
|
|
|
- | `AUTO_CALLOUT
|
|
|
|
|
|
+ | `EXTENDED_MORE
|
|
| `FIRSTLINE
|
|
| `FIRSTLINE
|
|
|
|
+ | `LITERAL
|
|
|
|
+ | `MATCH_INVALID_UTF
|
|
|
|
+ | `MATCH_UNSET_BACKREF
|
|
|
|
+ | `MULTILINE
|
|
|
|
+ | `NEVER_BACKSLASH_C
|
|
|
|
+ | `NEVER_UCP
|
|
|
|
+ | `NEVER_UTF
|
|
|
|
+ | `NO_AUTO_CAPTURE
|
|
|
|
+ | `NO_AUTO_POSSESS
|
|
|
|
+ | `NO_DOTSTAR_ANCHOR
|
|
|
|
+ | `NO_START_OPTIMIZE
|
|
|
|
+ | `NO_UTF_CHECK
|
|
| `UCP
|
|
| `UCP
|
|
|
|
+ | `UNGREEDY
|
|
|
|
+ | `USE_OFFSET_LIMIT
|
|
|
|
+ | `UTF
|
|
]
|
|
]
|
|
|
|
|
|
let int_of_cflag = function
|
|
let int_of_cflag = function
|
|
- | `CASELESS -> 0x0001
|
|
|
|
- | `MULTILINE -> 0x0002
|
|
|
|
- | `DOTALL -> 0x0004
|
|
|
|
- | `EXTENDED -> 0x0008
|
|
|
|
- | `ANCHORED -> 0x0010
|
|
|
|
- | `DOLLAR_ENDONLY -> 0x0020
|
|
|
|
- | `EXTRA -> 0x0040
|
|
|
|
- | `UNGREEDY -> 0x0200
|
|
|
|
- | `UTF8 -> 0x0800
|
|
|
|
- | `NO_AUTO_CAPTURE -> 0x1000
|
|
|
|
- | `NO_UTF8_CHECK -> 0x2000
|
|
|
|
- | `AUTO_CALLOUT -> 0x4000
|
|
|
|
- | `FIRSTLINE -> 0x40000
|
|
|
|
- | `UCP -> 0x20000000
|
|
|
|
-
|
|
|
|
-let coll_icflag icflag flag = int_of_cflag flag lor icflag
|
|
|
|
-let cflags flags = List.fold_left coll_icflag 0 flags
|
|
|
|
|
|
+ | `ALLOW_EMPTY_CLASS -> 0x00000001L
|
|
|
|
+ | `ALT_BSUX -> 0x00000002L
|
|
|
|
+ | `AUTO_CALLOUT -> 0x00000004L
|
|
|
|
+ | `CASELESS -> 0x00000008L
|
|
|
|
+ | `DOLLAR_ENDONLY -> 0x00000010L
|
|
|
|
+ | `DOTALL -> 0x00000020L
|
|
|
|
+ | `DUPNAMES -> 0x00000040L
|
|
|
|
+ | `EXTENDED -> 0x00000080L
|
|
|
|
+ | `FIRSTLINE -> 0x00000100L
|
|
|
|
+ | `MATCH_UNSET_BACKREF -> 0x00000200L
|
|
|
|
+ | `MULTILINE -> 0x00000400L
|
|
|
|
+ | `NEVER_UCP -> 0x00000800L
|
|
|
|
+ | `NEVER_UTF -> 0x00001000L
|
|
|
|
+ | `NO_AUTO_CAPTURE -> 0x00002000L
|
|
|
|
+ | `NO_AUTO_POSSESS -> 0x00004000L
|
|
|
|
+ | `NO_DOTSTAR_ANCHOR -> 0x00008000L
|
|
|
|
+ | `NO_START_OPTIMIZE -> 0x00010000L
|
|
|
|
+ | `UCP -> 0x00020000L
|
|
|
|
+ | `UNGREEDY -> 0x00040000L
|
|
|
|
+ | `UTF -> 0x00080000L
|
|
|
|
+ | `NEVER_BACKSLASH_C -> 0x00100000L
|
|
|
|
+ | `ALT_CIRCUMFLEX -> 0x00200000L
|
|
|
|
+ | `ALT_VERBNAMES -> 0x00400000L
|
|
|
|
+ | `USE_OFFSET_LIMIT -> 0x00800000L
|
|
|
|
+ | `EXTENDED_MORE -> 0x01000000L
|
|
|
|
+ | `LITERAL -> 0x02000000L
|
|
|
|
+ | `MATCH_INVALID_UTF -> 0x04000000L
|
|
|
|
+ | `ENDANCHORED -> 0x20000000L
|
|
|
|
+ | `NO_UTF_CHECK -> 0x40000000L
|
|
|
|
+ | `ANCHORED -> 0x80000000L
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+let coll_icflag icflag flag = Int64.logor (int_of_cflag flag) icflag
|
|
|
|
+let cflags flags = List.fold_left coll_icflag 0L flags
|
|
|
|
|
|
let cflag_of_int = function
|
|
let cflag_of_int = function
|
|
- | 0x0001 -> `CASELESS
|
|
|
|
- | 0x0002 -> `MULTILINE
|
|
|
|
- | 0x0004 -> `DOTALL
|
|
|
|
- | 0x0008 -> `EXTENDED
|
|
|
|
- | 0x0010 -> `ANCHORED
|
|
|
|
- | 0x0020 -> `DOLLAR_ENDONLY
|
|
|
|
- | 0x0040 -> `EXTRA
|
|
|
|
- | 0x0200 -> `UNGREEDY
|
|
|
|
- | 0x0800 -> `UTF8
|
|
|
|
- | 0x1000 -> `NO_AUTO_CAPTURE
|
|
|
|
- | 0x2000 -> `NO_UTF8_CHECK
|
|
|
|
- | 0x4000 -> `AUTO_CALLOUT
|
|
|
|
- | 0x40000 -> `FIRSTLINE
|
|
|
|
- | 0x20000000 -> `UCP
|
|
|
|
- | _ -> failwith "Pcre.cflag_list: unknown compilation flag"
|
|
|
|
|
|
+ | 0x00000001L -> `ALLOW_EMPTY_CLASS
|
|
|
|
+ | 0x00000002L -> `ALT_BSUX
|
|
|
|
+ | 0x00000004L -> `AUTO_CALLOUT
|
|
|
|
+ | 0x00000008L -> `CASELESS
|
|
|
|
+ | 0x00000010L -> `DOLLAR_ENDONLY
|
|
|
|
+ | 0x00000020L -> `DOTALL
|
|
|
|
+ | 0x00000040L -> `DUPNAMES
|
|
|
|
+ | 0x00000080L -> `EXTENDED
|
|
|
|
+ | 0x00000100L -> `FIRSTLINE
|
|
|
|
+ | 0x00000200L -> `MATCH_UNSET_BACKREF
|
|
|
|
+ | 0x00000400L -> `MULTILINE
|
|
|
|
+ | 0x00000800L -> `NEVER_UCP
|
|
|
|
+ | 0x00001000L -> `NEVER_UTF
|
|
|
|
+ | 0x00002000L -> `NO_AUTO_CAPTURE
|
|
|
|
+ | 0x00004000L -> `NO_AUTO_POSSESS
|
|
|
|
+ | 0x00008000L -> `NO_DOTSTAR_ANCHOR
|
|
|
|
+ | 0x00010000L -> `NO_START_OPTIMIZE
|
|
|
|
+ | 0x00020000L -> `UCP
|
|
|
|
+ | 0x00040000L -> `UNGREEDY
|
|
|
|
+ | 0x00080000L -> `UTF
|
|
|
|
+ | 0x00100000L -> `NEVER_BACKSLASH_C
|
|
|
|
+ | 0x00200000L -> `ALT_CIRCUMFLEX
|
|
|
|
+ | 0x00400000L -> `ALT_VERBNAMES
|
|
|
|
+ | 0x00800000L -> `USE_OFFSET_LIMIT
|
|
|
|
+ | 0x01000000L -> `EXTENDED_MORE
|
|
|
|
+ | 0x02000000L -> `LITERAL
|
|
|
|
+ | 0x04000000L -> `MATCH_INVALID_UTF
|
|
|
|
+ | 0x20000000L -> `ENDANCHORED
|
|
|
|
+ | 0x40000000L -> `NO_UTF_CHECK
|
|
|
|
+ | 0x80000000L -> `ANCHORED
|
|
|
|
+ | _ -> failwith "Pcre2.cflag_list: unknown compilation flag"
|
|
|
|
|
|
let all_cflags =
|
|
let all_cflags =
|
|
[
|
|
[
|
|
- 0x0001; 0x0002; 0x0004; 0x0008; 0x0010; 0x0020;
|
|
|
|
- 0x0040; 0x0200; 0x0800; 0x1000; 0x2000; 0x4000; 0x40000;
|
|
|
|
- 0x20000000
|
|
|
|
|
|
+ 0x00000001L; 0x00000002L; 0x00000004L; 0x00000008L;
|
|
|
|
+ 0x00000010L; 0x00000020L; 0x00000040L; 0x00000080L;
|
|
|
|
+ 0x00000100L; 0x00000200L; 0x00000400L; 0x00000800L;
|
|
|
|
+ 0x00001000L; 0x00002000L; 0x00004000L; 0x00008000L;
|
|
|
|
+ 0x00010000L; 0x00020000L; 0x00040000L; 0x00080000L;
|
|
|
|
+ 0x00100000L; 0x00200000L; 0x00400000L; 0x00800000L;
|
|
|
|
+ 0x01000000L; 0x02000000L; 0x04000000L;
|
|
|
|
+ 0x20000000L; 0x40000000L; 0x80000000L;
|
|
]
|
|
]
|
|
|
|
|
|
let cflag_list icflags =
|
|
let cflag_list icflags =
|
|
let coll flag_list flag =
|
|
let coll flag_list flag =
|
|
- if icflags land flag <> 0 then cflag_of_int flag :: flag_list
|
|
|
|
- else flag_list in
|
|
|
|
|
|
+ if Int64.equal (Int64.logand icflags flag) 0L then flag_list
|
|
|
|
+ else cflag_of_int flag :: flag_list in
|
|
List.fold_left coll [] all_cflags
|
|
List.fold_left coll [] all_cflags
|
|
|
|
|
|
|
|
|
|
@@ -125,144 +180,180 @@ let cflag_list icflags =
|
|
type rflag =
|
|
type rflag =
|
|
[
|
|
[
|
|
| `ANCHORED
|
|
| `ANCHORED
|
|
|
|
+ | `COPY_MATCHED_SUBJECT
|
|
|
|
+ | `DFA_RESTART
|
|
|
|
+ | `DFA_SHORTEST
|
|
|
|
+ | `ENDANCHORED
|
|
| `NOTBOL
|
|
| `NOTBOL
|
|
| `NOTEOL
|
|
| `NOTEOL
|
|
| `NOTEMPTY
|
|
| `NOTEMPTY
|
|
- | `PARTIAL
|
|
|
|
|
|
+ | `NOTEMPTY_ATSTART
|
|
|
|
+ | `NO_JIT
|
|
|
|
+ | `NO_UTF_CHECK
|
|
|
|
+ | `PARTIAL_HARD
|
|
|
|
+ | `PARTIAL_SOFT
|
|
]
|
|
]
|
|
|
|
|
|
let int_of_rflag = function
|
|
let int_of_rflag = function
|
|
- | `ANCHORED -> 0x0010
|
|
|
|
- | `NOTBOL -> 0x0080
|
|
|
|
- | `NOTEOL -> 0x0100
|
|
|
|
- | `NOTEMPTY -> 0x0400
|
|
|
|
- | `PARTIAL -> 0x8000
|
|
|
|
-
|
|
|
|
-let coll_irflag irflag flag = int_of_rflag flag lor irflag
|
|
|
|
-let rflags flags = List.fold_left coll_irflag 0 flags
|
|
|
|
|
|
+ | `NOTBOL -> 0x00000001L
|
|
|
|
+ | `NOTEOL -> 0x00000002L
|
|
|
|
+ | `NOTEMPTY -> 0x00000004L
|
|
|
|
+ | `NOTEMPTY_ATSTART -> 0x00000008L
|
|
|
|
+ | `PARTIAL_SOFT -> 0x00000010L
|
|
|
|
+ | `PARTIAL_HARD -> 0x00000020L
|
|
|
|
+ | `DFA_RESTART -> 0x00000040L
|
|
|
|
+ | `DFA_SHORTEST -> 0x00000080L
|
|
|
|
+ | `NO_JIT -> 0x00002000L
|
|
|
|
+ | `COPY_MATCHED_SUBJECT -> 0x00004000L
|
|
|
|
+ | `ENDANCHORED -> 0x20000000L
|
|
|
|
+ | `NO_UTF_CHECK -> 0x40000000L
|
|
|
|
+ | `ANCHORED -> 0x80000000L
|
|
|
|
+
|
|
|
|
+let coll_irflag irflag flag = Int64.logor (int_of_rflag flag) irflag
|
|
|
|
+let rflags flags = List.fold_left coll_irflag 0L flags
|
|
|
|
|
|
let rflag_of_int = function
|
|
let rflag_of_int = function
|
|
- | 0x0010 -> `ANCHORED
|
|
|
|
- | 0x0080 -> `NOTBOL
|
|
|
|
- | 0x0100 -> `NOTEOL
|
|
|
|
- | 0x0400 -> `NOTEMPTY
|
|
|
|
- | 0x8000 -> `PARTIAL
|
|
|
|
- | _ -> failwith "Pcre.rflag_list: unknown runtime flag"
|
|
|
|
-
|
|
|
|
-let all_rflags = [0x0010; 0x0080; 0x0100; 0x0400; 0x8000]
|
|
|
|
|
|
+ | 0x00000001L -> `NOTBOL
|
|
|
|
+ | 0x00000002L -> `NOTEOL
|
|
|
|
+ | 0x00000004L -> `NOTEMPTY
|
|
|
|
+ | 0x00000008L -> `NOTEMPTY_ATSTART
|
|
|
|
+ | 0x00000010L -> `PARTIAL_SOFT
|
|
|
|
+ | 0x00000020L -> `PARTIAL_HARD
|
|
|
|
+ | 0x00000040L -> `DFA_RESTART
|
|
|
|
+ | 0x00000080L -> `DFA_SHORTEST
|
|
|
|
+ | 0x00002000L -> `NO_JIT
|
|
|
|
+ | 0x00004000L -> `COPY_MATCHED_SUBJECT
|
|
|
|
+ | 0x20000000L -> `ENDANCHORED
|
|
|
|
+ | 0x40000000L -> `NO_UTF_CHECK
|
|
|
|
+ | 0x80000000L -> `ANCHORED
|
|
|
|
+ | _ -> failwith "Pcre2.rflag_list: unknown runtime flag"
|
|
|
|
+
|
|
|
|
+let all_rflags =
|
|
|
|
+ [
|
|
|
|
+ 0x00000001L; 0x00000002L; 0x00000004L; 0x00000008L;
|
|
|
|
+ 0x00000010L; 0x00000020L; 0x00000040L; 0x00000080L;
|
|
|
|
+ 0x00002000L; 0x00004000L;
|
|
|
|
+ 0x20000000L; 0x40000000L; 0x80000000L;
|
|
|
|
+ ]
|
|
|
|
|
|
let rflag_list irflags =
|
|
let rflag_list irflags =
|
|
let coll flag_list flag =
|
|
let coll flag_list flag =
|
|
- if irflags land flag <> 0 then rflag_of_int flag :: flag_list
|
|
|
|
- else flag_list in
|
|
|
|
|
|
+ if Int64.equal (Int64.logand irflags flag) 0L then flag_list
|
|
|
|
+ else rflag_of_int flag :: flag_list in
|
|
List.fold_left coll [] all_rflags
|
|
List.fold_left coll [] all_rflags
|
|
|
|
|
|
|
|
|
|
-(* Information on the PCRE-configuration (build-time options) *)
|
|
|
|
|
|
+(* Information on the PCRE2-configuration (build-time options) *)
|
|
|
|
|
|
-external pcre_version : unit -> string = "pcre_version_stub"
|
|
|
|
|
|
+external pcre2_version : unit -> string = "pcre2_version_stub"
|
|
|
|
|
|
-external pcre_config_utf8 : unit -> bool = "pcre_config_utf8_stub" [@@noalloc]
|
|
|
|
|
|
+external pcre2_config_unicode : unit -> bool
|
|
|
|
+ = "pcre2_config_unicode_stub" [@@noalloc]
|
|
|
|
|
|
-external pcre_config_newline :
|
|
|
|
- unit -> char = "pcre_config_newline_stub" [@@noalloc]
|
|
|
|
|
|
+external pcre2_config_newline : unit -> char
|
|
|
|
+ = "pcre2_config_newline_stub" [@@noalloc]
|
|
|
|
|
|
-external pcre_config_link_size :
|
|
|
|
- unit -> int = "pcre_config_link_size_stub" [@@noalloc]
|
|
|
|
|
|
+external pcre2_config_link_size : unit -> (int [@untagged])
|
|
|
|
+ = "pcre2_config_link_size_stub_bc" "pcre2_config_link_size_stub" [@@noalloc]
|
|
|
|
|
|
-external pcre_config_match_limit :
|
|
|
|
- unit -> int = "pcre_config_match_limit_stub" [@@noalloc]
|
|
|
|
|
|
+external pcre2_config_match_limit : unit -> (int [@untagged])
|
|
|
|
+ = "pcre2_config_match_limit_stub_bc" "pcre2_config_match_limit_stub"
|
|
|
|
+ [@@noalloc]
|
|
|
|
|
|
-external pcre_config_match_limit_recursion :
|
|
|
|
- unit -> int = "pcre_config_match_limit_recursion_stub" [@@noalloc]
|
|
|
|
|
|
+external pcre2_config_depth_limit : unit -> (int [@untagged])
|
|
|
|
+ = "pcre2_config_depth_limit_stub_bc" "pcre2_config_depth_limit_stub"
|
|
|
|
+ [@@noalloc]
|
|
|
|
|
|
-external pcre_config_stackrecurse :
|
|
|
|
- unit -> bool = "pcre_config_stackrecurse_stub" [@@noalloc]
|
|
|
|
|
|
+external pcre2_config_stackrecurse :
|
|
|
|
+ unit -> bool = "pcre2_config_stackrecurse_stub" [@@noalloc]
|
|
|
|
|
|
-let version = pcre_version ()
|
|
|
|
-let config_utf8 = pcre_config_utf8 ()
|
|
|
|
-let config_newline = pcre_config_newline ()
|
|
|
|
-let config_link_size = pcre_config_link_size ()
|
|
|
|
-let config_match_limit = pcre_config_match_limit ()
|
|
|
|
-let config_match_limit_recursion = pcre_config_match_limit_recursion ()
|
|
|
|
-let config_stackrecurse = pcre_config_stackrecurse ()
|
|
|
|
|
|
+let version = pcre2_version ()
|
|
|
|
+let config_unicode = pcre2_config_unicode ()
|
|
|
|
+let config_newline = pcre2_config_newline ()
|
|
|
|
+let config_link_size = pcre2_config_link_size ()
|
|
|
|
+let config_match_limit = pcre2_config_match_limit ()
|
|
|
|
+let config_depth_limit = pcre2_config_depth_limit ()
|
|
|
|
+let config_stackrecurse = pcre2_config_stackrecurse ()
|
|
|
|
|
|
|
|
|
|
(* Information on patterns *)
|
|
(* Information on patterns *)
|
|
|
|
|
|
-type firstbyte_info =
|
|
|
|
|
|
+type firstcodeunit_info =
|
|
[ `Char of char
|
|
[ `Char of char
|
|
| `Start_only
|
|
| `Start_only
|
|
| `ANCHORED ]
|
|
| `ANCHORED ]
|
|
|
|
|
|
-type study_stat =
|
|
|
|
- [ `Not_studied
|
|
|
|
- | `Studied
|
|
|
|
- | `Optimal ]
|
|
|
|
-
|
|
|
|
type regexp
|
|
type regexp
|
|
|
|
|
|
-external options : regexp -> icflag = "pcre_options_stub"
|
|
|
|
-external size : regexp -> int = "pcre_size_stub"
|
|
|
|
-external studysize : regexp -> int = "pcre_studysize_stub"
|
|
|
|
-external capturecount : regexp -> int = "pcre_capturecount_stub"
|
|
|
|
-external backrefmax : regexp -> int = "pcre_backrefmax_stub"
|
|
|
|
-external namecount : regexp -> int = "pcre_namecount_stub"
|
|
|
|
-external names : regexp -> string array = "pcre_names_stub"
|
|
|
|
-external nameentrysize : regexp -> int = "pcre_nameentrysize_stub"
|
|
|
|
-external firstbyte : regexp -> firstbyte_info = "pcre_firstbyte_stub"
|
|
|
|
-external firsttable : regexp -> string option = "pcre_firsttable_stub"
|
|
|
|
-external lastliteral : regexp -> char option = "pcre_lastliteral_stub"
|
|
|
|
-external study_stat : regexp -> study_stat = "pcre_study_stat_stub" [@@noalloc]
|
|
|
|
|
|
+external options : regexp -> (icflag [@unboxed])
|
|
|
|
+ = "pcre2_argoptions_stub_bc" "pcre2_argoptions_stub"
|
|
|
|
+
|
|
|
|
+external size : regexp -> (int [@untagged])
|
|
|
|
+ = "pcre2_size_stub_bc" "pcre2_size_stub"
|
|
|
|
+
|
|
|
|
+external capturecount : regexp -> (int [@untagged])
|
|
|
|
+ = "pcre2_capturecount_stub_bc" "pcre2_capturecount_stub"
|
|
|
|
|
|
|
|
+external backrefmax : regexp -> (int [@untagged])
|
|
|
|
+ = "pcre2_backrefmax_stub_bc" "pcre2_backrefmax_stub"
|
|
|
|
+
|
|
|
|
+external namecount : regexp -> (int [@untagged])
|
|
|
|
+ = "pcre2_namecount_stub_bc" "pcre2_namecount_stub"
|
|
|
|
+
|
|
|
|
+external nameentrysize : regexp -> (int [@untagged])
|
|
|
|
+ = "pcre2_nameentrysize_stub_bc" "pcre2_nameentrysize_stub"
|
|
|
|
+
|
|
|
|
+external names : regexp -> string array = "pcre2_names_stub"
|
|
|
|
+external firstcodeunit : regexp -> firstcodeunit_info = "pcre2_firstcodeunit_stub"
|
|
|
|
+external lastcodeunit : regexp -> char option = "pcre2_lastcodeunit_stub"
|
|
|
|
|
|
(* Compilation of patterns *)
|
|
(* Compilation of patterns *)
|
|
|
|
|
|
type chtables
|
|
type chtables
|
|
|
|
|
|
-external maketables : unit -> chtables = "pcre_maketables_stub"
|
|
|
|
|
|
+external maketables : unit -> chtables = "pcre2_maketables_stub"
|
|
|
|
|
|
-(* Internal use only! *)
|
|
|
|
-external pcre_study : regexp -> unit = "pcre_study_stub"
|
|
|
|
|
|
+external compile : (icflag [@unboxed]) -> chtables option -> string -> regexp
|
|
|
|
+ = "pcre2_compile_stub_bc" "pcre2_compile_stub"
|
|
|
|
|
|
-external compile :
|
|
|
|
- icflag -> chtables option -> string -> regexp = "pcre_compile_stub"
|
|
|
|
-
|
|
|
|
-external get_match_limit : regexp -> int option = "pcre_get_match_limit_stub"
|
|
|
|
|
|
+(* external get_match_limit : regexp -> int option = "pcre2_get_match_limit_stub" *)
|
|
|
|
|
|
(* Internal use only! *)
|
|
(* Internal use only! *)
|
|
-external set_imp_match_limit :
|
|
|
|
- regexp -> int -> regexp = "pcre_set_imp_match_limit_stub" [@@noalloc]
|
|
|
|
|
|
+external set_imp_match_limit : regexp -> (int [@untagged]) -> regexp
|
|
|
|
+ = "pcre2_set_imp_match_limit_stub_bc" "pcre2_set_imp_match_limit_stub"
|
|
|
|
+ [@@noalloc]
|
|
|
|
|
|
-external get_match_limit_recursion :
|
|
|
|
- regexp -> int option = "pcre_get_match_limit_recursion_stub"
|
|
|
|
|
|
+(* external get_depth_limit :
|
|
|
|
+ regexp -> int option = "pcre2_get_depth_limit_stub" *)
|
|
|
|
|
|
(* Internal use only! *)
|
|
(* Internal use only! *)
|
|
-external set_imp_match_limit_recursion :
|
|
|
|
- regexp -> int -> regexp = "pcre_set_imp_match_limit_recursion_stub" [@@noalloc]
|
|
|
|
|
|
+external set_imp_depth_limit : regexp -> (int [@untagged]) -> regexp
|
|
|
|
+ = "pcre2_set_imp_depth_limit_stub_bc" "pcre2_set_imp_depth_limit_stub"
|
|
|
|
+ [@@noalloc]
|
|
|
|
|
|
|
|
+(* TODO implement jit using new pcre2_jit_compile api *)
|
|
let regexp
|
|
let regexp
|
|
- ?(study = true) ?limit ?limit_recursion
|
|
|
|
- ?(iflags = 0) ?flags ?chtables pat =
|
|
|
|
|
|
+ (* ?(jit_compile = false) *)
|
|
|
|
+ ?limit ?depth_limit
|
|
|
|
+ ?(iflags = 0L) ?flags ?chtables pat =
|
|
let rex =
|
|
let rex =
|
|
match flags with
|
|
match flags with
|
|
| Some flag_list -> compile (cflags flag_list) chtables pat
|
|
| Some flag_list -> compile (cflags flag_list) chtables pat
|
|
| _ -> compile iflags chtables pat
|
|
| _ -> compile iflags chtables pat
|
|
in
|
|
in
|
|
- if study then pcre_study rex;
|
|
|
|
let rex =
|
|
let rex =
|
|
match limit with
|
|
match limit with
|
|
| None -> rex
|
|
| None -> rex
|
|
| Some lim -> set_imp_match_limit rex lim
|
|
| Some lim -> set_imp_match_limit rex lim
|
|
in
|
|
in
|
|
- match limit_recursion with
|
|
|
|
|
|
+ match depth_limit with
|
|
| None -> rex
|
|
| None -> rex
|
|
- | Some lim -> set_imp_match_limit_recursion rex lim
|
|
|
|
|
|
+ | Some lim -> set_imp_depth_limit rex lim
|
|
|
|
|
|
let regexp_or
|
|
let regexp_or
|
|
- ?study ?limit ?limit_recursion ?(iflags = 0) ?flags ?chtables pats =
|
|
|
|
|
|
+ (* ?jit_compile *) ?limit ?depth_limit ?(iflags = 0L) ?flags ?chtables pats =
|
|
let check pat =
|
|
let check pat =
|
|
- try ignore (regexp ~study:false ~iflags ?flags ?chtables pat)
|
|
|
|
|
|
+ try ignore (regexp ~iflags ?flags ?chtables pat)
|
|
with Error error -> raise (Regexp_or (pat, error))
|
|
with Error error -> raise (Regexp_or (pat, error))
|
|
in
|
|
in
|
|
List.iter check pats;
|
|
List.iter check pats;
|
|
@@ -270,7 +361,7 @@ let regexp_or
|
|
let cnv pat = "(?:" ^ pat ^ ")" in
|
|
let cnv pat = "(?:" ^ pat ^ ")" in
|
|
String.concat "|" (List.rev (List.rev_map cnv pats))
|
|
String.concat "|" (List.rev (List.rev_map cnv pats))
|
|
in
|
|
in
|
|
- regexp ?study ?limit ?limit_recursion ~iflags ?flags ?chtables big_pat
|
|
|
|
|
|
+ regexp (* ?jit_compile *) ?limit ?depth_limit ~iflags ?flags ?chtables big_pat
|
|
|
|
|
|
let bytes_unsafe_blit_string str str_ofs bts bts_ofs len =
|
|
let bytes_unsafe_blit_string str str_ofs bts bts_ofs len =
|
|
let str_bts = Bytes.unsafe_of_string str in
|
|
let str_bts = Bytes.unsafe_of_string str in
|
|
@@ -301,7 +392,7 @@ let quote s =
|
|
(* Matching of patterns and subpattern extraction *)
|
|
(* Matching of patterns and subpattern extraction *)
|
|
|
|
|
|
(* Default regular expression when none is provided by the user *)
|
|
(* Default regular expression when none is provided by the user *)
|
|
-let def_rex = regexp "\\s+"
|
|
|
|
|
|
+let def_rex = regexp (* ~jit_compile:true *) "\\s+"
|
|
|
|
|
|
type substrings = string * int array
|
|
type substrings = string * int array
|
|
|
|
|
|
@@ -325,7 +416,7 @@ let num_of_subs (_, ovector) = Array.length ovector / 3
|
|
|
|
|
|
let get_offset_start ovector str_num =
|
|
let get_offset_start ovector str_num =
|
|
if str_num < 0 || str_num >= Array.length ovector / 3 then
|
|
if str_num < 0 || str_num >= Array.length ovector / 3 then
|
|
- invalid_arg "Pcre.get_offset_start: illegal offset";
|
|
|
|
|
|
+ invalid_arg "Pcre2.get_offset_start: illegal offset";
|
|
let offset = str_num lsl 1 in
|
|
let offset = str_num lsl 1 in
|
|
offset, Array.unsafe_get ovector offset
|
|
offset, Array.unsafe_get ovector offset
|
|
|
|
|
|
@@ -370,8 +461,10 @@ let get_opt_substrings ?(full_match = true) (_, ovector as substrings) =
|
|
let len = (Array.length ovector / 3) - 1 in
|
|
let len = (Array.length ovector / 3) - 1 in
|
|
Array.init len (fun n -> unsafe_get_opt_substring substrings (n + 1))
|
|
Array.init len (fun n -> unsafe_get_opt_substring substrings (n + 1))
|
|
|
|
|
|
-external get_stringnumber :
|
|
|
|
- regexp -> string -> int = "pcre_get_stringnumber_stub"
|
|
|
|
|
|
+external get_stringnumber : regexp -> string -> (int [@untagged])
|
|
|
|
+ =
|
|
|
|
+ "pcre2_substring_number_from_name_stub_bc"
|
|
|
|
+ "pcre2_substring_number_from_name_stub"
|
|
|
|
|
|
let get_named_substring rex name substrings =
|
|
let get_named_substring rex name substrings =
|
|
get_substring substrings (get_stringnumber rex name)
|
|
get_substring substrings (get_stringnumber rex name)
|
|
@@ -379,48 +472,68 @@ let get_named_substring rex name substrings =
|
|
let get_named_substring_ofs rex name substrings =
|
|
let get_named_substring_ofs rex name substrings =
|
|
get_substring_ofs substrings (get_stringnumber rex name)
|
|
get_substring_ofs substrings (get_stringnumber rex name)
|
|
|
|
|
|
-external unsafe_pcre_exec :
|
|
|
|
- irflag ->
|
|
|
|
|
|
+external unsafe_pcre2_match :
|
|
|
|
+ (irflag [@unboxed]) ->
|
|
regexp ->
|
|
regexp ->
|
|
- pos : int ->
|
|
|
|
- subj_start : int ->
|
|
|
|
|
|
+ pos : (int [@untagged]) ->
|
|
|
|
+ subj_start : (int [@untagged]) ->
|
|
subj : string ->
|
|
subj : string ->
|
|
int array ->
|
|
int array ->
|
|
callout option ->
|
|
callout option ->
|
|
- unit = "pcre_exec_stub_bc" "pcre_exec_stub"
|
|
|
|
|
|
+ unit = "pcre2_match_stub_bc" "pcre2_match_stub"
|
|
|
|
|
|
let make_ovector rex =
|
|
let make_ovector rex =
|
|
let subgroups1 = capturecount rex + 1 in
|
|
let subgroups1 = capturecount rex + 1 in
|
|
let subgroups2 = subgroups1 lsl 1 in
|
|
let subgroups2 = subgroups1 lsl 1 in
|
|
subgroups2, Array.make (subgroups1 + subgroups2) 0
|
|
subgroups2, Array.make (subgroups1 + subgroups2) 0
|
|
|
|
|
|
-let pcre_exec ?(iflags = 0) ?flags ?(rex = def_rex) ?pat ?(pos = 0)
|
|
|
|
|
|
+external unsafe_pcre2_dfa_match :
|
|
|
|
+ (irflag [@unboxed]) ->
|
|
|
|
+ regexp ->
|
|
|
|
+ pos : (int [@untagged]) ->
|
|
|
|
+ subj_start : (int [@untagged]) ->
|
|
|
|
+ subj : string ->
|
|
|
|
+ int array ->
|
|
|
|
+ callout option ->
|
|
|
|
+ workspace : int array ->
|
|
|
|
+ unit = "pcre2_dfa_match_stub_bc" "pcre2_match_stub0"
|
|
|
|
+
|
|
|
|
+let pcre2_dfa_match ?(iflags = 0L) ?flags ?(rex = def_rex) ?pat ?(pos = 0)
|
|
|
|
+ ?callout ?(workspace = Array.make 20 0) subj =
|
|
|
|
+ let rex = match pat with Some str -> regexp str | _ -> rex in
|
|
|
|
+ let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
|
|
|
|
+ let _, ovector = make_ovector rex in
|
|
|
|
+ unsafe_pcre2_dfa_match
|
|
|
|
+ iflags rex ~pos ~subj_start:0 ~subj ovector callout ~workspace;
|
|
|
|
+ ovector
|
|
|
|
+
|
|
|
|
+let pcre2_match ?(iflags = 0L) ?flags ?(rex = def_rex) ?pat ?(pos = 0)
|
|
?callout subj =
|
|
?callout subj =
|
|
let rex = match pat with Some str -> regexp str | _ -> rex in
|
|
let rex = match pat with Some str -> regexp str | _ -> rex in
|
|
let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
|
|
let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
|
|
let _, ovector = make_ovector rex in
|
|
let _, ovector = make_ovector rex in
|
|
- unsafe_pcre_exec iflags rex ~pos ~subj_start:0 ~subj ovector callout;
|
|
|
|
|
|
+ unsafe_pcre2_match iflags rex ~pos ~subj_start:0 ~subj ovector callout;
|
|
ovector
|
|
ovector
|
|
|
|
|
|
let exec ?iflags ?flags ?rex ?pat ?pos ?callout subj =
|
|
let exec ?iflags ?flags ?rex ?pat ?pos ?callout subj =
|
|
- subj, pcre_exec ?iflags ?flags ?rex ?pat ?pos ?callout subj
|
|
|
|
|
|
+ subj, pcre2_match ?iflags ?flags ?rex ?pat ?pos ?callout subj
|
|
|
|
|
|
let next_match ?iflags ?flags ?rex ?pat ?(pos = 0) ?callout (subj, ovector) =
|
|
let next_match ?iflags ?flags ?rex ?pat ?(pos = 0) ?callout (subj, ovector) =
|
|
let pos = Array.unsafe_get ovector 1 + pos in
|
|
let pos = Array.unsafe_get ovector 1 + pos in
|
|
let subj_len = String.length subj in
|
|
let subj_len = String.length subj in
|
|
if pos < 0 || pos > subj_len then
|
|
if pos < 0 || pos > subj_len then
|
|
- invalid_arg "Pcre.next_match: illegal offset";
|
|
|
|
- subj, pcre_exec ?iflags ?flags ?rex ?pat ~pos ?callout subj
|
|
|
|
|
|
+ invalid_arg "Pcre2.next_match: illegal offset";
|
|
|
|
+ subj, pcre2_match ?iflags ?flags ?rex ?pat ~pos ?callout subj
|
|
|
|
|
|
let rec copy_lst ar n = function
|
|
let rec copy_lst ar n = function
|
|
| [] -> ar
|
|
| [] -> ar
|
|
| h :: t -> Array.unsafe_set ar n h; copy_lst ar (n - 1) t
|
|
| h :: t -> Array.unsafe_set ar n h; copy_lst ar (n - 1) t
|
|
|
|
|
|
-let exec_all ?(iflags = 0) ?flags ?(rex = def_rex) ?pat ?pos ?callout subj =
|
|
|
|
|
|
+let exec_all ?(iflags = 0L) ?flags ?(rex = def_rex) ?pat ?pos ?callout subj =
|
|
let rex = match pat with Some str -> regexp str | _ -> rex in
|
|
let rex = match pat with Some str -> regexp str | _ -> rex in
|
|
let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
|
|
let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
|
|
let (_, ovector as sstrs) = exec ~iflags ~rex ?pos ?callout subj in
|
|
let (_, ovector as sstrs) = exec ~iflags ~rex ?pos ?callout subj in
|
|
- let null_flags = iflags lor 0x0400 in
|
|
|
|
|
|
+ let null_flags = Int64.logor iflags 0x00000004L in (* `NOTEMPTY *)
|
|
let subj_len = String.length subj in
|
|
let subj_len = String.length subj in
|
|
let rec loop pos (subj, ovector as sstrs) n lst =
|
|
let rec loop pos (subj, ovector as sstrs) n lst =
|
|
let maybe_ovector =
|
|
let maybe_ovector =
|
|
@@ -428,8 +541,8 @@ let exec_all ?(iflags = 0) ?flags ?(rex = def_rex) ?pat ?pos ?callout subj =
|
|
let first = Array.unsafe_get ovector 0 in
|
|
let first = Array.unsafe_get ovector 0 in
|
|
if first = pos && Array.unsafe_get ovector 1 = pos then
|
|
if first = pos && Array.unsafe_get ovector 1 = pos then
|
|
if pos = subj_len then None
|
|
if pos = subj_len then None
|
|
- else Some (pcre_exec ~iflags:null_flags ~rex ~pos ?callout subj)
|
|
|
|
- else Some (pcre_exec ~iflags ~rex ~pos ?callout subj)
|
|
|
|
|
|
+ else Some (pcre2_match ~iflags:null_flags ~rex ~pos ?callout subj)
|
|
|
|
+ else Some (pcre2_match ~iflags ~rex ~pos ?callout subj)
|
|
with Not_found -> None in
|
|
with Not_found -> None in
|
|
match maybe_ovector with
|
|
match maybe_ovector with
|
|
| Some ovector ->
|
|
| Some ovector ->
|
|
@@ -454,7 +567,7 @@ let extract_all_opt ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj =
|
|
Array.map (get_opt_substrings ?full_match) many_sstrs
|
|
Array.map (get_opt_substrings ?full_match) many_sstrs
|
|
|
|
|
|
let pmatch ?iflags ?flags ?rex ?pat ?pos ?callout subj =
|
|
let pmatch ?iflags ?flags ?rex ?pat ?pos ?callout subj =
|
|
- try ignore (pcre_exec ?iflags ?flags ?rex ?pat ?pos ?callout subj); true
|
|
|
|
|
|
+ try ignore (pcre2_match ?iflags ?flags ?rex ?pat ?pos ?callout subj); true
|
|
with Not_found -> false
|
|
with Not_found -> false
|
|
|
|
|
|
|
|
|
|
@@ -559,7 +672,7 @@ let calc_trans_lst subgroups2 ovector subj templ subst_lst =
|
|
return_lst (subj, !ix, Array.unsafe_get ovector (!pos + 1) - !ix) in
|
|
return_lst (subj, !ix, Array.unsafe_get ovector (!pos + 1) - !ix) in
|
|
List.fold_left coll (0, []) subst_lst
|
|
List.fold_left coll (0, []) subst_lst
|
|
|
|
|
|
-let replace ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
|
|
|
|
|
|
+let replace ?(iflags = 0L) ?flags ?(rex = def_rex) ?pat
|
|
?(pos = 0) ?(itempl = def_subst) ?templ ?callout subj =
|
|
?(pos = 0) ?(itempl = def_subst) ?templ ?callout subj =
|
|
let rex = match pat with Some str -> regexp str | _ -> rex in
|
|
let rex = match pat with Some str -> regexp str | _ -> rex in
|
|
let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
|
|
let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
|
|
@@ -568,17 +681,17 @@ let replace ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
|
|
| Some str -> subst str
|
|
| Some str -> subst str
|
|
| _ -> itempl in
|
|
| _ -> itempl in
|
|
let subj_len = String.length subj in
|
|
let subj_len = String.length subj in
|
|
- if pos < 0 || pos > subj_len then invalid_arg "Pcre.replace: illegal offset";
|
|
|
|
|
|
+ if pos < 0 || pos > subj_len then invalid_arg "Pcre2.replace: illegal offset";
|
|
let subgroups2, ovector = make_ovector rex in
|
|
let subgroups2, ovector = make_ovector rex in
|
|
let nsubs = (subgroups2 lsr 1) - 1 in
|
|
let nsubs = (subgroups2 lsr 1) - 1 in
|
|
if max_br > nsubs then
|
|
if max_br > nsubs then
|
|
- failwith "Pcre.replace: backreference denotes nonexistent subpattern";
|
|
|
|
- if with_lp && nsubs = 0 then failwith "Pcre.replace: no backreferences";
|
|
|
|
|
|
+ failwith "Pcre2.replace: backreference denotes nonexistent subpattern";
|
|
|
|
+ if with_lp && nsubs = 0 then failwith "Pcre2.replace: no backreferences";
|
|
let rec loop full_len trans_lsts cur_pos =
|
|
let rec loop full_len trans_lsts cur_pos =
|
|
if
|
|
if
|
|
cur_pos > subj_len ||
|
|
cur_pos > subj_len ||
|
|
try
|
|
try
|
|
- unsafe_pcre_exec
|
|
|
|
|
|
+ unsafe_pcre2_match
|
|
iflags rex ~pos:cur_pos ~subj_start:0 ~subj
|
|
iflags rex ~pos:cur_pos ~subj_start:0 ~subj
|
|
ovector callout;
|
|
ovector callout;
|
|
false
|
|
false
|
|
@@ -617,19 +730,19 @@ let replace ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
|
|
else loop full_len trans_lsts last in
|
|
else loop full_len trans_lsts last in
|
|
loop 0 [] pos
|
|
loop 0 [] pos
|
|
|
|
|
|
-let qreplace ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
|
|
|
|
|
|
+let qreplace ?(iflags = 0L) ?flags ?(rex = def_rex) ?pat
|
|
?(pos = 0) ?(templ = "") ?callout subj =
|
|
?(pos = 0) ?(templ = "") ?callout subj =
|
|
let rex = match pat with Some str -> regexp str | _ -> rex in
|
|
let rex = match pat with Some str -> regexp str | _ -> rex in
|
|
let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
|
|
let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
|
|
let subj_len = String.length subj in
|
|
let subj_len = String.length subj in
|
|
- if pos < 0 || pos > subj_len then invalid_arg "Pcre.qreplace: illegal offset";
|
|
|
|
|
|
+ if pos < 0 || pos > subj_len then invalid_arg "Pcre2.qreplace: illegal offset";
|
|
let templ_len = String.length templ in
|
|
let templ_len = String.length templ in
|
|
let _, ovector = make_ovector rex in
|
|
let _, ovector = make_ovector rex in
|
|
let rec loop full_len subst_lst cur_pos =
|
|
let rec loop full_len subst_lst cur_pos =
|
|
if
|
|
if
|
|
cur_pos > subj_len ||
|
|
cur_pos > subj_len ||
|
|
try
|
|
try
|
|
- unsafe_pcre_exec
|
|
|
|
|
|
+ unsafe_pcre2_match
|
|
iflags rex ~pos:cur_pos ~subj_start:0 ~subj ovector callout;
|
|
iflags rex ~pos:cur_pos ~subj_start:0 ~subj ovector callout;
|
|
false
|
|
false
|
|
with Not_found -> true
|
|
with Not_found -> true
|
|
@@ -666,18 +779,18 @@ let qreplace ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
|
|
else loop full_len subst_lst last in
|
|
else loop full_len subst_lst last in
|
|
loop 0 [] pos
|
|
loop 0 [] pos
|
|
|
|
|
|
-let substitute_substrings ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
|
|
|
|
|
|
+let substitute_substrings ?(iflags = 0L) ?flags ?(rex = def_rex) ?pat
|
|
?(pos = 0) ?callout ~subst subj =
|
|
?(pos = 0) ?callout ~subst subj =
|
|
let rex = match pat with Some str -> regexp str | _ -> rex in
|
|
let rex = match pat with Some str -> regexp str | _ -> rex in
|
|
let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
|
|
let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
|
|
let subj_len = String.length subj in
|
|
let subj_len = String.length subj in
|
|
- if pos < 0 || pos > subj_len then invalid_arg "Pcre.substitute: illegal offset";
|
|
|
|
|
|
+ if pos < 0 || pos > subj_len then invalid_arg "Pcre2.substitute: illegal offset";
|
|
let _, ovector = make_ovector rex in
|
|
let _, ovector = make_ovector rex in
|
|
let rec loop full_len subst_lst cur_pos =
|
|
let rec loop full_len subst_lst cur_pos =
|
|
if
|
|
if
|
|
cur_pos > subj_len ||
|
|
cur_pos > subj_len ||
|
|
try
|
|
try
|
|
- unsafe_pcre_exec
|
|
|
|
|
|
+ unsafe_pcre2_match
|
|
iflags rex ~pos:cur_pos ~subj_start:0 ~subj ovector callout;
|
|
iflags rex ~pos:cur_pos ~subj_start:0 ~subj ovector callout;
|
|
false
|
|
false
|
|
with Not_found -> true
|
|
with Not_found -> true
|
|
@@ -719,7 +832,7 @@ let substitute ?iflags ?flags ?rex ?pat ?pos ?callout ~subst:str_subst subj =
|
|
str_subst (string_unsafe_sub subj first (last - first)) in
|
|
str_subst (string_unsafe_sub subj first (last - first)) in
|
|
substitute_substrings ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj
|
|
substitute_substrings ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj
|
|
|
|
|
|
-let replace_first ?(iflags = 0) ?flags ?(rex = def_rex) ?pat ?(pos = 0)
|
|
|
|
|
|
+let replace_first ?(iflags = 0L) ?flags ?(rex = def_rex) ?pat ?(pos = 0)
|
|
?(itempl = def_subst) ?templ ?callout subj =
|
|
?(itempl = def_subst) ?templ ?callout subj =
|
|
let rex = match pat with Some str -> regexp str | _ -> rex in
|
|
let rex = match pat with Some str -> regexp str | _ -> rex in
|
|
let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
|
|
let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
|
|
@@ -730,10 +843,10 @@ let replace_first ?(iflags = 0) ?flags ?(rex = def_rex) ?pat ?(pos = 0)
|
|
let subgroups2, ovector = make_ovector rex in
|
|
let subgroups2, ovector = make_ovector rex in
|
|
let nsubs = (subgroups2 lsr 1) - 1 in
|
|
let nsubs = (subgroups2 lsr 1) - 1 in
|
|
if max_br > nsubs then
|
|
if max_br > nsubs then
|
|
- failwith "Pcre.replace_first: backreference denotes nonexistent subpattern";
|
|
|
|
- if with_lp && nsubs = 0 then failwith "Pcre.replace_first: no backreferences";
|
|
|
|
|
|
+ failwith "Pcre2.replace_first: backreference denotes nonexistent subpattern";
|
|
|
|
+ if with_lp && nsubs = 0 then failwith "Pcre2.replace_first: no backreferences";
|
|
try
|
|
try
|
|
- unsafe_pcre_exec iflags rex ~pos ~subj_start:0 ~subj ovector callout;
|
|
|
|
|
|
+ unsafe_pcre2_match iflags rex ~pos ~subj_start:0 ~subj ovector callout;
|
|
let res_len, trans_lst =
|
|
let res_len, trans_lst =
|
|
calc_trans_lst subgroups2 ovector subj templ subst_lst in
|
|
calc_trans_lst subgroups2 ovector subj templ subst_lst in
|
|
let first = Array.unsafe_get ovector 0 in
|
|
let first = Array.unsafe_get ovector 0 in
|
|
@@ -746,15 +859,15 @@ let replace_first ?(iflags = 0) ?flags ?(rex = def_rex) ?pat ?(pos = 0)
|
|
let ofs = List.fold_left coll first trans_lst in
|
|
let ofs = List.fold_left coll first trans_lst in
|
|
bytes_unsafe_blit_string subj last res ofs rest;
|
|
bytes_unsafe_blit_string subj last res ofs rest;
|
|
Bytes.unsafe_to_string res
|
|
Bytes.unsafe_to_string res
|
|
- with Not_found -> string_copy subj
|
|
|
|
|
|
+ with Not_found -> subj
|
|
|
|
|
|
-let qreplace_first ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
|
|
|
|
|
|
+let qreplace_first ?(iflags = 0L) ?flags ?(rex = def_rex) ?pat
|
|
?(pos = 0) ?(templ = "") ?callout subj =
|
|
?(pos = 0) ?(templ = "") ?callout subj =
|
|
let rex = match pat with Some str -> regexp str | _ -> rex in
|
|
let rex = match pat with Some str -> regexp str | _ -> rex in
|
|
let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
|
|
let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
|
|
let _, ovector = make_ovector rex in
|
|
let _, ovector = make_ovector rex in
|
|
try
|
|
try
|
|
- unsafe_pcre_exec iflags rex ~pos ~subj_start:0 ~subj ovector callout;
|
|
|
|
|
|
+ unsafe_pcre2_match iflags rex ~pos ~subj_start:0 ~subj ovector callout;
|
|
let first = Array.unsafe_get ovector 0 in
|
|
let first = Array.unsafe_get ovector 0 in
|
|
let last = Array.unsafe_get ovector 1 in
|
|
let last = Array.unsafe_get ovector 1 in
|
|
let len = String.length templ in
|
|
let len = String.length templ in
|
|
@@ -765,15 +878,15 @@ let qreplace_first ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
|
|
bytes_unsafe_blit_string templ 0 res first len;
|
|
bytes_unsafe_blit_string templ 0 res first len;
|
|
bytes_unsafe_blit_string subj last res postfix_start rest;
|
|
bytes_unsafe_blit_string subj last res postfix_start rest;
|
|
Bytes.unsafe_to_string res
|
|
Bytes.unsafe_to_string res
|
|
- with Not_found -> string_copy subj
|
|
|
|
|
|
+ with Not_found -> subj
|
|
|
|
|
|
-let substitute_substrings_first ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
|
|
|
|
|
|
+let substitute_substrings_first ?(iflags = 0L) ?flags ?(rex = def_rex) ?pat
|
|
?(pos = 0) ?callout ~subst subj =
|
|
?(pos = 0) ?callout ~subst subj =
|
|
let rex = match pat with Some str -> regexp str | _ -> rex in
|
|
let rex = match pat with Some str -> regexp str | _ -> rex in
|
|
let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
|
|
let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
|
|
let _, ovector = make_ovector rex in
|
|
let _, ovector = make_ovector rex in
|
|
try
|
|
try
|
|
- unsafe_pcre_exec iflags rex ~pos ~subj_start:0 ~subj ovector callout;
|
|
|
|
|
|
+ unsafe_pcre2_match iflags rex ~pos ~subj_start:0 ~subj ovector callout;
|
|
let subj_len = String.length subj in
|
|
let subj_len = String.length subj in
|
|
let prefix_len = Array.unsafe_get ovector 0 in
|
|
let prefix_len = Array.unsafe_get ovector 0 in
|
|
let last = Array.unsafe_get ovector 1 in
|
|
let last = Array.unsafe_get ovector 1 in
|
|
@@ -786,7 +899,7 @@ let substitute_substrings_first ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
|
|
bytes_unsafe_blit_string templ 0 res prefix_len templ_len;
|
|
bytes_unsafe_blit_string templ 0 res prefix_len templ_len;
|
|
bytes_unsafe_blit_string subj last res postfix_start postfix_len;
|
|
bytes_unsafe_blit_string subj last res postfix_start postfix_len;
|
|
Bytes.unsafe_to_string res
|
|
Bytes.unsafe_to_string res
|
|
- with Not_found -> string_copy subj
|
|
|
|
|
|
+ with Not_found -> subj
|
|
|
|
|
|
let substitute_first ?iflags ?flags ?rex ?pat ?pos
|
|
let substitute_first ?iflags ?flags ?rex ?pat ?pos
|
|
?callout ~subst:str_subst subj =
|
|
?callout ~subst:str_subst subj =
|
|
@@ -803,7 +916,7 @@ let substitute_first ?iflags ?flags ?rex ?pat ?pos
|
|
let internal_psplit flags rex max pos callout subj =
|
|
let internal_psplit flags rex max pos callout subj =
|
|
let subj_len = String.length subj in
|
|
let subj_len = String.length subj in
|
|
if subj_len = 0 then []
|
|
if subj_len = 0 then []
|
|
- else if max = 1 then [string_copy subj]
|
|
|
|
|
|
+ else if max = 1 then [subj]
|
|
else
|
|
else
|
|
let subgroups2, ovector = make_ovector rex in
|
|
let subgroups2, ovector = make_ovector rex in
|
|
|
|
|
|
@@ -831,7 +944,7 @@ let internal_psplit flags rex max pos callout subj =
|
|
if cnt = 0 then
|
|
if cnt = 0 then
|
|
if prematch &&
|
|
if prematch &&
|
|
try
|
|
try
|
|
- unsafe_pcre_exec
|
|
|
|
|
|
+ unsafe_pcre2_match
|
|
flags rex ~pos ~subj_start:pos ~subj ovector callout;
|
|
flags rex ~pos ~subj_start:pos ~subj ovector callout;
|
|
true
|
|
true
|
|
with Not_found -> false
|
|
with Not_found -> false
|
|
@@ -845,7 +958,7 @@ let internal_psplit flags rex max pos callout subj =
|
|
else
|
|
else
|
|
if
|
|
if
|
|
try
|
|
try
|
|
- unsafe_pcre_exec
|
|
|
|
|
|
+ unsafe_pcre2_match
|
|
flags rex ~pos ~subj_start:pos ~subj ovector callout;
|
|
flags rex ~pos ~subj_start:pos ~subj ovector callout;
|
|
false
|
|
false
|
|
with Not_found -> true
|
|
with Not_found -> true
|
|
@@ -859,8 +972,9 @@ let internal_psplit flags rex max pos callout subj =
|
|
if len = 0 then "" :: strs
|
|
if len = 0 then "" :: strs
|
|
else if
|
|
else if
|
|
try
|
|
try
|
|
- unsafe_pcre_exec
|
|
|
|
- (flags lor 0x0410) rex ~pos ~subj_start:pos ~subj
|
|
|
|
|
|
+ unsafe_pcre2_match
|
|
|
|
+ (* `ANCHORED | `NOTEMPTY *)
|
|
|
|
+ (Int64.logor flags 0x80000004L) rex ~pos ~subj_start:pos ~subj
|
|
ovector callout;
|
|
ovector callout;
|
|
true
|
|
true
|
|
with Not_found -> false
|
|
with Not_found -> false
|
|
@@ -880,13 +994,13 @@ let internal_psplit flags rex max pos callout subj =
|
|
|
|
|
|
let rec strip_all_empty = function "" :: t -> strip_all_empty t | l -> l
|
|
let rec strip_all_empty = function "" :: t -> strip_all_empty t | l -> l
|
|
|
|
|
|
-external isspace : char -> bool = "pcre_isspace_stub" [@@noalloc]
|
|
|
|
|
|
+external isspace : char -> bool = "pcre2_isspace_stub" [@@noalloc]
|
|
|
|
|
|
let rec find_no_space ix len str =
|
|
let rec find_no_space ix len str =
|
|
if ix = len || not (isspace (String.unsafe_get str ix)) then ix
|
|
if ix = len || not (isspace (String.unsafe_get str ix)) then ix
|
|
else find_no_space (ix + 1) len str
|
|
else find_no_space (ix + 1) len str
|
|
|
|
|
|
-let split ?(iflags = 0) ?flags ?rex ?pat ?(pos = 0) ?(max = 0) ?callout subj =
|
|
|
|
|
|
+let split ?(iflags = 0L) ?flags ?rex ?pat ?(pos = 0) ?(max = 0) ?callout subj =
|
|
let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
|
|
let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
|
|
let res =
|
|
let res =
|
|
match pat, rex with
|
|
match pat, rex with
|
|
@@ -895,7 +1009,7 @@ let split ?(iflags = 0) ?flags ?rex ?pat ?(pos = 0) ?(max = 0) ?callout subj =
|
|
| _ ->
|
|
| _ ->
|
|
(* special case for Perl-splitting semantics *)
|
|
(* special case for Perl-splitting semantics *)
|
|
let len = String.length subj in
|
|
let len = String.length subj in
|
|
- if pos > len || pos < 0 then failwith "Pcre.split: illegal offset";
|
|
|
|
|
|
+ if pos > len || pos < 0 then failwith "Pcre2.split: illegal offset";
|
|
let new_pos = find_no_space pos len subj in
|
|
let new_pos = find_no_space pos len subj in
|
|
internal_psplit iflags def_rex max new_pos callout subj in
|
|
internal_psplit iflags def_rex max new_pos callout subj in
|
|
List.rev (if max = 0 then strip_all_empty res else res)
|
|
List.rev (if max = 0 then strip_all_empty res else res)
|
|
@@ -915,13 +1029,13 @@ let rec strip_all_empty_full = function
|
|
| Delim _ :: rest -> strip_all_empty_full rest
|
|
| Delim _ :: rest -> strip_all_empty_full rest
|
|
| l -> l
|
|
| l -> l
|
|
|
|
|
|
-let full_split ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
|
|
|
|
|
|
+let full_split ?(iflags = 0L) ?flags ?(rex = def_rex) ?pat
|
|
?(pos = 0) ?(max = 0) ?callout subj =
|
|
?(pos = 0) ?(max = 0) ?callout subj =
|
|
let rex = match pat with Some str -> regexp str | _ -> rex in
|
|
let rex = match pat with Some str -> regexp str | _ -> rex in
|
|
let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
|
|
let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
|
|
let subj_len = String.length subj in
|
|
let subj_len = String.length subj in
|
|
if subj_len = 0 then []
|
|
if subj_len = 0 then []
|
|
- else if max = 1 then [Text (string_copy subj)]
|
|
|
|
|
|
+ else if max = 1 then [Text (subj)]
|
|
else
|
|
else
|
|
let subgroups2, ovector = make_ovector rex in
|
|
let subgroups2, ovector = make_ovector rex in
|
|
|
|
|
|
@@ -952,7 +1066,7 @@ let full_split ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
|
|
if cnt = 0 then
|
|
if cnt = 0 then
|
|
if prematch &&
|
|
if prematch &&
|
|
try
|
|
try
|
|
- unsafe_pcre_exec
|
|
|
|
|
|
+ unsafe_pcre2_match
|
|
iflags rex ~pos ~subj_start:pos ~subj ovector callout;
|
|
iflags rex ~pos ~subj_start:pos ~subj ovector callout;
|
|
true
|
|
true
|
|
with Not_found -> false
|
|
with Not_found -> false
|
|
@@ -970,7 +1084,7 @@ let full_split ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
|
|
else
|
|
else
|
|
if
|
|
if
|
|
try
|
|
try
|
|
- unsafe_pcre_exec
|
|
|
|
|
|
+ unsafe_pcre2_match
|
|
iflags rex ~pos ~subj_start:pos ~subj ovector callout;
|
|
iflags rex ~pos ~subj_start:pos ~subj ovector callout;
|
|
false
|
|
false
|
|
with Not_found -> true
|
|
with Not_found -> true
|
|
@@ -987,8 +1101,9 @@ let full_split ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
|
|
let empty_groups = handle_subgroups [] in
|
|
let empty_groups = handle_subgroups [] in
|
|
if
|
|
if
|
|
try
|
|
try
|
|
- unsafe_pcre_exec
|
|
|
|
- (iflags lor 0x0410) rex ~pos ~subj_start:pos ~subj
|
|
|
|
|
|
+ unsafe_pcre2_match
|
|
|
|
+ (* `ANCHORED | `NOTEMPTY *)
|
|
|
|
+ (Int64.logor iflags 0x80000004L) rex ~pos ~subj_start:pos ~subj
|
|
ovector callout;
|
|
ovector callout;
|
|
true
|
|
true
|
|
with Not_found -> false
|
|
with Not_found -> false
|
|
@@ -1031,4 +1146,4 @@ let foreach_file filenames f =
|
|
let file = open_in filename in
|
|
let file = open_in filename in
|
|
try f filename file; close_in file
|
|
try f filename file; close_in file
|
|
with exn -> close_in file; raise exn in
|
|
with exn -> close_in file; raise exn in
|
|
- List.iter do_with_file filenames
|
|
|
|
|
|
+ List.iter do_with_file filenames
|