Переглянути джерело

[eval] Migrate to pcre2 (#11032)

* Update conf-libpcre to conf-libpcre2-8

* Rename pcre package to pcre2

* Update ocaml pcre bindings to pcre2

* Apply old patches from pcre-ocaml

Now is mostly up to date with the old pcre-ocaml, aside from some
patches that don't work on ocaml 4.07.1:

https://github.com/mmottl/pcre-ocaml/commit/b3a998c821f7cdba9cffc688fba1c651387338c0
https://github.com/mmottl/pcre-ocaml/commit/24cb1577aa085c15550ee5a63c83555f3598e97a
https://github.com/mmottl/pcre-ocaml/commit/5cd23963e25ffe843dc50cfabf53b709865ef9f3

* [ci] Update all builds to use pcre2

* Fix pcre 32-bit compilation error

* Fix compilation error with pcre2 10.31

Fixes builds on Ubuntu 18.04
tobil4sk 2 роки тому
батько
коміт
962ccade18

+ 8 - 8
.github/workflows/main.yml

@@ -64,7 +64,7 @@ jobs:
         run: |
           Set-PSDebug -Trace 1
           curl.exe -fsSL -o cygwin-setup.exe --retry 3 $($env:CYGWIN_SETUP)
-          Start-Process -FilePath "cygwin-setup.exe" -ArgumentList "-B -q -R $($env:CYG_ROOT) -l C:/tmp -s $($env:CYG_MIRROR) -P default -P make -P git -P zlib-devel -P rsync -P patch -P diffutils -P curl -P unzip -P tar -P m4 -P perl -P libpcre-devel -P mbedtls-devel -P mingw64-$($env:MINGW_ARCH)-zlib -P mingw64-$($env:MINGW_ARCH)-gcc-core -P mingw64-$($env:MINGW_ARCH)-pcre" -Wait -RedirectStandardOutput cygwin-setup-out.txt
+          Start-Process -FilePath "cygwin-setup.exe" -ArgumentList "-B -q -R $($env:CYG_ROOT) -l C:/tmp -s $($env:CYG_MIRROR) -P default -P make -P git -P zlib-devel -P rsync -P patch -P diffutils -P curl -P unzip -P tar -P m4 -P perl -P libpcre2-devel -P mbedtls-devel -P mingw64-$($env:MINGW_ARCH)-zlib -P mingw64-$($env:MINGW_ARCH)-gcc-core -P mingw64-$($env:MINGW_ARCH)-pcre2" -Wait -RedirectStandardOutput cygwin-setup-out.txt
           Get-Content cygwin-setup-out.txt
           curl.exe -fsSL -o "opam.tar.xz" --retry 3 https://github.com/fdopen/opam-repository-mingw/releases/download/0.0.0.2/opam$($env:ARCH).tar.xz
           curl.exe -fsSL -o "libmbedtls.tar.xz" --retry 3 https://github.com/Simn/mingw64-mbedtls/releases/download/2.16.3/mingw64-$($env:MINGW_ARCH)-mbedtls-2.16.3-1.tar.xz
@@ -182,7 +182,7 @@ jobs:
         run: |
           Set-PSDebug -Trace 1
           curl.exe -fsSL -o cygwin-setup.exe --retry 3 $($env:CYGWIN_SETUP)
-          Start-Process -FilePath "cygwin-setup.exe" -ArgumentList "-B -q -R $($env:CYG_ROOT) -l C:/tmp -s $($env:CYG_MIRROR) -P default -P make -P git -P zlib-devel -P rsync -P patch -P diffutils -P curl -P unzip -P tar -P m4 -P perl -P libpcre-devel -P mbedtls-devel -P mingw64-$($env:MINGW_ARCH)-zlib -P mingw64-$($env:MINGW_ARCH)-gcc-core -P mingw64-$($env:MINGW_ARCH)-pcre --allow-unsupported-windows" -Wait -RedirectStandardOutput cygwin-setup-out.txt
+          Start-Process -FilePath "cygwin-setup.exe" -ArgumentList "-B -q -R $($env:CYG_ROOT) -l C:/tmp -s $($env:CYG_MIRROR) -P default -P make -P git -P zlib-devel -P rsync -P patch -P diffutils -P curl -P unzip -P tar -P m4 -P perl -P libpcre2-devel -P mbedtls-devel -P mingw64-$($env:MINGW_ARCH)-zlib -P mingw64-$($env:MINGW_ARCH)-gcc-core -P mingw64-$($env:MINGW_ARCH)-pcre2 --allow-unsupported-windows" -Wait -RedirectStandardOutput cygwin-setup-out.txt
           Get-Content cygwin-setup-out.txt
           & "$($env:CYG_ROOT)/bin/bash.exe" @('--norc', '--noprofile', '/etc/postinstall/ca-certificates.sh')
           curl.exe -fsSL -o "opam.tar.xz" --retry 3 https://github.com/fdopen/opam-repository-mingw/releases/download/0.0.0.2/opam$($env:ARCH).tar.xz
@@ -283,7 +283,7 @@ jobs:
           sudo add-apt-repository ppa:avsm/ppa -y # provides OPAM 2
           sudo add-apt-repository ppa:haxe/ocaml -y # provides newer version of mbedtls
           sudo apt-get update -qqy
-          sudo apt-get install -qqy ocaml-nox camlp5 opam libpcre3-dev zlib1g-dev libgtk2.0-dev libmbedtls-dev ninja-build libstring-shellquote-perl libipc-system-simple-perl
+          sudo apt-get install -qqy ocaml-nox camlp5 opam libpcre2-dev zlib1g-dev libgtk2.0-dev libmbedtls-dev ninja-build libstring-shellquote-perl libipc-system-simple-perl
 
       - name: Install OCaml libraries
         if: steps.cache-opam.outputs.cache-hit != 'true'
@@ -596,7 +596,7 @@ jobs:
           # For compatibility with macOS 10.13
           ZLIB_VERSION: 1.2.13
           MBEDTLS_VERSION: 2.25.0
-          PCRE_VERSION: 8.44
+          PCRE2_VERSION: 10.42
         run: |
           set -ex
           brew uninstall [email protected] || echo
@@ -619,9 +619,9 @@ jobs:
           cd mbedtls-$MBEDTLS_VERSION
           make && make install
           cd ..
-          curl -L https://downloads.sourceforge.net/project/pcre/pcre/$PCRE_VERSION/pcre-$PCRE_VERSION.tar.gz | tar xz
-          cd pcre-$PCRE_VERSION
-          ./configure --enable-utf8 --enable-pcre8 --enable-pcre16 --enable-pcre32 --enable-unicode-properties --enable-pcregrep-libz --enable-pcregrep-libbz2 --enable-jit
+          curl -L https://github.com/PCRE2Project/pcre2/releases/download/pcre2-$PCRE2_VERSION/pcre2-$PCRE2_VERSION.tar.gz | tar xz
+          cd pcre2-$PCRE2_VERSION
+          ./configure --enable-unicode --enable-pcre2-8 --enable-pcre2-16 --enable-pcre2-32 --enable-unicode-properties --enable-pcre2grep-libz --enable-pcre2grep-libbz2 --enable-jit
           make && make install
           cd ..
 
@@ -649,7 +649,7 @@ jobs:
         run: |
           set -ex
           eval $(opam env)
-          opam config exec -- make -s -j`sysctl -n hw.ncpu` STATICLINK=1 "LIB_PARAMS=/usr/local/lib/libz.a /usr/local/lib/libpcre.a /usr/local/lib/libmbedtls.a /usr/local/lib/libmbedcrypto.a /usr/local/lib/libmbedx509.a -cclib '-framework Security -framework CoreFoundation'" haxe
+          opam config exec -- make -s -j`sysctl -n hw.ncpu` STATICLINK=1 "LIB_PARAMS=/usr/local/lib/libz.a /usr/local/lib/libpcre2-8.a /usr/local/lib/libmbedtls.a /usr/local/lib/libmbedcrypto.a /usr/local/lib/libmbedx509.a -cclib '-framework Security -framework CoreFoundation'" haxe
           opam config exec -- make -s haxelib
           make -s package_unix package_installer_mac
           ls -l out

+ 1 - 1
Earthfile

@@ -63,7 +63,7 @@ devcontainer:
             ocaml-nox \
             camlp5 \
             opam \
-            libpcre3-dev \
+            libpcre2-dev \
             zlib1g-dev \
             libgtk2.0-dev \
             libmbedtls-dev \

+ 2 - 2
Makefile

@@ -60,9 +60,9 @@ HAXE_VERSION=$(shell $(CURDIR)/$(HAXE_OUTPUT) -version 2>&1 | awk '{print $$1;}'
 HAXE_VERSION_SHORT=$(shell echo "$(HAXE_VERSION)" | grep -oE "^[0-9]+\.[0-9]+\.[0-9]+")
 
 ifneq ($(STATICLINK),0)
-	LIB_PARAMS= -cclib '-Wl,-Bstatic -lpcre -lz -lmbedtls -lmbedx509 -lmbedcrypto -Wl,-Bdynamic '
+	LIB_PARAMS= -cclib '-Wl,-Bstatic -lpcre2-8 -lz -lmbedtls -lmbedx509 -lmbedcrypto -Wl,-Bdynamic '
 else
-	LIB_PARAMS?= -cclib -lpcre -cclib -lz -cclib -lmbedtls -cclib -lmbedx509 -cclib -lmbedcrypto
+	LIB_PARAMS?= -cclib -lpcre2-8 -cclib -lz -cclib -lmbedtls -cclib -lmbedx509 -cclib -lmbedcrypto
 endif
 ifeq ($(SYSTEM_NAME),Mac)
 	LIB_PARAMS+= -cclib '-framework Security -framework CoreFoundation'

+ 2 - 2
Makefile.win

@@ -43,12 +43,12 @@ CC_CMD=($(COMPILER) $(ALL_CFLAGS) -c $< 2>tmp.cmi && $(FILTER)) || ($(FILTER) &&
 endif
 
 ifeq ($(STATICLINK),0)
-	LIB_PARAMS = -cclib -lpcre -cclib -lz -cclib -lcrypt32 -cclib -lmbedtls -cclib -lmbedcrypto -cclib -lmbedx509
+	LIB_PARAMS = -cclib -lpcre2-8 -cclib -lz -cclib -lcrypt32 -cclib -lmbedtls -cclib -lmbedcrypto -cclib -lmbedx509
 endif
 
 PACKAGE_FILES=$(HAXE_OUTPUT) $(HAXELIB_OUTPUT) std \
 	"$$(cygcheck $(CURDIR)/$(HAXE_OUTPUT) | grep zlib1.dll | sed -e 's/^\s*//')" \
-	"$$(cygcheck $(CURDIR)/$(HAXE_OUTPUT) | grep libpcre-1.dll | sed -e 's/^\s*//')" \
+	"$$(cygcheck $(CURDIR)/$(HAXE_OUTPUT) | grep libpcre2-8-0.dll | sed -e 's/^\s*//')" \
 	"$$(cygcheck $(CURDIR)/$(HAXE_OUTPUT) | grep libmbedcrypto.dll | sed -e 's/^\s*//')" \
 	"$$(cygcheck $(CURDIR)/$(HAXE_OUTPUT) | grep libmbedtls.dll | sed -e 's/^\s*//')" \
 	"$$(cygcheck $(CURDIR)/$(HAXE_OUTPUT) | grep libmbedx509.dll | sed -e 's/^\s*//')"

+ 3 - 3
extra/BUILDING.md

@@ -36,11 +36,11 @@ You need to install some native libraries as well as some OCaml libraries.
 To install the native libraries, use the appropriate system package manager.
 
  * Mac OS X
-    * Use [Homebrew](https://brew.sh/), `brew install zlib pcre`.
+    * Use [Homebrew](https://brew.sh/), `brew install zlib pcre2`.
  * Debian / Ubuntu
-    * `sudo apt install libpcre3-dev zlib1g-dev libmbedtls-dev`.
+    * `sudo apt install libpcre2-dev zlib1g-dev libmbedtls-dev`.
  * Windows (Cygwin)
-    * Run the Cygwin [setup-x86_64.exe](https://cygwin.com/install.html) against the Cygwin installation directory. Install `make`, `git`, `zlib-devel`, `libpcre-devel`, `mingw64-x86_64-gcc-core`, `mingw64-x86_64-zlib`, and `mingw64-x86_64-pcre`. You may need to select "Not Installed" in the dropdown list to see the packages. Copy `zlib1.dll` and `libpcre-1.dll` from `path/to/cygwin/usr/x86_64-w64-mingw32/sys-root/mingw/bin` to the checked out Haxe source directory.
+    * Run the Cygwin [setup-x86_64.exe](https://cygwin.com/install.html) against the Cygwin installation directory. Install `make`, `git`, `zlib-devel`, `libpcre2-devel`, `mingw64-x86_64-gcc-core`, `mingw64-x86_64-zlib`, and `mingw64-x86_64-pcre2`. You may need to select "Not Installed" in the dropdown list to see the packages. Copy `zlib1.dll` and `libpcre2-8-0.dll` from `path/to/cygwin/usr/x86_64-w64-mingw32/sys-root/mingw/bin` to the checked out Haxe source directory.
     * Install Neko by either
       * Download the [Neko binaries](https://nekovm.org/download/), and add the extracted directory to the beginning of PATH.
       * Install the [Chocolatey Neko package](https://chocolatey.org/packages/neko).

+ 5 - 5
extra/github-actions/build-mac.yml

@@ -3,7 +3,7 @@
     # For compatibility with macOS 10.13
     ZLIB_VERSION: 1.2.13
     MBEDTLS_VERSION: 2.25.0
-    PCRE_VERSION: 8.44
+    PCRE2_VERSION: 10.42
   run: |
     set -ex
     brew uninstall [email protected] || echo
@@ -26,9 +26,9 @@
     cd mbedtls-$MBEDTLS_VERSION
     make && make install
     cd ..
-    curl -L https://downloads.sourceforge.net/project/pcre/pcre/$PCRE_VERSION/pcre-$PCRE_VERSION.tar.gz | tar xz
-    cd pcre-$PCRE_VERSION
-    ./configure --enable-utf8 --enable-pcre8 --enable-pcre16 --enable-pcre32 --enable-unicode-properties --enable-pcregrep-libz --enable-pcregrep-libbz2 --enable-jit
+    curl -L https://github.com/PCRE2Project/pcre2/releases/download/pcre2-$PCRE2_VERSION/pcre2-$PCRE2_VERSION.tar.gz | tar xz
+    cd pcre2-$PCRE2_VERSION
+    ./configure --enable-unicode --enable-pcre2-8 --enable-pcre2-16 --enable-pcre2-32 --enable-unicode-properties --enable-pcre2grep-libz --enable-pcre2grep-libbz2 --enable-jit
     make && make install
     cd ..
 
@@ -56,7 +56,7 @@
   run: |
     set -ex
     eval $(opam env)
-    opam config exec -- make -s -j`sysctl -n hw.ncpu` STATICLINK=1 "LIB_PARAMS=/usr/local/lib/libz.a /usr/local/lib/libpcre.a /usr/local/lib/libmbedtls.a /usr/local/lib/libmbedcrypto.a /usr/local/lib/libmbedx509.a -cclib '-framework Security -framework CoreFoundation'" haxe
+    opam config exec -- make -s -j`sysctl -n hw.ncpu` STATICLINK=1 "LIB_PARAMS=/usr/local/lib/libz.a /usr/local/lib/libpcre2-8.a /usr/local/lib/libmbedtls.a /usr/local/lib/libmbedcrypto.a /usr/local/lib/libmbedx509.a -cclib '-framework Security -framework CoreFoundation'" haxe
     opam config exec -- make -s haxelib
     make -s package_unix package_installer_mac
     ls -l out

+ 1 - 1
extra/github-actions/install-ocaml-windows.yml

@@ -3,7 +3,7 @@
   run: |
     Set-PSDebug -Trace 1
     curl.exe -fsSL -o cygwin-setup.exe --retry 3 $($env:CYGWIN_SETUP)
-    Start-Process -FilePath "cygwin-setup.exe" -ArgumentList "-B -q -R $($env:CYG_ROOT) -l C:/tmp -s $($env:CYG_MIRROR) -P default -P make -P git -P zlib-devel -P rsync -P patch -P diffutils -P curl -P unzip -P tar -P m4 -P perl -P libpcre-devel -P mbedtls-devel -P mingw64-$($env:MINGW_ARCH)-zlib -P mingw64-$($env:MINGW_ARCH)-gcc-core -P mingw64-$($env:MINGW_ARCH)-pcre --allow-unsupported-windows" -Wait -RedirectStandardOutput cygwin-setup-out.txt
+    Start-Process -FilePath "cygwin-setup.exe" -ArgumentList "-B -q -R $($env:CYG_ROOT) -l C:/tmp -s $($env:CYG_MIRROR) -P default -P make -P git -P zlib-devel -P rsync -P patch -P diffutils -P curl -P unzip -P tar -P m4 -P perl -P libpcre2-devel -P mbedtls-devel -P mingw64-$($env:MINGW_ARCH)-zlib -P mingw64-$($env:MINGW_ARCH)-gcc-core -P mingw64-$($env:MINGW_ARCH)-pcre2 --allow-unsupported-windows" -Wait -RedirectStandardOutput cygwin-setup-out.txt
     Get-Content cygwin-setup-out.txt
     & "$($env:CYG_ROOT)/bin/bash.exe" @('--norc', '--noprofile', '/etc/postinstall/ca-certificates.sh')
     curl.exe -fsSL -o "opam.tar.xz" --retry 3 https://github.com/fdopen/opam-repository-mingw/releases/download/0.0.0.2/opam$($env:ARCH).tar.xz

+ 1 - 1
extra/github-actions/install-ocaml-windows64.yml

@@ -3,7 +3,7 @@
   run: |
     Set-PSDebug -Trace 1
     curl.exe -fsSL -o cygwin-setup.exe --retry 3 $($env:CYGWIN_SETUP)
-    Start-Process -FilePath "cygwin-setup.exe" -ArgumentList "-B -q -R $($env:CYG_ROOT) -l C:/tmp -s $($env:CYG_MIRROR) -P default -P make -P git -P zlib-devel -P rsync -P patch -P diffutils -P curl -P unzip -P tar -P m4 -P perl -P libpcre-devel -P mbedtls-devel -P mingw64-$($env:MINGW_ARCH)-zlib -P mingw64-$($env:MINGW_ARCH)-gcc-core -P mingw64-$($env:MINGW_ARCH)-pcre" -Wait -RedirectStandardOutput cygwin-setup-out.txt
+    Start-Process -FilePath "cygwin-setup.exe" -ArgumentList "-B -q -R $($env:CYG_ROOT) -l C:/tmp -s $($env:CYG_MIRROR) -P default -P make -P git -P zlib-devel -P rsync -P patch -P diffutils -P curl -P unzip -P tar -P m4 -P perl -P libpcre2-devel -P mbedtls-devel -P mingw64-$($env:MINGW_ARCH)-zlib -P mingw64-$($env:MINGW_ARCH)-gcc-core -P mingw64-$($env:MINGW_ARCH)-pcre2" -Wait -RedirectStandardOutput cygwin-setup-out.txt
     Get-Content cygwin-setup-out.txt
     curl.exe -fsSL -o "opam.tar.xz" --retry 3 https://github.com/fdopen/opam-repository-mingw/releases/download/0.0.0.2/opam$($env:ARCH).tar.xz
     curl.exe -fsSL -o "libmbedtls.tar.xz" --retry 3 https://github.com/Simn/mingw64-mbedtls/releases/download/2.16.3/mingw64-$($env:MINGW_ARCH)-mbedtls-2.16.3-1.tar.xz

+ 1 - 1
extra/github-actions/workflows/main.yml

@@ -93,7 +93,7 @@ jobs:
           sudo add-apt-repository ppa:avsm/ppa -y # provides OPAM 2
           sudo add-apt-repository ppa:haxe/ocaml -y # provides newer version of mbedtls
           sudo apt-get update -qqy
-          sudo apt-get install -qqy ocaml-nox camlp5 opam libpcre3-dev zlib1g-dev libgtk2.0-dev libmbedtls-dev ninja-build libstring-shellquote-perl libipc-system-simple-perl
+          sudo apt-get install -qqy ocaml-nox camlp5 opam libpcre2-dev zlib1g-dev libgtk2.0-dev libmbedtls-dev ninja-build libstring-shellquote-perl libipc-system-simple-perl
 
       - name: Install OCaml libraries
         if: steps.cache-opam.outputs.cache-hit != 'true'

+ 2 - 2
libs/Makefile

@@ -1,7 +1,7 @@
 OCAMLOPT = ocamlopt
 OCAMLC = ocamlc
 TARGET_FLAG = all
-LIBS=extlib-leftovers extc neko javalib ilib swflib ttflib objsize pcre ziplib
+LIBS=extlib-leftovers extc neko javalib ilib swflib ttflib objsize pcre2 ziplib
 
 all: $(LIBS)
 $(LIBS):
@@ -16,7 +16,7 @@ clean:
 	$(MAKE) -C swflib clean
 	$(MAKE) -C ttflib clean
 	$(MAKE) -C objsize clean
-	$(MAKE) -C pcre clean
+	$(MAKE) -C pcre2 clean
 	$(MAKE) -C ziplib clean
 
 .PHONY: all clean $(LIBS)

+ 0 - 28
libs/pcre/Makefile

@@ -1,28 +0,0 @@
-ALL_CFLAGS = $(CFLAGS) -I pcre
-LIBS =
-OCAMLOPT=ocamlopt
-OCAMLC=ocamlc
-SRC = pcre.ml pcre_stubs.c
-
-all: bytecode native
-
-bytecode: pcre.cma
-
-native: pcre.cmxa
-
-pcre.cma: pcre_stubs.o pcre.ml
-	$(OCAMLC) -safe-string -a -o pcre.cma $(LIBS) pcre.ml
-
-pcre.cmxa: pcre.ml pcre_stubs.o
-	$(OCAMLOPT) -safe-string -a -o pcre.cmxa $(LIBS) pcre.ml
-
-pcre_stubs.o: pcre_stubs.c
-	$(OCAMLC) -safe-string $(ALL_CFLAGS) pcre_stubs.c
-
-clean:
-	rm -f pcre.cma pcre.cmi pcre.cmx pcre.cmxa pcre.o pcre.obj pcre_stubs.obj pcre_stubs.o
-	rm -f pcre.a libpcre.a libpcre.lib pcre.cmo
-
-.PHONY: all bytecode native clean
-Makefile: ;
-$(SRC): ;

+ 0 - 736
libs/pcre/pcre_stubs.c

@@ -1,736 +0,0 @@
-/*
-   PCRE-OCAML - Perl Compatibility Regular Expressions for OCaml
-
-   Copyright (C) 1999-  Markus Mottl
-   email: [email protected]
-   WWW:   http://www.ocaml.info
-
-   This library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2 of the License, or (at your option) any later version.
-
-   This library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this library; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-*/
-
-#if defined(_WIN32)
-#  if defined(_DLL)
-#    define PCREextern __declspec(dllexport)
-#  else
-#    define PCREextern
-#  endif
-#endif
-
-#if _WIN64
-  typedef long long *ovec_dst_ptr;
-#else
-  typedef long *ovec_dst_ptr;
-#endif
-
-#if __GNUC__ >= 3
-# define inline inline __attribute__ ((always_inline))
-# define __unused __attribute__ ((unused))
-#else
-# define __unused
-# define inline
-#endif
-
-#include <ctype.h>
-#include <string.h>
-#include <stdio.h>
-
-#include <caml/mlvalues.h>
-#include <caml/alloc.h>
-#include <caml/memory.h>
-#include <caml/fail.h>
-#include <caml/callback.h>
-
-#include "pcre.h"
-
-/* Error codes as defined for pcre 7.9, undefined in pcre 4.5 */
-#ifndef PCRE_ERROR_PARTIAL
-#define PCRE_ERROR_PARTIAL        (-12)
-#endif
-#ifndef PCRE_ERROR_BADPARTIAL
-#define PCRE_ERROR_BADPARTIAL     (-13)
-#endif
-#ifndef PCRE_ERROR_RECURSIONLIMIT
-#define PCRE_ERROR_RECURSIONLIMIT (-21)
-#endif
-
-typedef const unsigned char *chartables;  /* Type of chartable sets */
-
-/* Contents of callout data */
-struct cod {
-  long subj_start;        /* Start of subject string */
-  value *v_substrings_p;  /* Pointer to substrings matched so far */
-  value *v_cof_p;         /* Pointer to callout function */
-  value v_exn;            /* Possible exception raised by callout function */
-};
-
-/* Cache for exceptions */
-static value *pcre_exc_Error         = NULL;  /* Exception [Error] */
-static value *pcre_exc_Backtrack     = NULL;  /* Exception [Backtrack] */
-
-/* Cache for polymorphic variants */
-static value var_Start_only;   /* Variant [`Start_only] */
-static value var_ANCHORED;     /* Variant [`ANCHORED] */
-static value var_Char;         /* Variant [`Char char] */
-static value var_Not_studied;  /* Variant [`Not_studied] */
-static value var_Studied;      /* Variant [`Studied] */
-static value var_Optimal;      /* Variant [`Optimal] */
-
-static value None = Val_int(0);
-
-/* Converts subject offsets from C-integers to OCaml-Integers.
-
-   This is a bit tricky, because there are 32- and 64-bit platforms around
-   and OCaml chooses the larger possibility for representing integers when
-   available (also in arrays) - not so the PCRE!
-*/
-static inline void copy_ovector(
-  long subj_start, const int *ovec_src, ovec_dst_ptr ovec_dst, int subgroups2)
-{
-  if (subj_start == 0)
-    while (subgroups2--) {
-      *ovec_dst = Val_int(*ovec_src);
-      --ovec_src; --ovec_dst;
-    }
-  else
-    while (subgroups2--) {
-      *ovec_dst = Val_long(*ovec_src + subj_start);
-      --ovec_src; --ovec_dst;
-    }
-}
-
-/* Callout handler */
-static int pcre_callout_handler(pcre_callout_block* cb)
-{
-  struct cod *cod = (struct cod *) cb->callout_data;
-
-  if (cod != NULL) {
-    /* Callout is available */
-    value v_res;
-
-    /* Set up parameter array */
-    value v_callout_data = caml_alloc_small(8, 0);
-
-    const value v_substrings = *cod->v_substrings_p;
-
-    const int capture_top = cb->capture_top;
-    int subgroups2 = capture_top << 1;
-    const int subgroups2_1 = subgroups2 - 1;
-
-    const int *ovec_src = cb->offset_vector + subgroups2_1;
-    ovec_dst_ptr ovec_dst = &Field(Field(v_substrings, 1), 0) + subgroups2_1;
-    long subj_start = cod->subj_start;
-
-    copy_ovector(subj_start, ovec_src, ovec_dst, subgroups2);
-
-    Field(v_callout_data, 0) = Val_int(cb->callout_number);
-    Field(v_callout_data, 1) = v_substrings;
-    Field(v_callout_data, 2) = Val_int(cb->start_match + subj_start);
-    Field(v_callout_data, 3) = Val_int(cb->current_position + subj_start);
-    Field(v_callout_data, 4) = Val_int(capture_top);
-    Field(v_callout_data, 5) = Val_int(cb->capture_last);
-    Field(v_callout_data, 6) = Val_int(cb->pattern_position);
-    Field(v_callout_data, 7) = Val_int(cb->next_item_length);
-
-    /* Perform callout */
-    v_res = caml_callback_exn(*cod->v_cof_p, v_callout_data);
-
-    if (Is_exception_result(v_res)) {
-      /* Callout raised an exception */
-      const value v_exn = Extract_exception(v_res);
-      if (Field(v_exn, 0) == *pcre_exc_Backtrack) return 1;
-      cod->v_exn = v_exn;
-      return PCRE_ERROR_CALLOUT;
-    }
-  }
-
-  return 0;
-}
-
-/* Fetchs the named OCaml-values + caches them and
-   calculates + caches the variant hash values */
-CAMLprim value pcre_ocaml_init(value __unused v_unit)
-{
-  pcre_exc_Error     = caml_named_value("Pcre.Error");
-  pcre_exc_Backtrack = caml_named_value("Pcre.Backtrack");
-
-  var_Start_only  = caml_hash_variant("Start_only");
-  var_ANCHORED    = caml_hash_variant("ANCHORED");
-  var_Char        = caml_hash_variant("Char");
-  var_Not_studied = caml_hash_variant("Not_studied");
-  var_Studied     = caml_hash_variant("Studied");
-  var_Optimal     = caml_hash_variant("Optimal");
-
-  pcre_callout = &pcre_callout_handler;
-
-  return Val_unit;
-}
-
-/* Finalizing deallocation function for chartable sets */
-static void pcre_dealloc_tables(value v_table)
-{ (pcre_free)((void *) Field(v_table, 1)); }
-
-/* Finalizing deallocation function for compiled regular expressions */
-static void pcre_dealloc_regexp(value v_rex)
-{
-  void *extra = (void *) Field(v_rex, 2);
-  (pcre_free)((void *) Field(v_rex, 1));
-  if (extra != NULL)
-#ifdef PCRE_STUDY_JIT_COMPILE
-    pcre_free_study(extra);
-#else
-    pcre_free(extra);
-#endif
-}
-
-/* Makes OCaml-string from PCRE-version */
-CAMLprim value pcre_version_stub(value __unused v_unit)
-{
-  return caml_copy_string((char *) pcre_version());
-}
-
-
-/* Raising exceptions */
-
-static inline void raise_pcre_error(value v_arg) Noreturn;
-static inline void raise_partial() Noreturn;
-static inline void raise_bad_partial() Noreturn;
-static inline void raise_bad_utf8() Noreturn;
-static inline void raise_bad_utf8_offset() Noreturn;
-static inline void raise_match_limit() Noreturn;
-static inline void raise_recursion_limit() Noreturn;
-static inline void raise_bad_pattern(const char *msg, int pos) Noreturn;
-static inline void raise_internal_error(char *msg) Noreturn;
-
-static inline void raise_pcre_error(value v_arg)
-{ caml_raise_with_arg(*pcre_exc_Error, v_arg); }
-
-static inline void raise_partial() { raise_pcre_error(Val_int(0)); }
-static inline void raise_bad_partial() { raise_pcre_error(Val_int(1)); }
-static inline void raise_bad_utf8() { raise_pcre_error(Val_int(2)); }
-static inline void raise_bad_utf8_offset() { raise_pcre_error(Val_int(3)); }
-static inline void raise_match_limit() { raise_pcre_error(Val_int(4)); }
-static inline void raise_recursion_limit() { raise_pcre_error(Val_int(5)); }
-
-static inline void raise_bad_pattern(const char *msg, int pos)
-{
-  CAMLparam0();
-  CAMLlocal1(v_msg);
-  value v_arg;
-  v_msg = caml_copy_string(msg);
-  v_arg = caml_alloc_small(2, 0);
-  Field(v_arg, 0) = v_msg;
-  Field(v_arg, 1) = Val_int(pos);
-  raise_pcre_error(v_arg);
-  CAMLnoreturn;
-}
-
-static inline void raise_internal_error(char *msg)
-{
-  CAMLparam0();
-  CAMLlocal1(v_msg);
-  value v_arg;
-  v_msg = caml_copy_string(msg);
-  v_arg = caml_alloc_small(1, 1);
-  Field(v_arg, 0) = v_msg;
-  raise_pcre_error(v_arg);
-  CAMLnoreturn;
-}
-
-/* PCRE pattern compilation */
-
-/* Makes compiled regular expression from compilation options, an optional
-   value of chartables and the pattern string */
-CAMLprim value pcre_compile_stub(value v_opt, value v_tables, value v_pat)
-{
-  value v_rex;  /* Final result -> value of type [regexp] */
-  const char *error = NULL;  /* pointer to possible error message */
-  int error_ofs = 0;  /* offset in the pattern at which error occurred */
-
-  /* If v_tables = [None], then pointer to tables is NULL, otherwise
-     set it to the appropriate value */
-  chartables tables =
-    (v_tables == None) ? NULL : (chartables) Field(Field(v_tables, 0), 1);
-
-  /* Compiles the pattern */
-  pcre *regexp = pcre_compile(String_val(v_pat), Int_val(v_opt), &error,
-                              &error_ofs, tables);
-
-  /* Raises appropriate exception with [BadPattern] if the pattern
-     could not be compiled */
-  if (regexp == NULL) raise_bad_pattern(error, error_ofs);
-
-  /* GC will do a full cycle every 1_000_000 regexp allocations (a typical
-     regexp probably consumes less than 100 bytes -> maximum of 100_000_000
-     bytes unreclaimed regexps) */
-  v_rex = caml_alloc_final(4, pcre_dealloc_regexp, 1, 1000000);
-
-  /* Field[1]: compiled regular expression (Field[0] is finalizing
-     function! See above!) */
-  Field(v_rex, 1) = (value) regexp;
-
-  /* Field[2]: extra information about regexp when it has been studied
-     successfully */
-  Field(v_rex, 2) = (value) NULL;
-
-  /* Field[3]: If 0 -> regexp has not yet been studied
-                  1 -> regexp has already been studied */
-  Field(v_rex, 3) = 0;
-
-  return v_rex;
-}
-
-/* Studies a regexp */
-CAMLprim value pcre_study_stub(value v_rex)
-{
-  /* If it has not yet been studied */
-  if (! (int) Field(v_rex, 3)) {
-    const char *error = NULL;
-    pcre_extra *extra = pcre_study((pcre *) Field(v_rex, 1), 0, &error);
-    if (error != NULL) caml_invalid_argument((char *) error);
-    Field(v_rex, 2) = (value) extra;
-    Field(v_rex, 3) = Val_int(1);
-  }
-  return v_rex;
-}
-
-/* Sets a match limit recursion for a regular expression imperatively */
-CAMLprim value pcre_set_imp_match_limit_recursion_stub(value v_rex, value v_lim)
-{
-  pcre_extra *extra = (pcre_extra *) Field(v_rex, 2);
-  if (extra == NULL) {
-    extra = pcre_malloc(sizeof(pcre_extra));
-    extra->flags = PCRE_EXTRA_MATCH_LIMIT_RECURSION;
-    Field(v_rex, 2) = (value) extra;
-  } else {
-    unsigned long *flags_ptr = &extra->flags;
-    *flags_ptr = PCRE_EXTRA_MATCH_LIMIT_RECURSION | *flags_ptr;
-  }
-  extra->match_limit_recursion = Int_val(v_lim);
-  return v_rex;
-}
-
-/* Gets the match limit recursion of a regular expression if it exists */
-CAMLprim value pcre_get_match_limit_recursion_stub(value v_rex)
-{
-  pcre_extra *extra = (pcre_extra *) Field(v_rex, 2);
-  if (extra == NULL) return None;
-  if (extra->flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) {
-    value v_lim = Val_int(extra->match_limit_recursion);
-    value v_res = caml_alloc_small(1, 0);
-    Field(v_res, 0) = v_lim;
-    return v_res;
-  }
-  return None;
-}
-
-/* Sets a match limit for a regular expression imperatively */
-CAMLprim value pcre_set_imp_match_limit_stub(value v_rex, value v_lim)
-{
-  pcre_extra *extra = (pcre_extra *) Field(v_rex, 2);
-  if (extra == NULL) {
-    extra = pcre_malloc(sizeof(pcre_extra));
-    extra->flags = PCRE_EXTRA_MATCH_LIMIT;
-    Field(v_rex, 2) = (value) extra;
-  } else {
-    unsigned long *flags_ptr = &extra->flags;
-    *flags_ptr = PCRE_EXTRA_MATCH_LIMIT | *flags_ptr;
-  }
-  extra->match_limit = Int_val(v_lim);
-  return v_rex;
-}
-
-/* Gets the match limit of a regular expression if it exists */
-CAMLprim value pcre_get_match_limit_stub(value v_rex)
-{
-  pcre_extra *extra = (pcre_extra *) Field(v_rex, 2);
-  if (extra == NULL) return None;
-  if (extra->flags & PCRE_EXTRA_MATCH_LIMIT) {
-    value v_lim = Val_int(extra->match_limit);
-    value v_res = caml_alloc_small(1, 0);
-    Field(v_res, 0) = v_lim;
-    return v_res;
-  }
-  return None;
-}
-
-/* Performs the call to the pcre_fullinfo function */
-static inline int pcre_fullinfo_stub(value v_rex, int what, void *where)
-{
-  return pcre_fullinfo((pcre *) Field(v_rex, 1), (pcre_extra *) Field(v_rex, 2),
-                       what, where);
-}
-
-/* Some stubs for info-functions */
-
-/* Generic macro for getting integer results from pcre_fullinfo */
-#define make_info(tp, cnv, name, option) \
-  CAMLprim value pcre_##name##_stub(value v_rex) \
-  { \
-    tp options; \
-    const int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_##option, &options); \
-    if (ret != 0) raise_internal_error("pcre_##name##_stub"); \
-    return cnv(options); \
-  }
-
-make_info(unsigned long, Val_long, options, OPTIONS)
-make_info(size_t, Val_long, size, SIZE)
-make_info(size_t, Val_long, studysize, STUDYSIZE)
-make_info(int, Val_int, capturecount, CAPTURECOUNT)
-make_info(int, Val_int, backrefmax, BACKREFMAX)
-make_info(int, Val_int, namecount, NAMECOUNT)
-make_info(int, Val_int, nameentrysize, NAMEENTRYSIZE)
-
-CAMLprim value pcre_firstbyte_stub(value v_rex)
-{
-  int firstbyte;
-  const int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_FIRSTBYTE, &firstbyte);
-
-  if (ret != 0) raise_internal_error("pcre_firstbyte_stub");
-
-  switch (firstbyte) {
-    case -1 : return var_Start_only; break;  /* [`Start_only] */
-    case -2 : return var_ANCHORED; break;    /* [`ANCHORED] */
-    default :
-      if (firstbyte < 0 )  /* Should not happen */
-        raise_internal_error("pcre_firstbyte_stub");
-      else {
-        value v_firstbyte;
-        /* Allocates the non-constant constructor [`Char of char] and fills
-           in the appropriate value */
-        v_firstbyte = caml_alloc_small(2, 0);
-        Field(v_firstbyte, 0) = var_Char;
-        Field(v_firstbyte, 1) = Val_int(firstbyte);
-        return v_firstbyte;
-      }
-  }
-}
-
-CAMLprim value pcre_firsttable_stub(value v_rex)
-{
-  const unsigned char *ftable;
-
-  int ret =
-    pcre_fullinfo_stub(v_rex, PCRE_INFO_FIRSTTABLE, (void *) &ftable);
-
-  if (ret != 0) raise_internal_error("pcre_firsttable_stub");
-
-  if (ftable == NULL) return None;
-  else {
-    value v_res, v_res_str;
-    char *ptr;
-    int i;
-
-    Begin_roots1(v_rex);
-      v_res_str = caml_alloc_string(32);
-    End_roots();
-
-    ptr = String_val(v_res_str);
-    for (i = 0; i <= 31; ++i) { *ptr = *ftable; ++ptr; ++ftable; }
-
-    Begin_roots1(v_res_str);
-      /* Allocates [Some string] from firsttable */
-      v_res = caml_alloc_small(1, 0);
-    End_roots();
-
-    Field(v_res, 0) = v_res_str;
-
-    return v_res;
-  }
-}
-
-CAMLprim value pcre_lastliteral_stub(value v_rex)
-{
-  int lastliteral;
-  const int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_LASTLITERAL,
-                                        &lastliteral);
-
-  if (ret != 0) raise_internal_error("pcre_lastliteral_stub");
-
-  if (lastliteral == -1) return None;
-  if (lastliteral < 0) raise_internal_error("pcre_lastliteral_stub");
-  else {
-    /* Allocates [Some char] */
-    value v_res = caml_alloc_small(1, 0);
-    Field(v_res, 0) = Val_int(lastliteral);
-    return v_res;
-  }
-}
-
-CAMLprim value pcre_study_stat_stub(value v_rex)
-{
-  /* Generates the appropriate constant constructor [`Optimal] or
-     [`Studied] if regexp has already been studied */
-  if (Field(v_rex, 3))
-    return ((pcre_extra *) Field(v_rex, 2) == NULL) ? var_Optimal : var_Studied;
-
-  return var_Not_studied;  /* otherwise [`Not_studied] */
-}
-
-static inline void handle_exec_error(char *loc, const int ret) Noreturn;
-
-static inline void handle_exec_error(char *loc, const int ret)
-{
-  switch (ret) {
-    /* Dedicated exceptions */
-    case PCRE_ERROR_NOMATCH : caml_raise_not_found();
-    case PCRE_ERROR_PARTIAL : raise_partial();
-    case PCRE_ERROR_MATCHLIMIT : raise_match_limit();
-    case PCRE_ERROR_BADPARTIAL : raise_bad_partial();
-    case PCRE_ERROR_BADUTF8 : raise_bad_utf8();
-    case PCRE_ERROR_BADUTF8_OFFSET : raise_bad_utf8_offset();
-    case PCRE_ERROR_RECURSIONLIMIT : raise_recursion_limit();
-    /* Unknown error */
-    default : {
-      char err_buf[100];
-      snprintf(err_buf, 100, "%s: unhandled PCRE error code: %d", loc, ret);
-      raise_internal_error(err_buf);
-    }
-  }
-}
-
-static inline void handle_pcre_exec_result(
-  int *ovec, value v_ovec, long ovec_len, long subj_start, int ret)
-{
-  ovec_dst_ptr ocaml_ovec = (ovec_dst_ptr) &Field(v_ovec, 0);
-  const int subgroups2 = ret * 2;
-  const int subgroups2_1 = subgroups2 - 1;
-  const int *ovec_src = ovec + subgroups2_1;
-  ovec_dst_ptr ovec_clear_stop = ocaml_ovec + (ovec_len * 2) / 3;
-  ovec_dst_ptr ovec_dst = ocaml_ovec + subgroups2_1;
-  copy_ovector(subj_start, ovec_src, ovec_dst, subgroups2);
-  while (++ovec_dst < ovec_clear_stop) *ovec_dst = -1;
-}
-
-/* Executes a pattern match with runtime options, a regular expression, a
-   matching position, the start of the the subject string, a subject string,
-   a number of subgroup offsets, an offset vector and an optional callout
-   function */
-CAMLprim value pcre_exec_stub(value v_opt, value v_rex, value v_pos,
-                              value v_subj_start, value v_subj,
-                              value v_ovec, value v_maybe_cof)
-{
-  int ret;
-  long
-    pos = Long_val(v_pos),
-    len = caml_string_length(v_subj),
-    subj_start = Long_val(v_subj_start);
-  long ovec_len = Wosize_val(v_ovec);
-
-  if (pos > len || pos < subj_start)
-    caml_invalid_argument("Pcre.pcre_exec_stub: illegal position");
-
-  if (subj_start > len || subj_start < 0)
-    caml_invalid_argument("Pcre.pcre_exec_stub: illegal subject start");
-
-  pos -= subj_start;
-  len -= subj_start;
-
-  {
-    const pcre *code = (pcre *) Field(v_rex, 1);  /* Compiled pattern */
-    const pcre_extra *extra = (pcre_extra *) Field(v_rex, 2);  /* Extra info */
-    const char *ocaml_subj =
-      String_val(v_subj) + subj_start;  /* Subject string */
-    const int opt = Int_val(v_opt);  /* Runtime options */
-
-    /* Special case when no callout functions specified */
-    if (v_maybe_cof == None) {
-      int *ovec = (int *) &Field(v_ovec, 0);
-
-      /* Performs the match */
-      ret = pcre_exec(code, extra, ocaml_subj, len, pos, opt, ovec, ovec_len);
-
-      if (ret < 0) handle_exec_error("pcre_exec_stub", ret);
-      else handle_pcre_exec_result(ovec, v_ovec, ovec_len, subj_start, ret);
-    }
-
-    /* There are callout functions */
-    else {
-      value v_cof = Field(v_maybe_cof, 0);
-      value v_substrings;
-      char *subj = caml_stat_alloc(sizeof(char) * len);
-      int *ovec = caml_stat_alloc(sizeof(int) * ovec_len);
-      struct cod cod = { 0, (value *) NULL, (value *) NULL, (value) NULL };
-      struct pcre_extra new_extra =
-#ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION
-# ifdef PCRE_EXTRA_MARK
-#  ifdef PCRE_EXTRA_EXECUTABLE_JIT
-        { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0, NULL, NULL };
-#  else
-        { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0, NULL };
-#  endif
-# else
-        { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL, 0 };
-# endif
-#else
-        { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL, NULL };
-#endif
-
-      cod.subj_start = subj_start;
-      memcpy(subj, ocaml_subj, len);
-
-      Begin_roots4(v_rex, v_cof, v_substrings, v_ovec);
-        Begin_roots1(v_subj);
-          v_substrings = caml_alloc_small(2, 0);
-        End_roots();
-
-        Field(v_substrings, 0) = v_subj;
-        Field(v_substrings, 1) = v_ovec;
-
-        cod.v_substrings_p = &v_substrings;
-        cod.v_cof_p = &v_cof;
-        new_extra.callout_data = &cod;
-
-        if (extra == NULL) {
-          ret = pcre_exec(code, &new_extra, subj, len, pos, opt, ovec,
-                          ovec_len);
-        }
-        else {
-          new_extra.flags = PCRE_EXTRA_CALLOUT_DATA | extra->flags;
-          new_extra.study_data = extra->study_data;
-          new_extra.match_limit = extra->match_limit;
-          new_extra.tables = extra->tables;
-#ifdef PCRE_EXTRA_MATCH_LIMIT_RECURSION
-          new_extra.match_limit_recursion = extra->match_limit_recursion;
-#endif
-
-          ret = pcre_exec(code, &new_extra, subj, len, pos, opt, ovec,
-                          ovec_len);
-        }
-
-        caml_stat_free(subj);
-      End_roots();
-
-      if (ret < 0) {
-        caml_stat_free(ovec);
-        if (ret == PCRE_ERROR_CALLOUT) caml_raise(cod.v_exn);
-        else handle_exec_error("pcre_exec_stub(callout)", ret);
-      } else {
-        handle_pcre_exec_result(ovec, v_ovec, ovec_len, subj_start, ret);
-        caml_stat_free(ovec);
-      }
-    }
-  }
-
-  return Val_unit;
-}
-
-/* Byte-code hook for pcre_exec_stub
-   Needed, because there are more than 5 arguments */
-CAMLprim value pcre_exec_stub_bc(value *argv, int __unused argn)
-{
-  return pcre_exec_stub(argv[0], argv[1], argv[2], argv[3],
-                        argv[4], argv[5], argv[6]);
-}
-
-/* Generates a new set of chartables for the current locale (see man
-   page of PCRE */
-CAMLprim value pcre_maketables_stub(value __unused v_unit)
-{
-  /* GC will do a full cycle every 1_000_000 table set allocations (one
-     table set consumes 864 bytes -> maximum of 864_000_000 bytes unreclaimed
-     table sets) */
-  const value v_res = caml_alloc_final(2, pcre_dealloc_tables, 1, 1000000);
-  Field(v_res, 1) = (value) pcre_maketables();
-  return v_res;
-}
-
-/* Wraps around the isspace-function */
-CAMLprim value pcre_isspace_stub(value v_c)
-{
-  return Val_bool(isspace(Int_val(v_c)));
-}
-
-/* Returns number of substring associated with a name */
-CAMLprim value pcre_get_stringnumber_stub(value v_rex, value v_name)
-{
-  const int ret = pcre_get_stringnumber((pcre *) Field(v_rex, 1),
-                                        String_val(v_name));
-  if (ret == PCRE_ERROR_NOSUBSTRING)
-    caml_invalid_argument("Named string not found");
-
-  return Val_int(ret);
-}
-
-/* Returns array of names of named substrings in a regexp */
-CAMLprim value pcre_names_stub(value v_rex)
-{
-  CAMLparam0();
-  CAMLlocal1(v_res);
-  int name_count;
-  int entry_size;
-  const char *tbl_ptr;
-  int i;
-
-  int ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMECOUNT, &name_count);
-  if (ret != 0) raise_internal_error("pcre_names_stub: namecount");
-
-  ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMEENTRYSIZE, &entry_size);
-  if (ret != 0) raise_internal_error("pcre_names_stub: nameentrysize");
-
-  ret = pcre_fullinfo_stub(v_rex, PCRE_INFO_NAMETABLE, &tbl_ptr);
-  if (ret != 0) raise_internal_error("pcre_names_stub: nametable");
-
-  v_res = caml_alloc(name_count, 0);
-
-  for (i = 0; i < name_count; ++i) {
-    value v_name = caml_copy_string(tbl_ptr + 2);
-    Store_field(v_res, i, v_name);
-    tbl_ptr += entry_size;
-  }
-
-  CAMLreturn(v_res);
-}
-
-/* Generic stub for getting integer results from pcre_config */
-static inline int pcre_config_int(int what)
-{
-  int ret;
-  pcre_config(what, (void *) &ret);
-  return ret;
-}
-
-/* Generic stub for getting long integer results from pcre_config */
-static inline int pcre_config_long(int what)
-{
-  long ret;
-  pcre_config(what, (void *) &ret);
-  return ret;
-}
-
-/* Some stubs for config-functions */
-
-/* Returns boolean indicating UTF8-support */
-CAMLprim value pcre_config_utf8_stub(value __unused v_unit)
-{ return Val_bool(pcre_config_int(PCRE_CONFIG_UTF8)); }
-
-/* Returns character used as newline */
-CAMLprim value pcre_config_newline_stub(value __unused v_unit)
-{ return Val_int(pcre_config_int(PCRE_CONFIG_NEWLINE)); }
-
-/* Returns number of bytes used for internal linkage of regular expressions */
-CAMLprim value pcre_config_link_size_stub(value __unused v_unit)
-{ return Val_int(pcre_config_int(PCRE_CONFIG_LINK_SIZE)); }
-
-/* Returns boolean indicating use of stack recursion */
-CAMLprim value pcre_config_stackrecurse_stub(value __unused v_unit)
-{ return Val_bool(pcre_config_int(PCRE_CONFIG_STACKRECURSE)); }
-
-/* Returns default limit for calls to internal matching function */
-CAMLprim value pcre_config_match_limit_stub(value __unused v_unit)
-{ return Val_long(pcre_config_long(PCRE_CONFIG_MATCH_LIMIT)); }
-
-/* Returns default limit for calls to internal matching function */
-CAMLprim value pcre_config_match_limit_recursion_stub(value __unused v_unit)
-{ return Val_long(pcre_config_long(PCRE_CONFIG_MATCH_LIMIT_RECURSION)); }

+ 28 - 0
libs/pcre2/Makefile

@@ -0,0 +1,28 @@
+ALL_CFLAGS = $(CFLAGS) -I pcre2
+LIBS =
+OCAMLOPT=ocamlopt
+OCAMLC=ocamlc
+SRC = pcre2.ml pcre2_stubs.c
+
+all: bytecode native
+
+bytecode: pcre2.cma
+
+native: pcre2.cmxa
+
+pcre2.cma: pcre2_stubs.o pcre2.ml
+	$(OCAMLC) -safe-string -a -o pcre2.cma $(LIBS) pcre2.ml
+
+pcre2.cmxa: pcre2.ml pcre2_stubs.o
+	$(OCAMLOPT) -safe-string -a -o pcre2.cmxa $(LIBS) pcre2.ml
+
+pcre2_stubs.o: pcre2_stubs.c
+	$(OCAMLC) -safe-string $(ALL_CFLAGS) pcre2_stubs.c
+
+clean:
+	rm -f pcre2.cma pcre2.cmi pcre2.cmx pcre2.cmxa pcre2.o pcre2.obj pcre2_stubs.obj pcre2_stubs.o
+	rm -f pcre2.a libpcre2-8.a libpcre2-8.lib pcre2.cmo
+
+.PHONY: all bytecode native clean
+Makefile: ;
+$(SRC): ;

+ 2 - 2
libs/pcre/dune → libs/pcre2/dune

@@ -1,9 +1,9 @@
 (include_subdirs no)
 
 (library
-	(name pcre)
+	(name pcre2)
 	(foreign_stubs
 		(language c)
-		(names pcre_stubs))
+		(names pcre2_stubs))
 	(wrapped false)
 )

+ 309 - 194
libs/pcre/pcre.ml → libs/pcre2/pcre2.ml

@@ -1,16 +1,20 @@
 (*
-   PCRE-OCAML - Perl Compatibility Regular Expressions for OCaml
+   PCRE2-OCAML - Perl Compatibility Regular Expressions for OCaml
+
    Copyright (C) 1999-  Markus Mottl
    email: [email protected]
    WWW:   http://www.ocaml.info
+
    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.
+
    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.
+
    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
@@ -18,17 +22,14 @@
 
 (* Public exceptions and their registration with the C runtime *)
 
-let string_copy str = str
-let buffer_add_subbytes = Buffer.add_subbytes
-
 type error =
   | Partial
-  | BadPartial
   | BadPattern of string * int
-  | BadUTF8
-  | BadUTF8Offset
+  | BadUTF
+  | BadUTFOffset
   | MatchLimit
-  | RecursionLimit
+  | DepthLimit
+  | WorkspaceSize
   | InternalError of string
 
 exception Error of error
@@ -36,87 +37,141 @@ exception Backtrack
 exception Regexp_or of string * error
 
 (* Puts exceptions into global C-variables for fast retrieval *)
-external pcre_ocaml_init : unit -> unit = "pcre_ocaml_init"
+external pcre2_ocaml_init : unit -> unit = "pcre2_ocaml_init"
 
 (* Registers exceptions with the C runtime and caches polymorphic variants *)
 let () =
-  Callback.register_exception "Pcre.Error" (Error (InternalError ""));
-  Callback.register_exception "Pcre.Backtrack" Backtrack;
-  pcre_ocaml_init ()
+  Callback.register_exception "Pcre2.Error" (Error (InternalError ""));
+  Callback.register_exception "Pcre2.Backtrack" Backtrack;
+  pcre2_ocaml_init ()
 
 
 (* Compilation and runtime flags and their conversion functions *)
 
-type icflag = int
-type irflag = int
+type icflag = int64
+type irflag = int64
 
 (* Compilation flags *)
 
 type cflag =
   [
+  | `ALLOW_EMPTY_CLASS
+  | `ALT_BSUX
+  | `ALT_CIRCUMFLEX
+  | `ALT_VERBNAMES
+  | `ANCHORED
+  | `AUTO_CALLOUT
   | `CASELESS
-  | `MULTILINE
+  | `DOLLAR_ENDONLY
   | `DOTALL
+  | `DUPNAMES
+  | `ENDANCHORED
   | `EXTENDED
-  | `ANCHORED
-  | `DOLLAR_ENDONLY
-  | `EXTRA
-  | `UNGREEDY
-  | `UTF8
-  | `NO_UTF8_CHECK
-  | `NO_AUTO_CAPTURE
-  | `AUTO_CALLOUT
+  | `EXTENDED_MORE
   | `FIRSTLINE
+  | `LITERAL
+  | `MATCH_INVALID_UTF
+  | `MATCH_UNSET_BACKREF
+  | `MULTILINE
+  | `NEVER_BACKSLASH_C
+  | `NEVER_UCP
+  | `NEVER_UTF
+  | `NO_AUTO_CAPTURE
+  | `NO_AUTO_POSSESS
+  | `NO_DOTSTAR_ANCHOR
+  | `NO_START_OPTIMIZE
+  | `NO_UTF_CHECK
   | `UCP
+  | `UNGREEDY
+  | `USE_OFFSET_LIMIT
+  | `UTF
   ]
 
 let int_of_cflag = function
-  | `CASELESS -> 0x0001
-  | `MULTILINE -> 0x0002
-  | `DOTALL -> 0x0004
-  | `EXTENDED -> 0x0008
-  | `ANCHORED -> 0x0010
-  | `DOLLAR_ENDONLY -> 0x0020
-  | `EXTRA -> 0x0040
-  | `UNGREEDY -> 0x0200
-  | `UTF8 -> 0x0800
-  | `NO_AUTO_CAPTURE -> 0x1000
-  | `NO_UTF8_CHECK -> 0x2000
-  | `AUTO_CALLOUT -> 0x4000
-  | `FIRSTLINE -> 0x40000
-  | `UCP -> 0x20000000
-
-let coll_icflag icflag flag = int_of_cflag flag lor icflag
-let cflags flags = List.fold_left coll_icflag 0 flags
+  | `ALLOW_EMPTY_CLASS -> 0x00000001L
+  | `ALT_BSUX -> 0x00000002L
+  | `AUTO_CALLOUT -> 0x00000004L
+  | `CASELESS -> 0x00000008L
+  | `DOLLAR_ENDONLY -> 0x00000010L
+  | `DOTALL -> 0x00000020L
+  | `DUPNAMES -> 0x00000040L
+  | `EXTENDED -> 0x00000080L
+  | `FIRSTLINE -> 0x00000100L
+  | `MATCH_UNSET_BACKREF -> 0x00000200L
+  | `MULTILINE -> 0x00000400L
+  | `NEVER_UCP -> 0x00000800L
+  | `NEVER_UTF -> 0x00001000L
+  | `NO_AUTO_CAPTURE -> 0x00002000L
+  | `NO_AUTO_POSSESS -> 0x00004000L
+  | `NO_DOTSTAR_ANCHOR -> 0x00008000L
+  | `NO_START_OPTIMIZE -> 0x00010000L
+  | `UCP -> 0x00020000L
+  | `UNGREEDY -> 0x00040000L
+  | `UTF -> 0x00080000L
+  | `NEVER_BACKSLASH_C -> 0x00100000L
+  | `ALT_CIRCUMFLEX -> 0x00200000L
+  | `ALT_VERBNAMES -> 0x00400000L
+  | `USE_OFFSET_LIMIT -> 0x00800000L
+  | `EXTENDED_MORE -> 0x01000000L
+  | `LITERAL -> 0x02000000L
+  | `MATCH_INVALID_UTF -> 0x04000000L
+  | `ENDANCHORED -> 0x20000000L
+  | `NO_UTF_CHECK -> 0x40000000L
+  | `ANCHORED -> 0x80000000L
+
+
+let coll_icflag icflag flag = Int64.logor (int_of_cflag flag) icflag
+let cflags flags = List.fold_left coll_icflag 0L flags
 
 let cflag_of_int = function
-  | 0x0001 -> `CASELESS
-  | 0x0002 -> `MULTILINE
-  | 0x0004 -> `DOTALL
-  | 0x0008 -> `EXTENDED
-  | 0x0010 -> `ANCHORED
-  | 0x0020 -> `DOLLAR_ENDONLY
-  | 0x0040 -> `EXTRA
-  | 0x0200 -> `UNGREEDY
-  | 0x0800 -> `UTF8
-  | 0x1000 -> `NO_AUTO_CAPTURE
-  | 0x2000 -> `NO_UTF8_CHECK
-  | 0x4000 -> `AUTO_CALLOUT
-  | 0x40000 -> `FIRSTLINE
-  | 0x20000000 -> `UCP
-  | _ -> failwith "Pcre.cflag_list: unknown compilation flag"
+  | 0x00000001L -> `ALLOW_EMPTY_CLASS
+  | 0x00000002L -> `ALT_BSUX
+  | 0x00000004L -> `AUTO_CALLOUT
+  | 0x00000008L -> `CASELESS
+  | 0x00000010L -> `DOLLAR_ENDONLY
+  | 0x00000020L -> `DOTALL
+  | 0x00000040L -> `DUPNAMES
+  | 0x00000080L -> `EXTENDED
+  | 0x00000100L -> `FIRSTLINE
+  | 0x00000200L -> `MATCH_UNSET_BACKREF
+  | 0x00000400L -> `MULTILINE
+  | 0x00000800L -> `NEVER_UCP
+  | 0x00001000L -> `NEVER_UTF
+  | 0x00002000L -> `NO_AUTO_CAPTURE
+  | 0x00004000L -> `NO_AUTO_POSSESS
+  | 0x00008000L -> `NO_DOTSTAR_ANCHOR
+  | 0x00010000L -> `NO_START_OPTIMIZE
+  | 0x00020000L -> `UCP
+  | 0x00040000L -> `UNGREEDY
+  | 0x00080000L -> `UTF
+  | 0x00100000L -> `NEVER_BACKSLASH_C
+  | 0x00200000L -> `ALT_CIRCUMFLEX
+  | 0x00400000L -> `ALT_VERBNAMES
+  | 0x00800000L -> `USE_OFFSET_LIMIT
+  | 0x01000000L -> `EXTENDED_MORE
+  | 0x02000000L -> `LITERAL
+  | 0x04000000L -> `MATCH_INVALID_UTF
+  | 0x20000000L -> `ENDANCHORED
+  | 0x40000000L -> `NO_UTF_CHECK
+  | 0x80000000L -> `ANCHORED
+  | _ -> failwith "Pcre2.cflag_list: unknown compilation flag"
 
 let all_cflags =
   [
-    0x0001; 0x0002; 0x0004; 0x0008; 0x0010; 0x0020;
-    0x0040; 0x0200; 0x0800; 0x1000; 0x2000; 0x4000; 0x40000;
-	0x20000000
+    0x00000001L; 0x00000002L; 0x00000004L; 0x00000008L;
+    0x00000010L; 0x00000020L; 0x00000040L; 0x00000080L;
+    0x00000100L; 0x00000200L; 0x00000400L; 0x00000800L;
+    0x00001000L; 0x00002000L; 0x00004000L; 0x00008000L;
+    0x00010000L; 0x00020000L; 0x00040000L; 0x00080000L;
+    0x00100000L; 0x00200000L; 0x00400000L; 0x00800000L;
+    0x01000000L; 0x02000000L; 0x04000000L;
+    0x20000000L; 0x40000000L; 0x80000000L;
   ]
 
 let cflag_list icflags =
   let coll flag_list flag =
-    if icflags land flag <> 0 then cflag_of_int flag :: flag_list
-    else flag_list in
+    if Int64.equal (Int64.logand icflags flag) 0L then flag_list
+    else cflag_of_int flag :: flag_list in
   List.fold_left coll [] all_cflags
 
 
@@ -125,144 +180,180 @@ let cflag_list icflags =
 type rflag =
   [
   | `ANCHORED
+  | `COPY_MATCHED_SUBJECT
+  | `DFA_RESTART
+  | `DFA_SHORTEST
+  | `ENDANCHORED
   | `NOTBOL
   | `NOTEOL
   | `NOTEMPTY
-  | `PARTIAL
+  | `NOTEMPTY_ATSTART
+  | `NO_JIT
+  | `NO_UTF_CHECK
+  | `PARTIAL_HARD
+  | `PARTIAL_SOFT
   ]
 
 let int_of_rflag = function
-  | `ANCHORED -> 0x0010
-  | `NOTBOL -> 0x0080
-  | `NOTEOL -> 0x0100
-  | `NOTEMPTY -> 0x0400
-  | `PARTIAL -> 0x8000
-
-let coll_irflag irflag flag = int_of_rflag flag lor irflag
-let rflags flags = List.fold_left coll_irflag 0 flags
+  | `NOTBOL -> 0x00000001L
+  | `NOTEOL -> 0x00000002L
+  | `NOTEMPTY ->  0x00000004L
+  | `NOTEMPTY_ATSTART -> 0x00000008L
+  | `PARTIAL_SOFT -> 0x00000010L
+  | `PARTIAL_HARD -> 0x00000020L
+  | `DFA_RESTART -> 0x00000040L
+  | `DFA_SHORTEST -> 0x00000080L
+  | `NO_JIT -> 0x00002000L
+  | `COPY_MATCHED_SUBJECT -> 0x00004000L
+  | `ENDANCHORED -> 0x20000000L
+  | `NO_UTF_CHECK -> 0x40000000L
+  | `ANCHORED -> 0x80000000L
+
+let coll_irflag irflag flag = Int64.logor (int_of_rflag flag) irflag
+let rflags flags = List.fold_left coll_irflag 0L flags
 
 let rflag_of_int = function
-  | 0x0010 -> `ANCHORED
-  | 0x0080 -> `NOTBOL
-  | 0x0100 -> `NOTEOL
-  | 0x0400 -> `NOTEMPTY
-  | 0x8000 -> `PARTIAL
-  | _ -> failwith "Pcre.rflag_list: unknown runtime flag"
-
-let all_rflags = [0x0010; 0x0080; 0x0100; 0x0400; 0x8000]
+  | 0x00000001L -> `NOTBOL
+  | 0x00000002L -> `NOTEOL
+  | 0x00000004L -> `NOTEMPTY
+  | 0x00000008L -> `NOTEMPTY_ATSTART
+  | 0x00000010L -> `PARTIAL_SOFT
+  | 0x00000020L -> `PARTIAL_HARD
+  | 0x00000040L -> `DFA_RESTART
+  | 0x00000080L -> `DFA_SHORTEST
+  | 0x00002000L -> `NO_JIT
+  | 0x00004000L -> `COPY_MATCHED_SUBJECT
+  | 0x20000000L -> `ENDANCHORED
+  | 0x40000000L -> `NO_UTF_CHECK
+  | 0x80000000L -> `ANCHORED
+  | _ -> failwith "Pcre2.rflag_list: unknown runtime flag"
+
+let all_rflags =
+  [
+    0x00000001L; 0x00000002L; 0x00000004L; 0x00000008L;
+    0x00000010L; 0x00000020L; 0x00000040L; 0x00000080L;
+    0x00002000L; 0x00004000L;
+    0x20000000L; 0x40000000L; 0x80000000L;
+  ]
 
 let rflag_list irflags =
   let coll flag_list flag =
-    if irflags land flag <> 0 then rflag_of_int flag :: flag_list
-    else flag_list in
+    if Int64.equal (Int64.logand irflags flag) 0L then flag_list
+    else rflag_of_int flag :: flag_list in
   List.fold_left coll [] all_rflags
 
 
-(* Information on the PCRE-configuration (build-time options) *)
+(* Information on the PCRE2-configuration (build-time options) *)
 
-external pcre_version : unit -> string = "pcre_version_stub"
+external pcre2_version : unit -> string = "pcre2_version_stub"
 
-external pcre_config_utf8 : unit -> bool = "pcre_config_utf8_stub" [@@noalloc]
+external pcre2_config_unicode : unit -> bool
+  = "pcre2_config_unicode_stub" [@@noalloc]
 
-external pcre_config_newline :
-  unit -> char = "pcre_config_newline_stub" [@@noalloc]
+external pcre2_config_newline : unit -> char
+  = "pcre2_config_newline_stub" [@@noalloc]
 
-external pcre_config_link_size :
-  unit -> int = "pcre_config_link_size_stub" [@@noalloc]
+external pcre2_config_link_size : unit -> (int [@untagged])
+  = "pcre2_config_link_size_stub_bc" "pcre2_config_link_size_stub" [@@noalloc]
 
-external pcre_config_match_limit :
-  unit -> int = "pcre_config_match_limit_stub" [@@noalloc]
+external pcre2_config_match_limit : unit -> (int [@untagged])
+  = "pcre2_config_match_limit_stub_bc" "pcre2_config_match_limit_stub"
+  [@@noalloc]
 
-external pcre_config_match_limit_recursion :
-  unit -> int = "pcre_config_match_limit_recursion_stub" [@@noalloc]
+external pcre2_config_depth_limit : unit -> (int [@untagged])
+  = "pcre2_config_depth_limit_stub_bc" "pcre2_config_depth_limit_stub"
+  [@@noalloc]
 
-external pcre_config_stackrecurse :
-  unit -> bool = "pcre_config_stackrecurse_stub" [@@noalloc]
+external pcre2_config_stackrecurse :
+  unit -> bool = "pcre2_config_stackrecurse_stub" [@@noalloc]
 
-let version = pcre_version ()
-let config_utf8 = pcre_config_utf8 ()
-let config_newline = pcre_config_newline ()
-let config_link_size = pcre_config_link_size ()
-let config_match_limit = pcre_config_match_limit ()
-let config_match_limit_recursion = pcre_config_match_limit_recursion ()
-let config_stackrecurse = pcre_config_stackrecurse ()
+let version = pcre2_version ()
+let config_unicode = pcre2_config_unicode ()
+let config_newline = pcre2_config_newline ()
+let config_link_size = pcre2_config_link_size ()
+let config_match_limit = pcre2_config_match_limit ()
+let config_depth_limit = pcre2_config_depth_limit ()
+let config_stackrecurse = pcre2_config_stackrecurse ()
 
 
 (* Information on patterns *)
 
-type firstbyte_info =
+type firstcodeunit_info =
   [ `Char of char
   | `Start_only
   | `ANCHORED ]
 
-type study_stat =
-  [ `Not_studied
-  | `Studied
-  | `Optimal ]
-
 type regexp
 
-external options : regexp -> icflag = "pcre_options_stub"
-external size : regexp -> int = "pcre_size_stub"
-external studysize : regexp -> int = "pcre_studysize_stub"
-external capturecount : regexp -> int = "pcre_capturecount_stub"
-external backrefmax : regexp -> int = "pcre_backrefmax_stub"
-external namecount : regexp -> int = "pcre_namecount_stub"
-external names : regexp -> string array = "pcre_names_stub"
-external nameentrysize : regexp -> int = "pcre_nameentrysize_stub"
-external firstbyte : regexp -> firstbyte_info = "pcre_firstbyte_stub"
-external firsttable : regexp -> string option = "pcre_firsttable_stub"
-external lastliteral : regexp -> char option = "pcre_lastliteral_stub"
-external study_stat : regexp -> study_stat = "pcre_study_stat_stub" [@@noalloc]
+external options : regexp -> (icflag [@unboxed])
+  = "pcre2_argoptions_stub_bc" "pcre2_argoptions_stub"
+
+external size : regexp -> (int [@untagged])
+  = "pcre2_size_stub_bc" "pcre2_size_stub"
+
+external capturecount : regexp -> (int [@untagged])
+  = "pcre2_capturecount_stub_bc" "pcre2_capturecount_stub"
 
+external backrefmax : regexp -> (int [@untagged])
+  = "pcre2_backrefmax_stub_bc" "pcre2_backrefmax_stub"
+
+external namecount : regexp -> (int [@untagged])
+  = "pcre2_namecount_stub_bc" "pcre2_namecount_stub"
+
+external nameentrysize : regexp -> (int [@untagged])
+  = "pcre2_nameentrysize_stub_bc" "pcre2_nameentrysize_stub"
+
+external names : regexp -> string array = "pcre2_names_stub"
+external firstcodeunit : regexp -> firstcodeunit_info = "pcre2_firstcodeunit_stub"
+external lastcodeunit : regexp -> char option = "pcre2_lastcodeunit_stub"
 
 (* Compilation of patterns *)
 
 type chtables
 
-external maketables : unit -> chtables = "pcre_maketables_stub"
+external maketables : unit -> chtables = "pcre2_maketables_stub"
 
-(*  Internal use only! *)
-external pcre_study : regexp -> unit = "pcre_study_stub"
+external compile : (icflag [@unboxed]) -> chtables option -> string -> regexp
+  = "pcre2_compile_stub_bc" "pcre2_compile_stub"
 
-external compile :
-  icflag -> chtables option -> string -> regexp = "pcre_compile_stub"
-
-external get_match_limit : regexp -> int option = "pcre_get_match_limit_stub"
+(* external get_match_limit : regexp -> int option = "pcre2_get_match_limit_stub" *)
 
 (* Internal use only! *)
-external set_imp_match_limit :
-  regexp -> int -> regexp = "pcre_set_imp_match_limit_stub" [@@noalloc]
+external set_imp_match_limit : regexp -> (int [@untagged]) -> regexp
+  = "pcre2_set_imp_match_limit_stub_bc" "pcre2_set_imp_match_limit_stub"
+  [@@noalloc]
 
-external get_match_limit_recursion :
-  regexp -> int option = "pcre_get_match_limit_recursion_stub"
+(* external get_depth_limit :
+  regexp -> int option = "pcre2_get_depth_limit_stub" *)
 
 (* Internal use only! *)
-external set_imp_match_limit_recursion :
-  regexp -> int -> regexp = "pcre_set_imp_match_limit_recursion_stub" [@@noalloc]
+external set_imp_depth_limit : regexp -> (int [@untagged]) -> regexp
+  = "pcre2_set_imp_depth_limit_stub_bc" "pcre2_set_imp_depth_limit_stub"
+  [@@noalloc]
 
+(* TODO implement jit using new pcre2_jit_compile api *)
 let regexp
-      ?(study = true) ?limit ?limit_recursion
-      ?(iflags = 0) ?flags ?chtables pat =
+      (* ?(jit_compile = false) *)
+      ?limit ?depth_limit
+      ?(iflags = 0L) ?flags ?chtables pat =
   let rex =
     match flags with
     | Some flag_list -> compile (cflags flag_list) chtables pat
     | _ -> compile iflags chtables pat
   in
-  if study then pcre_study rex;
   let rex =
     match limit with
     | None -> rex
     | Some lim -> set_imp_match_limit rex lim
   in
-  match limit_recursion with
+  match depth_limit with
   | None -> rex
-  | Some lim -> set_imp_match_limit_recursion rex lim
+  | Some lim -> set_imp_depth_limit rex lim
 
 let regexp_or
-      ?study ?limit ?limit_recursion ?(iflags = 0) ?flags ?chtables pats =
+      (* ?jit_compile *) ?limit ?depth_limit ?(iflags = 0L) ?flags ?chtables pats =
   let check pat =
-    try ignore (regexp ~study:false ~iflags ?flags ?chtables pat)
+    try ignore (regexp ~iflags ?flags ?chtables pat)
     with Error error -> raise (Regexp_or (pat, error))
   in
   List.iter check pats;
@@ -270,7 +361,7 @@ let regexp_or
     let cnv pat = "(?:" ^ pat ^ ")" in
     String.concat "|" (List.rev (List.rev_map cnv pats))
   in
-  regexp ?study ?limit ?limit_recursion ~iflags ?flags ?chtables big_pat
+  regexp (* ?jit_compile *) ?limit ?depth_limit ~iflags ?flags ?chtables big_pat
 
 let bytes_unsafe_blit_string str str_ofs bts bts_ofs len =
   let str_bts = Bytes.unsafe_of_string str in
@@ -301,7 +392,7 @@ let quote s =
 (* Matching of patterns and subpattern extraction *)
 
 (* Default regular expression when none is provided by the user *)
-let def_rex = regexp "\\s+"
+let def_rex = regexp (* ~jit_compile:true *) "\\s+"
 
 type substrings = string * int array
 
@@ -325,7 +416,7 @@ let num_of_subs (_, ovector) = Array.length ovector / 3
 
 let get_offset_start ovector str_num =
   if str_num < 0 || str_num >= Array.length ovector / 3 then
-    invalid_arg "Pcre.get_offset_start: illegal offset";
+    invalid_arg "Pcre2.get_offset_start: illegal offset";
   let offset = str_num lsl 1 in
   offset, Array.unsafe_get ovector offset
 
@@ -370,8 +461,10 @@ let get_opt_substrings ?(full_match = true) (_, ovector as substrings) =
     let len = (Array.length ovector / 3) - 1 in
     Array.init len (fun n -> unsafe_get_opt_substring substrings (n + 1))
 
-external get_stringnumber :
-  regexp -> string -> int = "pcre_get_stringnumber_stub"
+external get_stringnumber : regexp -> string -> (int [@untagged])
+  =
+  "pcre2_substring_number_from_name_stub_bc"
+  "pcre2_substring_number_from_name_stub"
 
 let get_named_substring rex name substrings =
   get_substring substrings (get_stringnumber rex name)
@@ -379,48 +472,68 @@ let get_named_substring rex name substrings =
 let get_named_substring_ofs rex name substrings =
   get_substring_ofs substrings (get_stringnumber rex name)
 
-external unsafe_pcre_exec :
-  irflag ->
+external unsafe_pcre2_match :
+  (irflag [@unboxed]) ->
   regexp ->
-  pos : int ->
-  subj_start : int ->
+  pos : (int [@untagged]) ->
+  subj_start : (int [@untagged]) ->
   subj : string ->
   int array ->
   callout option ->
-  unit = "pcre_exec_stub_bc" "pcre_exec_stub"
+  unit = "pcre2_match_stub_bc" "pcre2_match_stub"
 
 let make_ovector rex =
   let subgroups1 = capturecount rex + 1 in
   let subgroups2 = subgroups1 lsl 1 in
   subgroups2, Array.make (subgroups1 + subgroups2) 0
 
-let pcre_exec ?(iflags = 0) ?flags ?(rex = def_rex) ?pat ?(pos = 0)
+external unsafe_pcre2_dfa_match :
+  (irflag [@unboxed]) ->
+  regexp ->
+  pos : (int [@untagged]) ->
+  subj_start : (int [@untagged]) ->
+  subj : string ->
+  int array ->
+  callout option ->
+  workspace : int array ->
+  unit = "pcre2_dfa_match_stub_bc" "pcre2_match_stub0"
+
+let pcre2_dfa_match ?(iflags = 0L) ?flags ?(rex = def_rex) ?pat ?(pos = 0)
+                  ?callout ?(workspace = Array.make 20 0) subj =
+  let rex = match pat with Some str -> regexp str | _ -> rex in
+  let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
+  let _, ovector = make_ovector rex in
+  unsafe_pcre2_dfa_match
+    iflags rex ~pos ~subj_start:0 ~subj ovector callout ~workspace;
+  ovector
+
+let pcre2_match ?(iflags = 0L) ?flags ?(rex = def_rex) ?pat ?(pos = 0)
               ?callout subj =
   let rex = match pat with Some str -> regexp str | _ -> rex in
   let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
   let _, ovector = make_ovector rex in
-  unsafe_pcre_exec iflags rex ~pos ~subj_start:0 ~subj ovector callout;
+  unsafe_pcre2_match iflags rex ~pos ~subj_start:0 ~subj ovector callout;
   ovector
 
 let exec ?iflags ?flags ?rex ?pat ?pos ?callout subj =
-  subj, pcre_exec ?iflags ?flags ?rex ?pat ?pos ?callout subj
+  subj, pcre2_match ?iflags ?flags ?rex ?pat ?pos ?callout subj
 
 let next_match ?iflags ?flags ?rex ?pat ?(pos = 0) ?callout (subj, ovector) =
   let pos = Array.unsafe_get ovector 1 + pos in
   let subj_len = String.length subj in
   if pos < 0 || pos > subj_len then
-    invalid_arg "Pcre.next_match: illegal offset";
-  subj, pcre_exec ?iflags ?flags ?rex ?pat ~pos ?callout subj
+    invalid_arg "Pcre2.next_match: illegal offset";
+  subj, pcre2_match ?iflags ?flags ?rex ?pat ~pos ?callout subj
 
 let rec copy_lst ar n = function
   | [] -> ar
   | h :: t -> Array.unsafe_set ar n h; copy_lst ar (n - 1) t
 
-let exec_all ?(iflags = 0) ?flags ?(rex = def_rex) ?pat ?pos ?callout subj =
+let exec_all ?(iflags = 0L) ?flags ?(rex = def_rex) ?pat ?pos ?callout subj =
   let rex = match pat with Some str -> regexp str | _ -> rex in
   let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
   let (_, ovector as sstrs) = exec ~iflags ~rex ?pos ?callout subj in
-  let null_flags = iflags lor 0x0400 in
+  let null_flags = Int64.logor iflags 0x00000004L in (* `NOTEMPTY *)
   let subj_len = String.length subj in
   let rec loop pos (subj, ovector as sstrs) n lst =
     let maybe_ovector =
@@ -428,8 +541,8 @@ let exec_all ?(iflags = 0) ?flags ?(rex = def_rex) ?pat ?pos ?callout subj =
         let first = Array.unsafe_get ovector 0 in
         if first = pos && Array.unsafe_get ovector 1 = pos then
           if pos = subj_len then None
-          else Some (pcre_exec ~iflags:null_flags ~rex ~pos ?callout subj)
-        else Some (pcre_exec ~iflags ~rex ~pos ?callout subj)
+          else Some (pcre2_match ~iflags:null_flags ~rex ~pos ?callout subj)
+        else Some (pcre2_match ~iflags ~rex ~pos ?callout subj)
       with Not_found -> None in
     match maybe_ovector with
     | Some ovector ->
@@ -454,7 +567,7 @@ let extract_all_opt ?iflags ?flags ?rex ?pat ?pos ?full_match ?callout subj =
   Array.map (get_opt_substrings ?full_match) many_sstrs
 
 let pmatch ?iflags ?flags ?rex ?pat ?pos ?callout subj =
-  try ignore (pcre_exec ?iflags ?flags ?rex ?pat ?pos ?callout subj); true
+  try ignore (pcre2_match ?iflags ?flags ?rex ?pat ?pos ?callout subj); true
   with Not_found -> false
 
 
@@ -559,7 +672,7 @@ let calc_trans_lst subgroups2 ovector subj templ subst_lst =
         return_lst (subj, !ix, Array.unsafe_get ovector (!pos + 1) - !ix) in
   List.fold_left coll (0, []) subst_lst
 
-let replace ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
+let replace ?(iflags = 0L) ?flags ?(rex = def_rex) ?pat
             ?(pos = 0) ?(itempl = def_subst) ?templ ?callout subj =
   let rex = match pat with Some str -> regexp str | _ -> rex in
   let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
@@ -568,17 +681,17 @@ let replace ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
     | Some str -> subst str
     | _ -> itempl in
   let subj_len = String.length subj in
-  if pos < 0 || pos > subj_len then invalid_arg "Pcre.replace: illegal offset";
+  if pos < 0 || pos > subj_len then invalid_arg "Pcre2.replace: illegal offset";
   let subgroups2, ovector = make_ovector rex in
   let nsubs = (subgroups2 lsr 1) - 1 in
   if max_br > nsubs then
-    failwith "Pcre.replace: backreference denotes nonexistent subpattern";
-  if with_lp && nsubs = 0 then failwith "Pcre.replace: no backreferences";
+    failwith "Pcre2.replace: backreference denotes nonexistent subpattern";
+  if with_lp && nsubs = 0 then failwith "Pcre2.replace: no backreferences";
   let rec loop full_len trans_lsts cur_pos =
     if
       cur_pos > subj_len ||
       try
-        unsafe_pcre_exec
+        unsafe_pcre2_match
           iflags rex ~pos:cur_pos ~subj_start:0 ~subj
           ovector callout;
         false
@@ -617,19 +730,19 @@ let replace ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
       else loop full_len trans_lsts last in
   loop 0 [] pos
 
-let qreplace ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
+let qreplace ?(iflags = 0L) ?flags ?(rex = def_rex) ?pat
              ?(pos = 0) ?(templ = "") ?callout subj =
   let rex = match pat with Some str -> regexp str | _ -> rex in
   let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
   let subj_len = String.length subj in
-  if pos < 0 || pos > subj_len then invalid_arg "Pcre.qreplace: illegal offset";
+  if pos < 0 || pos > subj_len then invalid_arg "Pcre2.qreplace: illegal offset";
   let templ_len = String.length templ in
   let _, ovector = make_ovector rex in
   let rec loop full_len subst_lst cur_pos =
     if
       cur_pos > subj_len ||
       try
-        unsafe_pcre_exec
+        unsafe_pcre2_match
           iflags rex ~pos:cur_pos ~subj_start:0 ~subj ovector callout;
         false
       with Not_found -> true
@@ -666,18 +779,18 @@ let qreplace ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
       else loop full_len subst_lst last in
   loop 0 [] pos
 
-let substitute_substrings ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
+let substitute_substrings ?(iflags = 0L) ?flags ?(rex = def_rex) ?pat
                           ?(pos = 0) ?callout ~subst subj =
   let rex = match pat with Some str -> regexp str | _ -> rex in
   let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
   let subj_len = String.length subj in
-  if pos < 0 || pos > subj_len then invalid_arg "Pcre.substitute: illegal offset";
+  if pos < 0 || pos > subj_len then invalid_arg "Pcre2.substitute: illegal offset";
   let _, ovector = make_ovector rex in
   let rec loop full_len subst_lst cur_pos =
     if
       cur_pos > subj_len ||
       try
-        unsafe_pcre_exec
+        unsafe_pcre2_match
           iflags rex ~pos:cur_pos ~subj_start:0 ~subj ovector callout;
         false
       with Not_found -> true
@@ -719,7 +832,7 @@ let substitute ?iflags ?flags ?rex ?pat ?pos ?callout ~subst:str_subst subj =
     str_subst (string_unsafe_sub subj first (last - first)) in
   substitute_substrings ?iflags ?flags ?rex ?pat ?pos ?callout ~subst subj
 
-let replace_first ?(iflags = 0) ?flags ?(rex = def_rex) ?pat ?(pos = 0)
+let replace_first ?(iflags = 0L) ?flags ?(rex = def_rex) ?pat ?(pos = 0)
                   ?(itempl = def_subst) ?templ ?callout subj =
   let rex = match pat with Some str -> regexp str | _ -> rex in
   let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
@@ -730,10 +843,10 @@ let replace_first ?(iflags = 0) ?flags ?(rex = def_rex) ?pat ?(pos = 0)
   let subgroups2, ovector = make_ovector rex in
   let nsubs = (subgroups2 lsr 1) - 1 in
   if max_br > nsubs then
-    failwith "Pcre.replace_first: backreference denotes nonexistent subpattern";
-  if with_lp && nsubs = 0 then failwith "Pcre.replace_first: no backreferences";
+    failwith "Pcre2.replace_first: backreference denotes nonexistent subpattern";
+  if with_lp && nsubs = 0 then failwith "Pcre2.replace_first: no backreferences";
   try
-    unsafe_pcre_exec iflags rex ~pos ~subj_start:0 ~subj ovector callout;
+    unsafe_pcre2_match iflags rex ~pos ~subj_start:0 ~subj ovector callout;
     let res_len, trans_lst =
       calc_trans_lst subgroups2 ovector subj templ subst_lst in
     let first = Array.unsafe_get ovector 0 in
@@ -746,15 +859,15 @@ let replace_first ?(iflags = 0) ?flags ?(rex = def_rex) ?pat ?(pos = 0)
     let ofs = List.fold_left coll first trans_lst in
     bytes_unsafe_blit_string subj last res ofs rest;
     Bytes.unsafe_to_string res
-  with Not_found -> string_copy subj
+  with Not_found -> subj
 
-let qreplace_first ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
+let qreplace_first ?(iflags = 0L) ?flags ?(rex = def_rex) ?pat
                    ?(pos = 0) ?(templ = "") ?callout subj =
   let rex = match pat with Some str -> regexp str | _ -> rex in
   let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
   let _, ovector = make_ovector rex in
   try
-    unsafe_pcre_exec iflags rex ~pos ~subj_start:0 ~subj ovector callout;
+    unsafe_pcre2_match iflags rex ~pos ~subj_start:0 ~subj ovector callout;
     let first = Array.unsafe_get ovector 0 in
     let last = Array.unsafe_get ovector 1 in
     let len = String.length templ in
@@ -765,15 +878,15 @@ let qreplace_first ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
     bytes_unsafe_blit_string templ 0 res first len;
     bytes_unsafe_blit_string subj last res postfix_start rest;
     Bytes.unsafe_to_string res
-  with Not_found -> string_copy subj
+  with Not_found -> subj
 
-let substitute_substrings_first ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
+let substitute_substrings_first ?(iflags = 0L) ?flags ?(rex = def_rex) ?pat
                                 ?(pos = 0) ?callout ~subst subj =
   let rex = match pat with Some str -> regexp str | _ -> rex in
   let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
   let _, ovector = make_ovector rex in
   try
-    unsafe_pcre_exec iflags rex ~pos ~subj_start:0 ~subj ovector callout;
+    unsafe_pcre2_match iflags rex ~pos ~subj_start:0 ~subj ovector callout;
     let subj_len = String.length subj in
     let prefix_len = Array.unsafe_get ovector 0 in
     let last = Array.unsafe_get ovector 1 in
@@ -786,7 +899,7 @@ let substitute_substrings_first ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
     bytes_unsafe_blit_string templ 0 res prefix_len templ_len;
     bytes_unsafe_blit_string subj last res postfix_start postfix_len;
     Bytes.unsafe_to_string res
-  with Not_found -> string_copy subj
+  with Not_found -> subj
 
 let substitute_first ?iflags ?flags ?rex ?pat ?pos
                      ?callout ~subst:str_subst subj =
@@ -803,7 +916,7 @@ let substitute_first ?iflags ?flags ?rex ?pat ?pos
 let internal_psplit flags rex max pos callout subj =
   let subj_len = String.length subj in
   if subj_len = 0 then []
-  else if max = 1 then [string_copy subj]
+  else if max = 1 then [subj]
   else
     let subgroups2, ovector = make_ovector rex in
 
@@ -831,7 +944,7 @@ let internal_psplit flags rex max pos callout subj =
         if cnt = 0 then
           if prematch &&
             try
-              unsafe_pcre_exec
+              unsafe_pcre2_match
                 flags rex ~pos ~subj_start:pos ~subj ovector callout;
               true
             with Not_found -> false
@@ -845,7 +958,7 @@ let internal_psplit flags rex max pos callout subj =
         else
           if
             try
-              unsafe_pcre_exec
+              unsafe_pcre2_match
                 flags rex ~pos ~subj_start:pos ~subj ovector callout;
               false
             with Not_found -> true
@@ -859,8 +972,9 @@ let internal_psplit flags rex max pos callout subj =
                 if len = 0 then "" :: strs
                 else if
                   try
-                    unsafe_pcre_exec
-                      (flags lor 0x0410) rex ~pos ~subj_start:pos ~subj
+                    unsafe_pcre2_match
+                      (* `ANCHORED | `NOTEMPTY *)
+                      (Int64.logor flags 0x80000004L) rex ~pos ~subj_start:pos ~subj
                       ovector callout;
                     true
                   with Not_found -> false
@@ -880,13 +994,13 @@ let internal_psplit flags rex max pos callout subj =
 
 let rec strip_all_empty = function "" :: t -> strip_all_empty t | l -> l
 
-external isspace : char -> bool = "pcre_isspace_stub" [@@noalloc]
+external isspace : char -> bool = "pcre2_isspace_stub" [@@noalloc]
 
 let rec find_no_space ix len str =
   if ix = len || not (isspace (String.unsafe_get str ix)) then ix
   else find_no_space (ix + 1) len str
 
-let split ?(iflags = 0) ?flags ?rex ?pat ?(pos = 0) ?(max = 0) ?callout subj =
+let split ?(iflags = 0L) ?flags ?rex ?pat ?(pos = 0) ?(max = 0) ?callout subj =
   let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
   let res =
     match pat, rex with
@@ -895,7 +1009,7 @@ let split ?(iflags = 0) ?flags ?rex ?pat ?(pos = 0) ?(max = 0) ?callout subj =
     | _ ->
         (* special case for Perl-splitting semantics *)
         let len = String.length subj in
-        if pos > len || pos < 0 then failwith "Pcre.split: illegal offset";
+        if pos > len || pos < 0 then failwith "Pcre2.split: illegal offset";
         let new_pos = find_no_space pos len subj in
         internal_psplit iflags def_rex max new_pos callout subj in
   List.rev (if max = 0 then strip_all_empty res else res)
@@ -915,13 +1029,13 @@ let rec strip_all_empty_full = function
   | Delim _ :: rest -> strip_all_empty_full rest
   | l -> l
 
-let full_split ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
+let full_split ?(iflags = 0L) ?flags ?(rex = def_rex) ?pat
                ?(pos = 0) ?(max = 0) ?callout subj =
   let rex = match pat with Some str -> regexp str | _ -> rex in
   let iflags = match flags with Some flags -> rflags flags | _ -> iflags in
   let subj_len = String.length subj in
   if subj_len = 0 then []
-  else if max = 1 then [Text (string_copy subj)]
+  else if max = 1 then [Text (subj)]
   else
     let subgroups2, ovector = make_ovector rex in
 
@@ -952,7 +1066,7 @@ let full_split ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
         if cnt = 0 then
           if prematch &&
             try
-              unsafe_pcre_exec
+              unsafe_pcre2_match
                 iflags rex ~pos ~subj_start:pos ~subj ovector callout;
                true
             with Not_found -> false
@@ -970,7 +1084,7 @@ let full_split ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
         else
           if
             try
-              unsafe_pcre_exec
+              unsafe_pcre2_match
                 iflags rex ~pos ~subj_start:pos ~subj ovector callout;
               false
             with Not_found -> true
@@ -987,8 +1101,9 @@ let full_split ?(iflags = 0) ?flags ?(rex = def_rex) ?pat
                   let empty_groups = handle_subgroups [] in
                   if
                     try
-                      unsafe_pcre_exec
-                        (iflags lor 0x0410) rex ~pos ~subj_start:pos ~subj
+                      unsafe_pcre2_match
+                        (* `ANCHORED | `NOTEMPTY *)
+                        (Int64.logor iflags 0x80000004L) rex ~pos ~subj_start:pos ~subj
                         ovector callout;
                       true
                     with Not_found -> false
@@ -1031,4 +1146,4 @@ let foreach_file filenames f =
     let file = open_in filename in
     try f filename file; close_in file
     with exn -> close_in file; raise exn in
-  List.iter do_with_file filenames
+  List.iter do_with_file filenames

+ 791 - 0
libs/pcre2/pcre2_stubs.c

@@ -0,0 +1,791 @@
+/*
+   PCRE2-OCAML - Perl Compatibility Regular Expressions for OCaml
+
+   Copyright (C) 1999-  Markus Mottl
+   email: [email protected]
+   WWW:   http://www.ocaml.info
+
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   This library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with this library; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#if defined(_WIN32)
+#  define snprintf _snprintf
+#  if defined(_DLL)
+#    define PCREextern __declspec(dllexport)
+#  else
+#    define PCREextern
+#  endif
+#endif
+
+#if _WIN64
+  typedef long long *caml_int_ptr;
+#else
+  typedef long *caml_int_ptr;
+#endif
+
+#if __GNUC__ >= 3
+# define __unused __attribute__ ((unused))
+#else
+# define __unused
+#endif
+
+#include <ctype.h>
+#include <string.h>
+#include <stdio.h>
+
+#include <caml/mlvalues.h>
+#include <caml/alloc.h>
+#include <caml/memory.h>
+#include <caml/fail.h>
+#include <caml/callback.h>
+#include <caml/custom.h>
+
+#define PCRE2_CODE_UNIT_WIDTH 8
+
+#include <pcre2.h>
+
+typedef const unsigned char *chartables;  /* Type of chartable sets */
+
+/* Contents of callout data */
+struct cod {
+  long subj_start;        /* Start of subject string */
+  value *v_substrings_p;  /* Pointer to substrings matched so far */
+  value *v_cof_p;         /* Pointer to callout function */
+  value v_exn;            /* Possible exception raised by callout function */
+};
+
+/* Cache for exceptions */
+static const value *pcre2_exc_Error     = NULL;  /* Exception [Error] */
+static const value *pcre2_exc_Backtrack = NULL;  /* Exception [Backtrack] */
+
+/* Cache for polymorphic variants */
+static value var_Start_only;   /* Variant [`Start_only] */
+static value var_ANCHORED;     /* Variant [`ANCHORED] */
+static value var_Char;         /* Variant [`Char char] */
+
+static value None = Val_int(0);
+
+/* Data associated with OCaml values of PCRE regular expression */
+struct pcre2_ocaml_regexp { pcre2_code *rex; pcre2_match_context *mcontext; };
+
+#define Pcre2_ocaml_regexp_val(v) \
+  ((struct pcre2_ocaml_regexp *) Data_custom_val(v))
+
+#define get_rex(v) Pcre2_ocaml_regexp_val(v)->rex
+#define get_mcontext(v) Pcre2_ocaml_regexp_val(v)->mcontext
+
+#define set_rex(v, r) Pcre2_ocaml_regexp_val(v)->rex = r
+#define set_mcontext(v, c) Pcre2_ocaml_regexp_val(v)->mcontext = c
+
+/* Data associated with OCaml values of PCRE tables */
+struct pcre2_ocaml_tables { chartables tables; };
+
+#define Pcre2_ocaml_tables_val(v) \
+  ((struct pcre2_ocaml_tables *) Data_custom_val(v))
+
+#define get_tables(v) Pcre2_ocaml_tables_val(v)->tables
+#define set_tables(v, t) Pcre2_ocaml_tables_val(v)->tables = t
+
+/* Converts subject offsets from C-integers to OCaml-Integers.
+
+   This is a bit tricky, because there are 32- and 64-bit platforms around
+   and OCaml chooses the larger possibility for representing integers when
+   available (also in arrays) - not so the PCRE!
+*/
+static inline void copy_ovector(
+  long subj_start, const size_t* ovec_src, caml_int_ptr ovec_dst, uint32_t subgroups2)
+{
+  if (subj_start == 0)
+    while (subgroups2--) {
+      *ovec_dst = Val_int(*ovec_src);
+      --ovec_src; --ovec_dst;
+    }
+  else
+    while (subgroups2--) {
+      *ovec_dst = Val_long(*ovec_src + subj_start);
+      --ovec_src; --ovec_dst;
+    }
+}
+
+/* Callout handler */
+static int pcre2_callout_handler(pcre2_callout_block* cb, struct cod* cod)
+{
+  if (cod != NULL) {
+    /* Callout is available */
+    value v_res;
+
+    /* Set up parameter array */
+    value v_callout_data = caml_alloc_small(8, 0);
+
+    const value v_substrings = *cod->v_substrings_p;
+
+    const uint32_t capture_top = cb->capture_top;
+    uint32_t subgroups2 = capture_top << 1;
+    const uint32_t subgroups2_1 = subgroups2 - 1;
+
+    const size_t *ovec_src = cb->offset_vector + subgroups2_1;
+    caml_int_ptr ovec_dst = &Field(Field(v_substrings, 1), 0) + subgroups2_1;
+    long subj_start = cod->subj_start;
+
+    copy_ovector(subj_start, ovec_src, ovec_dst, subgroups2);
+
+    Field(v_callout_data, 0) = Val_int(cb->callout_number);
+    Field(v_callout_data, 1) = v_substrings;
+    Field(v_callout_data, 2) = Val_int(cb->start_match + subj_start);
+    Field(v_callout_data, 3) = Val_int(cb->current_position + subj_start);
+    Field(v_callout_data, 4) = Val_int(capture_top);
+    Field(v_callout_data, 5) = Val_int(cb->capture_last);
+    Field(v_callout_data, 6) = Val_int(cb->pattern_position);
+    Field(v_callout_data, 7) = Val_int(cb->next_item_length);
+
+    /* Perform callout */
+    v_res = caml_callback_exn(*cod->v_cof_p, v_callout_data);
+
+    if (Is_exception_result(v_res)) {
+      /* Callout raised an exception */
+      const value v_exn = Extract_exception(v_res);
+      if (Field(v_exn, 0) == *pcre2_exc_Backtrack) return 1;
+      cod->v_exn = v_exn;
+      return PCRE2_ERROR_CALLOUT;
+    }
+  }
+
+  return 0;
+}
+
+/* Fetches the named OCaml-values + caches them and
+   calculates + caches the variant hash values */
+CAMLprim value pcre2_ocaml_init(value __unused v_unit)
+{
+  pcre2_exc_Error     = caml_named_value("Pcre2.Error");
+  pcre2_exc_Backtrack = caml_named_value("Pcre2.Backtrack");
+
+  var_Start_only  = caml_hash_variant("Start_only");
+  var_ANCHORED    = caml_hash_variant("ANCHORED");
+  var_Char        = caml_hash_variant("Char");
+
+  return Val_unit;
+}
+
+/* Finalizing deallocation function for chartable sets */
+static void pcre2_dealloc_tables(value v_tables)
+{
+#if PCRE2_MINOR >= 34
+  pcre2_maketables_free(NULL, get_tables(v_tables));
+#else
+  free((void*)get_tables(v_tables));
+#endif
+}
+
+/* Finalizing deallocation function for compiled regular expressions */
+static void pcre2_dealloc_regexp(value v_rex)
+{
+  pcre2_code_free(get_rex(v_rex));
+  pcre2_match_context_free(get_mcontext(v_rex));
+}
+
+
+/* Raising exceptions */
+
+CAMLnoreturn_start
+static inline void raise_pcre2_error(value v_arg)
+CAMLnoreturn_end;
+
+CAMLnoreturn_start
+static inline void raise_partial()
+CAMLnoreturn_end;
+
+CAMLnoreturn_start
+static inline void raise_bad_utf()
+CAMLnoreturn_end;
+
+CAMLnoreturn_start
+static inline void raise_bad_utf_offset()
+CAMLnoreturn_end;
+
+CAMLnoreturn_start
+static inline void raise_match_limit()
+CAMLnoreturn_end;
+
+CAMLnoreturn_start
+static inline void raise_depth_limit()
+CAMLnoreturn_end;
+
+CAMLnoreturn_start
+static inline void raise_workspace_size()
+CAMLnoreturn_end;
+
+CAMLnoreturn_start
+static inline void raise_bad_pattern(int code, size_t pos)
+CAMLnoreturn_end;
+
+CAMLnoreturn_start
+static inline void raise_internal_error(char *msg)
+CAMLnoreturn_end;
+
+static inline void raise_pcre2_error(value v_arg)
+{ caml_raise_with_arg(*pcre2_exc_Error, v_arg); }
+
+static inline void raise_partial() { raise_pcre2_error(Val_int(0)); }
+static inline void raise_bad_utf() { raise_pcre2_error(Val_int(1)); }
+static inline void raise_bad_utf_offset() { raise_pcre2_error(Val_int(2)); }
+static inline void raise_match_limit() { raise_pcre2_error(Val_int(3)); }
+static inline void raise_depth_limit() { raise_pcre2_error(Val_int(4)); }
+static inline void raise_workspace_size() { raise_pcre2_error(Val_int(5)); }
+
+static inline void raise_bad_pattern(int code, size_t pos)
+{
+  CAMLparam0();
+  CAMLlocal1(v_msg);
+  value v_arg;
+  v_msg = caml_alloc_string(128);
+  pcre2_get_error_message(code, (PCRE2_UCHAR *)String_val(v_msg), 128);
+  v_arg = caml_alloc_small(2, 0);
+  Field(v_arg, 0) = v_msg;
+  Field(v_arg, 1) = Val_int(pos);
+  raise_pcre2_error(v_arg);
+  CAMLnoreturn;
+}
+
+static inline void raise_internal_error(char *msg)
+{
+  CAMLparam0();
+  CAMLlocal1(v_msg);
+  value v_arg;
+  v_msg = caml_copy_string(msg);
+  v_arg = caml_alloc_small(1, 1);
+  Field(v_arg, 0) = v_msg;
+  raise_pcre2_error(v_arg);
+  CAMLnoreturn;
+}
+
+/* PCRE pattern compilation */
+
+static struct custom_operations regexp_ops = {
+  "pcre2_ocaml_regexp",
+  pcre2_dealloc_regexp,
+  custom_compare_default,
+  custom_hash_default,
+  custom_serialize_default,
+  custom_deserialize_default,
+  custom_compare_ext_default
+};
+
+/* Makes compiled regular expression from compilation options, an optional
+   value of chartables and the pattern string */
+
+CAMLprim value pcre2_compile_stub(int64_t v_opt, value v_tables, value v_pat)
+{
+  value v_rex;  /* Final result -> value of type [regexp] */
+  int error_code = 0;  /* error code for potential error */
+  size_t error_ofs = 0;  /* offset in the pattern at which error occurred */
+  size_t length = caml_string_length(v_pat);
+
+  pcre2_compile_context* ccontext = NULL;
+  /* If v_tables = [None], then pointer to tables is NULL, otherwise
+     set it to the appropriate value */
+  if (v_tables != None) {
+    ccontext = pcre2_compile_context_create(NULL);
+    pcre2_set_character_tables(ccontext, get_tables(Field(v_tables, 0)));
+  }
+
+  /* Compiles the pattern */
+  pcre2_code* regexp = pcre2_compile((PCRE2_SPTR)String_val(v_pat), length, v_opt,
+                                     &error_code, &error_ofs, ccontext);
+
+  pcre2_compile_context_free(ccontext);
+
+  /* Raises appropriate exception with [BadPattern] if the pattern
+     could not be compiled */
+  if (regexp == NULL) raise_bad_pattern(error_code, error_ofs);
+
+  /* GC will do a full cycle every 1_000_000 regexp allocations (a typical
+     regexp probably consumes less than 100 bytes -> maximum of 100_000_000
+     bytes unreclaimed regexps) */
+  v_rex =
+    caml_alloc_custom(&regexp_ops,
+      sizeof(struct pcre2_ocaml_regexp), 1, 1000000);
+
+  set_rex(v_rex, regexp);
+  set_mcontext(v_rex, pcre2_match_context_create(NULL));
+
+  return v_rex;
+}
+
+CAMLprim value pcre2_compile_stub_bc(value v_opt, value v_tables, value v_pat)
+{
+  return pcre2_compile_stub(Long_val(v_opt), v_tables, v_pat);
+}
+
+/* Gets the depth limit of a regular expression if it exists */
+/* CAMLprim value pcre2_get_depth_limit_stub(value v_rex); */
+
+/* Gets the match limit of a regular expression if it exists */
+/* CAMLprim value pcre2_get_match_limit_stub(value v_rex); */
+
+
+/* Sets a match limit for a regular expression imperatively */
+
+CAMLprim value pcre2_set_imp_match_limit_stub(value v_rex, value v_lim) {
+  pcre2_match_context* mcontext = get_mcontext(v_rex);
+  pcre2_set_match_limit(mcontext, v_lim);
+  return v_rex;
+}
+
+CAMLprim value pcre2_set_imp_match_limit_stub_bc(value v_rex, value v_lim)
+{
+  return pcre2_set_imp_match_limit_stub(v_rex, Int_val(v_lim));
+}
+
+
+/* Sets a depth limit for a regular expression imperatively */
+
+CAMLprim value pcre2_set_imp_depth_limit_stub(value v_rex, intnat v_lim) {
+  pcre2_match_context* mcontext = get_mcontext(v_rex);
+  pcre2_set_depth_limit(mcontext, v_lim);
+  return v_rex;
+}
+
+CAMLprim value pcre2_set_imp_depth_limit_stub_bc(value v_rex, value v_lim)
+{
+  return pcre2_set_imp_depth_limit_stub(v_rex, Int_val(v_lim));
+}
+
+
+/* Performs the call to the pcre2_pattern_info function */
+static inline int pcre2_pattern_info_stub(value v_rex, int what, void* where)
+{
+  return pcre2_pattern_info(get_rex(v_rex), what, where);
+}
+
+/* Some stubs for info-functions */
+
+/* Generic macro for getting integer results from pcre2_pattern_info */
+#define make_intnat_info(tp, name, option) \
+  CAMLprim intnat pcre2_##name##_stub(value v_rex) \
+  { \
+    tp options; \
+    const int ret = pcre2_pattern_info_stub(v_rex, PCRE2_INFO_##option, &options); \
+    if (ret != 0) raise_internal_error("pcre2_##name##_stub"); \
+    return options; \
+  } \
+  \
+  CAMLprim value pcre2_##name##_stub_bc(value v_rex) \
+  { return Val_int(pcre2_##name##_stub(v_rex)); }
+
+make_intnat_info(size_t, size, SIZE)
+make_intnat_info(int, capturecount, CAPTURECOUNT)
+make_intnat_info(int, backrefmax, BACKREFMAX)
+make_intnat_info(int, namecount, NAMECOUNT)
+make_intnat_info(int, nameentrysize, NAMEENTRYSIZE)
+
+CAMLprim int64_t pcre2_argoptions_stub(value v_rex)
+{
+  uint32_t options;
+  const int ret = pcre2_pattern_info_stub(v_rex, PCRE2_INFO_ARGOPTIONS, &options);
+  if (ret != 0) raise_internal_error("pcre2_##name##_stub");
+  return (int64_t)options;
+}
+
+CAMLprim value pcre2_argoptions_stub_bc(value v_rex)
+{ return Val_long(pcre2_argoptions_stub(v_rex)); }
+
+CAMLprim value pcre2_firstcodeunit_stub(value v_rex)
+{
+  uint32_t firstcodetype;
+  const int ret = pcre2_pattern_info_stub(v_rex, PCRE2_INFO_FIRSTCODETYPE, &firstcodetype);
+
+  if (ret != 0) raise_internal_error("pcre2_firstcodeunit_stub");
+
+  switch (firstcodetype) {
+    case 2 : return var_Start_only; break;  /* [`Start_only] */
+    case 0 : return var_ANCHORED; break;    /* [`ANCHORED] */
+    case 1: {
+      uint32_t firstcodeunit;
+      const int ret = pcre2_pattern_info_stub(v_rex, PCRE2_INFO_FIRSTCODEUNIT, &firstcodeunit);
+      if (ret != 0) raise_internal_error("pcre2_firstcodeunit_stub");
+
+      value v_firstbyte;
+      /* Allocates the non-constant constructor [`Char of char] and fills
+         in the appropriate value */
+      v_firstbyte = caml_alloc_small(2, 0);
+      Field(v_firstbyte, 0) = var_Char;
+      Field(v_firstbyte, 1) = Val_int(firstcodeunit);
+
+      return v_firstbyte;
+      break;
+    }
+    default: /* Should not happen */
+      raise_internal_error("pcre2_firstcodeunit_stub");
+  }
+}
+
+CAMLprim value pcre2_lastcodeunit_stub(value v_rex)
+{
+  uint32_t lastcodetype;
+  const int ret =
+    pcre2_pattern_info_stub(v_rex, PCRE2_INFO_LASTCODETYPE, &lastcodetype);
+
+  if (ret != 0) raise_internal_error("pcre2_lastcodeunit_stub");
+
+  if (lastcodetype == 0) return None;
+  if (lastcodetype != 1) raise_internal_error("pcre2_lastcodeunit_stub");
+  else {
+    /* Allocates [Some char] */
+    value v_res = caml_alloc_small(1, 0);
+    uint32_t lastcodeunit;
+    const int ret = pcre2_pattern_info_stub(v_rex, PCRE2_INFO_LASTCODEUNIT, &lastcodeunit);
+    if (ret != 0) raise_internal_error("pcre2_lastcodeunit_stub");
+    Field(v_res, 0) = Val_int(lastcodeunit);
+    return v_res;
+  }
+}
+
+CAMLnoreturn_start
+static inline void handle_match_error(char *loc, const int ret)
+CAMLnoreturn_end;
+
+static inline void handle_match_error(char *loc, const int ret)
+{
+  switch (ret) {
+    /* Dedicated exceptions */
+    case PCRE2_ERROR_NOMATCH : caml_raise_not_found();
+    case PCRE2_ERROR_PARTIAL : raise_partial();
+    case PCRE2_ERROR_MATCHLIMIT : raise_match_limit();
+    case PCRE2_ERROR_BADUTFOFFSET : raise_bad_utf_offset();
+    case PCRE2_ERROR_DEPTHLIMIT : raise_depth_limit();
+    case PCRE2_ERROR_DFA_WSSIZE : raise_workspace_size();
+    default : {
+      if (PCRE2_ERROR_UTF8_ERR21 <= ret && ret <= PCRE2_ERROR_UTF8_ERR1)
+        raise_bad_utf();
+      /* Unknown error */
+      char err_buf[100];
+      snprintf(err_buf, 100, "%s: unhandled PCRE2 error code: %d", loc, ret);
+      raise_internal_error(err_buf);
+    }
+  }
+}
+
+static inline void handle_pcre2_match_result(
+  size_t *ovec, value v_ovec, size_t ovec_len, long subj_start, uint32_t ret)
+{
+  caml_int_ptr ocaml_ovec = (caml_int_ptr) &Field(v_ovec, 0);
+  const uint32_t subgroups2 = ret * 2;
+  const uint32_t subgroups2_1 = subgroups2 - 1;
+  const size_t *ovec_src = ovec + subgroups2_1;
+  caml_int_ptr ovec_clear_stop = ocaml_ovec + (ovec_len * 2) / 3;
+  caml_int_ptr ovec_dst = ocaml_ovec + subgroups2_1;
+  copy_ovector(subj_start, ovec_src, ovec_dst, subgroups2);
+  while (++ovec_dst < ovec_clear_stop) *ovec_dst = -1;
+}
+
+/* Executes a pattern match with runtime options, a regular expression, a
+   matching position, the start of the the subject string, a subject string,
+   a number of subgroup offsets, an offset vector and an optional callout
+   function */
+
+CAMLprim value pcre2_match_stub0(
+    int64_t v_opt, value v_rex, intnat v_pos, intnat v_subj_start, value v_subj,
+    value v_ovec, value v_maybe_cof, value v_workspace)
+{
+  int ret;
+  int is_dfa = v_workspace != (value) NULL;
+  long
+    pos = v_pos,
+    subj_start = v_subj_start;
+  size_t
+    ovec_len = Wosize_val(v_ovec),
+    len = caml_string_length(v_subj);
+
+  if (pos > (long)len || pos < subj_start)
+    caml_invalid_argument("Pcre2.pcre2_match_stub: illegal position");
+
+  if (subj_start > (long)len || subj_start < 0)
+    caml_invalid_argument("Pcre2.pcre2_match_stub: illegal subject start");
+
+  pos -= subj_start;
+  len -= subj_start;
+
+  {
+    const pcre2_code *code = get_rex(v_rex);  /* Compiled pattern */
+    pcre2_match_context* mcontext = get_mcontext(v_rex);  /* Match context */
+    PCRE2_SPTR ocaml_subj = (PCRE2_SPTR)String_val(v_subj) + subj_start;  /* Subject string */
+
+    pcre2_match_data* match_data = pcre2_match_data_create_from_pattern(code, NULL);
+
+    /* Special case when no callout functions specified */
+    if (v_maybe_cof == None) {
+      /* Performs the match */
+      if (is_dfa)
+        ret =
+          pcre2_dfa_match(code, ocaml_subj, len, pos, v_opt, match_data, mcontext,
+              (int *) &Field(v_workspace, 0), Wosize_val(v_workspace));
+      else
+        ret = pcre2_match(code, ocaml_subj, len, pos, v_opt, match_data, mcontext);
+
+      size_t *ovec = pcre2_get_ovector_pointer(match_data);
+
+      if (ret < 0) {
+        pcre2_match_data_free(match_data);
+        handle_match_error("pcre2_match_stub", ret);
+      } else {
+        handle_pcre2_match_result(ovec, v_ovec, ovec_len, subj_start, ret);
+      }
+    }
+
+    /* There are callout functions */
+    else {
+      value v_cof = Field(v_maybe_cof, 0);
+      value v_substrings;
+      PCRE2_UCHAR* subj = caml_stat_alloc(sizeof(char) * len);
+      int workspace_len;
+      int *workspace;
+      struct cod cod = { 0, (value *) NULL, (value *) NULL, (value) NULL };
+      pcre2_match_context* new_mcontext = pcre2_match_context_copy(mcontext);
+
+      pcre2_set_callout(new_mcontext, (int (*)(pcre2_callout_block_8*, void*))&pcre2_callout_handler, &cod);
+
+      cod.subj_start = subj_start;
+      memcpy(subj, ocaml_subj, len);
+
+      Begin_roots4(v_rex, v_cof, v_substrings, v_ovec);
+        Begin_roots1(v_subj);
+          v_substrings = caml_alloc_small(2, 0);
+        End_roots();
+
+        Field(v_substrings, 0) = v_subj;
+        Field(v_substrings, 1) = v_ovec;
+
+        cod.v_substrings_p = &v_substrings;
+        cod.v_cof_p = &v_cof;
+
+        if (is_dfa) {
+          workspace_len = Wosize_val(v_workspace);
+          workspace = caml_stat_alloc(sizeof(int) * workspace_len);
+          ret =
+            pcre2_dfa_match(code, subj, len, pos, v_opt, match_data, new_mcontext,
+                (int *) &Field(v_workspace, 0), workspace_len);
+        } else
+          ret =
+            pcre2_match(code, subj, len, pos, v_opt, match_data, new_mcontext);
+
+        caml_stat_free(subj);
+      End_roots();
+
+      pcre2_match_context_free(new_mcontext);
+      size_t* ovec = pcre2_get_ovector_pointer(match_data);
+      if (ret < 0) {
+        if (is_dfa) caml_stat_free(workspace);
+        pcre2_match_data_free(match_data);
+        if (ret == PCRE2_ERROR_CALLOUT) caml_raise(cod.v_exn);
+        else handle_match_error("pcre2_match_stub(callout)", ret);
+      } else {
+        handle_pcre2_match_result(ovec, v_ovec, ovec_len, subj_start, ret);
+        if (is_dfa) {
+          caml_int_ptr ocaml_workspace_dst =
+            (caml_int_ptr) &Field(v_workspace, 0);
+          const int *workspace_src = workspace;
+          const int *workspace_src_stop = workspace + workspace_len;
+          while (workspace_src != workspace_src_stop) {
+            *ocaml_workspace_dst = *workspace_src;
+            ocaml_workspace_dst++;
+            workspace_src++;
+          }
+          caml_stat_free(workspace);
+        }
+      }
+    }
+    pcre2_match_data_free(match_data);
+  }
+
+  return Val_unit;
+}
+
+CAMLprim value pcre2_match_stub(
+    int64_t v_opt, value v_rex, intnat v_pos, intnat v_subj_start, value v_subj,
+    value v_ovec, value v_maybe_cof)
+{
+  return pcre2_match_stub0(v_opt, v_rex, v_pos, v_subj_start, v_subj,
+                         v_ovec, v_maybe_cof, (value) NULL);
+}
+
+/* Byte-code hook for pcre2_match_stub
+   Needed, because there are more than 5 arguments */
+CAMLprim value pcre2_match_stub_bc(value *argv, int __unused argn)
+{
+  return
+    pcre2_match_stub0(
+        Long_val(argv[0]), argv[1], Int_val(argv[2]), Int_val(argv[3]),
+        argv[4], argv[5], argv[6], (value) NULL);
+}
+
+/* Byte-code hook for pcre2_dfa_match_stub
+   Needed, because there are more than 5 arguments */
+CAMLprim value pcre2_dfa_match_stub_bc(value *argv, int __unused argn)
+{
+  return
+    pcre2_match_stub0(
+        Long_val(argv[0]), argv[1], Int_val(argv[2]), Int_val(argv[3]),
+        argv[4], argv[5], argv[6], argv[7]);
+}
+
+static struct custom_operations tables_ops = {
+  "pcre2_ocaml_tables",
+  pcre2_dealloc_tables,
+  custom_compare_default,
+  custom_hash_default,
+  custom_serialize_default,
+  custom_deserialize_default,
+  custom_compare_ext_default
+};
+
+/* Generates a new set of chartables for the current locale (see man
+   page of PCRE */
+CAMLprim value pcre2_maketables_stub(value __unused v_unit)
+{
+  /* GC will do a full cycle every 1_000_000 table set allocations (one
+     table set consumes 864 bytes -> maximum of 864_000_000 bytes unreclaimed
+     table sets) */
+  const value v_tables =
+    caml_alloc_custom(
+      &tables_ops, sizeof(struct pcre2_ocaml_tables), 1, 1000000);
+  set_tables(v_tables, pcre2_maketables(NULL));
+  return v_tables;
+}
+
+/* Wraps around the isspace-function */
+CAMLprim value pcre2_isspace_stub(value v_c)
+{
+  return Val_bool(isspace(Int_val(v_c)));
+}
+
+
+/* Returns number of substring associated with a name */
+
+CAMLprim intnat pcre2_substring_number_from_name_stub(value v_rex, value v_name)
+{
+  const int ret = pcre2_substring_number_from_name(get_rex(v_rex), (PCRE2_SPTR)String_val(v_name));
+  if (ret == PCRE2_ERROR_NOSUBSTRING)
+    caml_invalid_argument("Named string not found");
+
+  return ret;
+}
+
+CAMLprim value pcre2_substring_number_from_name_stub_bc(value v_rex, value v_name)
+{
+  return Val_int(pcre2_substring_number_from_name_stub(v_rex, v_name));
+}
+
+
+/* Returns array of names of named substrings in a regexp */
+CAMLprim value pcre2_names_stub(value v_rex)
+{
+  CAMLparam1(v_rex);
+  CAMLlocal1(v_res);
+  uint32_t name_count;
+  uint32_t entry_size;
+  const char *tbl_ptr;
+  uint32_t i;
+
+  int ret = pcre2_pattern_info_stub(v_rex, PCRE2_INFO_NAMECOUNT, &name_count);
+  if (ret != 0) raise_internal_error("pcre2_names_stub: namecount");
+
+  ret = pcre2_pattern_info_stub(v_rex, PCRE2_INFO_NAMEENTRYSIZE, &entry_size);
+  if (ret != 0) raise_internal_error("pcre2_names_stub: nameentrysize");
+
+  ret = pcre2_pattern_info_stub(v_rex, PCRE2_INFO_NAMETABLE, &tbl_ptr);
+  if (ret != 0) raise_internal_error("pcre2_names_stub: nametable");
+
+  v_res = caml_alloc(name_count, 0);
+
+  for (i = 0; i < name_count; ++i) {
+    value v_name = caml_copy_string(tbl_ptr + 2);
+    Store_field(v_res, i, v_name);
+    tbl_ptr += entry_size;
+  }
+
+  CAMLreturn(v_res);
+}
+
+/* Generic stub for getting integer results from pcre2_config */
+static inline int pcre2_config_int(int what)
+{
+  int ret;
+  pcre2_config(what, (void *) &ret);
+  return ret;
+}
+
+/* Generic stub for getting long integer results from pcre2_config */
+static inline long pcre2_config_long(int what)
+{
+  long ret;
+  pcre2_config(what, (void *) &ret);
+  return ret;
+}
+
+
+/* Some stubs for config-functions */
+
+/* Makes OCaml-string from PCRE-version */
+CAMLprim value pcre2_version_stub(value __unused v_unit) {
+  CAMLparam1(v_unit);
+  CAMLlocal1(v_version);
+  v_version = caml_alloc_string(32);
+
+  pcre2_config(PCRE2_CONFIG_VERSION, (void *)String_val(v_version));
+
+  CAMLreturn(v_version);
+}
+
+/* Returns boolean indicating unicode support */
+CAMLprim value pcre2_config_unicode_stub(value __unused v_unit)
+{ return Val_bool(pcre2_config_int(PCRE2_CONFIG_UNICODE)); }
+
+
+/* Returns character used as newline */
+CAMLprim value pcre2_config_newline_stub(value __unused v_unit)
+{ return Val_int(pcre2_config_int(PCRE2_CONFIG_NEWLINE)); }
+
+
+/* Returns number of bytes used for internal linkage of regular expressions */
+
+CAMLprim intnat pcre2_config_link_size_stub(value __unused v_unit)
+{ return pcre2_config_int(PCRE2_CONFIG_LINKSIZE); }
+
+CAMLprim value pcre2_config_link_size_stub_bc(value v_unit)
+{ return Val_int(pcre2_config_link_size_stub(v_unit)); }
+
+
+/* Returns default limit for calls to internal matching function */
+
+CAMLprim intnat pcre2_config_match_limit_stub(value __unused v_unit)
+{ return pcre2_config_long(PCRE2_CONFIG_MATCHLIMIT); }
+
+CAMLprim value pcre2_config_match_limit_stub_bc(value v_unit)
+{ return Val_int(pcre2_config_match_limit_stub(v_unit)); }
+
+
+/* Returns default limit for depth of nested backtracking  */
+
+CAMLprim intnat pcre2_config_depth_limit_stub(value __unused v_unit)
+{ return pcre2_config_long(PCRE2_CONFIG_DEPTHLIMIT); }
+
+CAMLprim value pcre2_config_depth_limit_stub_bc(value v_unit)
+{ return Val_int(pcre2_config_depth_limit_stub(v_unit)); }
+
+
+/* Returns boolean indicating use of stack recursion */
+CAMLprim intnat pcre2_config_stackrecurse_stub(value __unused v_unit)
+{ return Val_bool(pcre2_config_int(PCRE2_CONFIG_STACKRECURSE)); }

+ 1 - 1
opam

@@ -28,7 +28,7 @@ depends: [
   "extlib" {>= "1.7.8"}
   "ptmap" {>= "2.0.0"}
   "sha"
-  "conf-libpcre"
+  "conf-libpcre2-8"
   "conf-zlib"
   "conf-neko"
   "luv"

+ 1 - 1
src/dune

@@ -9,7 +9,7 @@
 (library
 	(name haxe)
 	(libraries
-		extc extproc extlib_leftovers ilib javalib mbedtls neko objsize pcre swflib ttflib ziplib
+		extc extproc extlib_leftovers ilib javalib mbedtls neko objsize pcre2 swflib ttflib ziplib
 		json
 		unix str bigarray threads dynlink
 		xml-light extlib ptmap sha

+ 15 - 15
src/macro/eval/evalStdLib.ml

@@ -759,18 +759,18 @@ module StdDeque = struct
 end
 
 module StdEReg = struct
-	open Pcre
+	open Pcre2
 
 	let create r opt =
-		let open Pcre in
+		let open Pcre2 in
 		let string_of_pcre_error = function
 			| BadPattern(s,i) -> Printf.sprintf "at %i: %s" i s
 			| Partial -> "Partial"
-			| BadPartial -> "BadPartial"
-			| BadUTF8 -> "BadUTF8"
-			| BadUTF8Offset -> "BadUTF8Offset"
+			| BadUTF -> "BadUTF"
+			| BadUTFOffset -> "BadUTFOffset"
 			| MatchLimit -> "MatchLimit"
-			| RecursionLimit -> "RecursionLimit"
+			| DepthLimit -> "DepthLimit"
+			| WorkspaceSize -> "WorkspaceSize"
 			| InternalError s -> "InternalError: " ^ s
 		in
 		let global = ref false in
@@ -782,7 +782,7 @@ module StdEReg = struct
 			| 'g' -> global := true; None
 			| c -> failwith ("Unsupported regexp option '" ^ String.make 1 c ^ "'")
 		) (ExtString.String.explode opt) in
-		let flags = `UTF8 :: `UCP :: flags in
+		let flags = `UTF :: `UCP :: flags in
 		let rex = try regexp ~flags r with Error error -> failwith (string_of_pcre_error error) in
 		let pcre = {
 			r = rex;
@@ -849,17 +849,17 @@ module StdEReg = struct
 
 	let match' = vifun1 (fun vthis s ->
 		let this = this vthis in
-		let open Pcre in
+		let open Pcre2 in
 		let s = decode_string s in
 		this.r_string <- s;
 		try
-			let a = exec_all ~iflags:0x2000 ~rex:this.r s in
+			let a = exec_all ~flags:[`NO_UTF_CHECK] ~rex:this.r s in
 			this.r_groups <- a;
 			vtrue
 		with Not_found ->
 			this.r_groups <- [||];
 			vfalse
-		| Pcre.Error _ ->
+		| Pcre2.Error _ ->
 			exc_string "PCRE Error (invalid unicode string?)"
 	)
 
@@ -913,7 +913,7 @@ module StdEReg = struct
 		begin try
 			if pos + len > String.length s then raise Not_found;
 			let str = String.sub s 0 (pos + len) in
-			let a = Pcre.exec_all ~iflags:0x2000 ~rex:this.r ~pos str in
+			let a = Pcre2.exec_all ~flags:[`NO_UTF_CHECK] ~rex:this.r ~pos str in
 			this.r_string <- s;
 			this.r_groups <- a;
 			vtrue
@@ -926,7 +926,7 @@ module StdEReg = struct
 		let this = this vthis in
 		let s = decode_string s in
 		let by = decode_string by in
-		let s = (if this.r_global then Pcre.replace else Pcre.replace_first) ~iflags:0x2000 ~rex:this.r ~templ:by s in
+		let s = (if this.r_global then Pcre2.replace else Pcre2.replace_first) ~flags:[`NO_UTF_CHECK] ~rex:this.r ~templ:by s in
 		create_unknown s
 	)
 
@@ -943,11 +943,11 @@ module StdEReg = struct
 				let sub = String.sub s first (last - first) in
 				DynArray.add acc (create_unknown sub)
 			in
-			let exec = Pcre.exec ~iflags:0x2000 ~rex:this.r in
+			let exec = Pcre2.exec ~flags:[`NO_UTF_CHECK] ~rex:this.r in
 			let step pos =
 				try
 					let substrings = exec ~pos s in
-					let (first,last) = Pcre.get_substring_ofs substrings 0 in
+					let (first,last) = Pcre2.get_substring_ofs substrings 0 in
 					add !copy_offset first;
 					copy_offset := last;
 					let next_start = if pos = last then last + 1 else last in
@@ -3348,7 +3348,7 @@ let init_empty_constructors builtins =
 	Hashtbl.add h key_Array (fun () -> encode_array_instance (EvalArray.create [||]));
 	Hashtbl.add h key_eval_Vector (fun () -> encode_vector_instance (Array.make 0 vnull));
 	Hashtbl.add h key_Date (fun () -> encode_instance key_Date ~kind:(IDate 0.));
-	Hashtbl.add h key_EReg (fun () -> encode_instance key_EReg ~kind:(IRegex {r = Pcre.regexp ""; r_rex_string = create_ascii "~//"; r_global = false; r_string = ""; r_groups = [||]}));
+	Hashtbl.add h key_EReg (fun () -> encode_instance key_EReg ~kind:(IRegex {r = Pcre2.regexp ""; r_rex_string = create_ascii "~//"; r_global = false; r_string = ""; r_groups = [||]}));
 	Hashtbl.add h key_String (fun () -> v_empty_string);
 	Hashtbl.add h key_haxe_ds_StringMap (fun () -> encode_instance key_haxe_ds_StringMap ~kind:(IStringMap (StringHashtbl.create ())));
 	Hashtbl.add h key_haxe_ds_IntMap (fun () -> encode_instance key_haxe_ds_IntMap ~kind:(IIntMap (IntHashtbl.create ())));

+ 2 - 2
src/macro/eval/evalValue.ml

@@ -75,11 +75,11 @@ module IntHashtbl = struct
 end
 
 type vregex = {
-	r : Pcre.regexp;
+	r : Pcre2.regexp;
 	r_rex_string : vstring;
 	r_global : bool;
 	mutable r_string : string;
-	mutable r_groups : Pcre.substrings array;
+	mutable r_groups : Pcre2.substrings array;
 }
 
 type vzlib = {