Browse Source

Revert "pcre2: Update to upstream version 10.42"

This reverts commit 62c3e4ab9c87689ff0b7d27350bde176981daf1b.

Needs more work, see comments about `_regex_free` errors in #70447.
Rémi Verschelde 2 years ago
parent
commit
d0398f62f0
41 changed files with 5261 additions and 8395 deletions
  1. 1 1
      modules/regex/config.py
  2. 1 1
      thirdparty/README.md
  3. 3 9
      thirdparty/pcre2/src/config.h
  4. 34 34
      thirdparty/pcre2/src/pcre2.h
  5. 2 10
      thirdparty/pcre2/src/pcre2_compile.c
  6. 3 9
      thirdparty/pcre2/src/pcre2_context.c
  7. 3 2
      thirdparty/pcre2/src/pcre2_convert.c
  8. 3 9
      thirdparty/pcre2/src/pcre2_dfa_match.c
  9. 9 8
      thirdparty/pcre2/src/pcre2_internal.h
  10. 17 17
      thirdparty/pcre2/src/pcre2_intmodedep.h
  11. 79 77
      thirdparty/pcre2/src/pcre2_jit_compile.c
  12. 1 3
      thirdparty/pcre2/src/pcre2_jit_misc.c
  13. 1 3
      thirdparty/pcre2/src/pcre2_jit_neon_inc.h
  14. 1 1
      thirdparty/pcre2/src/pcre2_jit_simd_inc.h
  15. 86 94
      thirdparty/pcre2/src/pcre2_match.c
  16. 2 9
      thirdparty/pcre2/src/pcre2_match_data.c
  17. 11 14
      thirdparty/pcre2/src/pcre2_substitute.c
  18. 12 2
      thirdparty/pcre2/src/sljit/sljitConfig.h
  19. 80 54
      thirdparty/pcre2/src/sljit/sljitConfigInternal.h
  20. 3 0
      thirdparty/pcre2/src/sljit/sljitExecAllocator.c
  21. 179 439
      thirdparty/pcre2/src/sljit/sljitLir.c
  22. 222 454
      thirdparty/pcre2/src/sljit/sljitLir.h
  23. 193 413
      thirdparty/pcre2/src/sljit/sljitNativeARM_32.c
  24. 84 315
      thirdparty/pcre2/src/sljit/sljitNativeARM_64.c
  25. 174 397
      thirdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c
  26. 387 43
      thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c
  27. 423 37
      thirdparty/pcre2/src/sljit/sljitNativeMIPS_64.c
  28. 275 993
      thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c
  29. 18 74
      thirdparty/pcre2/src/sljit/sljitNativePPC_32.c
  30. 48 118
      thirdparty/pcre2/src/sljit/sljitNativePPC_64.c
  31. 120 382
      thirdparty/pcre2/src/sljit/sljitNativePPC_common.c
  32. 0 73
      thirdparty/pcre2/src/sljit/sljitNativeRISCV_32.c
  33. 0 183
      thirdparty/pcre2/src/sljit/sljitNativeRISCV_64.c
  34. 0 2762
      thirdparty/pcre2/src/sljit/sljitNativeRISCV_common.c
  35. 142 504
      thirdparty/pcre2/src/sljit/sljitNativeS390X.c
  36. 283 0
      thirdparty/pcre2/src/sljit/sljitNativeSPARC_32.c
  37. 1673 0
      thirdparty/pcre2/src/sljit/sljitNativeSPARC_common.c
  38. 468 247
      thirdparty/pcre2/src/sljit/sljitNativeX86_32.c
  39. 63 171
      thirdparty/pcre2/src/sljit/sljitNativeX86_64.c
  40. 118 419
      thirdparty/pcre2/src/sljit/sljitNativeX86_common.c
  41. 39 14
      thirdparty/pcre2/src/sljit/sljitWXExecAllocator.c

+ 1 - 1
modules/regex/config.py

@@ -1,5 +1,5 @@
 def can_build(env, platform):
 def can_build(env, platform):
-    return True
+    return not env["arch"].startswith("rv")
 
 
 
 
 def configure(env):
 def configure(env):

+ 1 - 1
thirdparty/README.md

@@ -570,7 +570,7 @@ Exclude:
 ## pcre2
 ## pcre2
 
 
 - Upstream: http://www.pcre.org
 - Upstream: http://www.pcre.org
-- Version: 10.42 (52c08847921a324c804cabf2814549f50bce1265, 2022)
+- Version: 10.40 (3103b8f20a3b9944b177e812fde29fbfb8b90558, 2022)
 - License: BSD-3-Clause
 - License: BSD-3-Clause
 
 
 Files extracted from upstream source:
 Files extracted from upstream source:

+ 3 - 9
thirdparty/pcre2/src/config.h

@@ -236,7 +236,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #define PACKAGE_NAME "PCRE2"
 #define PACKAGE_NAME "PCRE2"
 
 
 /* Define to the full name and version of this package. */
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "PCRE2 10.42"
+#define PACKAGE_STRING "PCRE2 10.40"
 
 
 /* Define to the one symbol short name of this package. */
 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "pcre2"
 #define PACKAGE_TARNAME "pcre2"
@@ -245,7 +245,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #define PACKAGE_URL ""
 #define PACKAGE_URL ""
 
 
 /* Define to the version of this package. */
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "10.42"
+#define PACKAGE_VERSION "10.40"
 
 
 /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
 /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
    parentheses (of any kind) in a pattern. This limits the amount of system
    parentheses (of any kind) in a pattern. This limits the amount of system
@@ -438,13 +438,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #endif
 #endif
 
 
 /* Version number of package */
 /* Version number of package */
-#define VERSION "10.42"
-
-/* Number of bits in a file offset, on hosts where this is settable. */
-/* #undef _FILE_OFFSET_BITS */
-
-/* Define for large files, on AIX-style hosts. */
-/* #undef _LARGE_FILES */
+#define VERSION "10.40"
 
 
 /* Define to empty if `const' does not conform to ANSI C. */
 /* Define to empty if `const' does not conform to ANSI C. */
 /* #undef const */
 /* #undef const */

+ 34 - 34
thirdparty/pcre2/src/pcre2.h

@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
 /* The current PCRE version information. */
 /* The current PCRE version information. */
 
 
 #define PCRE2_MAJOR           10
 #define PCRE2_MAJOR           10
-#define PCRE2_MINOR           42
+#define PCRE2_MINOR           40
 #define PCRE2_PRERELEASE      
 #define PCRE2_PRERELEASE      
-#define PCRE2_DATE            2022-12-11
+#define PCRE2_DATE            2022-04-14
 
 
 /* When an application links to a PCRE DLL in Windows, the symbols that are
 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE2, the appropriate
 imported have to be identified as such. When building PCRE2, the appropriate
@@ -572,19 +572,19 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION pcre2_config(uint32_t, void *);
 /* Functions for manipulating contexts. */
 /* Functions for manipulating contexts. */
 
 
 #define PCRE2_GENERAL_CONTEXT_FUNCTIONS \
 #define PCRE2_GENERAL_CONTEXT_FUNCTIONS \
-PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \
-  pcre2_general_context_copy(pcre2_general_context *); \
-PCRE2_EXP_DECL pcre2_general_context *PCRE2_CALL_CONVENTION \
-  pcre2_general_context_create(void *(*)(PCRE2_SIZE, void *), \
+PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \
+  *pcre2_general_context_copy(pcre2_general_context *); \
+PCRE2_EXP_DECL pcre2_general_context PCRE2_CALL_CONVENTION \
+  *pcre2_general_context_create(void *(*)(PCRE2_SIZE, void *), \
     void (*)(void *, void *), void *); \
     void (*)(void *, void *), void *); \
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
   pcre2_general_context_free(pcre2_general_context *);
   pcre2_general_context_free(pcre2_general_context *);
 
 
 #define PCRE2_COMPILE_CONTEXT_FUNCTIONS \
 #define PCRE2_COMPILE_CONTEXT_FUNCTIONS \
-PCRE2_EXP_DECL pcre2_compile_context *PCRE2_CALL_CONVENTION \
-  pcre2_compile_context_copy(pcre2_compile_context *); \
-PCRE2_EXP_DECL pcre2_compile_context *PCRE2_CALL_CONVENTION \
-  pcre2_compile_context_create(pcre2_general_context *);\
+PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \
+  *pcre2_compile_context_copy(pcre2_compile_context *); \
+PCRE2_EXP_DECL pcre2_compile_context PCRE2_CALL_CONVENTION \
+  *pcre2_compile_context_create(pcre2_general_context *);\
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
   pcre2_compile_context_free(pcre2_compile_context *); \
   pcre2_compile_context_free(pcre2_compile_context *); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
@@ -604,10 +604,10 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
     int (*)(uint32_t, void *), void *);
     int (*)(uint32_t, void *), void *);
 
 
 #define PCRE2_MATCH_CONTEXT_FUNCTIONS \
 #define PCRE2_MATCH_CONTEXT_FUNCTIONS \
-PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \
-  pcre2_match_context_copy(pcre2_match_context *); \
-PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \
-  pcre2_match_context_create(pcre2_general_context *); \
+PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \
+  *pcre2_match_context_copy(pcre2_match_context *); \
+PCRE2_EXP_DECL pcre2_match_context PCRE2_CALL_CONVENTION \
+  *pcre2_match_context_create(pcre2_general_context *); \
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
   pcre2_match_context_free(pcre2_match_context *); \
   pcre2_match_context_free(pcre2_match_context *); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
@@ -631,10 +631,10 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
     void *(*)(PCRE2_SIZE, void *), void (*)(void *, void *), void *);
     void *(*)(PCRE2_SIZE, void *), void (*)(void *, void *), void *);
 
 
 #define PCRE2_CONVERT_CONTEXT_FUNCTIONS \
 #define PCRE2_CONVERT_CONTEXT_FUNCTIONS \
-PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \
-  pcre2_convert_context_copy(pcre2_convert_context *); \
-PCRE2_EXP_DECL pcre2_convert_context *PCRE2_CALL_CONVENTION \
-  pcre2_convert_context_create(pcre2_general_context *); \
+PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \
+  *pcre2_convert_context_copy(pcre2_convert_context *); \
+PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \
+  *pcre2_convert_context_create(pcre2_general_context *); \
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
   pcre2_convert_context_free(pcre2_convert_context *); \
   pcre2_convert_context_free(pcre2_convert_context *); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
@@ -646,15 +646,15 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
 /* Functions concerned with compiling a pattern to PCRE internal code. */
 /* Functions concerned with compiling a pattern to PCRE internal code. */
 
 
 #define PCRE2_COMPILE_FUNCTIONS \
 #define PCRE2_COMPILE_FUNCTIONS \
-PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
-  pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \
+PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
+  *pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, int *, PCRE2_SIZE *, \
     pcre2_compile_context *); \
     pcre2_compile_context *); \
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
   pcre2_code_free(pcre2_code *); \
   pcre2_code_free(pcre2_code *); \
-PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
-  pcre2_code_copy(const pcre2_code *); \
-PCRE2_EXP_DECL pcre2_code *PCRE2_CALL_CONVENTION \
-  pcre2_code_copy_with_tables(const pcre2_code *);
+PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
+  *pcre2_code_copy(const pcre2_code *); \
+PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
+  *pcre2_code_copy_with_tables(const pcre2_code *);
 
 
 
 
 /* Functions that give information about a compiled pattern. */
 /* Functions that give information about a compiled pattern. */
@@ -670,10 +670,10 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
 /* Functions for running a match and inspecting the result. */
 /* Functions for running a match and inspecting the result. */
 
 
 #define PCRE2_MATCH_FUNCTIONS \
 #define PCRE2_MATCH_FUNCTIONS \
-PCRE2_EXP_DECL pcre2_match_data *PCRE2_CALL_CONVENTION \
-  pcre2_match_data_create(uint32_t, pcre2_general_context *); \
-PCRE2_EXP_DECL pcre2_match_data *PCRE2_CALL_CONVENTION \
-  pcre2_match_data_create_from_pattern(const pcre2_code *, \
+PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
+  *pcre2_match_data_create(uint32_t, pcre2_general_context *); \
+PCRE2_EXP_DECL pcre2_match_data PCRE2_CALL_CONVENTION \
+  *pcre2_match_data_create_from_pattern(const pcre2_code *, \
     pcre2_general_context *); \
     pcre2_general_context *); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
   pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
   pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, \
@@ -689,8 +689,8 @@ PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
   pcre2_get_match_data_size(pcre2_match_data *); \
   pcre2_get_match_data_size(pcre2_match_data *); \
 PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \
 PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \
   pcre2_get_ovector_count(pcre2_match_data *); \
   pcre2_get_ovector_count(pcre2_match_data *); \
-PCRE2_EXP_DECL PCRE2_SIZE *PCRE2_CALL_CONVENTION \
-  pcre2_get_ovector_pointer(pcre2_match_data *); \
+PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
+  *pcre2_get_ovector_pointer(pcre2_match_data *); \
 PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
 PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
   pcre2_get_startchar(pcre2_match_data *);
   pcre2_get_startchar(pcre2_match_data *);
 
 
@@ -770,8 +770,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
     uint32_t, pcre2_match_data *, pcre2_match_context *); \
     uint32_t, pcre2_match_data *, pcre2_match_context *); \
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
   pcre2_jit_free_unused_memory(pcre2_general_context *); \
   pcre2_jit_free_unused_memory(pcre2_general_context *); \
-PCRE2_EXP_DECL pcre2_jit_stack *PCRE2_CALL_CONVENTION \
-  pcre2_jit_stack_create(PCRE2_SIZE, PCRE2_SIZE, pcre2_general_context *); \
+PCRE2_EXP_DECL pcre2_jit_stack PCRE2_CALL_CONVENTION \
+  *pcre2_jit_stack_create(PCRE2_SIZE, PCRE2_SIZE, pcre2_general_context *); \
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
   pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \
   pcre2_jit_stack_assign(pcre2_match_context *, pcre2_jit_callback, void *); \
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
@@ -783,8 +783,8 @@ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
 #define PCRE2_OTHER_FUNCTIONS \
 #define PCRE2_OTHER_FUNCTIONS \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
   pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
   pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
-PCRE2_EXP_DECL const uint8_t *PCRE2_CALL_CONVENTION \
-  pcre2_maketables(pcre2_general_context *); \
+PCRE2_EXP_DECL const uint8_t PCRE2_CALL_CONVENTION \
+  *pcre2_maketables(pcre2_general_context *); \
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
 PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
   pcre2_maketables_free(pcre2_general_context *, const uint8_t *);
   pcre2_maketables_free(pcre2_general_context *, const uint8_t *);
 
 

+ 2 - 10
thirdparty/pcre2/src/pcre2_compile.c

@@ -1266,10 +1266,8 @@ PCRE2_SIZE* ref_count;
 
 
 if (code != NULL)
 if (code != NULL)
   {
   {
-#ifdef SUPPORT_JIT
   if (code->executable_jit != NULL)
   if (code->executable_jit != NULL)
     PRIV(jit_free)(code->executable_jit, &code->memctl);
     PRIV(jit_free)(code->executable_jit, &code->memctl);
-#endif
 
 
   if ((code->flags & PCRE2_DEREF_TABLES) != 0)
   if ((code->flags & PCRE2_DEREF_TABLES) != 0)
     {
     {
@@ -2689,7 +2687,7 @@ if ((options & PCRE2_EXTENDED_MORE) != 0) options |= PCRE2_EXTENDED;
 while (ptr < ptrend)
 while (ptr < ptrend)
   {
   {
   int prev_expect_cond_assert;
   int prev_expect_cond_assert;
-  uint32_t min_repeat = 0, max_repeat = 0;
+  uint32_t min_repeat, max_repeat;
   uint32_t set, unset, *optset;
   uint32_t set, unset, *optset;
   uint32_t terminator;
   uint32_t terminator;
   uint32_t prev_meta_quantifier;
   uint32_t prev_meta_quantifier;
@@ -8554,7 +8552,7 @@ do {
             op == OP_SCBRA || op == OP_SCBRAPOS)
             op == OP_SCBRA || op == OP_SCBRAPOS)
      {
      {
      int n = GET2(scode, 1+LINK_SIZE);
      int n = GET2(scode, 1+LINK_SIZE);
-     unsigned int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
+     int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
      if (!is_startline(scode, new_map, cb, atomcount, inassert)) return FALSE;
      if (!is_startline(scode, new_map, cb, atomcount, inassert)) return FALSE;
      }
      }
 
 
@@ -10622,10 +10620,4 @@ re = NULL;
 goto EXIT;
 goto EXIT;
 }
 }
 
 
-/* These #undefs are here to enable unity builds with CMake. */
-
-#undef NLBLOCK /* Block containing newline information */
-#undef PSSTART /* Field containing processed string start */
-#undef PSEND   /* Field containing processed string end */
-
 /* End of pcre2_compile.c */
 /* End of pcre2_compile.c */

+ 3 - 9
thirdparty/pcre2/src/pcre2_context.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
 
                        Written by Philip Hazel
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2022 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
 
 
 -----------------------------------------------------------------------------
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
 Redistribution and use in source and binary forms, with or without
@@ -443,11 +443,8 @@ mcontext->offset_limit = limit;
 return 0;
 return 0;
 }
 }
 
 
-/* These functions became obsolete at release 10.30. The first is kept as a
-synonym for backwards compatibility. The second now does nothing. Exclude both
-from coverage reports. */
-
-/* LCOV_EXCL_START */
+/* This function became obsolete at release 10.30. It is kept as a synonym for
+backwards compatibility. */
 
 
 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
 pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
 pcre2_set_recursion_limit(pcre2_match_context *mcontext, uint32_t limit)
@@ -467,9 +464,6 @@ pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
 return 0;
 return 0;
 }
 }
 
 
-/* LCOV_EXCL_STOP */
-
-
 /* ------------ Convert context ------------ */
 /* ------------ Convert context ------------ */
 
 
 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION

+ 3 - 2
thirdparty/pcre2/src/pcre2_convert.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
 
                        Written by Philip Hazel
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2022 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
 
 
 -----------------------------------------------------------------------------
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
 Redistribution and use in source and binary forms, with or without
@@ -65,8 +65,9 @@ POSSIBILITY OF SUCH DAMAGE.
 #define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS
 #define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS
 #define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
 #define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
 
 
-/* States for POSIX processing */
+/* States for range and POSIX processing */
 
 
+enum { RANGE_NOT_STARTED, RANGE_STARTING, RANGE_STARTED };
 enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET,
 enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET,
        POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED };
        POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED };
 
 

+ 3 - 9
thirdparty/pcre2/src/pcre2_dfa_match.c

@@ -350,7 +350,7 @@ Returns:            the return from the callout
 */
 */
 
 
 static int
 static int
-do_callout_dfa(PCRE2_SPTR code, PCRE2_SIZE *offsets, PCRE2_SPTR current_subject,
+do_callout(PCRE2_SPTR code, PCRE2_SIZE *offsets, PCRE2_SPTR current_subject,
   PCRE2_SPTR ptr, dfa_match_block *mb, PCRE2_SIZE extracode,
   PCRE2_SPTR ptr, dfa_match_block *mb, PCRE2_SIZE extracode,
   PCRE2_SIZE *lengthptr)
   PCRE2_SIZE *lengthptr)
 {
 {
@@ -2799,7 +2799,7 @@ for (;;)
             || code[LINK_SIZE + 1] == OP_CALLOUT_STR)
             || code[LINK_SIZE + 1] == OP_CALLOUT_STR)
           {
           {
           PCRE2_SIZE callout_length;
           PCRE2_SIZE callout_length;
-          rrc = do_callout_dfa(code, offsets, current_subject, ptr, mb,
+          rrc = do_callout(code, offsets, current_subject, ptr, mb,
             1 + LINK_SIZE, &callout_length);
             1 + LINK_SIZE, &callout_length);
           if (rrc < 0) return rrc;                 /* Abandon */
           if (rrc < 0) return rrc;                 /* Abandon */
           if (rrc > 0) break;                      /* Fail this thread */
           if (rrc > 0) break;                      /* Fail this thread */
@@ -3196,7 +3196,7 @@ for (;;)
       case OP_CALLOUT_STR:
       case OP_CALLOUT_STR:
         {
         {
         PCRE2_SIZE callout_length;
         PCRE2_SIZE callout_length;
-        rrc = do_callout_dfa(code, offsets, current_subject, ptr, mb, 0,
+        rrc = do_callout(code, offsets, current_subject, ptr, mb, 0,
           &callout_length);
           &callout_length);
         if (rrc < 0) return rrc;   /* Abandon */
         if (rrc < 0) return rrc;   /* Abandon */
         if (rrc == 0)
         if (rrc == 0)
@@ -4057,10 +4057,4 @@ while (rws->next != NULL)
 return rc;
 return rc;
 }
 }
 
 
-/* These #undefs are here to enable unity builds with CMake. */
-
-#undef NLBLOCK /* Block containing newline information */
-#undef PSSTART /* Field containing processed string start */
-#undef PSEND   /* Field containing processed string end */
-
 /* End of pcre2_dfa_match.c */
 /* End of pcre2_dfa_match.c */

+ 9 - 8
thirdparty/pcre2/src/pcre2_internal.h

@@ -220,17 +220,18 @@ not rely on this. */
 
 
 #define COMPILE_ERROR_BASE 100
 #define COMPILE_ERROR_BASE 100
 
 
-/* The initial frames vector for remembering pcre2_match() backtracking points
-is allocated on the heap, of this size (bytes) or ten times the frame size if
-larger, unless the heap limit is smaller. Typical frame sizes are a few hundred
-bytes (it depends on the number of capturing parentheses) so 20KiB handles
-quite a few frames. A larger vector on the heap is obtained for matches that
-need more frames, subject to the heap limit. */
+/* The initial frames vector for remembering backtracking points in
+pcre2_match() is allocated on the system stack, of this size (bytes). The size
+must be a multiple of sizeof(PCRE2_SPTR) in all environments, so making it a
+multiple of 8 is best. Typical frame sizes are a few hundred bytes (it depends
+on the number of capturing parentheses) so 20KiB handles quite a few frames. A
+larger vector on the heap is obtained for patterns that need more frames. The
+maximum size of this can be limited. */
 
 
 #define START_FRAMES_SIZE 20480
 #define START_FRAMES_SIZE 20480
 
 
-/* For DFA matching, an initial internal workspace vector is allocated on the
-stack. The heap is used only if this turns out to be too small. */
+/* Similarly, for DFA matching, an initial internal workspace vector is
+allocated on the stack. */
 
 
 #define DFA_START_RWS_SIZE 30720
 #define DFA_START_RWS_SIZE 30720
 
 

+ 17 - 17
thirdparty/pcre2/src/pcre2_intmodedep.h

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
 
                        Written by Philip Hazel
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2022 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
 
 
 -----------------------------------------------------------------------------
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
 Redistribution and use in source and binary forms, with or without
@@ -649,23 +649,19 @@ the size varies from call to call. As the maximum number of capturing
 subpatterns is 65535 we must allow for 65536 strings to include the overall
 subpatterns is 65535 we must allow for 65536 strings to include the overall
 match. (See also the heapframe structure below.) */
 match. (See also the heapframe structure below.) */
 
 
-struct heapframe;  /* Forward reference */
-
 typedef struct pcre2_real_match_data {
 typedef struct pcre2_real_match_data {
-  pcre2_memctl     memctl;           /* Memory control fields */
-  const pcre2_real_code *code;       /* The pattern used for the match */
-  PCRE2_SPTR       subject;          /* The subject that was matched */
-  PCRE2_SPTR       mark;             /* Pointer to last mark */
-  struct heapframe *heapframes;      /* Backtracking frames heap memory */
-  PCRE2_SIZE       heapframes_size;  /* Malloc-ed size */
-  PCRE2_SIZE       leftchar;         /* Offset to leftmost code unit */
-  PCRE2_SIZE       rightchar;        /* Offset to rightmost code unit */
-  PCRE2_SIZE       startchar;        /* Offset to starting code unit */
-  uint8_t          matchedby;        /* Type of match (normal, JIT, DFA) */
-  uint8_t          flags;            /* Various flags */
-  uint16_t         oveccount;        /* Number of pairs */
-  int              rc;               /* The return code from the match */
-  PCRE2_SIZE       ovector[131072];  /* Must be last in the structure */
+  pcre2_memctl     memctl;
+  const pcre2_real_code *code;    /* The pattern used for the match */
+  PCRE2_SPTR       subject;       /* The subject that was matched */
+  PCRE2_SPTR       mark;          /* Pointer to last mark */
+  PCRE2_SIZE       leftchar;      /* Offset to leftmost code unit */
+  PCRE2_SIZE       rightchar;     /* Offset to rightmost code unit */
+  PCRE2_SIZE       startchar;     /* Offset to starting code unit */
+  uint8_t          matchedby;     /* Type of match (normal, JIT, DFA) */
+  uint8_t          flags;         /* Various flags */
+  uint16_t         oveccount;     /* Number of pairs */
+  int              rc;            /* The return code from the match */
+  PCRE2_SIZE       ovector[131072]; /* Must be last in the structure */
 } pcre2_real_match_data;
 } pcre2_real_match_data;
 
 
 
 
@@ -858,6 +854,10 @@ doing traditional NFA matching (pcre2_match() and friends). */
 
 
 typedef struct match_block {
 typedef struct match_block {
   pcre2_memctl memctl;            /* For general use */
   pcre2_memctl memctl;            /* For general use */
+  PCRE2_SIZE frame_vector_size;   /* Size of a backtracking frame */
+  heapframe *match_frames;        /* Points to vector of frames */
+  heapframe *match_frames_top;    /* Points after the end of the vector */
+  heapframe *stack_frames;        /* The original vector on the stack */
   PCRE2_SIZE heap_limit;          /* As it says */
   PCRE2_SIZE heap_limit;          /* As it says */
   uint32_t match_limit;           /* As it says */
   uint32_t match_limit;           /* As it says */
   uint32_t match_limit_depth;     /* As it says */
   uint32_t match_limit_depth;     /* As it says */

+ 79 - 77
thirdparty/pcre2/src/pcre2_jit_compile.c

@@ -542,7 +542,7 @@ typedef struct compare_context {
 #undef CMP
 #undef CMP
 
 
 /* Used for accessing the elements of the stack. */
 /* Used for accessing the elements of the stack. */
-#define STACK(i)      ((i) * SSIZE_OF(sw))
+#define STACK(i)      ((i) * (int)sizeof(sljit_sw))
 
 
 #ifdef SLJIT_PREF_SHIFT_REG
 #ifdef SLJIT_PREF_SHIFT_REG
 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
@@ -590,8 +590,8 @@ to characters. The vector data is divided into two groups: the first
 group contains the start / end character pointers, and the second is
 group contains the start / end character pointers, and the second is
 the start pointers when the end of the capturing group has not yet reached. */
 the start pointers when the end of the capturing group has not yet reached. */
 #define OVECTOR_START    (common->ovector_start)
 #define OVECTOR_START    (common->ovector_start)
-#define OVECTOR(i)       (OVECTOR_START + (i) * SSIZE_OF(sw))
-#define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * SSIZE_OF(sw))
+#define OVECTOR(i)       (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
+#define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
 
 
 #if PCRE2_CODE_UNIT_WIDTH == 8
 #if PCRE2_CODE_UNIT_WIDTH == 8
@@ -2151,9 +2151,9 @@ while (cc < ccend)
       {
       {
       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
-      stackpos -= SSIZE_OF(sw);
+      stackpos -= (int)sizeof(sljit_sw);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
-      stackpos -= SSIZE_OF(sw);
+      stackpos -= (int)sizeof(sljit_sw);
       setsom_found = TRUE;
       setsom_found = TRUE;
       }
       }
     cc += 1;
     cc += 1;
@@ -2168,9 +2168,9 @@ while (cc < ccend)
       {
       {
       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
-      stackpos -= SSIZE_OF(sw);
+      stackpos -= (int)sizeof(sljit_sw);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
-      stackpos -= SSIZE_OF(sw);
+      stackpos -= (int)sizeof(sljit_sw);
       setmark_found = TRUE;
       setmark_found = TRUE;
       }
       }
     cc += 1 + 2 + cc[1];
     cc += 1 + 2 + cc[1];
@@ -2181,27 +2181,27 @@ while (cc < ccend)
       {
       {
       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
-      stackpos -= SSIZE_OF(sw);
+      stackpos -= (int)sizeof(sljit_sw);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
-      stackpos -= SSIZE_OF(sw);
+      stackpos -= (int)sizeof(sljit_sw);
       setsom_found = TRUE;
       setsom_found = TRUE;
       }
       }
     if (common->mark_ptr != 0 && !setmark_found)
     if (common->mark_ptr != 0 && !setmark_found)
       {
       {
       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
-      stackpos -= SSIZE_OF(sw);
+      stackpos -= (int)sizeof(sljit_sw);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
-      stackpos -= SSIZE_OF(sw);
+      stackpos -= (int)sizeof(sljit_sw);
       setmark_found = TRUE;
       setmark_found = TRUE;
       }
       }
     if (common->capture_last_ptr != 0 && !capture_last_found)
     if (common->capture_last_ptr != 0 && !capture_last_found)
       {
       {
       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
-      stackpos -= SSIZE_OF(sw);
+      stackpos -= (int)sizeof(sljit_sw);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
-      stackpos -= SSIZE_OF(sw);
+      stackpos -= (int)sizeof(sljit_sw);
       capture_last_found = TRUE;
       capture_last_found = TRUE;
       }
       }
     cc += 1 + LINK_SIZE;
     cc += 1 + LINK_SIZE;
@@ -2215,20 +2215,20 @@ while (cc < ccend)
       {
       {
       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
-      stackpos -= SSIZE_OF(sw);
+      stackpos -= (int)sizeof(sljit_sw);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
       OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
-      stackpos -= SSIZE_OF(sw);
+      stackpos -= (int)sizeof(sljit_sw);
       capture_last_found = TRUE;
       capture_last_found = TRUE;
       }
       }
     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
-    stackpos -= SSIZE_OF(sw);
+    stackpos -= (int)sizeof(sljit_sw);
     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
-    stackpos -= SSIZE_OF(sw);
+    stackpos -= (int)sizeof(sljit_sw);
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
-    stackpos -= SSIZE_OF(sw);
+    stackpos -= (int)sizeof(sljit_sw);
 
 
     cc += 1 + LINK_SIZE + IMM2_SIZE;
     cc += 1 + LINK_SIZE + IMM2_SIZE;
     break;
     break;
@@ -3144,7 +3144,7 @@ static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
 DEFINE_COMPILER;
 DEFINE_COMPILER;
 
 
 SLJIT_ASSERT(size > 0);
 SLJIT_ASSERT(size > 0);
-OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
+OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
 #ifdef DESTROY_REGISTERS
 #ifdef DESTROY_REGISTERS
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
@@ -3160,7 +3160,7 @@ static SLJIT_INLINE void free_stack(compiler_common *common, int size)
 DEFINE_COMPILER;
 DEFINE_COMPILER;
 
 
 SLJIT_ASSERT(size > 0);
 SLJIT_ASSERT(size > 0);
-OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
+OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
 }
 }
 
 
 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
@@ -3200,12 +3200,12 @@ if (length < 8)
   }
   }
 else
 else
   {
   {
-  if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
+  if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
     {
     {
     GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
     GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
     loop = LABEL();
     loop = LABEL();
-    sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
+    sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
     OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
     JUMPTO(SLJIT_NOT_ZERO, loop);
     JUMPTO(SLJIT_NOT_ZERO, loop);
     }
     }
@@ -3261,8 +3261,8 @@ OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
 loop = LABEL();
 loop = LABEL();
 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
-OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * SSIZE_OF(sw), src, 0);
-OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * SSIZE_OF(sw), src, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * (sljit_sw)sizeof(sljit_sw), src, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * (sljit_sw)sizeof(sljit_sw), src, 0);
 CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
 CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
 
 
 if (uncleared_size >= sizeof(sljit_sw))
 if (uncleared_size >= sizeof(sljit_sw))
@@ -3289,12 +3289,12 @@ if (length < 8)
   }
   }
 else
 else
   {
   {
-  if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
+  if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
     {
     {
     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
     loop = LABEL();
     loop = LABEL();
-    sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
+    sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
     OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
     OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
     JUMPTO(SLJIT_NOT_ZERO, loop);
     JUMPTO(SLJIT_NOT_ZERO, loop);
     }
     }
@@ -3386,7 +3386,7 @@ else
   OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
   OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
   }
   }
 
 
-has_pre = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
+has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
 
 
 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
@@ -3394,7 +3394,7 @@ OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUME
 loop = LABEL();
 loop = LABEL();
 
 
 if (has_pre)
 if (has_pre)
-  sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
+  sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
 else
 else
   {
   {
   OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
   OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
@@ -3417,14 +3417,14 @@ JUMPTO(SLJIT_NOT_ZERO, loop);
 /* Calculate the return value, which is the maximum ovector value. */
 /* Calculate the return value, which is the maximum ovector value. */
 if (topbracket > 1)
 if (topbracket > 1)
   {
   {
-  if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))) == SLJIT_SUCCESS)
+  if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
     {
     {
     GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
     GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
     OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
     OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
 
 
     /* OVECTOR(0) is never equal to SLJIT_S2. */
     /* OVECTOR(0) is never equal to SLJIT_S2. */
     loop = LABEL();
     loop = LABEL();
-    sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw)));
+    sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
@@ -3437,7 +3437,7 @@ if (topbracket > 1)
     /* OVECTOR(0) is never equal to SLJIT_S2. */
     /* OVECTOR(0) is never equal to SLJIT_S2. */
     loop = LABEL();
     loop = LABEL();
     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
     OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
-    OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
+    OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
     OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
     CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
@@ -4652,8 +4652,8 @@ if (common->nltype != NLTYPE_ANY)
   /* All newlines are ascii, just skip intermediate octets. */
   /* All newlines are ascii, just skip intermediate octets. */
   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
   jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
   loop = LABEL();
   loop = LABEL();
-  if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
-    sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+  if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
+    sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
   else
   else
     {
     {
     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
@@ -5886,7 +5886,7 @@ static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forw
       while (j < i)
       while (j < i)
         {
         {
         b_pri = chars[j].last_count;
         b_pri = chars[j].last_count;
-        if (b_pri > 2 && (sljit_u32)a_pri + (sljit_u32)b_pri >= max_pri)
+        if (b_pri > 2 && a_pri + b_pri >= max_pri)
           {
           {
           b1 = chars[j].chars[0];
           b1 = chars[j].chars[0];
           b2 = chars[j].chars[1];
           b2 = chars[j].chars[1];
@@ -6572,21 +6572,21 @@ GET_LOCAL_BASE(TMP1, 0, 0);
 
 
 /* Drop frames until we reach STACK_TOP. */
 /* Drop frames until we reach STACK_TOP. */
 mainloop = LABEL();
 mainloop = LABEL();
-OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -SSIZE_OF(sw));
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
 
 
 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
 if (HAS_VIRTUAL_REGISTERS)
 if (HAS_VIRTUAL_REGISTERS)
   {
   {
-  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
-  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
-  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
+  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
+  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
+  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
   }
   }
 else
 else
   {
   {
-  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
-  OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
-  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
+  OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
+  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
   GET_LOCAL_BASE(TMP1, 0, 0);
   GET_LOCAL_BASE(TMP1, 0, 0);
   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
@@ -6603,13 +6603,13 @@ OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0);
 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
 if (HAS_VIRTUAL_REGISTERS)
 if (HAS_VIRTUAL_REGISTERS)
   {
   {
-  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
-  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
+  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
+  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
   }
   }
 else
 else
   {
   {
-  OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
-  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
+  OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
+  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
   OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
   }
   }
 JUMPTO(SLJIT_JUMP, mainloop);
 JUMPTO(SLJIT_JUMP, mainloop);
@@ -7159,11 +7159,11 @@ if (char1_reg == STR_END)
   OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
   OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
   }
   }
 
 
-if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
+if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
   {
   {
   label = LABEL();
   label = LABEL();
-  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
-  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
+  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
   JUMPTO(SLJIT_NOT_ZERO, label);
   JUMPTO(SLJIT_NOT_ZERO, label);
@@ -7171,14 +7171,14 @@ if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST,
   JUMPHERE(jump);
   JUMPHERE(jump);
   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
   OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
   }
   }
-else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
+else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
   {
   {
   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
   OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
 
 
   label = LABEL();
   label = LABEL();
-  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
-  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
+  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
   jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
   OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
   JUMPTO(SLJIT_NOT_ZERO, label);
   JUMPTO(SLJIT_NOT_ZERO, label);
@@ -7232,9 +7232,9 @@ else
   lcc_table = TMP3;
   lcc_table = TMP3;
   }
   }
 
 
-if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
+if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
   opt_type = 1;
   opt_type = 1;
-else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
+else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
   opt_type = 2;
   opt_type = 2;
 
 
 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
@@ -7253,8 +7253,8 @@ OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
 if (opt_type == 1)
 if (opt_type == 1)
   {
   {
   label = LABEL();
   label = LABEL();
-  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
-  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
+  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
   }
   }
 else if (opt_type == 2)
 else if (opt_type == 2)
   {
   {
@@ -7262,8 +7262,8 @@ else if (opt_type == 2)
   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
   OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
 
 
   label = LABEL();
   label = LABEL();
-  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
-  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
+  sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
   }
   }
 else
 else
   {
   {
@@ -9689,7 +9689,7 @@ BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
 return cc + 1 + LINK_SIZE;
 return cc + 1 + LINK_SIZE;
 }
 }
 
 
-static sljit_s32 SLJIT_FUNC do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
+static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
 {
 {
 PCRE2_SPTR begin;
 PCRE2_SPTR begin;
 PCRE2_SIZE *ovector;
 PCRE2_SIZE *ovector;
@@ -9756,7 +9756,7 @@ unsigned int callout_length = (*cc == OP_CALLOUT)
 sljit_sw value1;
 sljit_sw value1;
 sljit_sw value2;
 sljit_sw value2;
 sljit_sw value3;
 sljit_sw value3;
-sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * SSIZE_OF(sw);
+sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw);
 
 
 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
 
 
@@ -9806,7 +9806,7 @@ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
 /* SLJIT_R0 = arguments */
 /* SLJIT_R0 = arguments */
 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
 OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
 GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
-sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit));
+sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout));
 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
 free_stack(common, callout_arg_size);
 free_stack(common, callout_arg_size);
 
 
@@ -11451,7 +11451,7 @@ struct sljit_label *label;
 int private_data_ptr = PRIVATE_DATA(cc);
 int private_data_ptr = PRIVATE_DATA(cc);
 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
-int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
+int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
 int tmp_base, tmp_offset;
 int tmp_base, tmp_offset;
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
 BOOL use_tmp;
 BOOL use_tmp;
@@ -11517,19 +11517,19 @@ if (exact > 1)
     }
     }
   }
   }
 else if (exact == 1)
 else if (exact == 1)
+  {
   compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
   compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
 
 
-if (early_fail_type == type_fail_range)
-  {
-  /* Range end first, followed by range start. */
-  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
-  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw));
-  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
-  OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
-  add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
+  if (early_fail_type == type_fail_range)
+    {
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw));
+    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
+    OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
+    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
 
 
-  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
-  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0);
+    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw), STR_PTR, 0);
+    }
   }
   }
 
 
 switch(opcode)
 switch(opcode)
@@ -12428,7 +12428,7 @@ PCRE2_SPTR end;
 int private_data_ptr = PRIVATE_DATA(cc);
 int private_data_ptr = PRIVATE_DATA(cc);
 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
-int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
+int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
 
 
 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
 cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
 
 
@@ -14148,7 +14148,7 @@ quit_label = common->quit_label;
 if (common->currententry != NULL)
 if (common->currententry != NULL)
   {
   {
   /* A free bit for each private data. */
   /* A free bit for each private data. */
-  common->recurse_bitset_size = ((private_data_size / SSIZE_OF(sw)) + 7) >> 3;
+  common->recurse_bitset_size = ((private_data_size / (int)sizeof(sljit_sw)) + 7) >> 3;
   SLJIT_ASSERT(common->recurse_bitset_size > 0);
   SLJIT_ASSERT(common->recurse_bitset_size > 0);
   common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
   common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
 
 
@@ -14384,7 +14384,7 @@ pcre2_jit_compile(pcre2_code *code, uint32_t options)
 pcre2_real_code *re = (pcre2_real_code *)code;
 pcre2_real_code *re = (pcre2_real_code *)code;
 #ifdef SUPPORT_JIT
 #ifdef SUPPORT_JIT
 executable_functions *functions;
 executable_functions *functions;
-static int executable_allocator_is_working = -1;
+static int executable_allocator_is_working = 0;
 #endif
 #endif
 
 
 if (code == NULL)
 if (code == NULL)
@@ -14447,21 +14447,23 @@ return PCRE2_ERROR_JIT_BADOPTION;
 
 
 if ((re->flags & PCRE2_NOJIT) != 0) return 0;
 if ((re->flags & PCRE2_NOJIT) != 0) return 0;
 
 
-if (executable_allocator_is_working == -1)
+if (executable_allocator_is_working == 0)
   {
   {
   /* Checks whether the executable allocator is working. This check
   /* Checks whether the executable allocator is working. This check
      might run multiple times in multi-threaded environments, but the
      might run multiple times in multi-threaded environments, but the
      result should not be affected by it. */
      result should not be affected by it. */
   void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
   void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
+
+  executable_allocator_is_working = -1;
+
   if (ptr != NULL)
   if (ptr != NULL)
     {
     {
     SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
     SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
     executable_allocator_is_working = 1;
     executable_allocator_is_working = 1;
     }
     }
-  else executable_allocator_is_working = 0;
   }
   }
 
 
-if (!executable_allocator_is_working)
+if (executable_allocator_is_working < 0)
   return PCRE2_ERROR_NOMEMORY;
   return PCRE2_ERROR_NOMEMORY;
 
 
 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)

+ 1 - 3
thirdparty/pcre2/src/pcre2_jit_misc.c

@@ -110,10 +110,8 @@ pcre2_jit_free_unused_memory(pcre2_general_context *gcontext)
 (void)gcontext;     /* Suppress warning */
 (void)gcontext;     /* Suppress warning */
 #else  /* SUPPORT_JIT */
 #else  /* SUPPORT_JIT */
 SLJIT_UNUSED_ARG(gcontext);
 SLJIT_UNUSED_ARG(gcontext);
-#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
 sljit_free_unused_memory_exec();
 sljit_free_unused_memory_exec();
-#endif /* SLJIT_EXECUTABLE_ALLOCATOR */
-#endif /* SUPPORT_JIT */
+#endif  /* SUPPORT_JIT */
 }
 }
 
 
 
 

+ 1 - 3
thirdparty/pcre2/src/pcre2_jit_neon_inc.h

@@ -183,8 +183,6 @@ restart:;
 #endif
 #endif
 
 
 #if defined(FFCPS)
 #if defined(FFCPS)
-if (str_ptr >= str_end)
-  return NULL;
 sljit_u8 *p1 = str_ptr - diff;
 sljit_u8 *p1 = str_ptr - diff;
 #endif
 #endif
 sljit_s32 align_offset = ((uint64_t)str_ptr & 0xf);
 sljit_s32 align_offset = ((uint64_t)str_ptr & 0xf);
@@ -329,7 +327,7 @@ match:;
     return NULL;
     return NULL;
 
 
 #if defined(FF_UTF)
 #if defined(FF_UTF)
-  if (utf_continue((PCRE2_SPTR)str_ptr - offs1))
+  if (utf_continue(str_ptr + IN_UCHARS(-offs1)))
     {
     {
     /* Not a match. */
     /* Not a match. */
     str_ptr += IN_UCHARS(1);
     str_ptr += IN_UCHARS(1);

+ 1 - 1
thirdparty/pcre2/src/pcre2_jit_simd_inc.h

@@ -776,7 +776,7 @@ typedef union {
 } int_char;
 } int_char;
 
 
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
-static SLJIT_INLINE int utf_continue(PCRE2_SPTR s)
+static SLJIT_INLINE int utf_continue(sljit_u8 *s)
 {
 {
 #if PCRE2_CODE_UNIT_WIDTH == 8
 #if PCRE2_CODE_UNIT_WIDTH == 8
 return (*s & 0xc0) == 0x80;
 return (*s & 0xc0) == 0x80;

+ 86 - 94
thirdparty/pcre2/src/pcre2_match.c

@@ -204,7 +204,6 @@ Arguments:
   P           a previous frame of interest
   P           a previous frame of interest
   frame_size  the frame size
   frame_size  the frame size
   mb          points to the match block
   mb          points to the match block
-  match_data  points to the match data block
   s           identification text
   s           identification text
 
 
 Returns:    nothing
 Returns:    nothing
@@ -212,7 +211,7 @@ Returns:    nothing
 
 
 static void
 static void
 display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size,
 display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size,
-  match_block *mb, pcre2_match_data *match_data, const char *s, ...)
+  match_block *mb, const char *s, ...)
 {
 {
 uint32_t i;
 uint32_t i;
 heapframe *Q;
 heapframe *Q;
@@ -224,10 +223,10 @@ vfprintf(f, s, ap);
 va_end(ap);
 va_end(ap);
 
 
 if (P != NULL) fprintf(f, " P=%lu",
 if (P != NULL) fprintf(f, " P=%lu",
-  ((char *)P - (char *)(match_data->heapframes))/frame_size);
+  ((char *)P - (char *)(mb->match_frames))/frame_size);
 fprintf(f, "\n");
 fprintf(f, "\n");
 
 
-for (i = 0, Q = match_data->heapframes;
+for (i = 0, Q = mb->match_frames;
      Q <= F;
      Q <= F;
      i++, Q = (heapframe *)((char *)Q + frame_size))
      i++, Q = (heapframe *)((char *)Q + frame_size))
   {
   {
@@ -491,16 +490,10 @@ A version did exist that used individual frames on the heap instead of calling
 match() recursively, but this ran substantially slower. The current version is
 match() recursively, but this ran substantially slower. The current version is
 a refactoring that uses a vector of frames to remember backtracking points.
 a refactoring that uses a vector of frames to remember backtracking points.
 This runs no slower, and possibly even a bit faster than the original recursive
 This runs no slower, and possibly even a bit faster than the original recursive
-implementation.
-
-At first, an initial vector of size START_FRAMES_SIZE (enough for maybe 50
-frames) was allocated on the system stack. If this was not big enough, the heap
-was used for a larger vector. However, it turns out that there are environments
-where taking as little as 20KiB from the system stack is an embarrassment.
-After another refactoring, the heap is used exclusively, but a pointer the
-frames vector and its size are cached in the match_data block, so that there is
-no new memory allocation if the same match_data block is used for multiple
-matches (unless the frames vector has to be extended).
+implementation. An initial vector of size START_FRAMES_SIZE (enough for maybe
+50 frames) is allocated on the system stack. If this is not big enough, the
+heap is used for a larger vector.
+
 *******************************************************************************
 *******************************************************************************
 ******************************************************************************/
 ******************************************************************************/
 
 
@@ -573,9 +566,10 @@ made performance worse.
 Arguments:
 Arguments:
    start_eptr   starting character in subject
    start_eptr   starting character in subject
    start_ecode  starting position in compiled code
    start_ecode  starting position in compiled code
+   ovector      pointer to the final output vector
+   oveccount    number of pairs in ovector
    top_bracket  number of capturing parentheses in the pattern
    top_bracket  number of capturing parentheses in the pattern
    frame_size   size of each backtracking frame
    frame_size   size of each backtracking frame
-   match_data   pointer to the match_data block
    mb           pointer to "static" variables block
    mb           pointer to "static" variables block
 
 
 Returns:        MATCH_MATCH if matched            )  these values are >= 0
 Returns:        MATCH_MATCH if matched            )  these values are >= 0
@@ -586,19 +580,17 @@ Returns:        MATCH_MATCH if matched            )  these values are >= 0
 */
 */
 
 
 static int
 static int
-match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket,
-  PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
+match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, PCRE2_SIZE *ovector,
+  uint16_t oveccount, uint16_t top_bracket, PCRE2_SIZE frame_size,
+  match_block *mb)
 {
 {
 /* Frame-handling variables */
 /* Frame-handling variables */
 
 
 heapframe *F;           /* Current frame pointer */
 heapframe *F;           /* Current frame pointer */
 heapframe *N = NULL;    /* Temporary frame pointers */
 heapframe *N = NULL;    /* Temporary frame pointers */
 heapframe *P = NULL;
 heapframe *P = NULL;
-
-heapframe *frames_top;  /* End of frames vector */
 heapframe *assert_accept_frame = NULL;  /* For passing back a frame with captures */
 heapframe *assert_accept_frame = NULL;  /* For passing back a frame with captures */
-PCRE2_SIZE heapframes_size;   /* Usable size of frames vector */
-PCRE2_SIZE frame_copy_size;   /* Amount to copy when creating a new frame */
+PCRE2_SIZE frame_copy_size;     /* Amount to copy when creating a new frame */
 
 
 /* Local variables that do not need to be preserved over calls to RRMATCH(). */
 /* Local variables that do not need to be preserved over calls to RRMATCH(). */
 
 
@@ -635,14 +627,10 @@ copied when a new frame is created. */
 
 
 frame_copy_size = frame_size - offsetof(heapframe, eptr);
 frame_copy_size = frame_size - offsetof(heapframe, eptr);
 
 
-/* Set up the first frame and the end of the frames vector. We set the local
-heapframes_size to the usuable amount of the vector, that is, a whole number of
-frames. */
-
-F = match_data->heapframes;
-heapframes_size = (match_data->heapframes_size / frame_size) * frame_size;
-frames_top = (heapframe *)((char *)F + heapframes_size);
+/* Set up the first current frame at the start of the vector, and initialize
+fields that are not reset for new frames. */
 
 
+F = mb->match_frames;
 Frdepth = 0;                        /* "Recursion" depth */
 Frdepth = 0;                        /* "Recursion" depth */
 Fcapture_last = 0;                  /* Number of most recent capture */
 Fcapture_last = 0;                  /* Number of most recent capture */
 Fcurrent_recurse = RECURSE_UNSET;   /* Not pattern recursing. */
 Fcurrent_recurse = RECURSE_UNSET;   /* Not pattern recursing. */
@@ -658,35 +646,34 @@ backtracking point. */
 
 
 MATCH_RECURSE:
 MATCH_RECURSE:
 
 
-/* Set up a new backtracking frame. If the vector is full, get a new one,
-doubling the size, but constrained by the heap limit (which is in KiB). */
+/* Set up a new backtracking frame. If the vector is full, get a new one
+on the heap, doubling the size, but constrained by the heap limit. */
 
 
 N = (heapframe *)((char *)F + frame_size);
 N = (heapframe *)((char *)F + frame_size);
-if (N >= frames_top)
+if (N >= mb->match_frames_top)
   {
   {
+  PCRE2_SIZE newsize = mb->frame_vector_size * 2;
   heapframe *new;
   heapframe *new;
-  PCRE2_SIZE newsize = match_data->heapframes_size * 2;
 
 
-  if (newsize > mb->heap_limit)
+  if ((newsize / 1024) > mb->heap_limit)
     {
     {
-    PCRE2_SIZE maxsize = (mb->heap_limit/frame_size) * frame_size;
-    if (match_data->heapframes_size >= maxsize) return PCRE2_ERROR_HEAPLIMIT;
+    PCRE2_SIZE maxsize = ((mb->heap_limit * 1024)/frame_size) * frame_size;
+    if (mb->frame_vector_size >= maxsize) return PCRE2_ERROR_HEAPLIMIT;
     newsize = maxsize;
     newsize = maxsize;
     }
     }
 
 
-  new = match_data->memctl.malloc(newsize, match_data->memctl.memory_data);
+  new = mb->memctl.malloc(newsize, mb->memctl.memory_data);
   if (new == NULL) return PCRE2_ERROR_NOMEMORY;
   if (new == NULL) return PCRE2_ERROR_NOMEMORY;
-  memcpy(new, match_data->heapframes, heapframes_size);
+  memcpy(new, mb->match_frames, mb->frame_vector_size);
 
 
-  F = (heapframe *)((char *)new + ((char *)F - (char *)match_data->heapframes));
+  F = (heapframe *)((char *)new + ((char *)F - (char *)mb->match_frames));
   N = (heapframe *)((char *)F + frame_size);
   N = (heapframe *)((char *)F + frame_size);
 
 
-  match_data->memctl.free(match_data->heapframes, match_data->memctl.memory_data);
-  match_data->heapframes = new;
-  match_data->heapframes_size = newsize;
-
-  heapframes_size = (newsize / frame_size) * frame_size;
-  frames_top = (heapframe *)((char *)new + heapframes_size);
+  if (mb->match_frames != mb->stack_frames)
+    mb->memctl.free(mb->match_frames, mb->memctl.memory_data);
+  mb->match_frames = new;
+  mb->match_frames_top = (heapframe *)((char *)mb->match_frames + newsize);
+  mb->frame_vector_size = newsize;
   }
   }
 
 
 #ifdef DEBUG_SHOW_RMATCH
 #ifdef DEBUG_SHOW_RMATCH
@@ -744,7 +731,7 @@ recursion value. */
 
 
 if (group_frame_type != 0)
 if (group_frame_type != 0)
   {
   {
-  Flast_group_offset = (char *)F - (char *)match_data->heapframes;
+  Flast_group_offset = (char *)F - (char *)mb->match_frames;
   if (GF_IDMASK(group_frame_type) == GF_RECURSE)
   if (GF_IDMASK(group_frame_type) == GF_RECURSE)
     Fcurrent_recurse = GF_DATAMASK(group_frame_type);
     Fcurrent_recurse = GF_DATAMASK(group_frame_type);
   group_frame_type = 0;
   group_frame_type = 0;
@@ -786,7 +773,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
       for(;;)
       for(;;)
         {
         {
         if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
         if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
-        N = (heapframe *)((char *)match_data->heapframes + offset);
+        N = (heapframe *)((char *)mb->match_frames + offset);
         P = (heapframe *)((char *)N - frame_size);
         P = (heapframe *)((char *)N - frame_size);
         if (N->group_frame_type == (GF_CAPTURE | number)) break;
         if (N->group_frame_type == (GF_CAPTURE | number)) break;
         offset = P->last_group_offset;
         offset = P->last_group_offset;
@@ -824,7 +811,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
       for(;;)
       for(;;)
         {
         {
         if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
         if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
-        N = (heapframe *)((char *)match_data->heapframes + offset);
+        N = (heapframe *)((char *)mb->match_frames + offset);
         P = (heapframe *)((char *)N - frame_size);
         P = (heapframe *)((char *)N - frame_size);
         if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break;
         if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break;
         offset = P->last_group_offset;
         offset = P->last_group_offset;
@@ -877,15 +864,14 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
     mb->mark = Fmark;                    /* and the last success mark */
     mb->mark = Fmark;                    /* and the last success mark */
     if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
     if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
 
 
-    match_data->ovector[0] = Fstart_match - mb->start_subject;
-    match_data->ovector[1] = Feptr - mb->start_subject;
+    ovector[0] = Fstart_match - mb->start_subject;
+    ovector[1] = Feptr - mb->start_subject;
 
 
     /* Set i to the smaller of the sizes of the external and frame ovectors. */
     /* Set i to the smaller of the sizes of the external and frame ovectors. */
 
 
-    i = 2 * ((top_bracket + 1 > match_data->oveccount)?
-      match_data->oveccount : top_bracket + 1);
-    memcpy(match_data->ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
-    while (--i >= Foffset_top + 2) match_data->ovector[i] = PCRE2_UNSET;
+    i = 2 * ((top_bracket + 1 > oveccount)? oveccount : top_bracket + 1);
+    memcpy(ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
+    while (--i >= Foffset_top + 2) ovector[i] = PCRE2_UNSET;
     return MATCH_MATCH;  /* Note: NOT RRETURN */
     return MATCH_MATCH;  /* Note: NOT RRETURN */
 
 
 
 
@@ -5342,7 +5328,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
       offset = Flast_group_offset;
       offset = Flast_group_offset;
       while (offset != PCRE2_UNSET)
       while (offset != PCRE2_UNSET)
         {
         {
-        N = (heapframe *)((char *)match_data->heapframes + offset);
+        N = (heapframe *)((char *)mb->match_frames + offset);
         P = (heapframe *)((char *)N - frame_size);
         P = (heapframe *)((char *)N - frame_size);
         if (N->group_frame_type == (GF_RECURSE | number))
         if (N->group_frame_type == (GF_RECURSE | number))
           {
           {
@@ -5743,7 +5729,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
 
 
     if (*bracode != OP_BRA && *bracode != OP_COND)
     if (*bracode != OP_BRA && *bracode != OP_COND)
       {
       {
-      N = (heapframe *)((char *)match_data->heapframes + Flast_group_offset);
+      N = (heapframe *)((char *)mb->match_frames + Flast_group_offset);
       P = (heapframe *)((char *)N - frame_size);
       P = (heapframe *)((char *)N - frame_size);
       Flast_group_offset = P->last_group_offset;
       Flast_group_offset = P->last_group_offset;
 
 
@@ -6360,7 +6346,6 @@ BOOL jit_checked_utf = FALSE;
 #endif  /* SUPPORT_UNICODE */
 #endif  /* SUPPORT_UNICODE */
 
 
 PCRE2_SIZE frame_size;
 PCRE2_SIZE frame_size;
-PCRE2_SIZE heapframes_size;
 
 
 /* We need to have mb as a pointer to a match block, because the IS_NEWLINE
 /* We need to have mb as a pointer to a match block, because the IS_NEWLINE
 macro is used below, and it expects NLBLOCK to be defined as a pointer. */
 macro is used below, and it expects NLBLOCK to be defined as a pointer. */
@@ -6369,6 +6354,15 @@ pcre2_callout_block cb;
 match_block actual_match_block;
 match_block actual_match_block;
 match_block *mb = &actual_match_block;
 match_block *mb = &actual_match_block;
 
 
+/* Allocate an initial vector of backtracking frames on the stack. If this
+proves to be too small, it is replaced by a larger one on the heap. To get a
+vector of the size required that is aligned for pointers, allocate it as a
+vector of pointers. */
+
+PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)]
+    PCRE2_KEEP_UNINITIALIZED;
+mb->stack_frames = (heapframe *)stack_frames_vector;
+
 /* Recognize NULL, length 0 as an empty string. */
 /* Recognize NULL, length 0 as an empty string. */
 
 
 if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
 if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
@@ -6799,11 +6793,15 @@ switch(re->newline_convention)
 vector at the end, whose size depends on the number of capturing parentheses in
 vector at the end, whose size depends on the number of capturing parentheses in
 the pattern. It is not used at all if there are no capturing parentheses.
 the pattern. It is not used at all if there are no capturing parentheses.
 
 
-  frame_size                   is the total size of each frame
-  match_data->heapframes       is the pointer to the frames vector
-  match_data->heapframes_size  is the total size of the vector
+  frame_size             is the total size of each frame
+  mb->frame_vector_size  is the total usable size of the vector (rounded down
+                           to a whole number of frames)
+
+The last of these is changed within the match() function if the frame vector
+has to be expanded. We therefore put it into the match block so that it is
+correct when calling match() more than once for non-anchored patterns.
 
 
-We must pad the frame_size for alignment to ensure subsequent frames are as
+We must also pad frame_size for alignment to ensure subsequent frames are as
 aligned as heapframe. Whilst ovector is word-aligned due to being a PCRE2_SIZE
 aligned as heapframe. Whilst ovector is word-aligned due to being a PCRE2_SIZE
 array, that does not guarantee it is suitably aligned for pointers, as some
 array, that does not guarantee it is suitably aligned for pointers, as some
 architectures have pointers that are larger than a size_t. */
 architectures have pointers that are larger than a size_t. */
@@ -6815,8 +6813,8 @@ frame_size = (offsetof(heapframe, ovector) +
 /* Limits set in the pattern override the match context only if they are
 /* Limits set in the pattern override the match context only if they are
 smaller. */
 smaller. */
 
 
-mb->heap_limit = ((mcontext->heap_limit < re->limit_heap)?
-  mcontext->heap_limit : re->limit_heap) * 1024;
+mb->heap_limit = (mcontext->heap_limit < re->limit_heap)?
+  mcontext->heap_limit : re->limit_heap;
 
 
 mb->match_limit = (mcontext->match_limit < re->limit_match)?
 mb->match_limit = (mcontext->match_limit < re->limit_match)?
   mcontext->match_limit : re->limit_match;
   mcontext->match_limit : re->limit_match;
@@ -6825,40 +6823,35 @@ mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
   mcontext->depth_limit : re->limit_depth;
   mcontext->depth_limit : re->limit_depth;
 
 
 /* If a pattern has very many capturing parentheses, the frame size may be very
 /* If a pattern has very many capturing parentheses, the frame size may be very
-large. Set the initial frame vector size to ensure that there are at least 10
-available frames, but enforce a minimum of START_FRAMES_SIZE. If this is
-greater than the heap limit, get as large a vector as possible. Always round
-the size to a multiple of the frame size. */
-
-heapframes_size = frame_size * 10;
-if (heapframes_size < START_FRAMES_SIZE) heapframes_size = START_FRAMES_SIZE;
-if (heapframes_size > mb->heap_limit)
+large. Ensure that there are at least 10 available frames by getting an initial
+vector on the heap if necessary, except when the heap limit prevents this. Get
+fewer if possible. (The heap limit is in kibibytes.) */
+
+if (frame_size <= START_FRAMES_SIZE/10)
   {
   {
-  if (frame_size > mb->heap_limit ) return PCRE2_ERROR_HEAPLIMIT;
-  heapframes_size = mb->heap_limit;
+  mb->match_frames = mb->stack_frames;   /* Initial frame vector on the stack */
+  mb->frame_vector_size = ((START_FRAMES_SIZE/frame_size) * frame_size);
   }
   }
-
-/* If an existing frame vector in the match_data block is large enough, we can
-use it.Otherwise, free any pre-existing vector and get a new one. */
-
-if (match_data->heapframes_size < heapframes_size)
+else
   {
   {
-  match_data->memctl.free(match_data->heapframes,
-    match_data->memctl.memory_data);
-  match_data->heapframes = match_data->memctl.malloc(heapframes_size,
-    match_data->memctl.memory_data);
-  if (match_data->heapframes == NULL)
+  mb->frame_vector_size = frame_size * 10;
+  if ((mb->frame_vector_size / 1024) > mb->heap_limit)
     {
     {
-    match_data->heapframes_size = 0;
-    return PCRE2_ERROR_NOMEMORY;
+    if (frame_size > mb->heap_limit * 1024) return PCRE2_ERROR_HEAPLIMIT;
+    mb->frame_vector_size = ((mb->heap_limit * 1024)/frame_size) * frame_size;
     }
     }
-  match_data->heapframes_size = heapframes_size;
+  mb->match_frames = mb->memctl.malloc(mb->frame_vector_size,
+    mb->memctl.memory_data);
+  if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY;
   }
   }
 
 
+mb->match_frames_top =
+  (heapframe *)((char *)mb->match_frames + mb->frame_vector_size);
+
 /* Write to the ovector within the first frame to mark every capture unset and
 /* Write to the ovector within the first frame to mark every capture unset and
 to avoid uninitialized memory read errors when it is copied to a new frame. */
 to avoid uninitialized memory read errors when it is copied to a new frame. */
 
 
-memset((char *)(match_data->heapframes) + offsetof(heapframe, ovector), 0xff,
+memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
   frame_size - offsetof(heapframe, ovector));
   frame_size - offsetof(heapframe, ovector));
 
 
 /* Pointers to the individual character tables */
 /* Pointers to the individual character tables */
@@ -7286,8 +7279,8 @@ for(;;)
   mb->end_offset_top = 0;
   mb->end_offset_top = 0;
   mb->skip_arg_count = 0;
   mb->skip_arg_count = 0;
 
 
-  rc = match(start_match, mb->start_code, re->top_bracket, frame_size,
-    match_data, mb);
+  rc = match(start_match, mb->start_code, match_data->ovector,
+    match_data->oveccount, re->top_bracket, frame_size, mb);
 
 
   if (mb->hitend && start_partial == NULL)
   if (mb->hitend && start_partial == NULL)
     {
     {
@@ -7470,6 +7463,11 @@ if (utf && end_subject != true_end_subject &&
   }
   }
 #endif  /* SUPPORT_UNICODE */
 #endif  /* SUPPORT_UNICODE */
 
 
+/* Release an enlarged frame vector that is on the heap. */
+
+if (mb->match_frames != mb->stack_frames)
+  mb->memctl.free(mb->match_frames, mb->memctl.memory_data);
+
 /* Fill in fields that are always returned in the match data. */
 /* Fill in fields that are always returned in the match data. */
 
 
 match_data->code = re;
 match_data->code = re;
@@ -7535,10 +7533,4 @@ else match_data->rc = PCRE2_ERROR_NOMATCH;
 return match_data->rc;
 return match_data->rc;
 }
 }
 
 
-/* These #undefs are here to enable unity builds with CMake. */
-
-#undef NLBLOCK /* Block containing newline information */
-#undef PSSTART /* Field containing processed string start */
-#undef PSEND   /* Field containing processed string end */
-
 /* End of pcre2_match.c */
 /* End of pcre2_match.c */

+ 2 - 9
thirdparty/pcre2/src/pcre2_match_data.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
 
                        Written by Philip Hazel
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2022 University of Cambridge
+          New API code Copyright (c) 2016-2019 University of Cambridge
 
 
 -----------------------------------------------------------------------------
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
 Redistribution and use in source and binary forms, with or without
@@ -51,23 +51,19 @@ POSSIBILITY OF SUCH DAMAGE.
 *  Create a match data block given ovector size  *
 *  Create a match data block given ovector size  *
 *************************************************/
 *************************************************/
 
 
-/* A minimum of 1 is imposed on the number of ovector pairs. A maximum is also
-imposed because the oveccount field in a match data block is uintt6_t. */
+/* A minimum of 1 is imposed on the number of ovector pairs. */
 
 
 PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
 PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
 pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
 pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
 {
 {
 pcre2_match_data *yield;
 pcre2_match_data *yield;
 if (oveccount < 1) oveccount = 1;
 if (oveccount < 1) oveccount = 1;
-if (oveccount > UINT16_MAX) oveccount = UINT16_MAX;
 yield = PRIV(memctl_malloc)(
 yield = PRIV(memctl_malloc)(
   offsetof(pcre2_match_data, ovector) + 2*oveccount*sizeof(PCRE2_SIZE),
   offsetof(pcre2_match_data, ovector) + 2*oveccount*sizeof(PCRE2_SIZE),
   (pcre2_memctl *)gcontext);
   (pcre2_memctl *)gcontext);
 if (yield == NULL) return NULL;
 if (yield == NULL) return NULL;
 yield->oveccount = oveccount;
 yield->oveccount = oveccount;
 yield->flags = 0;
 yield->flags = 0;
-yield->heapframes = NULL;
-yield->heapframes_size = 0;
 return yield;
 return yield;
 }
 }
 
 
@@ -99,9 +95,6 @@ pcre2_match_data_free(pcre2_match_data *match_data)
 {
 {
 if (match_data != NULL)
 if (match_data != NULL)
   {
   {
-  if (match_data->heapframes != NULL)
-    match_data->memctl.free(match_data->heapframes,
-      match_data->memctl.memory_data);
   if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
   if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
     match_data->memctl.free((void *)match_data->subject,
     match_data->memctl.free((void *)match_data->subject,
       match_data->memctl.memory_data);
       match_data->memctl.memory_data);

+ 11 - 14
thirdparty/pcre2/src/pcre2_substitute.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
 
                        Written by Philip Hazel
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2022 University of Cambridge
+          New API code Copyright (c) 2016-2021 University of Cambridge
 
 
 -----------------------------------------------------------------------------
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
 Redistribution and use in source and binary forms, with or without
@@ -259,16 +259,16 @@ PCRE2_UNSET, so as not to imply an offset in the replacement. */
 
 
 if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
 if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
   return PCRE2_ERROR_BADOPTION;
   return PCRE2_ERROR_BADOPTION;
-
-/* Validate length and find the end of the replacement. A NULL replacement of
+  
+/* Validate length and find the end of the replacement. A NULL replacement of 
 zero length is interpreted as an empty string. */
 zero length is interpreted as an empty string. */
 
 
-if (replacement == NULL)
+if (replacement == NULL) 
   {
   {
   if (rlength != 0) return PCRE2_ERROR_NULL;
   if (rlength != 0) return PCRE2_ERROR_NULL;
-  replacement = (PCRE2_SPTR)"";
-  }
-
+  replacement = (PCRE2_SPTR)""; 
+  } 
+   
 if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
 if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
 repend = replacement + rlength;
 repend = replacement + rlength;
 
 
@@ -282,9 +282,8 @@ replacement_only = ((options & PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) != 0);
 match data block. We create an internal match_data block in two cases: (a) an
 match data block. We create an internal match_data block in two cases: (a) an
 external one is not supplied (and we are not starting from an existing match);
 external one is not supplied (and we are not starting from an existing match);
 (b) an existing match is to be used for the first substitution. In the latter
 (b) an existing match is to be used for the first substitution. In the latter
-case, we copy the existing match into the internal block, except for any cached
-heap frame size and pointer. This ensures that no changes are made to the
-external match data block. */
+case, we copy the existing match into the internal block. This ensures that no
+changes are made to the existing match data block. */
 
 
 if (match_data == NULL)
 if (match_data == NULL)
   {
   {
@@ -310,8 +309,6 @@ else if (use_existing_match)
   if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
   if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
   memcpy(internal_match_data, match_data, offsetof(pcre2_match_data, ovector)
   memcpy(internal_match_data, match_data, offsetof(pcre2_match_data, ovector)
     + 2*pairs*sizeof(PCRE2_SIZE));
     + 2*pairs*sizeof(PCRE2_SIZE));
-  internal_match_data->heapframes = NULL;
-  internal_match_data->heapframes_size = 0;
   match_data = internal_match_data;
   match_data = internal_match_data;
   }
   }
 
 
@@ -331,9 +328,9 @@ scb.ovector = ovector;
 
 
 if (subject == NULL)
 if (subject == NULL)
   {
   {
-  if (length != 0) return PCRE2_ERROR_NULL;
+  if (length != 0) return PCRE2_ERROR_NULL; 
   subject = (PCRE2_SPTR)"";
   subject = (PCRE2_SPTR)"";
-  }
+  } 
 
 
 /* Find length of zero-terminated subject */
 /* Find length of zero-terminated subject */
 
 

+ 12 - 2
thirdparty/pcre2/src/sljit/sljitConfig.h

@@ -53,8 +53,7 @@ extern "C" {
 /* #define SLJIT_CONFIG_PPC_64 1 */
 /* #define SLJIT_CONFIG_PPC_64 1 */
 /* #define SLJIT_CONFIG_MIPS_32 1 */
 /* #define SLJIT_CONFIG_MIPS_32 1 */
 /* #define SLJIT_CONFIG_MIPS_64 1 */
 /* #define SLJIT_CONFIG_MIPS_64 1 */
-/* #define SLJIT_CONFIG_RISCV_32 1 */
-/* #define SLJIT_CONFIG_RISCV_64 1 */
+/* #define SLJIT_CONFIG_SPARC_32 1 */
 /* #define SLJIT_CONFIG_S390X 1 */
 /* #define SLJIT_CONFIG_S390X 1 */
 
 
 /* #define SLJIT_CONFIG_AUTO 1 */
 /* #define SLJIT_CONFIG_AUTO 1 */
@@ -128,6 +127,17 @@ extern "C" {
 
 
 #endif /* !SLJIT_EXECUTABLE_ALLOCATOR */
 #endif /* !SLJIT_EXECUTABLE_ALLOCATOR */
 
 
+/* Force cdecl calling convention even if a better calling
+   convention (e.g. fastcall) is supported by the C compiler.
+   If this option is disabled (this is the default), functions
+   called from JIT should be defined with SLJIT_FUNC attribute.
+   Standard C functions can still be called by using the
+   SLJIT_CALL_CDECL jump type. */
+#ifndef SLJIT_USE_CDECL_CALLING_CONVENTION
+/* Disabled by default */
+#define SLJIT_USE_CDECL_CALLING_CONVENTION 0
+#endif
+
 /* Return with error when an invalid argument is passed. */
 /* Return with error when an invalid argument is passed. */
 #ifndef SLJIT_ARGUMENT_CHECKS
 #ifndef SLJIT_ARGUMENT_CHECKS
 /* Disabled by default */
 /* Disabled by default */

+ 80 - 54
thirdparty/pcre2/src/sljit/sljitConfigInternal.h

@@ -59,8 +59,7 @@ extern "C" {
      SLJIT_64BIT_ARCHITECTURE : 64 bit architecture
      SLJIT_64BIT_ARCHITECTURE : 64 bit architecture
      SLJIT_LITTLE_ENDIAN : little endian architecture
      SLJIT_LITTLE_ENDIAN : little endian architecture
      SLJIT_BIG_ENDIAN : big endian architecture
      SLJIT_BIG_ENDIAN : big endian architecture
-     SLJIT_UNALIGNED : unaligned memory accesses for non-fpu operations are supported
-     SLJIT_FPU_UNALIGNED : unaligned memory accesses for fpu operations are supported
+     SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!)
      SLJIT_INDIRECT_CALL : see SLJIT_FUNC_ADDR() for more information
      SLJIT_INDIRECT_CALL : see SLJIT_FUNC_ADDR() for more information
 
 
    Constants:
    Constants:
@@ -99,8 +98,7 @@ extern "C" {
 	+ (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
 	+ (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
 	+ (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
 	+ (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
 	+ (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
 	+ (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
-	+ (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) \
-	+ (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \
+	+ (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
 	+ (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
 	+ (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
 	+ (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \
 	+ (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \
 	+ (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) >= 2
 	+ (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) >= 2
@@ -117,8 +115,7 @@ extern "C" {
 	&& !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
 	&& !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
 	&& !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
 	&& !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
 	&& !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
 	&& !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
-	&& !(defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) \
-	&& !(defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \
+	&& !(defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
 	&& !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
 	&& !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
 	&& !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) \
 	&& !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) \
 	&& !(defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO)
 	&& !(defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO)
@@ -159,10 +156,8 @@ extern "C" {
 #define SLJIT_CONFIG_MIPS_32 1
 #define SLJIT_CONFIG_MIPS_32 1
 #elif defined(__mips64)
 #elif defined(__mips64)
 #define SLJIT_CONFIG_MIPS_64 1
 #define SLJIT_CONFIG_MIPS_64 1
-#elif defined (__riscv_xlen) && (__riscv_xlen == 32)
-#define SLJIT_CONFIG_RISCV_32 1
-#elif defined (__riscv_xlen) && (__riscv_xlen == 64)
-#define SLJIT_CONFIG_RISCV_64 1
+#elif (defined(__sparc__) || defined(__sparc)) && !defined(_LP64)
+#define SLJIT_CONFIG_SPARC_32 1
 #elif defined(__s390x__)
 #elif defined(__s390x__)
 #define SLJIT_CONFIG_S390X 1
 #define SLJIT_CONFIG_S390X 1
 #else
 #else
@@ -210,8 +205,8 @@ extern "C" {
 #define SLJIT_CONFIG_PPC 1
 #define SLJIT_CONFIG_PPC 1
 #elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
 #elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
 #define SLJIT_CONFIG_MIPS 1
 #define SLJIT_CONFIG_MIPS 1
-#elif (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) || (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
-#define SLJIT_CONFIG_RISCV 1
+#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) || (defined SLJIT_CONFIG_SPARC_64 && SLJIT_CONFIG_SPARC_64)
+#define SLJIT_CONFIG_SPARC 1
 #endif
 #endif
 
 
 /***********************************************************/
 /***********************************************************/
@@ -335,14 +330,8 @@ extern "C" {
  * older versions are known to abort in some targets
  * older versions are known to abort in some targets
  * https://github.com/PhilipHazel/pcre2/issues/92
  * https://github.com/PhilipHazel/pcre2/issues/92
  *
  *
- * beware some vendors (ex: Microsoft, Apple) are known to have
- * removed the code to support this builtin even if the call for
- * __has_builtin reports it is available.
- *
- * make sure linking doesn't fail because __clear_cache() is
- * missing before changing it or add an exception so that the
- * system provided method that should be defined below is used
- * instead.
+ * beware APPLE is known to have removed the code in iOS so
+ * it will need to be excempted or result in broken builds
  */
  */
 #if (!defined SLJIT_CACHE_FLUSH && defined __has_builtin)
 #if (!defined SLJIT_CACHE_FLUSH && defined __has_builtin)
 #if __has_builtin(__builtin___clear_cache) && !defined(__clang__)
 #if __has_builtin(__builtin___clear_cache) && !defined(__clang__)
@@ -350,9 +339,9 @@ extern "C" {
 /*
 /*
  * https://gcc.gnu.org/bugzilla//show_bug.cgi?id=91248
  * https://gcc.gnu.org/bugzilla//show_bug.cgi?id=91248
  * https://gcc.gnu.org/bugzilla//show_bug.cgi?id=93811
  * https://gcc.gnu.org/bugzilla//show_bug.cgi?id=93811
- * gcc's clear_cache builtin for power is broken
+ * gcc's clear_cache builtin for power and sparc are broken
  */
  */
-#if !defined(SLJIT_CONFIG_PPC)
+#if !defined(SLJIT_CONFIG_PPC) && !defined(SLJIT_CONFIG_SPARC_32)
 #define SLJIT_CACHE_FLUSH(from, to) \
 #define SLJIT_CACHE_FLUSH(from, to) \
 	__builtin___clear_cache((char*)(from), (char*)(to))
 	__builtin___clear_cache((char*)(from), (char*)(to))
 #endif
 #endif
@@ -384,10 +373,12 @@ extern "C" {
 	ppc_cache_flush((from), (to))
 	ppc_cache_flush((from), (to))
 #define SLJIT_CACHE_FLUSH_OWN_IMPL 1
 #define SLJIT_CACHE_FLUSH_OWN_IMPL 1
 
 
-#elif defined(_WIN32)
+#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
 
 
+/* The __clear_cache() implementation of GCC is a dummy function on Sparc. */
 #define SLJIT_CACHE_FLUSH(from, to) \
 #define SLJIT_CACHE_FLUSH(from, to) \
-	FlushInstructionCache(GetCurrentProcess(), (void*)(from), (char*)(to) - (char*)(from))
+	sparc_cache_flush((from), (to))
+#define SLJIT_CACHE_FLUSH_OWN_IMPL 1
 
 
 #elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || defined(__clang__)
 #elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || defined(__clang__)
 
 
@@ -401,6 +392,11 @@ extern "C" {
 #define SLJIT_CACHE_FLUSH(from, to) \
 #define SLJIT_CACHE_FLUSH(from, to) \
 	cacheflush((long)(from), (long)(to), 0)
 	cacheflush((long)(from), (long)(to), 0)
 
 
+#elif defined _WIN32
+
+#define SLJIT_CACHE_FLUSH(from, to) \
+	FlushInstructionCache(GetCurrentProcess(), (void*)(from), (char*)(to) - (char*)(from))
+
 #else
 #else
 
 
 /* Call __ARM_NR_cacheflush on ARM-Linux or the corresponding MIPS syscall. */
 /* Call __ARM_NR_cacheflush on ARM-Linux or the corresponding MIPS syscall. */
@@ -439,7 +435,6 @@ typedef long int sljit_sw;
 	&& !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
 	&& !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
 	&& !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
 	&& !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
 	&& !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
 	&& !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
-	&& !(defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \
 	&& !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
 	&& !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
 #define SLJIT_32BIT_ARCHITECTURE 1
 #define SLJIT_32BIT_ARCHITECTURE 1
 #define SLJIT_WORD_SHIFT 2
 #define SLJIT_WORD_SHIFT 2
@@ -500,7 +495,8 @@ typedef double sljit_f64;
 #if !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN)
 #if !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN)
 
 
 /* These macros are mostly useful for the applications. */
 /* These macros are mostly useful for the applications. */
-#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
+	|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
 
 
 #ifdef __LITTLE_ENDIAN__
 #ifdef __LITTLE_ENDIAN__
 #define SLJIT_LITTLE_ENDIAN 1
 #define SLJIT_LITTLE_ENDIAN 1
@@ -508,7 +504,8 @@ typedef double sljit_f64;
 #define SLJIT_BIG_ENDIAN 1
 #define SLJIT_BIG_ENDIAN 1
 #endif
 #endif
 
 
-#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
+#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
+	|| (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
 
 
 #ifdef __MIPSEL__
 #ifdef __MIPSEL__
 #define SLJIT_LITTLE_ENDIAN 1
 #define SLJIT_LITTLE_ENDIAN 1
@@ -535,7 +532,8 @@ typedef double sljit_f64;
 
 
 #endif /* !SLJIT_MIPS_REV */
 #endif /* !SLJIT_MIPS_REV */
 
 
-#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
+#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
+	|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
 
 
 #define SLJIT_BIG_ENDIAN 1
 #define SLJIT_BIG_ENDIAN 1
 
 
@@ -556,30 +554,19 @@ typedef double sljit_f64;
 
 
 #ifndef SLJIT_UNALIGNED
 #ifndef SLJIT_UNALIGNED
 
 
-#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
+	|| (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
 	|| (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
 	|| (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
 	|| (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \
 	|| (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \
 	|| (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
 	|| (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
-	|| (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \
-	|| (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \
+	|| (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
+	|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
 	|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
 	|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
 #define SLJIT_UNALIGNED 1
 #define SLJIT_UNALIGNED 1
 #endif
 #endif
 
 
 #endif /* !SLJIT_UNALIGNED */
 #endif /* !SLJIT_UNALIGNED */
 
 
-#ifndef SLJIT_FPU_UNALIGNED
-
-#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
-	|| (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
-	|| (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \
-	|| (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \
-	|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
-#define SLJIT_FPU_UNALIGNED 1
-#endif
-
-#endif /* !SLJIT_FPU_UNALIGNED */
-
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 /* Auto detect SSE2 support using CPUID.
 /* Auto detect SSE2 support using CPUID.
    On 64 bit x86 cpus, sse2 must be present. */
    On 64 bit x86 cpus, sse2 must be present. */
@@ -591,7 +578,38 @@ typedef double sljit_f64;
 /*****************************************************************************************/
 /*****************************************************************************************/
 
 
 #ifndef SLJIT_FUNC
 #ifndef SLJIT_FUNC
+
+#if (defined SLJIT_USE_CDECL_CALLING_CONVENTION && SLJIT_USE_CDECL_CALLING_CONVENTION) \
+	|| !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+
 #define SLJIT_FUNC
 #define SLJIT_FUNC
+
+#elif defined(__GNUC__) && !defined(__APPLE__)
+
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+#define SLJIT_FUNC __attribute__ ((fastcall))
+#define SLJIT_X86_32_FASTCALL 1
+#else
+#define SLJIT_FUNC
+#endif /* gcc >= 3.4 */
+
+#elif defined(_MSC_VER)
+
+#define SLJIT_FUNC __fastcall
+#define SLJIT_X86_32_FASTCALL 1
+
+#elif defined(__BORLANDC__)
+
+#define SLJIT_FUNC __msfastcall
+#define SLJIT_X86_32_FASTCALL 1
+
+#else /* Unknown compiler. */
+
+/* The cdecl calling convention is usually the x86 default. */
+#define SLJIT_FUNC
+
+#endif /* SLJIT_USE_CDECL_CALLING_CONVENTION */
+
 #endif /* !SLJIT_FUNC */
 #endif /* !SLJIT_FUNC */
 
 
 #ifndef SLJIT_INDIRECT_CALL
 #ifndef SLJIT_INDIRECT_CALL
@@ -603,10 +621,14 @@ typedef double sljit_f64;
 #endif
 #endif
 #endif /* SLJIT_INDIRECT_CALL */
 #endif /* SLJIT_INDIRECT_CALL */
 
 
-/* The offset which needs to be subtracted from the return address to
+/* The offset which needs to be substracted from the return address to
 determine the next executed instruction after return. */
 determine the next executed instruction after return. */
 #ifndef SLJIT_RETURN_ADDRESS_OFFSET
 #ifndef SLJIT_RETURN_ADDRESS_OFFSET
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#define SLJIT_RETURN_ADDRESS_OFFSET 8
+#else
 #define SLJIT_RETURN_ADDRESS_OFFSET 0
 #define SLJIT_RETURN_ADDRESS_OFFSET 0
+#endif
 #endif /* SLJIT_RETURN_ADDRESS_OFFSET */
 #endif /* SLJIT_RETURN_ADDRESS_OFFSET */
 
 
 /***************************************************/
 /***************************************************/
@@ -644,10 +666,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 
 
 #define SLJIT_NUMBER_OF_REGISTERS 12
 #define SLJIT_NUMBER_OF_REGISTERS 12
-#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 9
 #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 7
 #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 7
 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
-#define SLJIT_LOCALS_OFFSET_BASE (8 * SSIZE_OF(sw))
+#define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset)
 #define SLJIT_PREF_SHIFT_REG SLJIT_R2
 #define SLJIT_PREF_SHIFT_REG SLJIT_R2
 
 
 #elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 #elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@@ -661,7 +683,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 #else /* _WIN64 */
 #else /* _WIN64 */
 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 10
 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 10
-#define SLJIT_LOCALS_OFFSET_BASE (4 * SSIZE_OF(sw))
+#define SLJIT_LOCALS_OFFSET_BASE (4 * (sljit_s32)sizeof(sljit_sw))
 #endif /* !_WIN64 */
 #endif /* !_WIN64 */
 #define SLJIT_PREF_SHIFT_REG SLJIT_R3
 #define SLJIT_PREF_SHIFT_REG SLJIT_R3
 
 
@@ -718,13 +740,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
 #endif
 #endif
 
 
-#elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV)
+#elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC)
 
 
-#define SLJIT_NUMBER_OF_REGISTERS 23
-#define SLJIT_NUMBER_OF_SAVED_REGISTERS 12
-#define SLJIT_LOCALS_OFFSET_BASE 0
-#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
-#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12
+#define SLJIT_NUMBER_OF_REGISTERS 18
+#define SLJIT_NUMBER_OF_SAVED_REGISTERS 14
+#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14
+#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+/* saved registers (16), return struct pointer (1), space for 6 argument words (1),
+   4th double arg (2), double alignment (1). */
+#define SLJIT_LOCALS_OFFSET_BASE ((16 + 1 + 6 + 2 + 1) * (sljit_s32)sizeof(sljit_sw))
+#endif
 
 
 #elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
 #elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
 
 
@@ -780,7 +806,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 #if (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \
 #if (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \
 	|| (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \
 	|| (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \
 	|| (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \
 	|| (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \
-	|| (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \
+	|| (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) \
 	|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
 	|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
 #define SLJIT_HAS_STATUS_FLAGS_STATE 1
 #define SLJIT_HAS_STATUS_FLAGS_STATE 1
 #endif
 #endif

+ 3 - 0
thirdparty/pcre2/src/sljit/sljitExecAllocator.c

@@ -152,6 +152,9 @@ static SLJIT_INLINE void apple_update_wx_flags(sljit_s32 enable_exec)
 {
 {
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= 110000
 #if MAC_OS_X_VERSION_MIN_REQUIRED >= 110000
 	pthread_jit_write_protect_np(enable_exec);
 	pthread_jit_write_protect_np(enable_exec);
+#elif defined(__clang__)
+	if (__builtin_available(macOS 11.0, *))
+		pthread_jit_write_protect_np(enable_exec);
 #else
 #else
 #error "Must target Big Sur or newer"
 #error "Must target Big Sur or newer"
 #endif /* BigSur */
 #endif /* BigSur */

File diff suppressed because it is too large
+ 179 - 439
thirdparty/pcre2/src/sljit/sljitLir.c


File diff suppressed because it is too large
+ 222 - 454
thirdparty/pcre2/src/sljit/sljitLir.h


File diff suppressed because it is too large
+ 193 - 413
thirdparty/pcre2/src/sljit/sljitNativeARM_32.c


+ 84 - 315
thirdparty/pcre2/src/sljit/sljitNativeARM_64.c

@@ -86,7 +86,6 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
 #define CSINC 0x9a800400
 #define CSINC 0x9a800400
 #define EOR 0xca000000
 #define EOR 0xca000000
 #define EORI 0xd2000000
 #define EORI 0xd2000000
-#define EXTR 0x93c00000
 #define FABS 0x1e60c000
 #define FABS 0x1e60c000
 #define FADD 0x1e602800
 #define FADD 0x1e602800
 #define FCMP 0x1e602000
 #define FCMP 0x1e602000
@@ -99,7 +98,6 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
 #define FSUB 0x1e603800
 #define FSUB 0x1e603800
 #define LDRI 0xf9400000
 #define LDRI 0xf9400000
 #define LDRI_F64 0xfd400000
 #define LDRI_F64 0xfd400000
-#define LDRI_POST 0xf8400400
 #define LDP 0xa9400000
 #define LDP 0xa9400000
 #define LDP_F64 0x6d400000
 #define LDP_F64 0x6d400000
 #define LDP_POST 0xa8c00000
 #define LDP_POST 0xa8c00000
@@ -114,9 +112,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
 #define ORN 0xaa200000
 #define ORN 0xaa200000
 #define ORR 0xaa000000
 #define ORR 0xaa000000
 #define ORRI 0xb2000000
 #define ORRI 0xb2000000
-#define RBIT 0xdac00000
 #define RET 0xd65f0000
 #define RET 0xd65f0000
-#define RORV 0x9ac02c00
 #define SBC 0xda000000
 #define SBC 0xda000000
 #define SBFM 0x93000000
 #define SBFM 0x93000000
 #define SCVTF 0x9e620000
 #define SCVTF 0x9e620000
@@ -141,6 +137,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
 #define UDIV 0x9ac00800
 #define UDIV 0x9ac00800
 #define UMULH 0x9bc03c00
 #define UMULH 0x9bc03c00
 
 
+/* dest_reg is the absolute name of the register
+   Useful for reordering instructions in the delay slot. */
 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
 {
 {
 	sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
 	sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
@@ -298,8 +296,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 				}
 				}
 				next_addr = compute_next_addr(label, jump, const_, put_label);
 				next_addr = compute_next_addr(label, jump, const_, put_label);
 			}
 			}
-			code_ptr++;
-			word_count++;
+			code_ptr ++;
+			word_count ++;
 		} while (buf_ptr < buf_end);
 		} while (buf_ptr < buf_end);
 
 
 		buf = buf->next;
 		buf = buf->next;
@@ -393,8 +391,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
 #endif
 #endif
 
 
 	case SLJIT_HAS_CLZ:
 	case SLJIT_HAS_CLZ:
-	case SLJIT_HAS_CTZ:
-	case SLJIT_HAS_ROT:
 	case SLJIT_HAS_CMOV:
 	case SLJIT_HAS_CMOV:
 	case SLJIT_HAS_PREFETCH:
 	case SLJIT_HAS_PREFETCH:
 		return 1;
 		return 1;
@@ -635,7 +631,6 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
 		switch (op) {
 		switch (op) {
 		case SLJIT_MUL:
 		case SLJIT_MUL:
 		case SLJIT_CLZ:
 		case SLJIT_CLZ:
-		case SLJIT_CTZ:
 		case SLJIT_ADDC:
 		case SLJIT_ADDC:
 		case SLJIT_SUBC:
 		case SLJIT_SUBC:
 			/* No form with immediate operand (except imm 0, which
 			/* No form with immediate operand (except imm 0, which
@@ -706,50 +701,36 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
 			FAIL_IF(push_inst(compiler, (inst_bits ^ inv_bits) | RD(dst) | RN(reg)));
 			FAIL_IF(push_inst(compiler, (inst_bits ^ inv_bits) | RD(dst) | RN(reg)));
 			goto set_flags;
 			goto set_flags;
 		case SLJIT_SHL:
 		case SLJIT_SHL:
-		case SLJIT_MSHL:
 			if (flags & ARG1_IMM)
 			if (flags & ARG1_IMM)
 				break;
 				break;
-
 			if (flags & INT_OP) {
 			if (flags & INT_OP) {
 				imm &= 0x1f;
 				imm &= 0x1f;
-				inst_bits = (((sljit_ins)-imm & 0x1f) << 16) | ((31 - (sljit_ins)imm) << 10);
-			} else {
+				FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1)
+					| (((sljit_ins)-imm & 0x1f) << 16) | ((31 - (sljit_ins)imm) << 10)));
+			}
+			else {
 				imm &= 0x3f;
 				imm &= 0x3f;
-				inst_bits = ((sljit_ins)1 << 22) | (((sljit_ins)-imm & 0x3f) << 16) | ((63 - (sljit_ins)imm) << 10);
+				FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22)
+					| (((sljit_ins)-imm & 0x3f) << 16) | ((63 - (sljit_ins)imm) << 10)));
 			}
 			}
-
-			FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | inst_bits));
 			goto set_flags;
 			goto set_flags;
 		case SLJIT_LSHR:
 		case SLJIT_LSHR:
-		case SLJIT_MLSHR:
 		case SLJIT_ASHR:
 		case SLJIT_ASHR:
-		case SLJIT_MASHR:
 			if (flags & ARG1_IMM)
 			if (flags & ARG1_IMM)
 				break;
 				break;
-
-			if (op >= SLJIT_ASHR)
+			if (op == SLJIT_ASHR)
 				inv_bits |= 1 << 30;
 				inv_bits |= 1 << 30;
-
 			if (flags & INT_OP) {
 			if (flags & INT_OP) {
 				imm &= 0x1f;
 				imm &= 0x1f;
-				inst_bits = ((sljit_ins)imm << 16) | (31 << 10);
-			} else {
+				FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1)
+					| ((sljit_ins)imm << 16) | (31 << 10)));
+			}
+			else {
 				imm &= 0x3f;
 				imm &= 0x3f;
-				inst_bits = ((sljit_ins)1 << 22) | ((sljit_ins)imm << 16) | (63 << 10);
+				FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1)
+					| (1 << 22) | ((sljit_ins)imm << 16) | (63 << 10)));
 			}
 			}
-
-			FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | inst_bits));
 			goto set_flags;
 			goto set_flags;
-		case SLJIT_ROTL:
-		case SLJIT_ROTR:
-			if (flags & ARG1_IMM)
-				break;
-
-			if (op == SLJIT_ROTL)
-				imm = -imm;
-
-			imm &= (flags & INT_OP) ? 0x1f : 0x3f;
-			return push_inst(compiler, (EXTR ^ (inv_bits | (inv_bits >> 9))) | RD(dst) | RN(arg1) | RM(arg1) | ((sljit_ins)imm << 10));
 		default:
 		default:
 			SLJIT_UNREACHABLE();
 			SLJIT_UNREACHABLE();
 			break;
 			break;
@@ -815,10 +796,6 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
 	case SLJIT_CLZ:
 	case SLJIT_CLZ:
 		SLJIT_ASSERT(arg1 == TMP_REG1);
 		SLJIT_ASSERT(arg1 == TMP_REG1);
 		return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2));
 		return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2));
-	case SLJIT_CTZ:
-		SLJIT_ASSERT(arg1 == TMP_REG1);
-		FAIL_IF(push_inst(compiler, (RBIT ^ inv_bits) | RD(dst) | RN(arg2)));
-		return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(dst));
 	case SLJIT_ADD:
 	case SLJIT_ADD:
 		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
 		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
 		CHECK_FLAGS(1 << 29);
 		CHECK_FLAGS(1 << 29);
@@ -857,23 +834,14 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
 		FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
 		FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
 		break; /* Set flags. */
 		break; /* Set flags. */
 	case SLJIT_SHL:
 	case SLJIT_SHL:
-	case SLJIT_MSHL:
 		FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
 		FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
 		break; /* Set flags. */
 		break; /* Set flags. */
 	case SLJIT_LSHR:
 	case SLJIT_LSHR:
-	case SLJIT_MLSHR:
 		FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
 		FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
 		break; /* Set flags. */
 		break; /* Set flags. */
 	case SLJIT_ASHR:
 	case SLJIT_ASHR:
-	case SLJIT_MASHR:
 		FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
 		FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
 		break; /* Set flags. */
 		break; /* Set flags. */
-	case SLJIT_ROTL:
-		FAIL_IF(push_inst(compiler, (SUB ^ inv_bits) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(arg2)));
-		arg2 = TMP_REG2;
-		/* fallthrough */
-	case SLJIT_ROTR:
-		return push_inst(compiler, (RORV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
 	default:
 	default:
 		SLJIT_UNREACHABLE();
 		SLJIT_UNREACHABLE();
 		return SLJIT_SUCCESS;
 		return SLJIT_SUCCESS;
@@ -927,37 +895,21 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, s
 		return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10));
 		return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10));
 	}
 	}
 
 
-	if ((argw & ((1 << shift) - 1)) == 0) {
-		if (argw >= 0) {
-			if ((argw >> shift) <= 0xfff)
-				return push_inst(compiler, STRBI | type | RT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift)));
+	if (argw >= 0 && (argw & ((1 << shift) - 1)) == 0) {
+		if ((argw >> shift) <= 0xfff)
+			return push_inst(compiler, STRBI | type | RT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift)));
 
 
-			if (argw <= 0xffffff) {
-				FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10)));
+		if (argw <= 0xffffff) {
+			FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10)));
 
 
-				argw = ((argw & 0xfff) >> shift);
-				return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10));
-			}
-		} else if (argw < -256 && argw >= -0xfff000) {
-			FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)(-argw + 0xfff) >> 12) << 10)));
-			argw = ((0x1000 + argw) & 0xfff) >> shift;
+			argw = ((argw & 0xfff) >> shift);
 			return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10));
 			return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10));
 		}
 		}
 	}
 	}
 
 
-	if (argw <= 0xff && argw >= -0x100)
+	if (argw <= 255 && argw >= -256)
 		return push_inst(compiler, STURBI | type | RT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12));
 		return push_inst(compiler, STURBI | type | RT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12));
 
 
-	if (argw >= 0) {
-		if (argw <= 0xfff0ff && ((argw + 0x100) & 0xfff) <= 0x1ff) {
-			FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10)));
-			return push_inst(compiler, STURBI | type | RT(reg) | RN(tmp_reg) | (((sljit_ins)argw & 0x1ff) << 12));
-		}
-	} else if (argw >= -0xfff100 && ((-argw + 0xff) & 0xfff) <= 0x1ff) {
-		FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)-argw >> 12) << 10)));
-		return push_inst(compiler, STURBI | type | RT(reg) | RN(tmp_reg) | (((sljit_ins)argw & 0x1ff) << 12));
-	}
-
 	FAIL_IF(load_immediate(compiler, tmp_reg, argw));
 	FAIL_IF(load_immediate(compiler, tmp_reg, argw));
 
 
 	return push_inst(compiler, STRB | type | RT(reg) | RN(arg) | RM(tmp_reg));
 	return push_inst(compiler, STRB | type | RT(reg) | RN(arg) | RM(tmp_reg));
@@ -972,14 +924,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
 {
 {
 	sljit_s32 prev, fprev, saved_regs_size, i, tmp;
 	sljit_s32 prev, fprev, saved_regs_size, i, tmp;
-	sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
+	sljit_s32 word_arg_count = 0;
 	sljit_ins offs;
 	sljit_ins offs;
 
 
 	CHECK_ERROR();
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
 
 
-	saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 2);
+	saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
 	saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, SSIZE_OF(f64));
 	saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, SSIZE_OF(f64));
 
 
 	local_size = (local_size + saved_regs_size + 0xf) & ~0xf;
 	local_size = (local_size + saved_regs_size + 0xf) & ~0xf;
@@ -1002,7 +954,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 	prev = -1;
 	prev = -1;
 
 
 	tmp = SLJIT_S0 - saveds;
 	tmp = SLJIT_S0 - saveds;
-	for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
+	for (i = SLJIT_S0; i > tmp; i--) {
 		if (prev == -1) {
 		if (prev == -1) {
 			prev = i;
 			prev = i;
 			continue;
 			continue;
@@ -1051,27 +1003,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 	if (prev != -1)
 	if (prev != -1)
 		FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5) | ((fprev == -1) ? (1 << 10) : 0)));
 		FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5) | ((fprev == -1) ? (1 << 10) : 0)));
 
 
+	arg_types >>= SLJIT_ARG_SHIFT;
 
 
 #ifdef _WIN32
 #ifdef _WIN32
 	if (local_size > 4096)
 	if (local_size > 4096)
 		FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22)));
 		FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22)));
 #endif /* _WIN32 */
 #endif /* _WIN32 */
 
 
-	if (!(options & SLJIT_ENTER_REG_ARG)) {
-		arg_types >>= SLJIT_ARG_SHIFT;
-		saved_arg_count = 0;
-		tmp = SLJIT_R0;
-
-		while (arg_types) {
-			if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
-				if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
-					FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0 - saved_arg_count) | RN(TMP_ZERO) | RM(tmp)));
-					saved_arg_count++;
-				}
+	tmp = 0;
+	while (arg_types > 0) {
+		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
+			if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
+				FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0 - tmp) | RN(TMP_ZERO) | RM(SLJIT_R0 + word_arg_count)));
 				tmp++;
 				tmp++;
 			}
 			}
-			arg_types >>= SLJIT_ARG_SHIFT;
+			word_arg_count++;
 		}
 		}
+		arg_types >>= SLJIT_ARG_SHIFT;
 	}
 	}
 
 
 #ifdef _WIN32
 #ifdef _WIN32
@@ -1152,34 +1100,26 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
 
 
-	saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 2);
+	saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
 	saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, SSIZE_OF(f64));
 	saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, SSIZE_OF(f64));
 
 
 	compiler->local_size = (local_size + saved_regs_size + 0xf) & ~0xf;
 	compiler->local_size = (local_size + saved_regs_size + 0xf) & ~0xf;
 	return SLJIT_SUCCESS;
 	return SLJIT_SUCCESS;
 }
 }
 
 
-static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
+static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
 {
 {
 	sljit_s32 local_size, prev, fprev, i, tmp;
 	sljit_s32 local_size, prev, fprev, i, tmp;
 	sljit_ins offs;
 	sljit_ins offs;
 
 
 	local_size = compiler->local_size;
 	local_size = compiler->local_size;
 
 
-	if (!is_return_to) {
-		if (local_size > 512 && local_size <= 512 + 496) {
-			FAIL_IF(push_inst(compiler, LDP_POST | RT(TMP_FP) | RT2(TMP_LR)
-				| RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << (15 - 3))));
-			local_size = 512;
-		} else
-			FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
-	} else {
-		if (local_size > 512 && local_size <= 512 + 248) {
-			FAIL_IF(push_inst(compiler, LDRI_POST | RT(TMP_FP) | RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << 12)));
-			local_size = 512;
-		} else
-			FAIL_IF(push_inst(compiler, LDRI | RT(TMP_FP) | RN(SLJIT_SP) | 0));
-	}
+	if (local_size > 512 && local_size <= 512 + 496) {
+		FAIL_IF(push_inst(compiler, LDP_POST | RT(TMP_FP) | RT2(TMP_LR)
+			| RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << (15 - 3))));
+		local_size = 512;
+	} else
+		FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
 
 
 	if (local_size > 512) {
 	if (local_size > 512) {
 		local_size -= 512;
 		local_size -= 512;
@@ -1197,7 +1137,7 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
 	prev = -1;
 	prev = -1;
 
 
 	tmp = SLJIT_S0 - compiler->saveds;
 	tmp = SLJIT_S0 - compiler->saveds;
-	for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) {
+	for (i = SLJIT_S0; i > tmp; i--) {
 		if (prev == -1) {
 		if (prev == -1) {
 			prev = i;
 			prev = i;
 			continue;
 			continue;
@@ -1255,34 +1195,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler
 	CHECK_ERROR();
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_return_void(compiler));
 	CHECK(check_sljit_emit_return_void(compiler));
 
 
-	FAIL_IF(emit_stack_frame_release(compiler, 0));
+	FAIL_IF(emit_stack_frame_release(compiler));
 
 
 	return push_inst(compiler, RET | RN(TMP_LR));
 	return push_inst(compiler, RET | RN(TMP_LR));
 }
 }
 
 
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
-	sljit_s32 src, sljit_sw srcw)
-{
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_return_to(compiler, src, srcw));
-
-	if (src & SLJIT_MEM) {
-		ADJUST_LOCAL_OFFSET(src, srcw);
-		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
-		src = TMP_REG1;
-		srcw = 0;
-	} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
-		FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(src)));
-		src = TMP_REG1;
-		srcw = 0;
-	}
-
-	FAIL_IF(emit_stack_frame_release(compiler, 1));
-
-	SLJIT_SKIP_CHECKS(compiler);
-	return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
-}
-
 /* --------------------------------------------------------------------- */
 /* --------------------------------------------------------------------- */
 /*  Operators                                                            */
 /*  Operators                                                            */
 /* --------------------------------------------------------------------- */
 /* --------------------------------------------------------------------- */
@@ -1475,84 +1392,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
 	CHECK_ERROR();
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
 	CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
 
 
-	SLJIT_SKIP_CHECKS(compiler);
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+	compiler->skip_checks = 1;
+#endif
 	return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
 	return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
 }
 }
 
 
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
-	sljit_s32 src_dst,
-	sljit_s32 src1, sljit_sw src1w,
-	sljit_s32 src2, sljit_sw src2w)
-{
-	sljit_ins inv_bits, imm;
-	sljit_s32 is_left;
-	sljit_sw mask;
-
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
-
-	is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
-
-	if (src_dst == src1) {
-		SLJIT_SKIP_CHECKS(compiler);
-		return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), src_dst, 0, src_dst, 0, src2, src2w);
-	}
-
-	ADJUST_LOCAL_OFFSET(src1, src1w);
-	ADJUST_LOCAL_OFFSET(src2, src2w);
-
-	inv_bits = (op & SLJIT_32) ? W_OP : 0;
-	mask = inv_bits ? 0x1f : 0x3f;
-
-	if (src2 & SLJIT_IMM) {
-		src2w &= mask;
-
-		if (src2w == 0)
-			return SLJIT_SUCCESS;
-	} else if (src2 & SLJIT_MEM) {
-		FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG2, src2, src2w, TMP_REG2));
-		src2 = TMP_REG2;
-	}
-
-	if (src1 & SLJIT_MEM) {
-		FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1));
-		src1 = TMP_REG1;
-	} else if (src1 & SLJIT_IMM) {
-		FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
-		src1 = TMP_REG1;
-	}
-
-	if (src2 & SLJIT_IMM) {
-		if (is_left)
-			src2w = (src2w ^ mask) + 1;
-
-		return push_inst(compiler, (EXTR ^ (inv_bits | (inv_bits >> 9))) | RD(src_dst)
-			| RN(is_left ? src_dst : src1) | RM(is_left ? src1 : src_dst) | ((sljit_ins)src2w << 10));
-	}
-
-	FAIL_IF(push_inst(compiler, ((is_left ? LSLV : LSRV) ^ inv_bits) | RD(src_dst) | RN(src_dst) | RM(src2)));
-
-	if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
-		/* Shift left/right by 1. */
-		if (is_left)
-			imm = (sljit_ins)(inv_bits ? ((1 << 16) | (31 << 10)) : ((1 << 16) | (63 << 10) | (1 << 22)));
-		else
-			imm = (sljit_ins)(inv_bits ? ((31 << 16) | (30 << 10)) : ((63 << 16) | (62 << 10) | (1 << 22)));
-
-		FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(TMP_REG1) | RN(src1) | imm));
-
-		/* Set imm to mask. */
-		imm = (sljit_ins)(inv_bits ? (4 << 10) : ((5 << 10) | (1 << 22)));
-		FAIL_IF(push_inst(compiler, (EORI ^ inv_bits) | RD(TMP_REG2) | RN(src2) | imm));
-
-		src1 = TMP_REG1;
-	} else
-		FAIL_IF(push_inst(compiler, (SUB ^ inv_bits) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(src2)));
-
-	FAIL_IF(push_inst(compiler, ((is_left ? LSRV : LSLV) ^ inv_bits) | RD(TMP_REG1) | RN(src1) | RM(TMP_REG2)));
-	return push_inst(compiler, (ORR ^ inv_bits) | RD(src_dst) | RN(src_dst) | RM(TMP_REG1));
-}
-
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
 	sljit_s32 src, sljit_sw srcw)
 	sljit_s32 src, sljit_sw srcw)
 {
 {
@@ -1704,9 +1550,10 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp
 		emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw, TMP_REG1);
 		emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw, TMP_REG1);
 		src = TMP_REG1;
 		src = TMP_REG1;
 	} else if (src & SLJIT_IMM) {
 	} else if (src & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
 		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
 			srcw = (sljit_s32)srcw;
 			srcw = (sljit_s32)srcw;
-
+#endif
 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
 		src = TMP_REG1;
 		src = TMP_REG1;
 	}
 	}
@@ -1852,15 +1699,11 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
 {
 {
 	switch (type) {
 	switch (type) {
 	case SLJIT_EQUAL:
 	case SLJIT_EQUAL:
-	case SLJIT_F_EQUAL:
-	case SLJIT_ORDERED_EQUAL:
-	case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */
+	case SLJIT_EQUAL_F64:
 		return 0x1;
 		return 0x1;
 
 
 	case SLJIT_NOT_EQUAL:
 	case SLJIT_NOT_EQUAL:
-	case SLJIT_F_NOT_EQUAL:
-	case SLJIT_UNORDERED_OR_NOT_EQUAL:
-	case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */
+	case SLJIT_NOT_EQUAL_F64:
 		return 0x0;
 		return 0x0;
 
 
 	case SLJIT_CARRY:
 	case SLJIT_CARRY:
@@ -1869,6 +1712,7 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
 		/* fallthrough */
 		/* fallthrough */
 
 
 	case SLJIT_LESS:
 	case SLJIT_LESS:
+	case SLJIT_LESS_F64:
 		return 0x2;
 		return 0x2;
 
 
 	case SLJIT_NOT_CARRY:
 	case SLJIT_NOT_CARRY:
@@ -1877,33 +1721,27 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
 		/* fallthrough */
 		/* fallthrough */
 
 
 	case SLJIT_GREATER_EQUAL:
 	case SLJIT_GREATER_EQUAL:
+	case SLJIT_GREATER_EQUAL_F64:
 		return 0x3;
 		return 0x3;
 
 
 	case SLJIT_GREATER:
 	case SLJIT_GREATER:
-	case SLJIT_UNORDERED_OR_GREATER:
+	case SLJIT_GREATER_F64:
 		return 0x9;
 		return 0x9;
 
 
 	case SLJIT_LESS_EQUAL:
 	case SLJIT_LESS_EQUAL:
-	case SLJIT_F_LESS_EQUAL:
-	case SLJIT_ORDERED_LESS_EQUAL:
+	case SLJIT_LESS_EQUAL_F64:
 		return 0x8;
 		return 0x8;
 
 
 	case SLJIT_SIG_LESS:
 	case SLJIT_SIG_LESS:
-	case SLJIT_UNORDERED_OR_LESS:
 		return 0xa;
 		return 0xa;
 
 
 	case SLJIT_SIG_GREATER_EQUAL:
 	case SLJIT_SIG_GREATER_EQUAL:
-	case SLJIT_F_GREATER_EQUAL:
-	case SLJIT_ORDERED_GREATER_EQUAL:
 		return 0xb;
 		return 0xb;
 
 
 	case SLJIT_SIG_GREATER:
 	case SLJIT_SIG_GREATER:
-	case SLJIT_F_GREATER:
-	case SLJIT_ORDERED_GREATER:
 		return 0xd;
 		return 0xd;
 
 
 	case SLJIT_SIG_LESS_EQUAL:
 	case SLJIT_SIG_LESS_EQUAL:
-	case SLJIT_UNORDERED_OR_LESS_EQUAL:
 		return 0xc;
 		return 0xc;
 
 
 	case SLJIT_OVERFLOW:
 	case SLJIT_OVERFLOW:
@@ -1911,7 +1749,7 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
 			return 0x0;
 			return 0x0;
 		/* fallthrough */
 		/* fallthrough */
 
 
-	case SLJIT_UNORDERED:
+	case SLJIT_UNORDERED_F64:
 		return 0x7;
 		return 0x7;
 
 
 	case SLJIT_NOT_OVERFLOW:
 	case SLJIT_NOT_OVERFLOW:
@@ -1919,16 +1757,9 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
 			return 0x1;
 			return 0x1;
 		/* fallthrough */
 		/* fallthrough */
 
 
-	case SLJIT_ORDERED:
+	case SLJIT_ORDERED_F64:
 		return 0x6;
 		return 0x6;
 
 
-	case SLJIT_F_LESS:
-	case SLJIT_ORDERED_LESS:
-		return 0x5;
-
-	case SLJIT_UNORDERED_OR_GREATER_EQUAL:
-		return 0x4;
-
 	default:
 	default:
 		SLJIT_UNREACHABLE();
 		SLJIT_UNREACHABLE();
 		return 0xe;
 		return 0xe;
@@ -1985,11 +1816,15 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
 
 
 	if (type & SLJIT_CALL_RETURN) {
 	if (type & SLJIT_CALL_RETURN) {
-		PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
+		PTR_FAIL_IF(emit_stack_frame_release(compiler));
 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
 	}
 	}
 
 
-	SLJIT_SKIP_CHECKS(compiler);
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+	compiler->skip_checks = 1;
+#endif
+
 	return sljit_emit_jump(compiler, type);
 	return sljit_emit_jump(compiler, type);
 }
 }
 
 
@@ -2034,10 +1869,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
 
 
 	CHECK_ERROR();
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
+	ADJUST_LOCAL_OFFSET(src, srcw);
 
 
 	if (!(src & SLJIT_IMM)) {
 	if (!(src & SLJIT_IMM)) {
 		if (src & SLJIT_MEM) {
 		if (src & SLJIT_MEM) {
-			ADJUST_LOCAL_OFFSET(src, srcw);
 			FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
 			FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
 			src = TMP_REG1;
 			src = TMP_REG1;
 		}
 		}
@@ -2062,24 +1897,28 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
 	SLJIT_UNUSED_ARG(arg_types);
 	SLJIT_UNUSED_ARG(arg_types);
 	CHECK_ERROR();
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
+	ADJUST_LOCAL_OFFSET(src, srcw);
 
 
 	if (src & SLJIT_MEM) {
 	if (src & SLJIT_MEM) {
-		ADJUST_LOCAL_OFFSET(src, srcw);
 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
 		src = TMP_REG1;
 		src = TMP_REG1;
 	}
 	}
 
 
 	if (type & SLJIT_CALL_RETURN) {
 	if (type & SLJIT_CALL_RETURN) {
-		if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
+		if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) {
 			FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(src)));
 			FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(src)));
 			src = TMP_REG1;
 			src = TMP_REG1;
 		}
 		}
 
 
-		FAIL_IF(emit_stack_frame_release(compiler, 0));
+		FAIL_IF(emit_stack_frame_release(compiler));
 		type = SLJIT_JUMP;
 		type = SLJIT_JUMP;
 	}
 	}
 
 
-	SLJIT_SKIP_CHECKS(compiler);
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+	compiler->skip_checks = 1;
+#endif
+
 	return sljit_emit_ijump(compiler, type, src, srcw);
 	return sljit_emit_ijump(compiler, type, src, srcw);
 }
 }
 
 
@@ -2094,7 +1933,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
 	ADJUST_LOCAL_OFFSET(dst, dstw);
 	ADJUST_LOCAL_OFFSET(dst, dstw);
 
 
-	cc = get_cc(compiler, type);
+	cc = get_cc(compiler, type & 0xff);
 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
 
 
 	if (GET_OPCODE(op) < SLJIT_ADD) {
 	if (GET_OPCODE(op) < SLJIT_ADD) {
@@ -2135,21 +1974,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
 	sljit_s32 dst_reg,
 	sljit_s32 dst_reg,
 	sljit_s32 src, sljit_sw srcw)
 	sljit_s32 src, sljit_sw srcw)
 {
 {
-	sljit_ins inv_bits = (type & SLJIT_32) ? W_OP : 0;
+	sljit_ins inv_bits = (dst_reg & SLJIT_32) ? W_OP : 0;
 	sljit_ins cc;
 	sljit_ins cc;
 
 
 	CHECK_ERROR();
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
 	CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
 
 
 	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
 	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
-		if (type & SLJIT_32)
+		if (dst_reg & SLJIT_32)
 			srcw = (sljit_s32)srcw;
 			srcw = (sljit_s32)srcw;
 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
 		src = TMP_REG1;
 		src = TMP_REG1;
 		srcw = 0;
 		srcw = 0;
 	}
 	}
 
 
-	cc = get_cc(compiler, type & ~SLJIT_32);
+	cc = get_cc(compiler, type & 0xff);
+	dst_reg &= ~SLJIT_32;
 
 
 	return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(dst_reg) | RM(src));
 	return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(dst_reg) | RM(src));
 }
 }
@@ -2157,82 +1997,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
 	sljit_s32 reg,
 	sljit_s32 reg,
 	sljit_s32 mem, sljit_sw memw)
 	sljit_s32 mem, sljit_sw memw)
-{
-	sljit_u32 inst;
-
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
-
-	if (!(reg & REG_PAIR_MASK))
-		return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
-
-	ADJUST_LOCAL_OFFSET(mem, memw);
-
-	if (!(mem & REG_MASK)) {
-		FAIL_IF(load_immediate(compiler, TMP_REG1, memw & ~0x1f8));
-
-		mem = SLJIT_MEM1(TMP_REG1);
-		memw &= 0x1f8;
-	} else if (mem & OFFS_REG_MASK) {
-		FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 10)));
-
-		mem = SLJIT_MEM1(TMP_REG1);
-		memw = 0;
-	} else if ((memw & 0x7) != 0 || memw > 0x1f8 || memw < -0x200) {
-		inst = ADDI;
-
-		if (memw < 0) {
-			/* Remains negative for integer min. */
-			memw = -memw;
-			inst = SUBI;
-		} else if ((memw & 0x7) == 0 && memw <= 0x7ff0) {
-			if (!(type & SLJIT_MEM_STORE) && (mem & REG_MASK) == REG_PAIR_FIRST(reg)) {
-				FAIL_IF(push_inst(compiler, LDRI | RD(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | ((sljit_ins)memw << 7)));
-				return push_inst(compiler, LDRI | RD(REG_PAIR_FIRST(reg)) | RN(mem & REG_MASK) | ((sljit_ins)(memw + 0x8) << 7));
-			}
-
-			inst = (type & SLJIT_MEM_STORE) ? STRI : LDRI;
-
-			FAIL_IF(push_inst(compiler, inst | RD(REG_PAIR_FIRST(reg)) | RN(mem & REG_MASK) | ((sljit_ins)memw << 7)));
-			return push_inst(compiler, inst | RD(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | ((sljit_ins)(memw + 0x8) << 7));
-		}
-
-		if ((sljit_uw)memw <= 0xfff) {
-			FAIL_IF(push_inst(compiler, inst | RD(TMP_REG1) | RN(mem & REG_MASK) | ((sljit_ins)memw << 10)));
-			memw = 0;
-		} else if ((sljit_uw)memw <= 0xffffff) {
-			FAIL_IF(push_inst(compiler, inst | (1 << 22) | RD(TMP_REG1) | RN(mem & REG_MASK) | (((sljit_ins)memw >> 12) << 10)));
-
-			if ((memw & 0xe07) != 0) {
-				FAIL_IF(push_inst(compiler, inst | RD(TMP_REG1) | RN(TMP_REG1) | (((sljit_ins)memw & 0xfff) << 10)));
-				memw = 0;
-			} else {
-				memw &= 0xfff;
-			}
-		} else {
-			FAIL_IF(load_immediate(compiler, TMP_REG1, memw));
-			FAIL_IF(push_inst(compiler, (inst == ADDI ? ADD : SUB) | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(TMP_REG1)));
-			memw = 0;
-		}
-
-		mem = SLJIT_MEM1(TMP_REG1);
-
-		if (inst == SUBI)
-			memw = -memw;
-	}
-
-	SLJIT_ASSERT((memw & 0x7) == 0 && memw <= 0x1f8 && memw >= -0x200);
-	return push_inst(compiler, ((type & SLJIT_MEM_STORE) ? STP : LDP) | RT(REG_PAIR_FIRST(reg)) | RT2(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x3f8) << 12));
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type,
-	sljit_s32 reg,
-	sljit_s32 mem, sljit_sw memw)
 {
 {
 	sljit_u32 sign = 0, inst;
 	sljit_u32 sign = 0, inst;
 
 
 	CHECK_ERROR();
 	CHECK_ERROR();
-	CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw));
+	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
 
 
 	if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256))
 	if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256))
 		return SLJIT_ERR_UNSUPPORTED;
 		return SLJIT_ERR_UNSUPPORTED;
@@ -2273,20 +2042,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *
 	if (!(type & SLJIT_MEM_STORE))
 	if (!(type & SLJIT_MEM_STORE))
 		inst |= sign ? 0x00800000 : 0x00400000;
 		inst |= sign ? 0x00800000 : 0x00400000;
 
 
-	if (!(type & SLJIT_MEM_POST))
+	if (type & SLJIT_MEM_PRE)
 		inst |= 0x800;
 		inst |= 0x800;
 
 
 	return push_inst(compiler, inst | RT(reg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12));
 	return push_inst(compiler, inst | RT(reg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12));
 }
 }
 
 
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type,
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
 	sljit_s32 freg,
 	sljit_s32 freg,
 	sljit_s32 mem, sljit_sw memw)
 	sljit_s32 mem, sljit_sw memw)
 {
 {
 	sljit_u32 inst;
 	sljit_u32 inst;
 
 
 	CHECK_ERROR();
 	CHECK_ERROR();
-	CHECK(check_sljit_emit_fmem_update(compiler, type, freg, mem, memw));
+	CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
 
 
 	if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256))
 	if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256))
 		return SLJIT_ERR_UNSUPPORTED;
 		return SLJIT_ERR_UNSUPPORTED;
@@ -2302,7 +2071,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler
 	if (!(type & SLJIT_MEM_STORE))
 	if (!(type & SLJIT_MEM_STORE))
 		inst |= 0x00400000;
 		inst |= 0x00400000;
 
 
-	if (!(type & SLJIT_MEM_POST))
+	if (type & SLJIT_MEM_PRE)
 		inst |= 0x800;
 		inst |= 0x800;
 
 
 	return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12));
 	return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12));

File diff suppressed because it is too large
+ 174 - 397
thirdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c


+ 387 - 43
thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c

@@ -38,6 +38,383 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a
 	return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS;
 	return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS;
 }
 }
 
 
+#define EMIT_LOGICAL(op_imm, op_norm) \
+	if (flags & SRC2_IMM) { \
+		if (op & SLJIT_SET_Z) \
+			FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \
+		if (!(flags & UNUSED_DEST)) \
+			FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \
+	} \
+	else { \
+		if (op & SLJIT_SET_Z) \
+			FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
+		if (!(flags & UNUSED_DEST)) \
+			FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \
+	}
+
+#define EMIT_SHIFT(op_imm, op_v) \
+	if (flags & SRC2_IMM) { \
+		if (op & SLJIT_SET_Z) \
+			FAIL_IF(push_inst(compiler, op_imm | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \
+		if (!(flags & UNUSED_DEST)) \
+			FAIL_IF(push_inst(compiler, op_imm | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \
+	} \
+	else { \
+		if (op & SLJIT_SET_Z) \
+			FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
+		if (!(flags & UNUSED_DEST)) \
+			FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | D(dst), DR(dst))); \
+	}
+
+static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
+	sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
+{
+	sljit_s32 is_overflow, is_carry, is_handled;
+
+	switch (GET_OPCODE(op)) {
+	case SLJIT_MOV:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+		if (dst != src2)
+			return push_inst(compiler, ADDU | S(src2) | TA(0) | D(dst), DR(dst));
+		return SLJIT_SUCCESS;
+
+	case SLJIT_MOV_U8:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
+			return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
+		SLJIT_ASSERT(dst == src2);
+		return SLJIT_SUCCESS;
+
+	case SLJIT_MOV_S8:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
+			return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst));
+#else /* SLJIT_MIPS_REV < 1 */
+			FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
+			return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst));
+#endif /* SLJIT_MIPS_REV >= 1 */
+		}
+		SLJIT_ASSERT(dst == src2);
+		return SLJIT_SUCCESS;
+
+	case SLJIT_MOV_U16:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
+			return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
+		SLJIT_ASSERT(dst == src2);
+		return SLJIT_SUCCESS;
+
+	case SLJIT_MOV_S16:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
+			return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst));
+#else /* SLJIT_MIPS_REV < 1 */
+			FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
+			return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst));
+#endif /* SLJIT_MIPS_REV >= 1 */
+		}
+		SLJIT_ASSERT(dst == src2);
+		return SLJIT_SUCCESS;
+
+	case SLJIT_NOT:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+		if (op & SLJIT_SET_Z)
+			FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+		if (!(flags & UNUSED_DEST))
+			FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst)));
+		return SLJIT_SUCCESS;
+
+	case SLJIT_CLZ:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
+		if (op & SLJIT_SET_Z)
+			FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+		if (!(flags & UNUSED_DEST))
+			FAIL_IF(push_inst(compiler, CLZ | S(src2) | T(dst) | D(dst), DR(dst)));
+#else /* SLJIT_MIPS_REV < 1 */
+		if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
+			FAIL_IF(push_inst(compiler, SRL | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG));
+			return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG);
+		}
+		/* Nearly all instructions are unmovable in the following sequence. */
+		FAIL_IF(push_inst(compiler, ADDU | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
+		/* Check zero. */
+		FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS));
+		FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM(32), UNMOVABLE_INS));
+		FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(dst) | IMM(-1), DR(dst)));
+		/* Loop for searching the highest bit. */
+		FAIL_IF(push_inst(compiler, ADDIU | S(dst) | T(dst) | IMM(1), DR(dst)));
+		FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
+		FAIL_IF(push_inst(compiler, SLL | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
+#endif /* SLJIT_MIPS_REV >= 1 */
+		return SLJIT_SUCCESS;
+
+	case SLJIT_ADD:
+		is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
+		is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
+		if (flags & SRC2_IMM) {
+			if (is_overflow) {
+				if (src2 >= 0)
+					FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
+				else
+					FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
+			}
+			else if (op & SLJIT_SET_Z)
+				FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
+
+			if (is_overflow || is_carry) {
+				if (src2 >= 0)
+					FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
+				else {
+					FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
+					FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+				}
+			}
+			/* dst may be the same as src1 or src2. */
+			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
+				FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst)));
+		}
+		else {
+			if (is_overflow)
+				FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+			else if (op & SLJIT_SET_Z)
+				FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+
+			if (is_overflow || is_carry)
+				FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
+			/* dst may be the same as src1 or src2. */
+			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
+				FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst)));
+		}
+
+		/* a + b >= a | b (otherwise, the carry should be set to 1). */
+		if (is_overflow || is_carry)
+			FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+		if (!is_overflow)
+			return SLJIT_SUCCESS;
+		FAIL_IF(push_inst(compiler, SLL | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
+		FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+		FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+		if (op & SLJIT_SET_Z)
+			FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG));
+		return push_inst(compiler, SRL | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG);
+
+	case SLJIT_ADDC:
+		is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
+		if (flags & SRC2_IMM) {
+			if (is_carry) {
+				if (src2 >= 0)
+					FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
+				else {
+					FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
+					FAIL_IF(push_inst(compiler, OR | S(src1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+				}
+			}
+			FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst)));
+		} else {
+			if (is_carry)
+				FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+			/* dst may be the same as src1 or src2. */
+			FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst)));
+		}
+		if (is_carry)
+			FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+
+		FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
+		if (!is_carry)
+			return SLJIT_SUCCESS;
+
+		/* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */
+		FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+		/* Set carry flag. */
+		return push_inst(compiler, OR | SA(OTHER_FLAG) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
+
+	case SLJIT_SUB:
+		if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
+			FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
+			src2 = TMP_REG2;
+			flags &= ~SRC2_IMM;
+		}
+
+		is_handled = 0;
+
+		if (flags & SRC2_IMM) {
+			if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
+				FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
+				is_handled = 1;
+			}
+			else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
+				FAIL_IF(push_inst(compiler, SLTI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
+				is_handled = 1;
+			}
+		}
+
+		if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) {
+			is_handled = 1;
+
+			if (flags & SRC2_IMM) {
+				FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
+				src2 = TMP_REG2;
+				flags &= ~SRC2_IMM;
+			}
+
+			if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
+				FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
+			}
+			else if (GET_FLAG_TYPE(op) == SLJIT_GREATER || GET_FLAG_TYPE(op) == SLJIT_LESS_EQUAL)
+			{
+				FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG));
+			}
+			else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
+				FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
+			}
+			else if (GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER || GET_FLAG_TYPE(op) == SLJIT_SIG_LESS_EQUAL)
+			{
+				FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG));
+			}
+		}
+
+		if (is_handled) {
+			if (flags & SRC2_IMM) {
+				if (op & SLJIT_SET_Z)
+					FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
+				if (!(flags & UNUSED_DEST))
+					return push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst));
+			}
+			else {
+				if (op & SLJIT_SET_Z)
+					FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+				if (!(flags & UNUSED_DEST))
+					return push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst));
+			}
+			return SLJIT_SUCCESS;
+		}
+
+		is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
+		is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
+		if (flags & SRC2_IMM) {
+			if (is_overflow) {
+				if (src2 >= 0)
+					FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
+				else
+					FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
+			}
+			else if (op & SLJIT_SET_Z)
+				FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
+
+			if (is_overflow || is_carry)
+				FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
+			/* dst may be the same as src1 or src2. */
+			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
+				FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst)));
+		}
+		else {
+			if (is_overflow)
+				FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+			else if (op & SLJIT_SET_Z)
+				FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+
+			if (is_overflow || is_carry)
+				FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
+			/* dst may be the same as src1 or src2. */
+			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
+				FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst)));
+		}
+
+		if (!is_overflow)
+			return SLJIT_SUCCESS;
+		FAIL_IF(push_inst(compiler, SLL | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
+		FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+		FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+		if (op & SLJIT_SET_Z)
+			FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG));
+		return push_inst(compiler, SRL | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG);
+
+	case SLJIT_SUBC:
+		if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
+			FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
+			src2 = TMP_REG2;
+			flags &= ~SRC2_IMM;
+		}
+
+		is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
+		if (flags & SRC2_IMM) {
+			if (is_carry)
+				FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
+			/* dst may be the same as src1 or src2. */
+			FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst)));
+		}
+		else {
+			if (is_carry)
+				FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+			/* dst may be the same as src1 or src2. */
+			FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst)));
+		}
+
+		if (is_carry)
+			FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1)));
+
+		FAIL_IF(push_inst(compiler, SUBU | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
+		return (is_carry) ? push_inst(compiler, OR | SA(EQUAL_FLAG) | T(TMP_REG1) | DA(OTHER_FLAG), OTHER_FLAG) : SLJIT_SUCCESS;
+
+	case SLJIT_MUL:
+		SLJIT_ASSERT(!(flags & SRC2_IMM));
+
+		if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW) {
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
+			return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
+#else /* SLJIT_MIPS_REV < 1 */
+			FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
+			return push_inst(compiler, MFLO | D(dst), DR(dst));
+#endif /* SLJIT_MIPS_REV >= 1 */
+		}
+
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
+		FAIL_IF(push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)));
+		FAIL_IF(push_inst(compiler, MUH | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+#else /* SLJIT_MIPS_REV < 6 */
+		FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
+		FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG));
+		FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
+#endif /* SLJIT_MIPS_REV >= 6 */
+		FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG));
+		return push_inst(compiler, SUBU | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
+
+	case SLJIT_AND:
+		EMIT_LOGICAL(ANDI, AND);
+		return SLJIT_SUCCESS;
+
+	case SLJIT_OR:
+		EMIT_LOGICAL(ORI, OR);
+		return SLJIT_SUCCESS;
+
+	case SLJIT_XOR:
+		EMIT_LOGICAL(XORI, XOR);
+		return SLJIT_SUCCESS;
+
+	case SLJIT_SHL:
+		EMIT_SHIFT(SLL, SLLV);
+		return SLJIT_SUCCESS;
+
+	case SLJIT_LSHR:
+		EMIT_SHIFT(SRL, SRLV);
+		return SLJIT_SUCCESS;
+
+	case SLJIT_ASHR:
+		EMIT_SHIFT(SRA, SRAV);
+		return SLJIT_SUCCESS;
+	}
+
+	SLJIT_UNREACHABLE();
+	return SLJIT_SUCCESS;
+}
+
 static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value)
 static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value)
 {
 {
 	FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 16), DR(dst)));
 	FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 16), DR(dst)));
@@ -196,8 +573,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
 	sljit_s32 arg_types)
 	sljit_s32 arg_types)
 {
 {
 	struct sljit_jump *jump;
 	struct sljit_jump *jump;
-	sljit_u32 extra_space = 0;
-	sljit_ins ins = NOP;
+	sljit_u32 extra_space = (sljit_u32)type;
+	sljit_ins ins;
 
 
 	CHECK_ERROR_PTR();
 	CHECK_ERROR_PTR();
 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
@@ -206,23 +583,14 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
 	PTR_FAIL_IF(!jump);
 	PTR_FAIL_IF(!jump);
 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
 
 
-	if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
-		extra_space = (sljit_u32)type;
-		PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins, &extra_space));
-	} else if (type & SLJIT_CALL_RETURN)
-		PTR_FAIL_IF(emit_stack_frame_release(compiler, 0, &ins));
+	PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins, &extra_space));
 
 
 	SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
 	SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
 
 
-	if (ins == NOP && compiler->delay_slot != UNMOVABLE_INS)
-		jump->flags |= IS_MOVABLE;
+	PTR_FAIL_IF(emit_const(compiler, PIC_ADDR_REG, 0));
 
 
 	if (!(type & SLJIT_CALL_RETURN) || extra_space > 0) {
 	if (!(type & SLJIT_CALL_RETURN) || extra_space > 0) {
-		jump->flags |= IS_JAL;
-
-		if ((type & 0xff) != SLJIT_CALL_REG_ARG)
-			jump->flags |= IS_CALL;
-
+		jump->flags |= IS_JAL | IS_CALL;
 		PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
 		PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
 	} else
 	} else
 		PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS));
 		PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS));
@@ -230,9 +598,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
 	jump->addr = compiler->size;
 	jump->addr = compiler->size;
 	PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS));
 	PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS));
 
 
-	/* Maximum number of instructions required for generating a constant. */
-	compiler->size += 2;
-
 	if (extra_space == 0)
 	if (extra_space == 0)
 		return jump;
 		return jump;
 
 
@@ -258,37 +623,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
 	CHECK_ERROR();
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
 
 
-	if (src & SLJIT_MEM) {
-		ADJUST_LOCAL_OFFSET(src, srcw);
-		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw));
-		src = PIC_ADDR_REG;
-		srcw = 0;
-	}
-
-	if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
-		if (type & SLJIT_CALL_RETURN) {
-			if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
-				FAIL_IF(push_inst(compiler, ADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG)));
-				src = PIC_ADDR_REG;
-				srcw = 0;
-			}
-
-			FAIL_IF(emit_stack_frame_release(compiler, 0, &ins));
-
-			if (ins != NOP)
-				FAIL_IF(push_inst(compiler, ins, MOVABLE_INS));
-		}
-
-		SLJIT_SKIP_CHECKS(compiler);
-		return sljit_emit_ijump(compiler, type, src, srcw);
-	}
-
 	SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
 	SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
 
 
 	if (src & SLJIT_IMM)
 	if (src & SLJIT_IMM)
 		FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw));
 		FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw));
-	else if (src != PIC_ADDR_REG)
+	else if (FAST_IS_REG(src))
 		FAIL_IF(push_inst(compiler, ADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG)));
 		FAIL_IF(push_inst(compiler, ADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG)));
+	else if (src & SLJIT_MEM) {
+		ADJUST_LOCAL_OFFSET(src, srcw);
+		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw));
+	}
 
 
 	FAIL_IF(call_with_args(compiler, arg_types, &ins, &extra_space));
 	FAIL_IF(call_with_args(compiler, arg_types, &ins, &extra_space));
 
 

+ 423 - 37
thirdparty/pcre2/src/sljit/sljitNativeMIPS_64.c

@@ -118,6 +118,421 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a
 	return !(imm & 0xffff) ? SLJIT_SUCCESS : push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar);
 	return !(imm & 0xffff) ? SLJIT_SUCCESS : push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar);
 }
 }
 
 
+#define SELECT_OP(a, b) \
+	(!(op & SLJIT_32) ? a : b)
+
+#define EMIT_LOGICAL(op_imm, op_norm) \
+	if (flags & SRC2_IMM) { \
+		if (op & SLJIT_SET_Z) \
+			FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \
+		if (!(flags & UNUSED_DEST)) \
+			FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \
+	} \
+	else { \
+		if (op & SLJIT_SET_Z) \
+			FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
+		if (!(flags & UNUSED_DEST)) \
+			FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \
+	}
+
+#define EMIT_SHIFT(op_dimm, op_dimm32, op_imm, op_dv, op_v) \
+	if (flags & SRC2_IMM) { \
+		if (src2 >= 32) { \
+			SLJIT_ASSERT(!(op & SLJIT_32)); \
+			ins = op_dimm32; \
+			src2 -= 32; \
+		} \
+		else \
+			ins = (op & SLJIT_32) ? op_imm : op_dimm; \
+		if (op & SLJIT_SET_Z) \
+			FAIL_IF(push_inst(compiler, ins | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \
+		if (!(flags & UNUSED_DEST)) \
+			FAIL_IF(push_inst(compiler, ins | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \
+	} \
+	else { \
+		ins = (op & SLJIT_32) ? op_v : op_dv; \
+		if (op & SLJIT_SET_Z) \
+			FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
+		if (!(flags & UNUSED_DEST)) \
+			FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | D(dst), DR(dst))); \
+	}
+
+static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
+	sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
+{
+	sljit_ins ins;
+	sljit_s32 is_overflow, is_carry, is_handled;
+
+	switch (GET_OPCODE(op)) {
+	case SLJIT_MOV:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+		if (dst != src2)
+			return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(dst), DR(dst));
+		return SLJIT_SUCCESS;
+
+	case SLJIT_MOV_U8:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
+			return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
+		SLJIT_ASSERT(dst == src2);
+		return SLJIT_SUCCESS;
+
+	case SLJIT_MOV_S8:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
+			if (op & SLJIT_32)
+				return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst));
+#endif /* SLJIT_MIPS_REV >= 1 */
+			FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
+			return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst));
+		}
+		SLJIT_ASSERT(dst == src2);
+		return SLJIT_SUCCESS;
+
+	case SLJIT_MOV_U16:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
+			return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
+		SLJIT_ASSERT(dst == src2);
+		return SLJIT_SUCCESS;
+
+	case SLJIT_MOV_S16:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
+			if (op & SLJIT_32)
+				return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst));
+#endif /* SLJIT_MIPS_REV >= 1 */
+			FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
+			return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst));
+		}
+		SLJIT_ASSERT(dst == src2);
+		return SLJIT_SUCCESS;
+
+	case SLJIT_MOV_U32:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && !(op & SLJIT_32));
+		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
+			if (dst == src2)
+				return push_inst(compiler, DINSU | T(src2) | SA(0) | (31 << 11) | (0 << 11), DR(dst));
+#endif /* SLJIT_MIPS_REV >= 2 */
+			FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst)));
+			return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst));
+		}
+		SLJIT_ASSERT(dst == src2);
+		return SLJIT_SUCCESS;
+
+	case SLJIT_MOV_S32:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && !(op & SLJIT_32));
+		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+			return push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(0), DR(dst));
+		}
+		SLJIT_ASSERT(dst == src2);
+		return SLJIT_SUCCESS;
+
+	case SLJIT_NOT:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+		if (op & SLJIT_SET_Z)
+			FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+		if (!(flags & UNUSED_DEST))
+			FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst)));
+		return SLJIT_SUCCESS;
+
+	case SLJIT_CLZ:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
+		if (op & SLJIT_SET_Z)
+			FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+		if (!(flags & UNUSED_DEST))
+			FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | T(dst) | D(dst), DR(dst)));
+#else /* SLJIT_MIPS_REV < 1 */
+		if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
+			FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG));
+			return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG);
+		}
+		/* Nearly all instructions are unmovable in the following sequence. */
+		FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
+		/* Check zero. */
+		FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS));
+		FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM((op & SLJIT_32) ? 32 : 64), UNMOVABLE_INS));
+		FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | T(dst) | IMM(-1), DR(dst)));
+		/* Loop for searching the highest bit. */
+		FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(dst) | IMM(1), DR(dst)));
+		FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
+		FAIL_IF(push_inst(compiler, SELECT_OP(DSLL, SLL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
+#endif /* SLJIT_MIPS_REV >= 1 */
+		return SLJIT_SUCCESS;
+
+	case SLJIT_ADD:
+		is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
+		is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
+		if (flags & SRC2_IMM) {
+			if (is_overflow) {
+				if (src2 >= 0)
+					FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
+				else
+					FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
+			}
+			else if (op & SLJIT_SET_Z)
+				FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
+
+			if (is_overflow || is_carry) {
+				if (src2 >= 0)
+					FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
+				else {
+					FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
+					FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+				}
+			}
+			/* dst may be the same as src1 or src2. */
+			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
+				FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst)));
+		}
+		else {
+			if (is_overflow)
+				FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+			else if (op & SLJIT_SET_Z)
+				FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+
+			if (is_overflow || is_carry)
+				FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
+			/* dst may be the same as src1 or src2. */
+			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
+				FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst)));
+		}
+
+		/* a + b >= a | b (otherwise, the carry should be set to 1). */
+		if (is_overflow || is_carry)
+			FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+		if (!is_overflow)
+			return SLJIT_SUCCESS;
+		FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
+		FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+		FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+		if (op & SLJIT_SET_Z)
+			FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG));
+		return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG);
+
+	case SLJIT_ADDC:
+		is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
+		if (flags & SRC2_IMM) {
+			if (is_carry) {
+				if (src2 >= 0)
+					FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
+				else {
+					FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
+					FAIL_IF(push_inst(compiler, OR | S(src1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+				}
+			}
+			FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst)));
+		} else {
+			if (is_carry)
+				FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+			/* dst may be the same as src1 or src2. */
+			FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst)));
+		}
+		if (is_carry)
+			FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+
+		FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
+		if (!is_carry)
+			return SLJIT_SUCCESS;
+
+		/* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */
+		FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+		/* Set carry flag. */
+		return push_inst(compiler, OR | SA(OTHER_FLAG) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
+
+	case SLJIT_SUB:
+		if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
+			FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
+			src2 = TMP_REG2;
+			flags &= ~SRC2_IMM;
+		}
+
+		is_handled = 0;
+
+		if (flags & SRC2_IMM) {
+			if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
+				FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
+				is_handled = 1;
+			}
+			else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
+				FAIL_IF(push_inst(compiler, SLTI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
+				is_handled = 1;
+			}
+		}
+
+		if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) {
+			is_handled = 1;
+
+			if (flags & SRC2_IMM) {
+				FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
+				src2 = TMP_REG2;
+				flags &= ~SRC2_IMM;
+			}
+
+			if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
+				FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
+			}
+			else if (GET_FLAG_TYPE(op) == SLJIT_GREATER || GET_FLAG_TYPE(op) == SLJIT_LESS_EQUAL)
+			{
+				FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG));
+			}
+			else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
+				FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
+			}
+			else if (GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER || GET_FLAG_TYPE(op) == SLJIT_SIG_LESS_EQUAL)
+			{
+				FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG));
+			}
+		}
+
+		if (is_handled) {
+			if (flags & SRC2_IMM) {
+				if (op & SLJIT_SET_Z)
+					FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
+				if (!(flags & UNUSED_DEST))
+					return push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst));
+			}
+			else {
+				if (op & SLJIT_SET_Z)
+					FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+				if (!(flags & UNUSED_DEST))
+					return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst));
+			}
+			return SLJIT_SUCCESS;
+		}
+
+		is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
+		is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
+		if (flags & SRC2_IMM) {
+			if (is_overflow) {
+				if (src2 >= 0)
+					FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
+				else
+					FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
+			}
+			else if (op & SLJIT_SET_Z)
+				FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
+
+			if (is_overflow || is_carry)
+				FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
+			/* dst may be the same as src1 or src2. */
+			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
+				FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)));
+		}
+		else {
+			if (is_overflow)
+				FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+			else if (op & SLJIT_SET_Z)
+				FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+
+			if (is_overflow || is_carry)
+				FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
+			/* dst may be the same as src1 or src2. */
+			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
+				FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)));
+		}
+
+		if (!is_overflow)
+			return SLJIT_SUCCESS;
+		FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
+		FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
+		FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
+		if (op & SLJIT_SET_Z)
+			FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG));
+		return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG);
+
+	case SLJIT_SUBC:
+		if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
+			FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
+			src2 = TMP_REG2;
+			flags &= ~SRC2_IMM;
+		}
+
+		is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
+
+		if (flags & SRC2_IMM) {
+			if (is_carry)
+				FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
+			/* dst may be the same as src1 or src2. */
+			FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)));
+		}
+		else {
+			if (is_carry)
+				FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+			/* dst may be the same as src1 or src2. */
+			FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)));
+		}
+
+		if (is_carry)
+			FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1)));
+
+		FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
+		return (is_carry) ? push_inst(compiler, OR | SA(EQUAL_FLAG) | T(TMP_REG1) | DA(OTHER_FLAG), OTHER_FLAG) : SLJIT_SUCCESS;
+
+	case SLJIT_MUL:
+		SLJIT_ASSERT(!(flags & SRC2_IMM));
+
+		if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW) {
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
+			return push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst));
+#elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
+			if (op & SLJIT_32)
+				return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
+			FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS));
+			return push_inst(compiler, MFLO | D(dst), DR(dst));
+#else /* SLJIT_MIPS_REV < 1 */
+			FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
+			return push_inst(compiler, MFLO | D(dst), DR(dst));
+#endif /* SLJIT_MIPS_REV >= 6 */
+		}
+
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
+		FAIL_IF(push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst)));
+		FAIL_IF(push_inst(compiler, SELECT_OP(DMUH, MUH) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+#else /* SLJIT_MIPS_REV < 6 */
+		FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
+		FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG));
+		FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
+#endif /* SLJIT_MIPS_REV >= 6 */
+		FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG));
+		return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
+
+	case SLJIT_AND:
+		EMIT_LOGICAL(ANDI, AND);
+		return SLJIT_SUCCESS;
+
+	case SLJIT_OR:
+		EMIT_LOGICAL(ORI, OR);
+		return SLJIT_SUCCESS;
+
+	case SLJIT_XOR:
+		EMIT_LOGICAL(XORI, XOR);
+		return SLJIT_SUCCESS;
+
+	case SLJIT_SHL:
+		EMIT_SHIFT(DSLL, DSLL32, SLL, DSLLV, SLLV);
+		return SLJIT_SUCCESS;
+
+	case SLJIT_LSHR:
+		EMIT_SHIFT(DSRL, DSRL32, SRL, DSRLV, SRLV);
+		return SLJIT_SUCCESS;
+
+	case SLJIT_ASHR:
+		EMIT_SHIFT(DSRA, DSRA32, SRA, DSRAV, SRAV);
+		return SLJIT_SUCCESS;
+	}
+
+	SLJIT_UNREACHABLE();
+	return SLJIT_SUCCESS;
+}
+
 static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value)
 static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value)
 {
 {
 	FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 48), DR(dst)));
 	FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 48), DR(dst)));
@@ -238,20 +653,14 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
 	if (type & SLJIT_CALL_RETURN)
 	if (type & SLJIT_CALL_RETURN)
 		PTR_FAIL_IF(emit_stack_frame_release(compiler, 0, &ins));
 		PTR_FAIL_IF(emit_stack_frame_release(compiler, 0, &ins));
 
 
-	if ((type & 0xff) != SLJIT_CALL_REG_ARG)
-		PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins));
+	PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins));
 
 
 	SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
 	SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
 
 
-	if (ins == NOP && compiler->delay_slot != UNMOVABLE_INS)
-		jump->flags |= IS_MOVABLE;
+	PTR_FAIL_IF(emit_const(compiler, PIC_ADDR_REG, 0));
 
 
 	if (!(type & SLJIT_CALL_RETURN)) {
 	if (!(type & SLJIT_CALL_RETURN)) {
-		jump->flags |= IS_JAL;
-
-		if ((type & 0xff) != SLJIT_CALL_REG_ARG)
-			jump->flags |= IS_CALL;
-
+		jump->flags |= IS_JAL | IS_CALL;
 		PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
 		PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
 	} else
 	} else
 		PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS));
 		PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS));
@@ -259,8 +668,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
 	jump->addr = compiler->size;
 	jump->addr = compiler->size;
 	PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS));
 	PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS));
 
 
-	/* Maximum number of instructions required for generating a constant. */
-	compiler->size += 6;
 	return jump;
 	return jump;
 }
 }
 
 
@@ -273,37 +680,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
 	CHECK_ERROR();
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
 
 
-	if (src & SLJIT_MEM) {
-		ADJUST_LOCAL_OFFSET(src, srcw);
-		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw));
-		src = PIC_ADDR_REG;
-		srcw = 0;
-	}
-
-	if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
-		if (type & SLJIT_CALL_RETURN) {
-			if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
-				FAIL_IF(push_inst(compiler, DADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG)));
-				src = PIC_ADDR_REG;
-				srcw = 0;
-			}
-
-			FAIL_IF(emit_stack_frame_release(compiler, 0, &ins));
-
-			if (ins != NOP)
-				FAIL_IF(push_inst(compiler, ins, MOVABLE_INS));
-		}
-
-		SLJIT_SKIP_CHECKS(compiler);
-		return sljit_emit_ijump(compiler, type, src, srcw);
-	}
-
 	SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
 	SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
 
 
 	if (src & SLJIT_IMM)
 	if (src & SLJIT_IMM)
 		FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw));
 		FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw));
-	else if (src != PIC_ADDR_REG)
+	else if (FAST_IS_REG(src))
 		FAIL_IF(push_inst(compiler, DADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG)));
 		FAIL_IF(push_inst(compiler, DADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG)));
+	else if (src & SLJIT_MEM) {
+		ADJUST_LOCAL_OFFSET(src, srcw);
+		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw));
+	}
 
 
 	if (type & SLJIT_CALL_RETURN)
 	if (type & SLJIT_CALL_RETURN)
 		FAIL_IF(emit_stack_frame_release(compiler, 0, &ins));
 		FAIL_IF(emit_stack_frame_release(compiler, 0, &ins));

File diff suppressed because it is too large
+ 275 - 993
thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c


+ 18 - 74
thirdparty/pcre2/src/sljit/sljitNativePPC_32.c

@@ -38,15 +38,12 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg,
 	return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS;
 	return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS;
 }
 }
 
 
-/* Simplified mnemonics: clrlwi. */
 #define INS_CLEAR_LEFT(dst, src, from) \
 #define INS_CLEAR_LEFT(dst, src, from) \
-	(RLWINM | S(src) | A(dst) | RLWI_MBE(from, 31))
+	(RLWINM | S(src) | A(dst) | ((from) << 6) | (31 << 1))
 
 
 static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
 static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
 	sljit_s32 dst, sljit_s32 src1, sljit_s32 src2)
 	sljit_s32 dst, sljit_s32 src1, sljit_s32 src2)
 {
 {
-	sljit_u32 imm;
-
 	switch (op) {
 	switch (op) {
 	case SLJIT_MOV:
 	case SLJIT_MOV:
 	case SLJIT_MOV_U32:
 	case SLJIT_MOV_U32:
@@ -93,16 +90,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 		SLJIT_ASSERT(src1 == TMP_REG1);
 		SLJIT_ASSERT(src1 == TMP_REG1);
 		return push_inst(compiler, CNTLZW | S(src2) | A(dst));
 		return push_inst(compiler, CNTLZW | S(src2) | A(dst));
 
 
-	case SLJIT_CTZ:
-		SLJIT_ASSERT(src1 == TMP_REG1);
-		FAIL_IF(push_inst(compiler, NEG | D(TMP_REG1) | A(src2)));
-		FAIL_IF(push_inst(compiler, AND | S(src2) | A(dst) | B(TMP_REG1)));
-		FAIL_IF(push_inst(compiler, CNTLZW | S(dst) | A(dst)));
-		FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG1) | A(dst) | IMM(-32)));
-		/* The highest bits are set, if dst < 32, zero otherwise. */
-		FAIL_IF(push_inst(compiler, SRWI(27) | S(TMP_REG1) | A(TMP_REG1)));
-		return push_inst(compiler, XOR | S(dst) | A(dst) | B(TMP_REG1));
-
 	case SLJIT_ADD:
 	case SLJIT_ADD:
 		if (flags & ALT_FORM1) {
 		if (flags & ALT_FORM1) {
 			/* Setting XER SO is not enough, CR SO is also needed. */
 			/* Setting XER SO is not enough, CR SO is also needed. */
@@ -116,14 +103,12 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 			if (flags & ALT_FORM3)
 			if (flags & ALT_FORM3)
 				return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm);
 				return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm);
 
 
-			imm = compiler->imm;
-
 			if (flags & ALT_FORM4) {
 			if (flags & ALT_FORM4) {
-				FAIL_IF(push_inst(compiler, ADDIS | D(dst) | A(src1) | (((imm >> 16) & 0xffff) + ((imm >> 15) & 0x1))));
+				FAIL_IF(push_inst(compiler, ADDIS | D(dst) | A(src1) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1))));
 				src1 = dst;
 				src1 = dst;
 			}
 			}
 
 
-			return push_inst(compiler, ADDI | D(dst) | A(src1) | (imm & 0xffff));
+			return push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff));
 		}
 		}
 		if (flags & ALT_FORM3) {
 		if (flags & ALT_FORM3) {
 			SLJIT_ASSERT(src2 == TMP_REG2);
 			SLJIT_ASSERT(src2 == TMP_REG2);
@@ -223,10 +208,8 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 		}
 		}
 		if (flags & ALT_FORM3) {
 		if (flags & ALT_FORM3) {
 			SLJIT_ASSERT(src2 == TMP_REG2);
 			SLJIT_ASSERT(src2 == TMP_REG2);
-			imm = compiler->imm;
-
-			FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(imm)));
-			return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(imm >> 16));
+			FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(compiler->imm)));
+			return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16));
 		}
 		}
 		return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2));
 		return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2));
 
 
@@ -241,78 +224,34 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 		}
 		}
 		if (flags & ALT_FORM3) {
 		if (flags & ALT_FORM3) {
 			SLJIT_ASSERT(src2 == TMP_REG2);
 			SLJIT_ASSERT(src2 == TMP_REG2);
-			imm = compiler->imm;
-
-			FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(imm)));
-			return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(imm >> 16));
+			FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(compiler->imm)));
+			return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16));
 		}
 		}
 		return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2));
 		return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2));
 
 
 	case SLJIT_SHL:
 	case SLJIT_SHL:
-	case SLJIT_MSHL:
 		if (flags & ALT_FORM1) {
 		if (flags & ALT_FORM1) {
 			SLJIT_ASSERT(src2 == TMP_REG2);
 			SLJIT_ASSERT(src2 == TMP_REG2);
-			imm = compiler->imm & 0x1f;
-			return push_inst(compiler, SLWI(imm) | RC(flags) | S(src1) | A(dst));
-		}
-
-		if (op == SLJIT_MSHL) {
-			FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | 0x1f));
-			src2 = TMP_REG2;
+			compiler->imm &= 0x1f;
+			return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1));
 		}
 		}
-
 		return push_inst(compiler, SLW | RC(flags) | S(src1) | A(dst) | B(src2));
 		return push_inst(compiler, SLW | RC(flags) | S(src1) | A(dst) | B(src2));
 
 
 	case SLJIT_LSHR:
 	case SLJIT_LSHR:
-	case SLJIT_MLSHR:
 		if (flags & ALT_FORM1) {
 		if (flags & ALT_FORM1) {
 			SLJIT_ASSERT(src2 == TMP_REG2);
 			SLJIT_ASSERT(src2 == TMP_REG2);
-			imm = compiler->imm & 0x1f;
-			/* Since imm can be 0, SRWI() cannot be used. */
-			return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | RLWI_SH((32 - imm) & 0x1f) | RLWI_MBE(imm, 31));
-		}
-
-		if (op == SLJIT_MLSHR) {
-			FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | 0x1f));
-			src2 = TMP_REG2;
+			compiler->imm &= 0x1f;
+			return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1));
 		}
 		}
-
 		return push_inst(compiler, SRW | RC(flags) | S(src1) | A(dst) | B(src2));
 		return push_inst(compiler, SRW | RC(flags) | S(src1) | A(dst) | B(src2));
 
 
 	case SLJIT_ASHR:
 	case SLJIT_ASHR:
-	case SLJIT_MASHR:
 		if (flags & ALT_FORM1) {
 		if (flags & ALT_FORM1) {
 			SLJIT_ASSERT(src2 == TMP_REG2);
 			SLJIT_ASSERT(src2 == TMP_REG2);
-			imm = compiler->imm & 0x1f;
-			return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (imm << 11));
-		}
-
-		if (op == SLJIT_MASHR) {
-			FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | 0x1f));
-			src2 = TMP_REG2;
+			compiler->imm &= 0x1f;
+			return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11));
 		}
 		}
-
 		return push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2));
 		return push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2));
-
-	case SLJIT_ROTL:
-	case SLJIT_ROTR:
-		if (flags & ALT_FORM1) {
-			SLJIT_ASSERT(src2 == TMP_REG2);
-			imm = compiler->imm;
-
-			if (op == SLJIT_ROTR)
-				imm = (sljit_u32)(-(sljit_s32)imm);
-
-			imm &= 0x1f;
-			return push_inst(compiler, RLWINM | S(src1) | A(dst) | RLWI_SH(imm) | RLWI_MBE(0, 31));
-		}
-
-		if (op == SLJIT_ROTR) {
-			FAIL_IF(push_inst(compiler, SUBFIC | D(TMP_REG2) | A(src2) | 0));
-			src2 = TMP_REG2;
-		}
-
-		return push_inst(compiler, RLWNM | S(src1) | A(dst) | B(src2) | RLWI_MBE(0, 31));
 	}
 	}
 
 
 	SLJIT_UNREACHABLE();
 	SLJIT_UNREACHABLE();
@@ -338,3 +277,8 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
 	inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
 	inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
 	SLJIT_CACHE_FLUSH(inst, inst + 2);
 	SLJIT_CACHE_FLUSH(inst, inst + 2);
 }
 }
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
+{
+	sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
+}

+ 48 - 118
thirdparty/pcre2/src/sljit/sljitNativePPC_64.c

@@ -35,9 +35,8 @@
 #error "Must implement count leading zeroes"
 #error "Must implement count leading zeroes"
 #endif
 #endif
 
 
-/* Computes SLDI(63 - shift). */
-#define PUSH_SLDI_NEG(reg, shift) \
-	push_inst(compiler, RLDICR | S(reg) | A(reg) | RLDI_SH(63 - shift) | RLDI_ME(shift))
+#define PUSH_RLDICR(reg, shift) \
+	push_inst(compiler, RLDI(reg, reg, 63 - shift, shift, 1))
 
 
 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
 {
 {
@@ -67,14 +66,14 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg,
 	if ((tmp & ~0xffff000000000000ul) == 0) {
 	if ((tmp & ~0xffff000000000000ul) == 0) {
 		FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48)));
 		FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48)));
 		shift += 15;
 		shift += 15;
-		return PUSH_SLDI_NEG(reg, shift);
+		return PUSH_RLDICR(reg, shift);
 	}
 	}
 
 
 	if ((tmp & ~0xffffffff00000000ul) == 0) {
 	if ((tmp & ~0xffffffff00000000ul) == 0) {
 		FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | (sljit_ins)(tmp >> 48)));
 		FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | (sljit_ins)(tmp >> 48)));
 		FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp >> 32)));
 		FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp >> 32)));
 		shift += 31;
 		shift += 31;
-		return PUSH_SLDI_NEG(reg, shift);
+		return PUSH_RLDICR(reg, shift);
 	}
 	}
 
 
 	/* Cut out the 16 bit from immediate. */
 	/* Cut out the 16 bit from immediate. */
@@ -83,13 +82,13 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg,
 
 
 	if (tmp2 <= 0xffff) {
 	if (tmp2 <= 0xffff) {
 		FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48)));
 		FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48)));
-		FAIL_IF(PUSH_SLDI_NEG(reg, shift));
+		FAIL_IF(PUSH_RLDICR(reg, shift));
 		return push_inst(compiler, ORI | S(reg) | A(reg) | (sljit_ins)tmp2);
 		return push_inst(compiler, ORI | S(reg) | A(reg) | (sljit_ins)tmp2);
 	}
 	}
 
 
 	if (tmp2 <= 0xffffffff) {
 	if (tmp2 <= 0xffffffff) {
 		FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
 		FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
-		FAIL_IF(PUSH_SLDI_NEG(reg, shift));
+		FAIL_IF(PUSH_RLDICR(reg, shift));
 		FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | (sljit_ins)(tmp2 >> 16)));
 		FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | (sljit_ins)(tmp2 >> 16)));
 		return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp2)) : SLJIT_SUCCESS;
 		return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp2)) : SLJIT_SUCCESS;
 	}
 	}
@@ -101,23 +100,22 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg,
 		FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48)));
 		FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48)));
 		shift2 += 15;
 		shift2 += 15;
 		shift += (63 - shift2);
 		shift += (63 - shift2);
-		FAIL_IF(PUSH_SLDI_NEG(reg, shift));
+		FAIL_IF(PUSH_RLDICR(reg, shift));
 		FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | (sljit_ins)(tmp2 >> 48)));
 		FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | (sljit_ins)(tmp2 >> 48)));
-		return PUSH_SLDI_NEG(reg, shift2);
+		return PUSH_RLDICR(reg, shift2);
 	}
 	}
 
 
 	/* The general version. */
 	/* The general version. */
 	FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | (sljit_ins)((sljit_uw)imm >> 48)));
 	FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | (sljit_ins)((sljit_uw)imm >> 48)));
 	FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm >> 32)));
 	FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm >> 32)));
-	FAIL_IF(PUSH_SLDI_NEG(reg, 31));
+	FAIL_IF(PUSH_RLDICR(reg, 31));
 	FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(imm >> 16)));
 	FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(imm >> 16)));
 	return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm));
 	return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm));
 }
 }
 
 
-#undef PUSH_SLDI_NEG
-
-#define CLRLDI(dst, src, n) \
-	(RLDICL | S(src) | A(dst) | RLDI_SH(0) | RLDI_MB(n))
+/* Simplified mnemonics: clrldi. */
+#define INS_CLEAR_LEFT(dst, src, from) \
+	(RLDICL | S(src) | A(dst) | ((from) << 6) | (1 << 5))
 
 
 /* Sign extension for integer operations. */
 /* Sign extension for integer operations. */
 #define UN_EXTS() \
 #define UN_EXTS() \
@@ -147,8 +145,6 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg,
 static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
 static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
 	sljit_s32 dst, sljit_s32 src1, sljit_s32 src2)
 	sljit_s32 dst, sljit_s32 src1, sljit_s32 src2)
 {
 {
-	sljit_u32 imm;
-
 	switch (op) {
 	switch (op) {
 	case SLJIT_MOV:
 	case SLJIT_MOV:
 	case SLJIT_MOV_P:
 	case SLJIT_MOV_P:
@@ -163,7 +159,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
 			if (op == SLJIT_MOV_S32)
 			if (op == SLJIT_MOV_S32)
 				return push_inst(compiler, EXTSW | S(src2) | A(dst));
 				return push_inst(compiler, EXTSW | S(src2) | A(dst));
-			return push_inst(compiler, CLRLDI(dst, src2, 32));
+			return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 0));
 		}
 		}
 		else {
 		else {
 			SLJIT_ASSERT(dst == src2);
 			SLJIT_ASSERT(dst == src2);
@@ -176,7 +172,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
 			if (op == SLJIT_MOV_S8)
 			if (op == SLJIT_MOV_S8)
 				return push_inst(compiler, EXTSB | S(src2) | A(dst));
 				return push_inst(compiler, EXTSB | S(src2) | A(dst));
-			return push_inst(compiler, CLRLDI(dst, src2, 56));
+			return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24));
 		}
 		}
 		else if ((flags & REG_DEST) && op == SLJIT_MOV_S8)
 		else if ((flags & REG_DEST) && op == SLJIT_MOV_S8)
 			return push_inst(compiler, EXTSB | S(src2) | A(dst));
 			return push_inst(compiler, EXTSB | S(src2) | A(dst));
@@ -191,7 +187,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
 			if (op == SLJIT_MOV_S16)
 			if (op == SLJIT_MOV_S16)
 				return push_inst(compiler, EXTSH | S(src2) | A(dst));
 				return push_inst(compiler, EXTSH | S(src2) | A(dst));
-			return push_inst(compiler, CLRLDI(dst, src2, 48));
+			return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16));
 		}
 		}
 		else {
 		else {
 			SLJIT_ASSERT(dst == src2);
 			SLJIT_ASSERT(dst == src2);
@@ -205,30 +201,22 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 
 
 	case SLJIT_CLZ:
 	case SLJIT_CLZ:
 		SLJIT_ASSERT(src1 == TMP_REG1);
 		SLJIT_ASSERT(src1 == TMP_REG1);
-		return push_inst(compiler, ((flags & ALT_FORM1) ? CNTLZW : CNTLZD) | S(src2) | A(dst));
-
-	case SLJIT_CTZ:
-		SLJIT_ASSERT(src1 == TMP_REG1);
-		FAIL_IF(push_inst(compiler, NEG | D(TMP_REG1) | A(src2)));
-		FAIL_IF(push_inst(compiler, AND | S(src2) | A(dst) | B(TMP_REG1)));
-		FAIL_IF(push_inst(compiler, ((flags & ALT_FORM1) ? CNTLZW : CNTLZD) | S(dst) | A(dst)));
-		FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG1) | A(dst) | IMM((flags & ALT_FORM1) ? -32 : -64)));
-		/* The highest bits are set, if dst < bit width, zero otherwise. */
-		FAIL_IF(push_inst(compiler, ((flags & ALT_FORM1) ? SRWI(27) : SRDI(58)) | S(TMP_REG1) | A(TMP_REG1)));
-		return push_inst(compiler, XOR | S(dst) | A(dst) | B(TMP_REG1));
+		if (flags & ALT_FORM1)
+			return push_inst(compiler, CNTLZW | S(src2) | A(dst));
+		return push_inst(compiler, CNTLZD | S(src2) | A(dst));
 
 
 	case SLJIT_ADD:
 	case SLJIT_ADD:
 		if (flags & ALT_FORM1) {
 		if (flags & ALT_FORM1) {
 			if (flags & ALT_SIGN_EXT) {
 			if (flags & ALT_SIGN_EXT) {
-				FAIL_IF(push_inst(compiler, SLDI(32) | S(src1) | A(TMP_REG1)));
+				FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1)));
 				src1 = TMP_REG1;
 				src1 = TMP_REG1;
-				FAIL_IF(push_inst(compiler, SLDI(32) | S(src2) | A(TMP_REG2)));
+				FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1)));
 				src2 = TMP_REG2;
 				src2 = TMP_REG2;
 			}
 			}
 			/* Setting XER SO is not enough, CR SO is also needed. */
 			/* Setting XER SO is not enough, CR SO is also needed. */
 			FAIL_IF(push_inst(compiler, ADD | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)));
 			FAIL_IF(push_inst(compiler, ADD | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)));
 			if (flags & ALT_SIGN_EXT)
 			if (flags & ALT_SIGN_EXT)
-				return push_inst(compiler, SRDI(32) | S(dst) | A(dst));
+				return push_inst(compiler, RLDI(dst, dst, 32, 32, 0));
 			return SLJIT_SUCCESS;
 			return SLJIT_SUCCESS;
 		}
 		}
 
 
@@ -239,14 +227,12 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 			if (flags & ALT_FORM3)
 			if (flags & ALT_FORM3)
 				return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm);
 				return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm);
 
 
-			imm = compiler->imm;
-
 			if (flags & ALT_FORM4) {
 			if (flags & ALT_FORM4) {
-				FAIL_IF(push_inst(compiler, ADDIS | D(dst) | A(src1) | (((imm >> 16) & 0xffff) + ((imm >> 15) & 0x1))));
+				FAIL_IF(push_inst(compiler, ADDIS | D(dst) | A(src1) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1))));
 				src1 = dst;
 				src1 = dst;
 			}
 			}
 
 
-			return push_inst(compiler, ADDI | D(dst) | A(src1) | (imm & 0xffff));
+			return push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff));
 		}
 		}
 		if (flags & ALT_FORM3) {
 		if (flags & ALT_FORM3) {
 			SLJIT_ASSERT(src2 == TMP_REG2);
 			SLJIT_ASSERT(src2 == TMP_REG2);
@@ -301,11 +287,11 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 		if (flags & ALT_FORM3) {
 		if (flags & ALT_FORM3) {
 			if (flags & ALT_SIGN_EXT) {
 			if (flags & ALT_SIGN_EXT) {
 				if (src1 != TMP_ZERO) {
 				if (src1 != TMP_ZERO) {
-					FAIL_IF(push_inst(compiler, SLDI(32) | S(src1) | A(TMP_REG1)));
+					FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1)));
 					src1 = TMP_REG1;
 					src1 = TMP_REG1;
 				}
 				}
 				if (src2 != TMP_ZERO) {
 				if (src2 != TMP_ZERO) {
-					FAIL_IF(push_inst(compiler, SLDI(32) | S(src2) | A(TMP_REG2)));
+					FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1)));
 					src2 = TMP_REG2;
 					src2 = TMP_REG2;
 				}
 				}
 			}
 			}
@@ -317,7 +303,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 				FAIL_IF(push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2)));
 				FAIL_IF(push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2)));
 
 
 			if (flags & ALT_SIGN_EXT)
 			if (flags & ALT_SIGN_EXT)
-				return push_inst(compiler, SRDI(32) | S(dst) | A(dst));
+				return push_inst(compiler, RLDI(dst, dst, 32, 32, 0));
 			return SLJIT_SUCCESS;
 			return SLJIT_SUCCESS;
 		}
 		}
 
 
@@ -376,10 +362,8 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 		}
 		}
 		if (flags & ALT_FORM3) {
 		if (flags & ALT_FORM3) {
 			SLJIT_ASSERT(src2 == TMP_REG2);
 			SLJIT_ASSERT(src2 == TMP_REG2);
-			imm = compiler->imm;
-
-			FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(imm)));
-			return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(imm >> 16));
+			FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(compiler->imm)));
+			return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16));
 		}
 		}
 		return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2));
 		return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2));
 
 
@@ -394,105 +378,46 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 		}
 		}
 		if (flags & ALT_FORM3) {
 		if (flags & ALT_FORM3) {
 			SLJIT_ASSERT(src2 == TMP_REG2);
 			SLJIT_ASSERT(src2 == TMP_REG2);
-			imm = compiler->imm;
-
-			FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(imm)));
-			return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(imm >> 16));
+			FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(compiler->imm)));
+			return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16));
 		}
 		}
 		return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2));
 		return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2));
 
 
 	case SLJIT_SHL:
 	case SLJIT_SHL:
-	case SLJIT_MSHL:
 		if (flags & ALT_FORM1) {
 		if (flags & ALT_FORM1) {
 			SLJIT_ASSERT(src2 == TMP_REG2);
 			SLJIT_ASSERT(src2 == TMP_REG2);
-			imm = compiler->imm;
-
 			if (flags & ALT_FORM2) {
 			if (flags & ALT_FORM2) {
-				imm &= 0x1f;
-				return push_inst(compiler, SLWI(imm) | RC(flags) | S(src1) | A(dst));
+				compiler->imm &= 0x1f;
+				return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1));
 			}
 			}
-
-			imm &= 0x3f;
-			return push_inst(compiler, SLDI(imm) | RC(flags) | S(src1) | A(dst));
+			compiler->imm &= 0x3f;
+			return push_inst(compiler, RLDI(dst, src1, compiler->imm, 63 - compiler->imm, 1) | RC(flags));
 		}
 		}
-
-		if (op == SLJIT_MSHL) {
-			FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | ((flags & ALT_FORM2) ? 0x1f : 0x3f)));
-			src2 = TMP_REG2;
-		}
-
 		return push_inst(compiler, ((flags & ALT_FORM2) ? SLW : SLD) | RC(flags) | S(src1) | A(dst) | B(src2));
 		return push_inst(compiler, ((flags & ALT_FORM2) ? SLW : SLD) | RC(flags) | S(src1) | A(dst) | B(src2));
 
 
 	case SLJIT_LSHR:
 	case SLJIT_LSHR:
-	case SLJIT_MLSHR:
 		if (flags & ALT_FORM1) {
 		if (flags & ALT_FORM1) {
 			SLJIT_ASSERT(src2 == TMP_REG2);
 			SLJIT_ASSERT(src2 == TMP_REG2);
-			imm = compiler->imm;
-
 			if (flags & ALT_FORM2) {
 			if (flags & ALT_FORM2) {
-				imm &= 0x1f;
-				/* Since imm can be 0, SRWI() cannot be used. */
-				return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | RLWI_SH((32 - imm) & 0x1f) | RLWI_MBE(imm, 31));
+				compiler->imm &= 0x1f;
+				return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1));
 			}
 			}
-
-			imm &= 0x3f;
-			/* Since imm can be 0, SRDI() cannot be used. */
-			return push_inst(compiler, RLDICL | RC(flags) | S(src1) | A(dst) | RLDI_SH((64 - imm) & 0x3f) | RLDI_MB(imm));
+			compiler->imm &= 0x3f;
+			return push_inst(compiler, RLDI(dst, src1, 64 - compiler->imm, compiler->imm, 0) | RC(flags));
 		}
 		}
-
-		if (op == SLJIT_MLSHR) {
-			FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | ((flags & ALT_FORM2) ? 0x1f : 0x3f)));
-			src2 = TMP_REG2;
-		}
-
 		return push_inst(compiler, ((flags & ALT_FORM2) ? SRW : SRD) | RC(flags) | S(src1) | A(dst) | B(src2));
 		return push_inst(compiler, ((flags & ALT_FORM2) ? SRW : SRD) | RC(flags) | S(src1) | A(dst) | B(src2));
 
 
 	case SLJIT_ASHR:
 	case SLJIT_ASHR:
-	case SLJIT_MASHR:
 		if (flags & ALT_FORM1) {
 		if (flags & ALT_FORM1) {
 			SLJIT_ASSERT(src2 == TMP_REG2);
 			SLJIT_ASSERT(src2 == TMP_REG2);
-			imm = compiler->imm;
-
 			if (flags & ALT_FORM2) {
 			if (flags & ALT_FORM2) {
-				imm &= 0x1f;
-				return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (imm << 11));
+				compiler->imm &= 0x1f;
+				return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11));
 			}
 			}
-
-			imm &= 0x3f;
-			return push_inst(compiler, SRADI | RC(flags) | S(src1) | A(dst) | RLDI_SH(imm));
+			compiler->imm &= 0x3f;
+			return push_inst(compiler, SRADI | RC(flags) | S(src1) | A(dst) | ((compiler->imm & 0x1f) << 11) | ((compiler->imm & 0x20) >> 4));
 		}
 		}
-
-		if (op == SLJIT_MASHR) {
-			FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | ((flags & ALT_FORM2) ? 0x1f : 0x3f)));
-			src2 = TMP_REG2;
-		}
-
 		return push_inst(compiler, ((flags & ALT_FORM2) ? SRAW : SRAD) | RC(flags) | S(src1) | A(dst) | B(src2));
 		return push_inst(compiler, ((flags & ALT_FORM2) ? SRAW : SRAD) | RC(flags) | S(src1) | A(dst) | B(src2));
-
-	case SLJIT_ROTL:
-	case SLJIT_ROTR:
-		if (flags & ALT_FORM1) {
-			SLJIT_ASSERT(src2 == TMP_REG2);
-			imm = compiler->imm;
-
-			if (op == SLJIT_ROTR)
-				imm = (sljit_u32)(-(sljit_s32)imm);
-
-			if (flags & ALT_FORM2) {
-				imm &= 0x1f;
-				return push_inst(compiler, RLWINM | S(src1) | A(dst) | RLWI_SH(imm) | RLWI_MBE(0, 31));
-			}
-
-			imm &= 0x3f;
-			return push_inst(compiler, RLDICL | S(src1) | A(dst) | RLDI_SH(imm));
-		}
-
-		if (op == SLJIT_ROTR) {
-			FAIL_IF(push_inst(compiler, SUBFIC | D(TMP_REG2) | A(src2) | 0));
-			src2 = TMP_REG2;
-		}
-
-		return push_inst(compiler, ((flags & ALT_FORM2) ? (RLWNM | RLWI_MBE(0, 31)) : (RLDCL | RLDI_MB(0))) | S(src1) | A(dst) | B(src2));
 	}
 	}
 
 
 	SLJIT_UNREACHABLE();
 	SLJIT_UNREACHABLE();
@@ -558,7 +483,7 @@ static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_
 {
 {
 	FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 48)));
 	FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 48)));
 	FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value >> 32)));
 	FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value >> 32)));
-	FAIL_IF(push_inst(compiler, SLDI(32) | S(reg) | A(reg)));
+	FAIL_IF(PUSH_RLDICR(reg, 31));
 	FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(init_value >> 16)));
 	FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(init_value >> 16)));
 	return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value));
 	return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value));
 }
 }
@@ -577,3 +502,8 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
 	inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
 	inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
 	SLJIT_CACHE_FLUSH(inst, inst + 5);
 	SLJIT_CACHE_FLUSH(inst, inst + 5);
 }
 }
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
+{
+	sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
+}

+ 120 - 382
thirdparty/pcre2/src/sljit/sljitNativePPC_common.c

@@ -203,13 +203,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
 #define OR		(HI(31) | LO(444))
 #define OR		(HI(31) | LO(444))
 #define ORI		(HI(24))
 #define ORI		(HI(24))
 #define ORIS		(HI(25))
 #define ORIS		(HI(25))
-#define RLDCL		(HI(30) | LO(8))
-#define RLDICL		(HI(30) | LO(0 << 1))
-#define RLDICR		(HI(30) | LO(1 << 1))
-#define RLDIMI		(HI(30) | LO(3 << 1))
-#define RLWIMI		(HI(20))
+#define RLDICL		(HI(30))
 #define RLWINM		(HI(21))
 #define RLWINM		(HI(21))
-#define RLWNM		(HI(23))
 #define SLD		(HI(31) | LO(27))
 #define SLD		(HI(31) | LO(27))
 #define SLW		(HI(31) | LO(24))
 #define SLW		(HI(31) | LO(24))
 #define SRAD		(HI(31) | LO(794))
 #define SRAD		(HI(31) | LO(794))
@@ -238,24 +233,9 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
 #define SIMM_MIN	(-0x8000)
 #define SIMM_MIN	(-0x8000)
 #define UIMM_MAX	(0xffff)
 #define UIMM_MAX	(0xffff)
 
 
-/* Shift helpers. */
-#define RLWI_SH(sh) ((sljit_ins)(sh) << 11)
-#define RLWI_MBE(mb, me) (((sljit_ins)(mb) << 6) | ((sljit_ins)(me) << 1))
-#define RLDI_SH(sh) ((((sljit_ins)(sh) & 0x1f) << 11) | (((sljit_ins)(sh) & 0x20) >> 4))
-#define RLDI_MB(mb) ((((sljit_ins)(mb) & 0x1f) << 6) | ((sljit_ins)(mb) & 0x20))
-#define RLDI_ME(me) RLDI_MB(me)
-
-#define SLWI(shift) (RLWINM | RLWI_SH(shift) | RLWI_MBE(0, 31 - (shift)))
-#define SLDI(shift) (RLDICR | RLDI_SH(shift) | RLDI_ME(63 - (shift)))
-/* shift > 0 */
-#define SRWI(shift) (RLWINM | RLWI_SH(32 - (shift)) | RLWI_MBE((shift), 31))
-#define SRDI(shift) (RLDICL | RLDI_SH(64 - (shift)) | RLDI_MB(shift))
-
-#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
-#define SLWI_W(shift) SLWI(shift)
-#else /* !SLJIT_CONFIG_PPC_32 */
-#define SLWI_W(shift) SLDI(shift)
-#endif /* SLJIT_CONFIG_PPC_32 */
+#define RLDI(dst, src, sh, mb, type) \
+	(HI(30) | S(src) | A(dst) | ((sljit_ins)(type) << 2) | (((sljit_ins)(sh) & 0x1f) << 11) \
+	| (((sljit_ins)(sh) & 0x20) >> 4) | (((sljit_ins)(mb) & 0x1f) << 6) | ((sljit_ins)(mb) & 0x20))
 
 
 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_uw addr, void* func)
 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_uw addr, void* func)
@@ -388,10 +368,10 @@ static SLJIT_INLINE void put_label_set(struct sljit_put_label *put_label)
 		else {
 		else {
 			inst[0] = ORIS | S(TMP_ZERO) | A(reg) | IMM(addr >> 48);
 			inst[0] = ORIS | S(TMP_ZERO) | A(reg) | IMM(addr >> 48);
 			inst[1] = ORI | S(reg) | A(reg) | IMM((addr >> 32) & 0xffff);
 			inst[1] = ORI | S(reg) | A(reg) | IMM((addr >> 32) & 0xffff);
-			inst++;
+			inst ++;
 		}
 		}
 
 
-		inst[1] = SLDI(32) | S(reg) | A(reg);
+		inst[1] = RLDI(reg, reg, 32, 31, 1);
 		inst[2] = ORIS | S(reg) | A(reg) | IMM((addr >> 16) & 0xffff);
 		inst[2] = ORIS | S(reg) | A(reg) | IMM((addr >> 16) & 0xffff);
 		inst += 2;
 		inst += 2;
 	}
 	}
@@ -399,7 +379,7 @@ static SLJIT_INLINE void put_label_set(struct sljit_put_label *put_label)
 	inst[1] = ORI | S(reg) | A(reg) | IMM(addr & 0xffff);
 	inst[1] = ORI | S(reg) | A(reg) | IMM(addr & 0xffff);
 }
 }
 
 
-#endif /* SLJIT_CONFIG_PPC_64 */
+#endif
 
 
 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
 {
 {
@@ -517,8 +497,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 				}
 				}
 				next_addr = compute_next_addr(label, jump, const_, put_label);
 				next_addr = compute_next_addr(label, jump, const_, put_label);
 			}
 			}
-			code_ptr++;
-			word_count++;
+			code_ptr ++;
+			word_count ++;
 		} while (buf_ptr < buf_end);
 		} while (buf_ptr < buf_end);
 
 
 		buf = buf->next;
 		buf = buf->next;
@@ -661,23 +641,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
 	/* A saved register is set to a zero value. */
 	/* A saved register is set to a zero value. */
 	case SLJIT_HAS_ZERO_REGISTER:
 	case SLJIT_HAS_ZERO_REGISTER:
 	case SLJIT_HAS_CLZ:
 	case SLJIT_HAS_CLZ:
-	case SLJIT_HAS_ROT:
 	case SLJIT_HAS_PREFETCH:
 	case SLJIT_HAS_PREFETCH:
 		return 1;
 		return 1;
 
 
-	case SLJIT_HAS_CTZ:
-		return 2;
-
 	default:
 	default:
 		return 0;
 		return 0;
 	}
 	}
 }
 }
 
 
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
-{
-	return (type >= SLJIT_UNORDERED && type <= SLJIT_ORDERED_LESS_EQUAL);
-}
-
 /* --------------------------------------------------------------------- */
 /* --------------------------------------------------------------------- */
 /*  Entry, exit                                                          */
 /*  Entry, exit                                                          */
 /* --------------------------------------------------------------------- */
 /* --------------------------------------------------------------------- */
@@ -744,16 +715,13 @@ ALT_FORM5		0x010000 */
 
 
 #define STACK_MAX_DISTANCE	(0x8000 - SSIZE_OF(sw) - LR_SAVE_OFFSET)
 #define STACK_MAX_DISTANCE	(0x8000 - SSIZE_OF(sw) - LR_SAVE_OFFSET)
 
 
-static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg,
-	sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg);
-
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
 {
 {
 	sljit_s32 i, tmp, base, offset;
 	sljit_s32 i, tmp, base, offset;
 	sljit_s32 word_arg_count = 0;
 	sljit_s32 word_arg_count = 0;
-	sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
+	sljit_s32 saved_arg_count = 0;
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
 	sljit_s32 arg_count = 0;
 	sljit_s32 arg_count = 0;
 #endif
 #endif
@@ -762,12 +730,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
 
 
-	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 0)
+	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1)
 		+ GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64));
 		+ GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64));
-
-	if (!(options & SLJIT_ENTER_REG_ARG))
-		local_size += SSIZE_OF(sw);
-
 	local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
 	local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
 	compiler->local_size = local_size;
 	compiler->local_size = local_size;
 
 
@@ -806,13 +770,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 		FAIL_IF(push_inst(compiler, STFD | FS(i) | A(base) | IMM(offset)));
 		FAIL_IF(push_inst(compiler, STFD | FS(i) | A(base) | IMM(offset)));
 	}
 	}
 
 
-	if (!(options & SLJIT_ENTER_REG_ARG)) {
-		offset -= SSIZE_OF(sw);
-		FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(base) | IMM(offset)));
-	}
+	offset -= SSIZE_OF(sw);
+	FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(base) | IMM(offset)));
 
 
 	tmp = SLJIT_S0 - saveds;
 	tmp = SLJIT_S0 - saveds;
-	for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
+	for (i = SLJIT_S0; i > tmp; i--) {
 		offset -= SSIZE_OF(sw);
 		offset -= SSIZE_OF(sw);
 		FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(base) | IMM(offset)));
 		FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(base) | IMM(offset)));
 	}
 	}
@@ -823,14 +785,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 	}
 	}
 
 
 	FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(base) | IMM(local_size + LR_SAVE_OFFSET)));
 	FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(base) | IMM(local_size + LR_SAVE_OFFSET)));
-
-	if (options & SLJIT_ENTER_REG_ARG)
-		return SLJIT_SUCCESS;
-
 	FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0));
 	FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0));
 
 
 	arg_types >>= SLJIT_ARG_SHIFT;
 	arg_types >>= SLJIT_ARG_SHIFT;
-	saved_arg_count = 0;
 
 
 	while (arg_types > 0) {
 	while (arg_types > 0) {
 		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
 		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
@@ -872,17 +829,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
 
 
-	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 0)
+	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1)
 		+ GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64));
 		+ GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64));
-
-	if (!(options & SLJIT_ENTER_REG_ARG))
-		local_size += SSIZE_OF(sw);
-
 	compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
 	compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
 	return SLJIT_SUCCESS;
 	return SLJIT_SUCCESS;
 }
 }
 
 
-static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
+
+static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
 {
 {
 	sljit_s32 i, tmp, base, offset;
 	sljit_s32 i, tmp, base, offset;
 	sljit_s32 local_size = compiler->local_size;
 	sljit_s32 local_size = compiler->local_size;
@@ -900,8 +854,7 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
 	}
 	}
 
 
 	offset = local_size;
 	offset = local_size;
-	if (!is_return_to)
-		FAIL_IF(push_inst(compiler, STACK_LOAD | S(0) | A(base) | IMM(offset + LR_SAVE_OFFSET)));
+	FAIL_IF(push_inst(compiler, STACK_LOAD | S(0) | A(base) | IMM(offset + LR_SAVE_OFFSET)));
 
 
 	tmp = SLJIT_FS0 - compiler->fsaveds;
 	tmp = SLJIT_FS0 - compiler->fsaveds;
 	for (i = SLJIT_FS0; i > tmp; i--) {
 	for (i = SLJIT_FS0; i > tmp; i--) {
@@ -914,13 +867,11 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
 		FAIL_IF(push_inst(compiler, LFD | FS(i) | A(base) | IMM(offset)));
 		FAIL_IF(push_inst(compiler, LFD | FS(i) | A(base) | IMM(offset)));
 	}
 	}
 
 
-	if (!(compiler->options & SLJIT_ENTER_REG_ARG)) {
-		offset -= SSIZE_OF(sw);
-		FAIL_IF(push_inst(compiler, STACK_LOAD | S(TMP_ZERO) | A(base) | IMM(offset)));
-	}
+	offset -= SSIZE_OF(sw);
+	FAIL_IF(push_inst(compiler, STACK_LOAD | S(TMP_ZERO) | A(base) | IMM(offset)));
 
 
 	tmp = SLJIT_S0 - compiler->saveds;
 	tmp = SLJIT_S0 - compiler->saveds;
-	for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) {
+	for (i = SLJIT_S0; i > tmp; i--) {
 		offset -= SSIZE_OF(sw);
 		offset -= SSIZE_OF(sw);
 		FAIL_IF(push_inst(compiler, STACK_LOAD | S(i) | A(base) | IMM(offset)));
 		FAIL_IF(push_inst(compiler, STACK_LOAD | S(i) | A(base) | IMM(offset)));
 	}
 	}
@@ -930,8 +881,7 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
 		FAIL_IF(push_inst(compiler, STACK_LOAD | S(i) | A(base) | IMM(offset)));
 		FAIL_IF(push_inst(compiler, STACK_LOAD | S(i) | A(base) | IMM(offset)));
 	}
 	}
 
 
-	if (!is_return_to)
-		push_inst(compiler, MTLR | S(0));
+	push_inst(compiler, MTLR | S(0));
 
 
 	if (local_size > 0)
 	if (local_size > 0)
 		return push_inst(compiler, ADDI | D(SLJIT_SP) | A(base) | IMM(local_size));
 		return push_inst(compiler, ADDI | D(SLJIT_SP) | A(base) | IMM(local_size));
@@ -940,40 +890,17 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
 	return push_inst(compiler, OR | S(base) | A(SLJIT_SP) | B(base));
 	return push_inst(compiler, OR | S(base) | A(SLJIT_SP) | B(base));
 }
 }
 
 
-#undef STACK_STORE
-#undef STACK_LOAD
-
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
 {
 {
 	CHECK_ERROR();
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_return_void(compiler));
 	CHECK(check_sljit_emit_return_void(compiler));
 
 
-	FAIL_IF(emit_stack_frame_release(compiler, 0));
+	FAIL_IF(emit_stack_frame_release(compiler));
 	return push_inst(compiler, BLR);
 	return push_inst(compiler, BLR);
 }
 }
 
 
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
-	sljit_s32 src, sljit_sw srcw)
-{
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_return_to(compiler, src, srcw));
-
-	if (src & SLJIT_MEM) {
-		ADJUST_LOCAL_OFFSET(src, srcw);
-		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_CALL_REG, src, srcw, TMP_CALL_REG));
-		src = TMP_CALL_REG;
-		srcw = 0;
-	} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
-		FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src)));
-		src = TMP_CALL_REG;
-		srcw = 0;
-	}
-
-	FAIL_IF(emit_stack_frame_release(compiler, 1));
-
-	SLJIT_SKIP_CHECKS(compiler);
-	return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
-}
+#undef STACK_STORE
+#undef STACK_LOAD
 
 
 /* --------------------------------------------------------------------- */
 /* --------------------------------------------------------------------- */
 /*  Operators                                                            */
 /*  Operators                                                            */
@@ -1139,6 +1066,7 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flag
 {
 {
 	sljit_ins inst;
 	sljit_ins inst;
 	sljit_s32 offs_reg;
 	sljit_s32 offs_reg;
+	sljit_sw high_short;
 
 
 	/* Should work when (arg & REG_MASK) == 0. */
 	/* Should work when (arg & REG_MASK) == 0. */
 	SLJIT_ASSERT(A(0) == 0);
 	SLJIT_ASSERT(A(0) == 0);
@@ -1149,7 +1077,11 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flag
 		offs_reg = OFFS_REG(arg);
 		offs_reg = OFFS_REG(arg);
 
 
 		if (argw != 0) {
 		if (argw != 0) {
-			FAIL_IF(push_inst(compiler, SLWI_W(argw) | S(OFFS_REG(arg)) | A(tmp_reg)));
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+			FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_reg) | ((sljit_ins)argw << 11) | ((31 - (sljit_ins)argw) << 1)));
+#else
+			FAIL_IF(push_inst(compiler, RLDI(tmp_reg, OFFS_REG(arg), argw, 63 - argw, 1)));
+#endif
 			offs_reg = tmp_reg;
 			offs_reg = tmp_reg;
 		}
 		}
 
 
@@ -1157,7 +1089,7 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flag
 
 
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
 		SLJIT_ASSERT(!(inst & INT_ALIGNED));
 		SLJIT_ASSERT(!(inst & INT_ALIGNED));
-#endif /* SLJIT_CONFIG_PPC_64 */
+#endif
 
 
 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(offs_reg));
 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(offs_reg));
 	}
 	}
@@ -1172,24 +1104,36 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 inp_flag
 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | B(tmp_reg));
 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | B(tmp_reg));
 	}
 	}
-#endif /* SLJIT_CONFIG_PPC_64 */
+#endif
 
 
 	if (argw <= SIMM_MAX && argw >= SIMM_MIN)
 	if (argw <= SIMM_MAX && argw >= SIMM_MIN)
 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | IMM(argw));
 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | IMM(argw));
 
 
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
 	if (argw <= 0x7fff7fffl && argw >= -0x80000000l) {
 	if (argw <= 0x7fff7fffl && argw >= -0x80000000l) {
-#endif /* SLJIT_CONFIG_PPC_64 */
-		FAIL_IF(push_inst(compiler, ADDIS | D(tmp_reg) | A(arg) | IMM((argw + 0x8000) >> 16)));
+#endif
+
+		high_short = (sljit_s32)(argw + ((argw & 0x8000) << 1)) & ~0xffff;
+
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+		SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l);
+#else
+		SLJIT_ASSERT(high_short);
+#endif
+
+		FAIL_IF(push_inst(compiler, ADDIS | D(tmp_reg) | A(arg) | IMM(high_short >> 16)));
 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_reg) | IMM(argw));
 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_reg) | IMM(argw));
+
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
 	}
 	}
 
 
+	/* The rest is PPC-64 only. */
+
 	FAIL_IF(load_immediate(compiler, tmp_reg, argw));
 	FAIL_IF(load_immediate(compiler, tmp_reg, argw));
 
 
 	inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
 	inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
 	return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | B(tmp_reg));
 	return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg) | B(tmp_reg));
-#endif /* SLJIT_CONFIG_PPC_64 */
+#endif
 }
 }
 
 
 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 input_flags,
 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 input_flags,
@@ -1329,7 +1273,11 @@ static sljit_s32 emit_prefetch(struct sljit_compiler *compiler,
 	if (srcw == 0)
 	if (srcw == 0)
 		return push_inst(compiler, DCBT | A(src & REG_MASK) | B(OFFS_REG(src)));
 		return push_inst(compiler, DCBT | A(src & REG_MASK) | B(OFFS_REG(src)));
 
 
-	FAIL_IF(push_inst(compiler, SLWI_W(srcw) | S(OFFS_REG(src)) | A(TMP_REG1)));
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+	FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(src)) | A(TMP_REG1) | ((sljit_ins)srcw << 11) | ((31 - (sljit_ins)srcw) << 1)));
+#else
+	FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(src), srcw, 63 - srcw, 1)));
+#endif
 	return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1));
 	return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1));
 }
 }
 
 
@@ -1414,11 +1362,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
 		return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
 		return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
 
 
 	case SLJIT_CLZ:
 	case SLJIT_CLZ:
-	case SLJIT_CTZ:
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
-		return emit_op(compiler, op, flags | (!(op_flags & SLJIT_32) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
+		return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_32) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
 #else
 #else
-		return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
+		return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw);
 #endif
 #endif
 	}
 	}
 
 
@@ -1679,7 +1626,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
 			}
 			}
 		}
 		}
-		if (!HAS_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) {
+		if (GET_OPCODE(op) != SLJIT_AND) {
 			/* Unlike or and xor, the and resets unwanted bits as well. */
 			/* Unlike or and xor, the and resets unwanted bits as well. */
 			if (TEST_UI_IMM(src2, src2w)) {
 			if (TEST_UI_IMM(src2, src2w)) {
 				compiler->imm = (sljit_ins)src2w;
 				compiler->imm = (sljit_ins)src2w;
@@ -1693,13 +1640,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
 		return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
 		return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
 
 
 	case SLJIT_SHL:
 	case SLJIT_SHL:
-	case SLJIT_MSHL:
 	case SLJIT_LSHR:
 	case SLJIT_LSHR:
-	case SLJIT_MLSHR:
 	case SLJIT_ASHR:
 	case SLJIT_ASHR:
-	case SLJIT_MASHR:
-	case SLJIT_ROTL:
-	case SLJIT_ROTR:
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
 		if (op & SLJIT_32)
 		if (op & SLJIT_32)
 			flags |= ALT_FORM2;
 			flags |= ALT_FORM2;
@@ -1721,7 +1663,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
 	CHECK_ERROR();
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
 	CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
 
 
-	SLJIT_SKIP_CHECKS(compiler);
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+	compiler->skip_checks = 1;
+#endif
 	return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w);
 	return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w);
 }
 }
 
 
@@ -1729,102 +1674,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
 #undef TEST_SUB_FORM2
 #undef TEST_SUB_FORM2
 #undef TEST_SUB_FORM3
 #undef TEST_SUB_FORM3
 
 
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
-	sljit_s32 src_dst,
-	sljit_s32 src1, sljit_sw src1w,
-	sljit_s32 src2, sljit_sw src2w)
-{
-	sljit_s32 is_right;
-#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
-	sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
-	sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
-#else /* !SLJIT_CONFIG_PPC_64 */
-	sljit_s32 inp_flags = WORD_DATA | LOAD_DATA;
-	sljit_sw bit_length = 32;
-#endif /* SLJIT_CONFIG_PPC_64 */
-
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
-
-	is_right = (GET_OPCODE(op) == SLJIT_LSHR || GET_OPCODE(op) == SLJIT_MLSHR);
-
-	if (src_dst == src1) {
-		SLJIT_SKIP_CHECKS(compiler);
-		return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), src_dst, 0, src_dst, 0, src2, src2w);
-	}
-
-	ADJUST_LOCAL_OFFSET(src1, src1w);
-	ADJUST_LOCAL_OFFSET(src2, src2w);
-
-	if (src2 & SLJIT_IMM) {
-		src2w &= bit_length - 1;
-
-		if (src2w == 0)
-			return SLJIT_SUCCESS;
-	} else if (src2 & SLJIT_MEM) {
-		FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG2, src2, src2w, TMP_REG2));
-		src2 = TMP_REG2;
-	}
-
-	if (src1 & SLJIT_MEM) {
-		FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG1, src1, src1w, TMP_REG1));
-		src1 = TMP_REG1;
-	} else if (src1 & SLJIT_IMM) {
-		FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
-		src1 = TMP_REG1;
-	}
-
-	if (src2 & SLJIT_IMM) {
-#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
-		if (!(op & SLJIT_32)) {
-			if (is_right) {
-				FAIL_IF(push_inst(compiler, SRDI(src2w) | S(src_dst) | A(src_dst)));
-				return push_inst(compiler, RLDIMI | S(src1) | A(src_dst) | RLDI_SH(64 - src2w) | RLDI_MB(0));
-			}
-
-			FAIL_IF(push_inst(compiler, SLDI(src2w) | S(src_dst) | A(src_dst)));
-			/* Computes SRDI(64 - src2w). */
-			FAIL_IF(push_inst(compiler, RLDICL | S(src1) | A(TMP_REG1) | RLDI_SH(src2w) | RLDI_MB(64 - src2w)));
-			return push_inst(compiler, OR | S(src_dst) | A(src_dst) | B(TMP_REG1));
-		}
-#endif /* SLJIT_CONFIG_PPC_64 */
-
-		if (is_right) {
-			FAIL_IF(push_inst(compiler, SRWI(src2w) | S(src_dst) | A(src_dst)));
-			return push_inst(compiler, RLWIMI | S(src1) | A(src_dst) | RLWI_SH(32 - src2w) | RLWI_MBE(0, src2w - 1));
-		}
-
-		FAIL_IF(push_inst(compiler, SLWI(src2w) | S(src_dst) | A(src_dst)));
-		return push_inst(compiler, RLWIMI | S(src1) | A(src_dst) | RLWI_SH(src2w) | RLWI_MBE(32 - src2w, 31));
-	}
-
-#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
-	if (!(op & SLJIT_32)) {
-		if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) {
-			FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | 0x3f));
-			src2 = TMP_REG2;
-		}
-
-		FAIL_IF(push_inst(compiler, (is_right ? SRD : SLD) | S(src_dst) | A(src_dst) | B(src2)));
-		FAIL_IF(push_inst(compiler, (is_right ? SLDI(1) : SRDI(1)) | S(src1) | A(TMP_REG1)));
-		FAIL_IF(push_inst(compiler, XORI | S(src2) | A(TMP_REG2) | 0x3f));
-		FAIL_IF(push_inst(compiler, (is_right ? SLD : SRD) | S(TMP_REG1) | A(TMP_REG1) | B(TMP_REG2)));
-		return push_inst(compiler, OR | S(src_dst) | A(src_dst) | B(TMP_REG1));
-	}
-#endif /* SLJIT_CONFIG_PPC_64 */
-
-	if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) {
-		FAIL_IF(push_inst(compiler, ANDI | S(src2) | A(TMP_REG2) | 0x1f));
-		src2 = TMP_REG2;
-	}
-
-	FAIL_IF(push_inst(compiler, (is_right ? SRW : SLW) | S(src_dst) | A(src_dst) | B(src2)));
-	FAIL_IF(push_inst(compiler, (is_right ? SLWI(1) : SRWI(1)) | S(src1) | A(TMP_REG1)));
-	FAIL_IF(push_inst(compiler, XORI | S(src2) | A(TMP_REG2) | 0x1f));
-	FAIL_IF(push_inst(compiler, (is_right ? SLW : SRW) | S(TMP_REG1) | A(TMP_REG1) | B(TMP_REG2)));
-	return push_inst(compiler, OR | S(src_dst) | A(src_dst) | B(TMP_REG1));
-}
-
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
 	sljit_s32 src, sljit_sw srcw)
 	sljit_s32 src, sljit_sw srcw)
 {
 {
@@ -1837,7 +1686,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp
 		if (FAST_IS_REG(src))
 		if (FAST_IS_REG(src))
 			FAIL_IF(push_inst(compiler, MTLR | S(src)));
 			FAIL_IF(push_inst(compiler, MTLR | S(src)));
 		else {
 		else {
-			FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG2));
+			FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
 			FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2)));
 			FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2)));
 		}
 		}
 
 
@@ -1933,7 +1782,11 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
 	if (dst & OFFS_REG_MASK) {
 	if (dst & OFFS_REG_MASK) {
 		dstw &= 0x3;
 		dstw &= 0x3;
 		if (dstw) {
 		if (dstw) {
-			FAIL_IF(push_inst(compiler, SLWI_W(dstw) | S(OFFS_REG(dst)) | A(TMP_REG1)));
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+			FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | ((sljit_ins)dstw << 11) | ((31 - (sljit_ins)dstw) << 1)));
+#else
+			FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1)));
+#endif
 			dstw = TMP_REG1;
 			dstw = TMP_REG1;
 		}
 		}
 		else
 		else
@@ -1965,7 +1818,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp
 	if (src & SLJIT_IMM) {
 	if (src & SLJIT_IMM) {
 		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
 		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
 			srcw = (sljit_s32)srcw;
 			srcw = (sljit_s32)srcw;
-
 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
 		src = TMP_REG1;
 		src = TMP_REG1;
 	}
 	}
@@ -2011,7 +1863,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp
 	   The double precision format has exactly 53 bit precision, so the lower 32 bit represents
 	   The double precision format has exactly 53 bit precision, so the lower 32 bit represents
 	   the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000
 	   the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000
 	   to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating
 	   to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating
-	   point value, we need to subtract 2^53 + 2^31 from the constructed value. */
+	   point value, we need to substract 2^53 + 2^31 from the constructed value. */
 	FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
 	FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
 	if (invert_sign)
 	if (invert_sign)
 		FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
 		FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
@@ -2047,21 +1899,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile
 		src2 = TMP_FREG2;
 		src2 = TMP_FREG2;
 	}
 	}
 
 
-	FAIL_IF(push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2)));
-
-	switch (GET_FLAG_TYPE(op)) {
-	case SLJIT_UNORDERED_OR_EQUAL:
-	case SLJIT_ORDERED_NOT_EQUAL:
-		return push_inst(compiler, CROR | ((4 + 2) << 21) | ((4 + 2) << 16) | ((4 + 3) << 11));
-	case SLJIT_UNORDERED_OR_LESS:
-	case SLJIT_ORDERED_GREATER_EQUAL:
-		return push_inst(compiler, CROR | ((4 + 0) << 21) | ((4 + 0) << 16) | ((4 + 3) << 11));
-	case SLJIT_UNORDERED_OR_GREATER:
-	case SLJIT_ORDERED_LESS_EQUAL:
-		return push_inst(compiler, CROR | ((4 + 1) << 21) | ((4 + 1) << 16) | ((4 + 3) << 11));
-	}
-
-	return SLJIT_SUCCESS;
+	return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2));
 }
 }
 
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
@@ -2238,50 +2076,38 @@ static sljit_ins get_bo_bi_flags(struct sljit_compiler *compiler, sljit_s32 type
 	case SLJIT_SIG_LESS_EQUAL:
 	case SLJIT_SIG_LESS_EQUAL:
 		return (4 << 21) | (1 << 16);
 		return (4 << 21) | (1 << 16);
 
 
-	case SLJIT_OVERFLOW:
-		return (12 << 21) | (3 << 16);
-
-	case SLJIT_NOT_OVERFLOW:
-		return (4 << 21) | (3 << 16);
-
-	case SLJIT_F_LESS:
-	case SLJIT_ORDERED_LESS:
-	case SLJIT_UNORDERED_OR_LESS:
+	case SLJIT_LESS_F64:
 		return (12 << 21) | ((4 + 0) << 16);
 		return (12 << 21) | ((4 + 0) << 16);
 
 
-	case SLJIT_F_GREATER_EQUAL:
-	case SLJIT_ORDERED_GREATER_EQUAL:
-	case SLJIT_UNORDERED_OR_GREATER_EQUAL:
+	case SLJIT_GREATER_EQUAL_F64:
 		return (4 << 21) | ((4 + 0) << 16);
 		return (4 << 21) | ((4 + 0) << 16);
 
 
-	case SLJIT_F_GREATER:
-	case SLJIT_ORDERED_GREATER:
-	case SLJIT_UNORDERED_OR_GREATER:
+	case SLJIT_GREATER_F64:
 		return (12 << 21) | ((4 + 1) << 16);
 		return (12 << 21) | ((4 + 1) << 16);
 
 
-	case SLJIT_F_LESS_EQUAL:
-	case SLJIT_ORDERED_LESS_EQUAL:
-	case SLJIT_UNORDERED_OR_LESS_EQUAL:
+	case SLJIT_LESS_EQUAL_F64:
 		return (4 << 21) | ((4 + 1) << 16);
 		return (4 << 21) | ((4 + 1) << 16);
 
 
-	case SLJIT_F_EQUAL:
-	case SLJIT_ORDERED_EQUAL:
-	case SLJIT_UNORDERED_OR_EQUAL:
+	case SLJIT_OVERFLOW:
+		return (12 << 21) | (3 << 16);
+
+	case SLJIT_NOT_OVERFLOW:
+		return (4 << 21) | (3 << 16);
+
+	case SLJIT_EQUAL_F64:
 		return (12 << 21) | ((4 + 2) << 16);
 		return (12 << 21) | ((4 + 2) << 16);
 
 
-	case SLJIT_F_NOT_EQUAL:
-	case SLJIT_ORDERED_NOT_EQUAL:
-	case SLJIT_UNORDERED_OR_NOT_EQUAL:
+	case SLJIT_NOT_EQUAL_F64:
 		return (4 << 21) | ((4 + 2) << 16);
 		return (4 << 21) | ((4 + 2) << 16);
 
 
-	case SLJIT_UNORDERED:
+	case SLJIT_UNORDERED_F64:
 		return (12 << 21) | ((4 + 3) << 16);
 		return (12 << 21) | ((4 + 3) << 16);
 
 
-	case SLJIT_ORDERED:
+	case SLJIT_ORDERED_F64:
 		return (4 << 21) | ((4 + 3) << 16);
 		return (4 << 21) | ((4 + 3) << 16);
 
 
 	default:
 	default:
-		SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_REG_ARG);
+		SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_CDECL);
 		return (20 << 21);
 		return (20 << 21);
 	}
 	}
 }
 }
@@ -2328,16 +2154,19 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
 
 
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
-	if ((type & 0xff) != SLJIT_CALL_REG_ARG)
-		PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));
+	PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));
 #endif
 #endif
 
 
 	if (type & SLJIT_CALL_RETURN) {
 	if (type & SLJIT_CALL_RETURN) {
-		PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
+		PTR_FAIL_IF(emit_stack_frame_release(compiler));
 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
 	}
 	}
 
 
-	SLJIT_SKIP_CHECKS(compiler);
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+	compiler->skip_checks = 1;
+#endif
+
 	return sljit_emit_jump(compiler, type);
 	return sljit_emit_jump(compiler, type);
 }
 }
 
 
@@ -2348,6 +2177,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
 
 
 	CHECK_ERROR();
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
+	ADJUST_LOCAL_OFFSET(src, srcw);
 
 
 	if (FAST_IS_REG(src)) {
 	if (FAST_IS_REG(src)) {
 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
@@ -2374,9 +2204,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
 
 
 		FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
 		FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
 		src_r = TMP_CALL_REG;
 		src_r = TMP_CALL_REG;
-	} else {
-		ADJUST_LOCAL_OFFSET(src, srcw);
-		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_CALL_REG, src, srcw, TMP_CALL_REG));
+	}
+	else {
+		FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
 		src_r = TMP_CALL_REG;
 		src_r = TMP_CALL_REG;
 	}
 	}
 
 
@@ -2395,26 +2225,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
 
 
 	if (src & SLJIT_MEM) {
 	if (src & SLJIT_MEM) {
 		ADJUST_LOCAL_OFFSET(src, srcw);
 		ADJUST_LOCAL_OFFSET(src, srcw);
-		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_CALL_REG, src, srcw, TMP_CALL_REG));
+		FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
 		src = TMP_CALL_REG;
 		src = TMP_CALL_REG;
 	}
 	}
 
 
 	if (type & SLJIT_CALL_RETURN) {
 	if (type & SLJIT_CALL_RETURN) {
-		if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
+		if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) {
 			FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src)));
 			FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src)));
 			src = TMP_CALL_REG;
 			src = TMP_CALL_REG;
 		}
 		}
 
 
-		FAIL_IF(emit_stack_frame_release(compiler, 0));
+		FAIL_IF(emit_stack_frame_release(compiler));
 		type = SLJIT_JUMP;
 		type = SLJIT_JUMP;
 	}
 	}
 
 
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
-	if ((type & 0xff) != SLJIT_CALL_REG_ARG)
-		FAIL_IF(call_with_args(compiler, arg_types, &src));
+	FAIL_IF(call_with_args(compiler, arg_types, &src));
+#endif
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+	compiler->skip_checks = 1;
 #endif
 #endif
 
 
-	SLJIT_SKIP_CHECKS(compiler);
 	return sljit_emit_ijump(compiler, type, src, srcw);
 	return sljit_emit_ijump(compiler, type, src, srcw);
 }
 }
 
 
@@ -2446,7 +2279,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
 	bit = 0;
 	bit = 0;
 	from_xer = 0;
 	from_xer = 0;
 
 
-	switch (type) {
+	switch (type & 0xff) {
 	case SLJIT_LESS:
 	case SLJIT_LESS:
 	case SLJIT_SIG_LESS:
 	case SLJIT_SIG_LESS:
 		break;
 		break;
@@ -2499,50 +2332,38 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
 		invert = (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) != 0;
 		invert = (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) != 0;
 		break;
 		break;
 
 
-	case SLJIT_F_LESS:
-	case SLJIT_ORDERED_LESS:
-	case SLJIT_UNORDERED_OR_LESS:
+	case SLJIT_LESS_F64:
 		bit = 4 + 0;
 		bit = 4 + 0;
 		break;
 		break;
 
 
-	case SLJIT_F_GREATER_EQUAL:
-	case SLJIT_ORDERED_GREATER_EQUAL:
-	case SLJIT_UNORDERED_OR_GREATER_EQUAL:
+	case SLJIT_GREATER_EQUAL_F64:
 		bit = 4 + 0;
 		bit = 4 + 0;
 		invert = 1;
 		invert = 1;
 		break;
 		break;
 
 
-	case SLJIT_F_GREATER:
-	case SLJIT_ORDERED_GREATER:
-	case SLJIT_UNORDERED_OR_GREATER:
+	case SLJIT_GREATER_F64:
 		bit = 4 + 1;
 		bit = 4 + 1;
 		break;
 		break;
 
 
-	case SLJIT_F_LESS_EQUAL:
-	case SLJIT_ORDERED_LESS_EQUAL:
-	case SLJIT_UNORDERED_OR_LESS_EQUAL:
+	case SLJIT_LESS_EQUAL_F64:
 		bit = 4 + 1;
 		bit = 4 + 1;
 		invert = 1;
 		invert = 1;
 		break;
 		break;
 
 
-	case SLJIT_F_EQUAL:
-	case SLJIT_ORDERED_EQUAL:
-	case SLJIT_UNORDERED_OR_EQUAL:
+	case SLJIT_EQUAL_F64:
 		bit = 4 + 2;
 		bit = 4 + 2;
 		break;
 		break;
 
 
-	case SLJIT_F_NOT_EQUAL:
-	case SLJIT_ORDERED_NOT_EQUAL:
-	case SLJIT_UNORDERED_OR_NOT_EQUAL:
+	case SLJIT_NOT_EQUAL_F64:
 		bit = 4 + 2;
 		bit = 4 + 2;
 		invert = 1;
 		invert = 1;
 		break;
 		break;
 
 
-	case SLJIT_UNORDERED:
+	case SLJIT_UNORDERED_F64:
 		bit = 4 + 3;
 		bit = 4 + 3;
 		break;
 		break;
 
 
-	case SLJIT_ORDERED:
+	case SLJIT_ORDERED_F64:
 		bit = 4 + 3;
 		bit = 4 + 3;
 		invert = 1;
 		invert = 1;
 		break;
 		break;
@@ -2553,8 +2374,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
 	}
 	}
 
 
 	FAIL_IF(push_inst(compiler, (from_xer ? MFXER : MFCR) | D(reg)));
 	FAIL_IF(push_inst(compiler, (from_xer ? MFXER : MFCR) | D(reg)));
-	/* Simplified mnemonics: extrwi. */
-	FAIL_IF(push_inst(compiler, RLWINM | S(reg) | A(reg) | RLWI_SH(1 + bit) | RLWI_MBE(31, 31)));
+	FAIL_IF(push_inst(compiler, RLWINM | S(reg) | A(reg) | ((1 + bit) << 11) | (31 << 6) | (31 << 1)));
 
 
 	if (invert)
 	if (invert)
 		FAIL_IF(push_inst(compiler, XORI | S(reg) | A(reg) | 0x1));
 		FAIL_IF(push_inst(compiler, XORI | S(reg) | A(reg) | 0x1));
@@ -2565,8 +2385,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
 		return emit_op_mem(compiler, input_flags, reg, dst, dstw, TMP_REG1);
 		return emit_op_mem(compiler, input_flags, reg, dst, dstw, TMP_REG1);
 	}
 	}
 
 
-	SLJIT_SKIP_CHECKS(compiler);
-
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+	compiler->skip_checks = 1;
+#endif
 	if (dst & SLJIT_MEM)
 	if (dst & SLJIT_MEM)
 		return sljit_emit_op2(compiler, saved_op, dst, saved_dstw, TMP_REG1, 0, TMP_REG2, 0);
 		return sljit_emit_op2(compiler, saved_op, dst, saved_dstw, TMP_REG1, 0, TMP_REG2, 0);
 	return sljit_emit_op2(compiler, saved_op, dst, 0, dst, 0, TMP_REG2, 0);
 	return sljit_emit_op2(compiler, saved_op, dst, 0, dst, 0, TMP_REG2, 0);
@@ -2582,94 +2404,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
 	return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);;
 	return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);;
 }
 }
 
 
-#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
-
-#define EMIT_MEM_LOAD_IMM(inst, mem, memw) \
-	((sljit_s16)(memw) > SIMM_MAX - SSIZE_OF(sw))
-
-#else /* !SLJIT_CONFIG_PPC_32 */
-
-#define EMIT_MEM_LOAD_IMM(inst, mem, memw) \
-	((((inst) & INT_ALIGNED) && ((memw) & 0x3) != 0) \
-		|| ((sljit_s16)(memw) > SIMM_MAX - SSIZE_OF(sw)) \
-		|| ((memw) > 0x7fff7fffl || (memw) < -0x80000000l)) \
-
-#endif /* SLJIT_CONFIG_PPC_32 */
-
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
 	sljit_s32 reg,
 	sljit_s32 reg,
 	sljit_s32 mem, sljit_sw memw)
 	sljit_s32 mem, sljit_sw memw)
-{
-	sljit_ins inst;
-
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
-
-	if (!(reg & REG_PAIR_MASK))
-		return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
-
-	ADJUST_LOCAL_OFFSET(mem, memw);
-
-	inst = data_transfer_insts[WORD_DATA | ((type & SLJIT_MEM_STORE) ? 0 : LOAD_DATA)];
-
-	if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
-		memw &= 0x3;
-
-		if (memw != 0) {
-			FAIL_IF(push_inst(compiler, SLWI_W(memw) | S(OFFS_REG(mem)) | A(TMP_REG1)));
-			FAIL_IF(push_inst(compiler, ADD | D(TMP_REG1) | A(TMP_REG1) | B(mem & REG_MASK)));
-		} else
-			FAIL_IF(push_inst(compiler, ADD | D(TMP_REG1) | A(mem & REG_MASK) | B(OFFS_REG(mem))));
-
-		mem = TMP_REG1;
-		memw = 0;
-	} else {
-		if (EMIT_MEM_LOAD_IMM(inst, mem, memw)) {
-			if ((mem & REG_MASK) != 0) {
-				SLJIT_SKIP_CHECKS(compiler);
-				FAIL_IF(sljit_emit_op2(compiler, SLJIT_ADD, TMP_REG1, 0, mem & REG_MASK, 0, SLJIT_IMM, memw));
-			} else
-				FAIL_IF(load_immediate(compiler, TMP_REG1, memw));
-
-			memw = 0;
-			mem = TMP_REG1;
-		} else if (memw > SIMM_MAX || memw < SIMM_MIN) {
-			FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(mem & REG_MASK) | IMM((memw + 0x8000) >> 16)));
-
-			memw &= 0xffff;
-			mem = TMP_REG1;
-		} else {
-			memw &= 0xffff;
-			mem &= REG_MASK;
-		}
-	}
-
-	SLJIT_ASSERT((memw >= 0 && memw <= SIMM_MAX - SSIZE_OF(sw)) || (memw >= 0x8000 && memw <= 0xffff));
-
-#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
-	inst &= (sljit_ins)~INT_ALIGNED;
-#endif /* SLJIT_CONFIG_PPC_64 */
-
-	if (!(type & SLJIT_MEM_STORE) && mem == REG_PAIR_FIRST(reg)) {
-		FAIL_IF(push_inst(compiler, inst | D(REG_PAIR_SECOND(reg)) | A(mem) | IMM(memw + SSIZE_OF(sw))));
-		return push_inst(compiler, inst | D(REG_PAIR_FIRST(reg)) | A(mem) | IMM(memw));
-	}
-
-	FAIL_IF(push_inst(compiler, inst | D(REG_PAIR_FIRST(reg)) | A(mem) | IMM(memw)));
-	return push_inst(compiler, inst | D(REG_PAIR_SECOND(reg)) | A(mem) | IMM(memw + SSIZE_OF(sw)));
-}
-
-#undef EMIT_MEM_LOAD_IMM
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type,
-	sljit_s32 reg,
-	sljit_s32 mem, sljit_sw memw)
 {
 {
 	sljit_s32 mem_flags;
 	sljit_s32 mem_flags;
 	sljit_ins inst;
 	sljit_ins inst;
 
 
 	CHECK_ERROR();
 	CHECK_ERROR();
-	CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw));
+	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
 
 
 	if (type & SLJIT_MEM_POST)
 	if (type & SLJIT_MEM_POST)
 		return SLJIT_ERR_UNSUPPORTED;
 		return SLJIT_ERR_UNSUPPORTED;
@@ -2757,7 +2500,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *
 	return SLJIT_SUCCESS;
 	return SLJIT_SUCCESS;
 }
 }
 
 
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type,
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
 	sljit_s32 freg,
 	sljit_s32 freg,
 	sljit_s32 mem, sljit_sw memw)
 	sljit_s32 mem, sljit_sw memw)
 {
 {
@@ -2765,7 +2508,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler
 	sljit_ins inst;
 	sljit_ins inst;
 
 
 	CHECK_ERROR();
 	CHECK_ERROR();
-	CHECK(check_sljit_emit_fmem_update(compiler, type, freg, mem, memw));
+	CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
 
 
 	if (type & SLJIT_MEM_POST)
 	if (type & SLJIT_MEM_POST)
 		return SLJIT_ERR_UNSUPPORTED;
 		return SLJIT_ERR_UNSUPPORTED;
@@ -2844,8 +2587,3 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct slj
 
 
 	return put_label;
 	return put_label;
 }
 }
-
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
-{
-	sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
-}

+ 0 - 73
thirdparty/pcre2/src/sljit/sljitNativeRISCV_32.c

@@ -1,73 +0,0 @@
-/*
- *    Stack-less Just-In-Time compiler
- *
- *    Copyright Zoltan Herczeg ([email protected]). All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without modification, are
- * permitted provided that the following conditions are met:
- *
- *   1. Redistributions of source code must retain the above copyright notice, this list of
- *      conditions and the following disclaimer.
- *
- *   2. Redistributions in binary form must reproduce the above copyright notice, this list
- *      of conditions and the following disclaimer in the documentation and/or other materials
- *      provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
- * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm, sljit_s32 tmp_r)
-{
-	SLJIT_UNUSED_ARG(tmp_r);
-	SLJIT_ASSERT(dst_r != tmp_r);
-
-	if (imm <= SIMM_MAX && imm >= SIMM_MIN)
-		return push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm));
-
-	if (imm & 0x800)
-		imm += 0x1000;
-
-	FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff)));
-
-	if ((imm & 0xfff) == 0)
-		return SLJIT_SUCCESS;
-
-	return push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm));
-}
-
-static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins)
-{
-	if ((init_value & 0x800) != 0)
-		init_value += 0x1000;
-
-	FAIL_IF(push_inst(compiler, LUI | RD(dst) | (sljit_ins)(init_value & ~0xfff)));
-	return push_inst(compiler, last_ins | RS1(dst) | IMM_I(init_value));
-}
-
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
-{
-	sljit_ins *inst = (sljit_ins*)addr;
-	SLJIT_UNUSED_ARG(executable_offset);
-
-	if ((new_target & 0x800) != 0)
-		new_target += 0x1000;
-
-	SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 0);
-
-	SLJIT_ASSERT((inst[0] & 0x7f) == LUI);
-	inst[0] = (inst[0] & 0xfff) | (sljit_ins)((sljit_sw)new_target & ~0xfff);
-	SLJIT_ASSERT((inst[1] & 0x707f) == ADDI || (inst[1] & 0x707f) == JALR);
-	inst[1] = (inst[1] & 0xfffff) | IMM_I(new_target);
-
-	SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 1);
-	inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
-	SLJIT_CACHE_FLUSH(inst, inst + 5);
-}

+ 0 - 183
thirdparty/pcre2/src/sljit/sljitNativeRISCV_64.c

@@ -1,183 +0,0 @@
-/*
- *    Stack-less Just-In-Time compiler
- *
- *    Copyright Zoltan Herczeg ([email protected]). All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without modification, are
- * permitted provided that the following conditions are met:
- *
- *   1. Redistributions of source code must retain the above copyright notice, this list of
- *      conditions and the following disclaimer.
- *
- *   2. Redistributions in binary form must reproduce the above copyright notice, this list
- *      of conditions and the following disclaimer in the documentation and/or other materials
- *      provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
- * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm, sljit_s32 tmp_r)
-{
-	sljit_sw high;
-
-	SLJIT_ASSERT(dst_r != tmp_r);
-
-	if (imm <= SIMM_MAX && imm >= SIMM_MIN)
-		return push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm));
-
-	if (imm <= 0x7fffffffl && imm >= S32_MIN) {
-		if (imm > S32_MAX) {
-			SLJIT_ASSERT((imm & 0x800) != 0);
-			FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u));
-			return push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm));
-		}
-
-		if ((imm & 0x800) != 0)
-			imm += 0x1000;
-
-		FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff)));
-
-		if ((imm & 0xfff) == 0)
-			return SLJIT_SUCCESS;
-
-		return push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm));
-	}
-
-	/* Trailing zeroes could be used to produce shifted immediates. */
-
-	if (imm <= 0x7ffffffffffl && imm >= -0x80000000000l) {
-		high = imm >> 12;
-
-		if (imm & 0x800)
-			high = ~high;
-
-		if (high > S32_MAX) {
-			SLJIT_ASSERT((high & 0x800) != 0);
-			FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u));
-			FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(high)));
-		} else {
-			if ((high & 0x800) != 0)
-				high += 0x1000;
-
-			FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(high & ~0xfff)));
-
-			if ((high & 0xfff) != 0)
-				FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(high)));
-		}
-
-		FAIL_IF(push_inst(compiler, SLLI | RD(dst_r) | RS1(dst_r) | IMM_I(12)));
-
-		if ((imm & 0xfff) != 0)
-			return push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm));
-
-		return SLJIT_SUCCESS;
-	}
-
-	high = imm >> 32;
-	imm = (sljit_s32)imm;
-
-	if ((imm & 0x80000000l) != 0)
-		high = ~high;
-
-	if (high <= 0x7ffff && high >= -0x80000) {
-		FAIL_IF(push_inst(compiler, LUI | RD(tmp_r) | (sljit_ins)(high << 12)));
-		high = 0x1000;
-	} else {
-		if ((high & 0x800) != 0)
-			high += 0x1000;
-
-		FAIL_IF(push_inst(compiler, LUI | RD(tmp_r) | (sljit_ins)(high & ~0xfff)));
-		high &= 0xfff;
-	}
-
-	if (imm <= SIMM_MAX && imm >= SIMM_MIN) {
-		FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm)));
-		imm = 0;
-	} else if (imm > S32_MAX) {
-		SLJIT_ASSERT((imm & 0x800) != 0);
-
-		FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u));
-		imm = 0x1000 | (imm & 0xfff);
-	} else {
-		if ((imm & 0x800) != 0)
-			imm += 0x1000;
-
-		FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff)));
-		imm &= 0xfff;
-	}
-
-	if ((high & 0xfff) != 0)
-		FAIL_IF(push_inst(compiler, ADDI | RD(tmp_r) | RS1(tmp_r) | IMM_I(high)));
-
-	if (imm & 0x1000)
-		FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)));
-	else if (imm != 0)
-		FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)));
-
-	FAIL_IF(push_inst(compiler, SLLI | RD(tmp_r) | RS1(tmp_r) | IMM_I((high & 0x1000) ? 20 : 32)));
-	return push_inst(compiler, XOR | RD(dst_r) | RS1(dst_r) | RS2(tmp_r));
-}
-
-static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins)
-{
-	sljit_sw high;
-
-	if ((init_value & 0x800) != 0)
-		init_value += 0x1000;
-
-	high = init_value >> 32;
-
-	if ((init_value & 0x80000000l) != 0)
-		high = ~high;
-
-	if ((high & 0x800) != 0)
-		high += 0x1000;
-
-	FAIL_IF(push_inst(compiler, LUI | RD(TMP_REG3) | (sljit_ins)(high & ~0xfff)));
-	FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I(high)));
-	FAIL_IF(push_inst(compiler, LUI | RD(dst) | (sljit_ins)(init_value & ~0xfff)));
-	FAIL_IF(push_inst(compiler, SLLI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I(32)));
-	FAIL_IF(push_inst(compiler, XOR | RD(dst) | RS1(dst) | RS2(TMP_REG3)));
-	return push_inst(compiler, last_ins | RS1(dst) | IMM_I(init_value));
-}
-
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
-{
-	sljit_ins *inst = (sljit_ins*)addr;
-	sljit_sw high;
-	SLJIT_UNUSED_ARG(executable_offset);
-
-	if ((new_target & 0x800) != 0)
-		new_target += 0x1000;
-
-	high = (sljit_sw)new_target >> 32;
-
-	if ((new_target & 0x80000000l) != 0)
-		high = ~high;
-
-	if ((high & 0x800) != 0)
-		high += 0x1000;
-
-	SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 0);
-
-	SLJIT_ASSERT((inst[0] & 0x7f) == LUI);
-	inst[0] = (inst[0] & 0xfff) | (sljit_ins)(high & ~0xfff);
-	SLJIT_ASSERT((inst[1] & 0x707f) == ADDI);
-	inst[1] = (inst[1] & 0xfffff) | IMM_I(high);
-	SLJIT_ASSERT((inst[2] & 0x7f) == LUI);
-	inst[2] = (inst[2] & 0xfff) | (sljit_ins)((sljit_sw)new_target & ~0xfff);
-	SLJIT_ASSERT((inst[5] & 0x707f) == ADDI || (inst[5] & 0x707f) == JALR);
-	inst[5] = (inst[5] & 0xfffff) | IMM_I(new_target);
-	SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 1);
-
-	inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
-	SLJIT_CACHE_FLUSH(inst, inst + 5);
-}

+ 0 - 2762
thirdparty/pcre2/src/sljit/sljitNativeRISCV_common.c

@@ -1,2762 +0,0 @@
-/*
- *    Stack-less Just-In-Time compiler
- *
- *    Copyright Zoltan Herczeg ([email protected]). All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without modification, are
- * permitted provided that the following conditions are met:
- *
- *   1. Redistributions of source code must retain the above copyright notice, this list of
- *      conditions and the following disclaimer.
- *
- *   2. Redistributions in binary form must reproduce the above copyright notice, this list
- *      of conditions and the following disclaimer in the documentation and/or other materials
- *      provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
- * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
-{
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-	return "RISC-V-32" SLJIT_CPUINFO;
-#else /* !SLJIT_CONFIG_RISCV_32 */
-	return "RISC-V-64" SLJIT_CPUINFO;
-#endif /* SLJIT_CONFIG_RISCV_32 */
-}
-
-/* Length of an instruction word
-   Both for riscv-32 and riscv-64 */
-typedef sljit_u32 sljit_ins;
-
-#define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
-#define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
-#define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
-#define TMP_ZERO	0
-
-/* Flags are kept in volatile registers. */
-#define EQUAL_FLAG	(SLJIT_NUMBER_OF_REGISTERS + 5)
-#define RETURN_ADDR_REG	TMP_REG2
-#define OTHER_FLAG	(SLJIT_NUMBER_OF_REGISTERS + 6)
-
-#define TMP_FREG1	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
-#define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
-
-static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
-	0, 10, 11, 12, 13, 14, 15, 16, 17, 29, 30, 31, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 9, 8, 2, 6, 1, 7, 5, 28
-};
-
-static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
-	0, 10, 11, 12, 13, 14, 15, 16, 17, 2, 3, 4, 5, 6, 7, 28, 29, 30, 31, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 9, 8, 0, 1,
-};
-
-/* --------------------------------------------------------------------- */
-/*  Instrucion forms                                                     */
-/* --------------------------------------------------------------------- */
-
-#define RD(rd)		((sljit_ins)reg_map[rd] << 7)
-#define RS1(rs1)	((sljit_ins)reg_map[rs1] << 15)
-#define RS2(rs2)	((sljit_ins)reg_map[rs2] << 20)
-#define FRD(rd)		((sljit_ins)freg_map[rd] << 7)
-#define FRS1(rs1)	((sljit_ins)freg_map[rs1] << 15)
-#define FRS2(rs2)	((sljit_ins)freg_map[rs2] << 20)
-#define IMM_I(imm)	((sljit_ins)(imm) << 20)
-#define IMM_S(imm)	((((sljit_ins)(imm) & 0xfe0) << 20) | (((sljit_ins)(imm) & 0x1f) << 7))
-
-/* Represents funct(i) parts of the instructions. */
-#define OPC(o)		((sljit_ins)(o))
-#define F3(f)		((sljit_ins)(f) << 12)
-#define F12(f)		((sljit_ins)(f) << 20)
-#define F7(f)		((sljit_ins)(f) << 25)
-
-#define ADD		(F7(0x0) | F3(0x0) | OPC(0x33))
-#define ADDI		(F3(0x0) | OPC(0x13))
-#define AND		(F7(0x0) | F3(0x7) | OPC(0x33))
-#define ANDI		(F3(0x7) | OPC(0x13))
-#define AUIPC		(OPC(0x17))
-#define BEQ		(F3(0x0) | OPC(0x63))
-#define BNE		(F3(0x1) | OPC(0x63))
-#define BLT		(F3(0x4) | OPC(0x63))
-#define BGE		(F3(0x5) | OPC(0x63))
-#define BLTU		(F3(0x6) | OPC(0x63))
-#define BGEU		(F3(0x7) | OPC(0x63))
-#define DIV		(F7(0x1) | F3(0x4) | OPC(0x33))
-#define DIVU		(F7(0x1) | F3(0x5) | OPC(0x33))
-#define EBREAK		(F12(0x1) | F3(0x0) | OPC(0x73))
-#define FADD_S		(F7(0x0) | F3(0x7) | OPC(0x53))
-#define FDIV_S		(F7(0xc) | F3(0x7) | OPC(0x53))
-#define FEQ_S		(F7(0x50) | F3(0x2) | OPC(0x53))
-#define FLD		(F3(0x3) | OPC(0x7))
-#define FLE_S		(F7(0x50) | F3(0x0) | OPC(0x53))
-#define FLT_S		(F7(0x50) | F3(0x1) | OPC(0x53))
-#define FSD		(F3(0x3) | OPC(0x27))
-/* These conversion opcodes are partly defined. */
-#define FCVT_S_D	(F7(0x20) | OPC(0x53))
-#define FCVT_S_W	(F7(0x68) | OPC(0x53))
-#define FCVT_W_S	(F7(0x60) | F3(0x1) | OPC(0x53))
-#define FMUL_S		(F7(0x8) | F3(0x7) | OPC(0x53))
-#define FSGNJ_S		(F7(0x10) | F3(0x0) | OPC(0x53))
-#define FSGNJN_S	(F7(0x10) | F3(0x1) | OPC(0x53))
-#define FSGNJX_S	(F7(0x10) | F3(0x2) | OPC(0x53))
-#define FSUB_S		(F7(0x4) | F3(0x7) | OPC(0x53))
-#define JAL		(OPC(0x6f))
-#define JALR		(F3(0x0) | OPC(0x67))
-#define LD		(F3(0x3) | OPC(0x3))
-#define LUI		(OPC(0x37))
-#define LW		(F3(0x2) | OPC(0x3))
-#define MUL		(F7(0x1) | F3(0x0) | OPC(0x33))
-#define MULH		(F7(0x1) | F3(0x1) | OPC(0x33))
-#define MULHU		(F7(0x1) | F3(0x3) | OPC(0x33))
-#define OR		(F7(0x0) | F3(0x6) | OPC(0x33))
-#define ORI		(F3(0x6) | OPC(0x13))
-#define REM		(F7(0x1) | F3(0x6) | OPC(0x33))
-#define REMU		(F7(0x1) | F3(0x7) | OPC(0x33))
-#define SD		(F3(0x3) | OPC(0x23))
-#define SLL		(F7(0x0) | F3(0x1) | OPC(0x33))
-#define SLLI		(IMM_I(0x0) | F3(0x1) | OPC(0x13))
-#define SLT		(F7(0x0) | F3(0x2) | OPC(0x33))
-#define SLTI		(F3(0x2) | OPC(0x13))
-#define SLTU		(F7(0x0) | F3(0x3) | OPC(0x33))
-#define SLTUI		(F3(0x3) | OPC(0x13))
-#define SRL		(F7(0x0) | F3(0x5) | OPC(0x33))
-#define SRLI		(IMM_I(0x0) | F3(0x5) | OPC(0x13))
-#define SRA		(F7(0x20) | F3(0x5) | OPC(0x33))
-#define SRAI		(IMM_I(0x400) | F3(0x5) | OPC(0x13))
-#define SUB		(F7(0x20) | F3(0x0) | OPC(0x33))
-#define SW		(F3(0x2) | OPC(0x23))
-#define XOR		(F7(0x0) | F3(0x4) | OPC(0x33))
-#define XORI		(F3(0x4) | OPC(0x13))
-
-#define SIMM_MAX	(0x7ff)
-#define SIMM_MIN	(-0x800)
-#define BRANCH_MAX	(0xfff)
-#define BRANCH_MIN	(-0x1000)
-#define JUMP_MAX	(0xfffff)
-#define JUMP_MIN	(-0x100000)
-
-#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
-#define S32_MAX		(0x7ffff7ffl)
-#define S32_MIN		(-0x80000000l)
-#define S44_MAX		(0x7fffffff7ffl)
-#define S52_MAX		(0x7ffffffffffffl)
-#endif
-
-static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
-{
-	sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
-	FAIL_IF(!ptr);
-	*ptr = ins;
-	compiler->size++;
-	return SLJIT_SUCCESS;
-}
-
-static sljit_s32 push_imm_s_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_sw imm)
-{
-	return push_inst(compiler, ins | IMM_S(imm));
-}
-
-static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset)
-{
-	sljit_sw diff;
-	sljit_uw target_addr;
-	sljit_ins *inst;
-
-	inst = (sljit_ins *)jump->addr;
-
-	if (jump->flags & SLJIT_REWRITABLE_JUMP)
-		goto exit;
-
-	if (jump->flags & JUMP_ADDR)
-		target_addr = jump->u.target;
-	else {
-		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
-		target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
-	}
-
-	diff = (sljit_sw)target_addr - (sljit_sw)inst - executable_offset;
-
-	if (jump->flags & IS_COND) {
-		inst--;
-		diff += SSIZE_OF(ins);
-
-		if (diff >= BRANCH_MIN && diff <= BRANCH_MAX) {
-			jump->flags |= PATCH_B;
-			inst[0] = (inst[0] & 0x1fff07f) ^ 0x1000;
-			jump->addr = (sljit_uw)inst;
-			return inst;
-		}
-
-		inst++;
-		diff -= SSIZE_OF(ins);
-	}
-
-	if (diff >= JUMP_MIN && diff <= JUMP_MAX) {
-		if (jump->flags & IS_COND) {
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-			inst[-1] -= (sljit_ins)(1 * sizeof(sljit_ins)) << 7;
-#else
-			inst[-1] -= (sljit_ins)(5 * sizeof(sljit_ins)) << 7;
-#endif
-		}
-
-		jump->flags |= PATCH_J;
-		return inst;
-	}
-
-#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
-	if (diff >= S32_MIN && diff <= S32_MAX) {
-		if (jump->flags & IS_COND)
-			inst[-1] -= (sljit_ins)(4 * sizeof(sljit_ins)) << 7;
-
-		jump->flags |= PATCH_REL32;
-		inst[1] = inst[0];
-		return inst + 1;
-	}
-
-	if (target_addr <= (sljit_uw)S32_MAX) {
-		if (jump->flags & IS_COND)
-			inst[-1] -= (sljit_ins)(4 * sizeof(sljit_ins)) << 7;
-
-		jump->flags |= PATCH_ABS32;
-		inst[1] = inst[0];
-		return inst + 1;
-	}
-
-	if (target_addr <= S44_MAX) {
-		if (jump->flags & IS_COND)
-			inst[-1] -= (sljit_ins)(2 * sizeof(sljit_ins)) << 7;
-
-		jump->flags |= PATCH_ABS44;
-		inst[3] = inst[0];
-		return inst + 3;
-	}
-
-	if (target_addr <= S52_MAX) {
-		if (jump->flags & IS_COND)
-			inst[-1] -= (sljit_ins)(1 * sizeof(sljit_ins)) << 7;
-
-		jump->flags |= PATCH_ABS52;
-		inst[4] = inst[0];
-		return inst + 4;
-	}
-#endif
-
-exit:
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-	inst[1] = inst[0];
-	return inst + 1;
-#else
-	inst[5] = inst[0];
-	return inst + 5;
-#endif
-}
-
-#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
-
-static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label)
-{
-	if (max_label <= (sljit_uw)S32_MAX) {
-		put_label->flags = PATCH_ABS32;
-		return 1;
-	}
-
-	if (max_label <= S44_MAX) {
-		put_label->flags = PATCH_ABS44;
-		return 3;
-	}
-
-	if (max_label <= S52_MAX) {
-		put_label->flags = PATCH_ABS52;
-		return 4;
-	}
-
-	put_label->flags = 0;
-	return 5;
-}
-
-#endif /* SLJIT_CONFIG_RISCV_64 */
-
-static SLJIT_INLINE void load_addr_to_reg(void *dst, sljit_u32 reg)
-{
-	struct sljit_jump *jump = NULL;
-	struct sljit_put_label *put_label;
-	sljit_uw flags;
-	sljit_ins *inst;
-#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
-	sljit_sw high;
-#endif
-	sljit_uw addr;
-
-	if (reg != 0) {
-		jump = (struct sljit_jump*)dst;
-		flags = jump->flags;
-		inst = (sljit_ins*)jump->addr;
-		addr = (flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
-	} else {
-		put_label = (struct sljit_put_label*)dst;
-#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
-		flags = put_label->flags;
-#endif
-		inst = (sljit_ins*)put_label->addr;
-		addr = put_label->label->addr;
-		reg = *inst;
-	}
-
-	if ((addr & 0x800) != 0)
-		addr += 0x1000;
-
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-	inst[0] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff);
-#else /* !SLJIT_CONFIG_RISCV_32 */
-
-	if (flags & PATCH_ABS32) {
-		SLJIT_ASSERT(addr <= S32_MAX);
-		inst[0] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff);
-	} else if (flags & PATCH_ABS44) {
-		high = (sljit_sw)addr >> 12;
-		SLJIT_ASSERT((sljit_uw)high <= 0x7fffffff);
-
-		if (high > S32_MAX) {
-			SLJIT_ASSERT((high & 0x800) != 0);
-			inst[0] = LUI | RD(reg) | (sljit_ins)0x80000000u;
-			inst[1] = XORI | RD(reg) | RS1(reg) | IMM_I(high);
-		} else {
-			if ((high & 0x800) != 0)
-				high += 0x1000;
-
-			inst[0] = LUI | RD(reg) | (sljit_ins)(high & ~0xfff);
-			inst[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(high);
-		}
-
-		inst[2] = SLLI | RD(reg) | RS1(reg) | IMM_I(12);
-		inst += 2;
-	} else {
-		high = (sljit_sw)addr >> 32;
-
-		if ((addr & 0x80000000l) != 0)
-			high = ~high;
-
-		if ((high & 0x800) != 0)
-			high += 0x1000;
-
-		if (flags & PATCH_ABS52) {
-			SLJIT_ASSERT(addr <= S52_MAX);
-			inst[0] = LUI | RD(TMP_REG3) | (sljit_ins)(high << 12);
-		} else {
-			inst[0] = LUI | RD(TMP_REG3) | (sljit_ins)(high & ~0xfff);
-			inst[1] = ADDI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I(high);
-			inst++;
-		}
-
-		inst[1] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff);
-		inst[2] = SLLI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I((flags & PATCH_ABS52) ? 20 : 32);
-		inst[3] = XOR | RD(reg) | RS1(reg) | RS2(TMP_REG3);
-		inst += 3;
-	}
-#endif /* !SLJIT_CONFIG_RISCV_32 */
-
-	if (jump != NULL) {
-		SLJIT_ASSERT((inst[1] & 0x707f) == JALR);
-		inst[1] = (inst[1] & 0xfffff) | IMM_I(addr);
-	} else
-		inst[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(addr);
-}
-
-SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
-{
-	struct sljit_memory_fragment *buf;
-	sljit_ins *code;
-	sljit_ins *code_ptr;
-	sljit_ins *buf_ptr;
-	sljit_ins *buf_end;
-	sljit_uw word_count;
-	sljit_uw next_addr;
-	sljit_sw executable_offset;
-	sljit_uw addr;
-
-	struct sljit_label *label;
-	struct sljit_jump *jump;
-	struct sljit_const *const_;
-	struct sljit_put_label *put_label;
-
-	CHECK_ERROR_PTR();
-	CHECK_PTR(check_sljit_generate_code(compiler));
-	reverse_buf(compiler);
-
-	code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data);
-	PTR_FAIL_WITH_EXEC_IF(code);
-	buf = compiler->buf;
-
-	code_ptr = code;
-	word_count = 0;
-	next_addr = 0;
-	executable_offset = SLJIT_EXEC_OFFSET(code);
-
-	label = compiler->labels;
-	jump = compiler->jumps;
-	const_ = compiler->consts;
-	put_label = compiler->put_labels;
-
-	do {
-		buf_ptr = (sljit_ins*)buf->memory;
-		buf_end = buf_ptr + (buf->used_size >> 2);
-		do {
-			*code_ptr = *buf_ptr++;
-			if (next_addr == word_count) {
-				SLJIT_ASSERT(!label || label->size >= word_count);
-				SLJIT_ASSERT(!jump || jump->addr >= word_count);
-				SLJIT_ASSERT(!const_ || const_->addr >= word_count);
-				SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
-
-				/* These structures are ordered by their address. */
-				if (label && label->size == word_count) {
-					label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
-					label->size = (sljit_uw)(code_ptr - code);
-					label = label->next;
-				}
-				if (jump && jump->addr == word_count) {
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-					word_count += 1;
-#else
-					word_count += 5;
-#endif
-					jump->addr = (sljit_uw)code_ptr;
-					code_ptr = detect_jump_type(jump, code, executable_offset);
-					jump = jump->next;
-				}
-				if (const_ && const_->addr == word_count) {
-					const_->addr = (sljit_uw)code_ptr;
-					const_ = const_->next;
-				}
-				if (put_label && put_label->addr == word_count) {
-					SLJIT_ASSERT(put_label->label);
-					put_label->addr = (sljit_uw)code_ptr;
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-					code_ptr += 1;
-					word_count += 1;
-#else
-					code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size));
-					word_count += 5;
-#endif
-					put_label = put_label->next;
-				}
-				next_addr = compute_next_addr(label, jump, const_, put_label);
-			}
-			code_ptr++;
-			word_count++;
-		} while (buf_ptr < buf_end);
-
-		buf = buf->next;
-	} while (buf);
-
-	if (label && label->size == word_count) {
-		label->addr = (sljit_uw)code_ptr;
-		label->size = (sljit_uw)(code_ptr - code);
-		label = label->next;
-	}
-
-	SLJIT_ASSERT(!label);
-	SLJIT_ASSERT(!jump);
-	SLJIT_ASSERT(!const_);
-	SLJIT_ASSERT(!put_label);
-	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
-
-	jump = compiler->jumps;
-	while (jump) {
-		do {
-			if (!(jump->flags & (PATCH_B | PATCH_J | PATCH_REL32))) {
-				load_addr_to_reg(jump, TMP_REG1);
-				break;
-			}
-
-			addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
-			buf_ptr = (sljit_ins *)jump->addr;
-			addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);
-
-			if (jump->flags & PATCH_B) {
-				SLJIT_ASSERT((sljit_sw)addr >= BRANCH_MIN && (sljit_sw)addr <= BRANCH_MAX);
-				addr = ((addr & 0x800) >> 4) | ((addr & 0x1e) << 7) | ((addr & 0x7e0) << 20) | ((addr & 0x1000) << 19);
-				buf_ptr[0] |= (sljit_ins)addr;
-				break;
-			}
-
-#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
-			if (jump->flags & PATCH_REL32) {
-				SLJIT_ASSERT((sljit_sw)addr >= S32_MIN && (sljit_sw)addr <= S32_MAX);
-
-				if ((addr & 0x800) != 0)
-					addr += 0x1000;
-
-				buf_ptr[0] = AUIPC | RD(TMP_REG1) | (sljit_ins)((sljit_sw)addr & ~0xfff);
-				SLJIT_ASSERT((buf_ptr[1] & 0x707f) == JALR);
-				buf_ptr[1] |= IMM_I(addr);
-				break;
-			}
-#endif
-
-			SLJIT_ASSERT((sljit_sw)addr >= JUMP_MIN && (sljit_sw)addr <= JUMP_MAX);
-			addr = (addr & 0xff000) | ((addr & 0x800) << 9) | ((addr & 0x7fe) << 20) | ((addr & 0x100000) << 11);
-			buf_ptr[0] = JAL | RD((jump->flags & IS_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | (sljit_ins)addr;
-		} while (0);
-		jump = jump->next;
-	}
-
-	put_label = compiler->put_labels;
-	while (put_label) {
-		load_addr_to_reg(put_label, 0);
-		put_label = put_label->next;
-	}
-
-	compiler->error = SLJIT_ERR_COMPILED;
-	compiler->executable_offset = executable_offset;
-	compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins);
-
-	code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
-	code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
-
-	SLJIT_CACHE_FLUSH(code, code_ptr);
-	SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
-	return code;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
-{
-	switch (feature_type) {
-	case SLJIT_HAS_FPU:
-	case SLJIT_HAS_ZERO_REGISTER:
-		return 1;
-	default:
-		return 0;
-	}
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
-{
-	return (type >= SLJIT_ORDERED_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL);
-}
-
-/* --------------------------------------------------------------------- */
-/*  Entry, exit                                                          */
-/* --------------------------------------------------------------------- */
-
-/* Creates an index in data_transfer_insts array. */
-#define LOAD_DATA	0x01
-#define WORD_DATA	0x00
-#define BYTE_DATA	0x02
-#define HALF_DATA	0x04
-#define INT_DATA	0x06
-#define SIGNED_DATA	0x08
-/* Separates integer and floating point registers */
-#define GPR_REG		0x0f
-#define DOUBLE_DATA	0x10
-#define SINGLE_DATA	0x12
-
-#define MEM_MASK	0x1f
-
-#define ARG_TEST	0x00020
-#define ALT_KEEP_CACHE	0x00040
-#define CUMULATIVE_OP	0x00080
-#define IMM_OP		0x00100
-#define MOVE_OP		0x00200
-#define SRC2_IMM	0x00400
-
-#define UNUSED_DEST	0x00800
-#define REG_DEST	0x01000
-#define REG1_SOURCE	0x02000
-#define REG2_SOURCE	0x04000
-#define SLOW_SRC1	0x08000
-#define SLOW_SRC2	0x10000
-#define SLOW_DEST	0x20000
-
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-#define STACK_STORE	SW
-#define STACK_LOAD	LW
-#else
-#define STACK_STORE	SD
-#define STACK_LOAD	LD
-#endif
-
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-#include "sljitNativeRISCV_32.c"
-#else
-#include "sljitNativeRISCV_64.c"
-#endif
-
-#define STACK_MAX_DISTANCE (-SIMM_MIN)
-
-static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw);
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
-	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
-	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
-{
-	sljit_s32 i, tmp, offset;
-	sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
-
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
-	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
-
-	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-	if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
-		if ((local_size & SSIZE_OF(sw)) != 0)
-			local_size += SSIZE_OF(sw);
-		local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64));
-	}
-#else
-	local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64));
-#endif
-	local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
-	compiler->local_size = local_size;
-
-	if (local_size <= STACK_MAX_DISTANCE) {
-		/* Frequent case. */
-		FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(-local_size)));
-		offset = local_size - SSIZE_OF(sw);
-		local_size = 0;
-	} else {
-		FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(STACK_MAX_DISTANCE)));
-		local_size -= STACK_MAX_DISTANCE;
-
-		if (local_size > STACK_MAX_DISTANCE)
-			FAIL_IF(load_immediate(compiler, TMP_REG1, local_size, TMP_REG3));
-		offset = STACK_MAX_DISTANCE - SSIZE_OF(sw);
-	}
-
-	FAIL_IF(push_imm_s_inst(compiler, STACK_STORE | RS1(SLJIT_SP) | RS2(RETURN_ADDR_REG), offset));
-
-	tmp = SLJIT_S0 - saveds;
-	for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
-		offset -= SSIZE_OF(sw);
-		FAIL_IF(push_imm_s_inst(compiler, STACK_STORE | RS1(SLJIT_SP) | RS2(i), offset));
-	}
-
-	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
-		offset -= SSIZE_OF(sw);
-		FAIL_IF(push_imm_s_inst(compiler, STACK_STORE | RS1(SLJIT_SP) | RS2(i), offset));
-	}
-
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-	/* This alignment is valid because offset is not used after storing FPU regs. */
-	if ((offset & SSIZE_OF(sw)) != 0)
-		offset -= SSIZE_OF(sw);
-#endif
-
-	tmp = SLJIT_FS0 - fsaveds;
-	for (i = SLJIT_FS0; i > tmp; i--) {
-		offset -= SSIZE_OF(f64);
-		FAIL_IF(push_imm_s_inst(compiler, FSD | RS1(SLJIT_SP) | FRS2(i), offset));
-	}
-
-	for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
-		offset -= SSIZE_OF(f64);
-		FAIL_IF(push_imm_s_inst(compiler, FSD | RS1(SLJIT_SP) | FRS2(i), offset));
-	}
-
-	if (local_size > STACK_MAX_DISTANCE)
-		FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RS1(SLJIT_SP) | RS2(TMP_REG1)));
-	else if (local_size > 0)
-		FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(-local_size)));
-
-	if (options & SLJIT_ENTER_REG_ARG)
-		return SLJIT_SUCCESS;
-
-	arg_types >>= SLJIT_ARG_SHIFT;
-	saved_arg_count = 0;
-	tmp = SLJIT_R0;
-
-	while (arg_types > 0) {
-		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
-			if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
-				FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_S0 - saved_arg_count) | RS1(tmp) | IMM_I(0)));
-				saved_arg_count++;
-			}
-			tmp++;
-		}
-
-		arg_types >>= SLJIT_ARG_SHIFT;
-	}
-
-	return SLJIT_SUCCESS;
-}
-
-#undef STACK_MAX_DISTANCE
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
-	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
-	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
-{
-	CHECK_ERROR();
-	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
-	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
-
-	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-	if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
-		if ((local_size & SSIZE_OF(sw)) != 0)
-			local_size += SSIZE_OF(sw);
-		local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64));
-	}
-#else
-	local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64));
-#endif
-	compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
-
-	return SLJIT_SUCCESS;
-}
-
-#define STACK_MAX_DISTANCE (-SIMM_MIN - 16)
-
-static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
-{
-	sljit_s32 i, tmp, offset;
-	sljit_s32 local_size = compiler->local_size;
-
-	if (local_size > STACK_MAX_DISTANCE) {
-		local_size -= STACK_MAX_DISTANCE;
-
-		if (local_size > STACK_MAX_DISTANCE) {
-			FAIL_IF(load_immediate(compiler, TMP_REG2, local_size, TMP_REG3));
-			FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RS1(SLJIT_SP) | RS2(TMP_REG2)));
-		} else
-			FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(local_size)));
-
-		local_size = STACK_MAX_DISTANCE;
-	}
-
-	SLJIT_ASSERT(local_size > 0);
-
-	offset = local_size - SSIZE_OF(sw);
-	if (!is_return_to)
-		FAIL_IF(push_inst(compiler, STACK_LOAD | RD(RETURN_ADDR_REG) | RS1(SLJIT_SP) | IMM_I(offset)));
-
-	tmp = SLJIT_S0 - compiler->saveds;
-	for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) {
-		offset -= SSIZE_OF(sw);
-		FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RS1(SLJIT_SP) | IMM_I(offset)));
-	}
-
-	for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
-		offset -= SSIZE_OF(sw);
-		FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RS1(SLJIT_SP) | IMM_I(offset)));
-	}
-
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-	/* This alignment is valid because offset is not used after storing FPU regs. */
-	if ((offset & SSIZE_OF(sw)) != 0)
-		offset -= SSIZE_OF(sw);
-#endif
-
-	tmp = SLJIT_FS0 - compiler->fsaveds;
-	for (i = SLJIT_FS0; i > tmp; i--) {
-		offset -= SSIZE_OF(f64);
-		FAIL_IF(push_inst(compiler, FLD | FRD(i) | RS1(SLJIT_SP) | IMM_I(offset)));
-	}
-
-	for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
-		offset -= SSIZE_OF(f64);
-		FAIL_IF(push_inst(compiler, FLD | FRD(i) | RS1(SLJIT_SP) | IMM_I(offset)));
-	}
-
-	return push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(local_size));
-}
-
-#undef STACK_MAX_DISTANCE
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
-{
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_return_void(compiler));
-
-	FAIL_IF(emit_stack_frame_release(compiler, 0));
-	return push_inst(compiler, JALR | RD(TMP_ZERO) | RS1(RETURN_ADDR_REG) | IMM_I(0));
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
-	sljit_s32 src, sljit_sw srcw)
-{
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_return_to(compiler, src, srcw));
-
-	if (src & SLJIT_MEM) {
-		ADJUST_LOCAL_OFFSET(src, srcw);
-		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
-		src = TMP_REG1;
-		srcw = 0;
-	} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
-		FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(src) | IMM_I(0)));
-		src = TMP_REG1;
-		srcw = 0;
-	}
-
-	FAIL_IF(emit_stack_frame_release(compiler, 1));
-
-	SLJIT_SKIP_CHECKS(compiler);
-	return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
-}
-
-/* --------------------------------------------------------------------- */
-/*  Operators                                                            */
-/* --------------------------------------------------------------------- */
-
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-#define ARCH_32_64(a, b)	a
-#else
-#define ARCH_32_64(a, b)	b
-#endif
-
-static const sljit_ins data_transfer_insts[16 + 4] = {
-/* u w s */ ARCH_32_64(F3(0x2) | OPC(0x23) /* sw */, F3(0x3) | OPC(0x23) /* sd */),
-/* u w l */ ARCH_32_64(F3(0x2) | OPC(0x3) /* lw */, F3(0x3) | OPC(0x3) /* ld */),
-/* u b s */ F3(0x0) | OPC(0x23) /* sb */,
-/* u b l */ F3(0x4) | OPC(0x3) /* lbu */,
-/* u h s */ F3(0x1) | OPC(0x23) /* sh */,
-/* u h l */ F3(0x5) | OPC(0x3) /* lhu */,
-/* u i s */ F3(0x2) | OPC(0x23) /* sw */,
-/* u i l */ ARCH_32_64(F3(0x2) | OPC(0x3) /* lw */, F3(0x6) | OPC(0x3) /* lwu */),
-
-/* s w s */ ARCH_32_64(F3(0x2) | OPC(0x23) /* sw */, F3(0x3) | OPC(0x23) /* sd */),
-/* s w l */ ARCH_32_64(F3(0x2) | OPC(0x3) /* lw */, F3(0x3) | OPC(0x3) /* ld */),
-/* s b s */ F3(0x0) | OPC(0x23) /* sb */,
-/* s b l */ F3(0x0) | OPC(0x3) /* lb */,
-/* s h s */ F3(0x1) | OPC(0x23) /* sh */,
-/* s h l */ F3(0x1) | OPC(0x3) /* lh */,
-/* s i s */ F3(0x2) | OPC(0x23) /* sw */,
-/* s i l */ F3(0x2) | OPC(0x3) /* lw */,
-
-/* d   s */ F3(0x3) | OPC(0x27) /* fsd */,
-/* d   l */ F3(0x3) | OPC(0x7) /* fld */,
-/* s   s */ F3(0x2) | OPC(0x27) /* fsw */,
-/* s   l */ F3(0x2) | OPC(0x7) /* flw */,
-};
-
-#undef ARCH_32_64
-
-static sljit_s32 push_mem_inst(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 base, sljit_sw offset)
-{
-	sljit_ins ins;
-
-	SLJIT_ASSERT(FAST_IS_REG(base) && offset <= 0xfff && offset >= SIMM_MIN);
-
-	ins = data_transfer_insts[flags & MEM_MASK] | RS1(base);
-	if (flags & LOAD_DATA)
-		ins |= ((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) | IMM_I(offset);
-	else
-		ins |= ((flags & MEM_MASK) <= GPR_REG ? RS2(reg) : FRS2(reg)) | IMM_S(offset);
-
-	return push_inst(compiler, ins);
-}
-
-/* Can perform an operation using at most 1 instruction. */
-static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
-{
-
-	SLJIT_ASSERT(arg & SLJIT_MEM);
-
-	if (!(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN) {
-		/* Works for both absoulte and relative addresses. */
-		if (SLJIT_UNLIKELY(flags & ARG_TEST))
-			return 1;
-
-		FAIL_IF(push_mem_inst(compiler, flags, reg, arg & REG_MASK, argw));
-		return -1;
-	}
-	return 0;
-}
-
-#define TO_ARGW_HI(argw) (((argw) & ~0xfff) + (((argw) & 0x800) ? 0x1000 : 0))
-
-/* See getput_arg below.
-   Note: can_cache is called only for binary operators. */
-static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
-{
-	SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
-
-	/* Simple operation except for updates. */
-	if (arg & OFFS_REG_MASK) {
-		argw &= 0x3;
-		next_argw &= 0x3;
-		if (argw && argw == next_argw && (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK)))
-			return 1;
-		return 0;
-	}
-
-	if (arg == next_arg) {
-		if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN)
-				|| TO_ARGW_HI(argw) == TO_ARGW_HI(next_argw))
-			return 1;
-		return 0;
-	}
-
-	return 0;
-}
-
-/* Emit the necessary instructions. See can_cache above. */
-static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
-{
-	sljit_s32 base = arg & REG_MASK;
-	sljit_s32 tmp_r = TMP_REG1;
-	sljit_sw offset, argw_hi;
-
-	SLJIT_ASSERT(arg & SLJIT_MEM);
-	if (!(next_arg & SLJIT_MEM)) {
-		next_arg = 0;
-		next_argw = 0;
-	}
-
-	/* Since tmp can be the same as base or offset registers,
-	 * these might be unavailable after modifying tmp. */
-	if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA))
-		tmp_r = reg;
-
-	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
-		argw &= 0x3;
-
-		/* Using the cache. */
-		if (argw == compiler->cache_argw) {
-			if (arg == compiler->cache_arg)
-				return push_mem_inst(compiler, flags, reg, TMP_REG3, 0);
-
-			if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
-				if (arg == next_arg && argw == (next_argw & 0x3)) {
-					compiler->cache_arg = arg;
-					compiler->cache_argw = argw;
-					FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RS1(TMP_REG3) | RS2(base)));
-					return push_mem_inst(compiler, flags, reg, TMP_REG3, 0);
-				}
-				FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(base) | RS2(TMP_REG3)));
-				return push_mem_inst(compiler, flags, reg, tmp_r, 0);
-			}
-		}
-
-		if (SLJIT_UNLIKELY(argw)) {
-			compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
-			compiler->cache_argw = argw;
-			FAIL_IF(push_inst(compiler, SLLI | RD(TMP_REG3) | RS1(OFFS_REG(arg)) | IMM_I(argw)));
-		}
-
-		if (arg == next_arg && argw == (next_argw & 0x3)) {
-			compiler->cache_arg = arg;
-			compiler->cache_argw = argw;
-			FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RS1(base) | RS2(!argw ? OFFS_REG(arg) : TMP_REG3)));
-			tmp_r = TMP_REG3;
-		}
-		else
-			FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(base) | RS2(!argw ? OFFS_REG(arg) : TMP_REG3)));
-		return push_mem_inst(compiler, flags, reg, tmp_r, 0);
-	}
-
-	if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN)
-		return push_mem_inst(compiler, flags, reg, TMP_REG3, argw - compiler->cache_argw);
-
-	if (compiler->cache_arg == SLJIT_MEM && (argw - compiler->cache_argw <= SIMM_MAX) && (argw - compiler->cache_argw >= SIMM_MIN)) {
-		offset = argw - compiler->cache_argw;
-	} else {
-		compiler->cache_arg = SLJIT_MEM;
-
-		argw_hi = TO_ARGW_HI(argw);
-
-		if (next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN && argw_hi != TO_ARGW_HI(next_argw)) {
-			FAIL_IF(load_immediate(compiler, TMP_REG3, argw, tmp_r));
-			compiler->cache_argw = argw;
-			offset = 0;
-		} else {
-			FAIL_IF(load_immediate(compiler, TMP_REG3, argw_hi, tmp_r));
-			compiler->cache_argw = argw_hi;
-			offset = argw & 0xfff;
-			argw = argw_hi;
-		}
-	}
-
-	if (!base)
-		return push_mem_inst(compiler, flags, reg, TMP_REG3, offset);
-
-	if (arg == next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN) {
-		compiler->cache_arg = arg;
-		FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RS1(TMP_REG3) | RS2(base)));
-		return push_mem_inst(compiler, flags, reg, TMP_REG3, offset);
-	}
-
-	FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(TMP_REG3) | RS2(base)));
-	return push_mem_inst(compiler, flags, reg, tmp_r, offset);
-}
-
-static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
-{
-	sljit_s32 base = arg & REG_MASK;
-	sljit_s32 tmp_r = TMP_REG1;
-
-	if (getput_arg_fast(compiler, flags, reg, arg, argw))
-		return compiler->error;
-
-	if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA))
-		tmp_r = reg;
-
-	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
-		argw &= 0x3;
-
-		if (SLJIT_UNLIKELY(argw)) {
-			FAIL_IF(push_inst(compiler, SLLI | RD(tmp_r) | RS1(OFFS_REG(arg)) | IMM_I(argw)));
-			FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(tmp_r) | RS2(base)));
-		}
-		else
-			FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(base) | RS2(OFFS_REG(arg))));
-
-		argw = 0;
-	} else {
-		FAIL_IF(load_immediate(compiler, tmp_r, TO_ARGW_HI(argw), TMP_REG3));
-
-		if (base != 0)
-			FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(tmp_r) | RS2(base)));
-	}
-
-	return push_mem_inst(compiler, flags, reg, tmp_r, argw & 0xfff);
-}
-
-static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
-{
-	if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
-		return compiler->error;
-	return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
-}
-
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-#define WORD 0
-#define IMM_EXTEND(v) (IMM_I(v))
-#else /* !SLJIT_CONFIG_RISCV_32 */
-#define WORD word
-#define IMM_EXTEND(v) (IMM_I((op & SLJIT_32) ? (v) : (32 + (v))))
-#endif /* SLJIT_CONFIG_RISCV_32 */
-
-static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src)
-{
-	sljit_s32 is_clz = (GET_OPCODE(op) == SLJIT_CLZ);
-#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
-	sljit_ins word = (op & SLJIT_32) >> 5;
-	sljit_ins max = (op & SLJIT_32) ? 32 : 64;
-#else /* !SLJIT_CONFIG_RISCV_64 */
-	sljit_ins max = 32;
-#endif /* SLJIT_CONFIG_RISCV_64 */
-
-	SLJIT_ASSERT(WORD == 0 || WORD == 0x8);
-
-	/* The OTHER_FLAG is the counter. */
-	FAIL_IF(push_inst(compiler, ADDI | WORD | RD(OTHER_FLAG) | RS1(TMP_ZERO) | IMM_I(max)));
-
-	/* The TMP_REG2 is the next value. */
-	if (src != TMP_REG2)
-		FAIL_IF(push_inst(compiler, ADDI | WORD | RD(TMP_REG2) | RS1(src) | IMM_I(0)));
-
-	FAIL_IF(push_inst(compiler, BEQ | RS1(TMP_REG2) | RS2(TMP_ZERO) | ((sljit_ins)((is_clz ? 4 : 5) * SSIZE_OF(ins)) << 7) | ((sljit_ins)(8 * SSIZE_OF(ins)) << 20)));
-
-	FAIL_IF(push_inst(compiler, ADDI | WORD | RD(OTHER_FLAG) | RS1(TMP_ZERO) | IMM_I(0)));
-	if (!is_clz) {
-		FAIL_IF(push_inst(compiler, ANDI | RD(TMP_REG1) | RS1(TMP_REG2) | IMM_I(1)));
-		FAIL_IF(push_inst(compiler, BNE | RS1(TMP_REG1) | RS2(TMP_ZERO) | ((sljit_ins)(2 * SSIZE_OF(ins)) << 7) | ((sljit_ins)(8 * SSIZE_OF(ins)) << 20)));
-	} else
-		FAIL_IF(push_inst(compiler, BLT | RS1(TMP_REG2) | RS2(TMP_ZERO) | ((sljit_ins)(2 * SSIZE_OF(ins)) << 7) | ((sljit_ins)(8 * SSIZE_OF(ins)) << 20)));
-
-	/* The TMP_REG1 is the next shift. */
-	FAIL_IF(push_inst(compiler, ADDI | WORD | RD(TMP_REG1) | RS1(TMP_ZERO) | IMM_I(max)));
-
-	FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(TMP_REG2) | IMM_I(0)));
-	FAIL_IF(push_inst(compiler, SRLI | WORD | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_I(1)));
-
-	FAIL_IF(push_inst(compiler, (is_clz ? SRL : SLL) | WORD | RD(TMP_REG2) | RS1(EQUAL_FLAG) | RS2(TMP_REG1)));
-	FAIL_IF(push_inst(compiler, BNE | RS1(TMP_REG2) | RS2(TMP_ZERO) | ((sljit_ins)0xfe000e80 - ((2 * SSIZE_OF(ins)) << 7))));
-	FAIL_IF(push_inst(compiler, ADDI | WORD | RD(TMP_REG2) | RS1(TMP_REG1) | IMM_I(-1)));
-	FAIL_IF(push_inst(compiler, (is_clz ? SRL : SLL) | WORD | RD(TMP_REG2) | RS1(EQUAL_FLAG) | RS2(TMP_REG2)));
-	FAIL_IF(push_inst(compiler, OR | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(TMP_REG1)));
-	FAIL_IF(push_inst(compiler, BEQ | RS1(TMP_REG2) | RS2(TMP_ZERO) | ((sljit_ins)0xfe000e80 - ((5 * SSIZE_OF(ins)) << 7))));
-
-	return push_inst(compiler, ADDI | WORD | RD(dst) | RS1(OTHER_FLAG) | IMM_I(0));
-}
-
-#define EMIT_LOGICAL(op_imm, op_reg) \
-	if (flags & SRC2_IMM) { \
-		if (op & SLJIT_SET_Z) \
-			FAIL_IF(push_inst(compiler, op_imm | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2))); \
-		if (!(flags & UNUSED_DEST)) \
-			FAIL_IF(push_inst(compiler, op_imm | RD(dst) | RS1(src1) | IMM_I(src2))); \
-	} \
-	else { \
-		if (op & SLJIT_SET_Z) \
-			FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); \
-		if (!(flags & UNUSED_DEST)) \
-			FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RS1(src1) | RS2(src2))); \
-	}
-
-#define EMIT_SHIFT(imm, reg) \
-	op_imm = (imm); \
-	op_reg = (reg);
-
-static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
-	sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
-{
-	sljit_s32 is_overflow, is_carry, carry_src_r, is_handled;
-	sljit_ins op_imm, op_reg;
-#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
-	sljit_ins word = (op & SLJIT_32) >> 5;
-#endif /* SLJIT_CONFIG_RISCV_64 */
-
-	SLJIT_ASSERT(WORD == 0 || WORD == 0x8);
-
-	switch (GET_OPCODE(op)) {
-	case SLJIT_MOV:
-		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
-		if (dst != src2)
-			return push_inst(compiler, ADDI | RD(dst) | RS1(src2) | IMM_I(0));
-		return SLJIT_SUCCESS;
-
-	case SLJIT_MOV_U8:
-		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
-		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
-			return push_inst(compiler, ANDI | RD(dst) | RS1(src2) | IMM_I(0xff));
-		SLJIT_ASSERT(dst == src2);
-		return SLJIT_SUCCESS;
-
-	case SLJIT_MOV_S8:
-		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
-		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
-			FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(24)));
-			return push_inst(compiler, SRAI | WORD | RD(dst) | RS1(dst) | IMM_EXTEND(24));
-		}
-		SLJIT_ASSERT(dst == src2);
-		return SLJIT_SUCCESS;
-
-	case SLJIT_MOV_U16:
-		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
-		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
-			FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(16)));
-			return push_inst(compiler, SRLI | WORD | RD(dst) | RS1(dst) | IMM_EXTEND(16));
-		}
-		SLJIT_ASSERT(dst == src2);
-		return SLJIT_SUCCESS;
-
-	case SLJIT_MOV_S16:
-		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
-		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
-			FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(16)));
-			return push_inst(compiler, SRAI | WORD | RD(dst) | RS1(dst) | IMM_EXTEND(16));
-		}
-		SLJIT_ASSERT(dst == src2);
-		return SLJIT_SUCCESS;
-
-#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
-	case SLJIT_MOV_U32:
-		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
-		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
-			FAIL_IF(push_inst(compiler, SLLI | RD(dst) | RS1(src2) | IMM_I(32)));
-			return push_inst(compiler, SRLI | RD(dst) | RS1(dst) | IMM_I(32));
-		}
-		SLJIT_ASSERT(dst == src2);
-		return SLJIT_SUCCESS;
-
-	case SLJIT_MOV_S32:
-		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
-		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
-			return push_inst(compiler, ADDI | 0x8 | RD(dst) | RS1(src2) | IMM_I(0));
-		SLJIT_ASSERT(dst == src2);
-		return SLJIT_SUCCESS;
-#endif /* SLJIT_CONFIG_RISCV_64 */
-
-	case SLJIT_CLZ:
-	case SLJIT_CTZ:
-		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
-		return emit_clz_ctz(compiler, op, dst, src2);
-
-	case SLJIT_ADD:
-		/* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */
-		is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
-		carry_src_r = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
-
-		if (flags & SRC2_IMM) {
-			if (is_overflow) {
-				if (src2 >= 0)
-					FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0)));
-				else
-					FAIL_IF(push_inst(compiler, XORI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-1)));
-			}
-			else if (op & SLJIT_SET_Z)
-				FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2)));
-
-			/* Only the zero flag is needed. */
-			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
-				FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(src2)));
-		}
-		else {
-			if (is_overflow)
-				FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2)));
-			else if (op & SLJIT_SET_Z)
-				FAIL_IF(push_inst(compiler, ADD | WORD | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2)));
-
-			if (is_overflow || carry_src_r != 0) {
-				if (src1 != dst)
-					carry_src_r = (sljit_s32)src1;
-				else if (src2 != dst)
-					carry_src_r = (sljit_s32)src2;
-				else {
-					FAIL_IF(push_inst(compiler, ADDI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(0)));
-					carry_src_r = OTHER_FLAG;
-				}
-			}
-
-			/* Only the zero flag is needed. */
-			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
-				FAIL_IF(push_inst(compiler, ADD | WORD | RD(dst) | RS1(src1) | RS2(src2)));
-		}
-
-		/* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */
-		if (is_overflow || carry_src_r != 0) {
-			if (flags & SRC2_IMM)
-				FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RS1(dst) | IMM_I(src2)));
-			else
-				FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(dst) | RS2(carry_src_r)));
-		}
-
-		if (!is_overflow)
-			return SLJIT_SUCCESS;
-
-		FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RS1(dst) | RS2(EQUAL_FLAG)));
-		if (op & SLJIT_SET_Z)
-			FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(dst) | IMM_I(0)));
-		FAIL_IF(push_inst(compiler, SRLI | WORD | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_EXTEND(31)));
-		return push_inst(compiler, XOR | RD(OTHER_FLAG) | RS1(TMP_REG1) | RS2(OTHER_FLAG));
-
-	case SLJIT_ADDC:
-		carry_src_r = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
-
-		if (flags & SRC2_IMM) {
-			FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(src2)));
-		} else {
-			if (carry_src_r != 0) {
-				if (src1 != dst)
-					carry_src_r = (sljit_s32)src1;
-				else if (src2 != dst)
-					carry_src_r = (sljit_s32)src2;
-				else {
-					FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0)));
-					carry_src_r = EQUAL_FLAG;
-				}
-			}
-
-			FAIL_IF(push_inst(compiler, ADD | WORD | RD(dst) | RS1(src1) | RS2(src2)));
-		}
-
-		/* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */
-		if (carry_src_r != 0) {
-			if (flags & SRC2_IMM)
-				FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RS1(dst) | IMM_I(src2)));
-			else
-				FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RS1(dst) | RS2(carry_src_r)));
-		}
-
-		FAIL_IF(push_inst(compiler, ADD | WORD | RD(dst) | RS1(dst) | RS2(OTHER_FLAG)));
-
-		if (carry_src_r == 0)
-			return SLJIT_SUCCESS;
-
-		/* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */
-		FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(dst) | RS2(OTHER_FLAG)));
-		/* Set carry flag. */
-		return push_inst(compiler, OR | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(EQUAL_FLAG));
-
-	case SLJIT_SUB:
-		if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
-			FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG2) | RS1(TMP_ZERO) | IMM_I(src2)));
-			src2 = TMP_REG2;
-			flags &= ~SRC2_IMM;
-		}
-
-		is_handled = 0;
-
-		if (flags & SRC2_IMM) {
-			if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
-				FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2)));
-				is_handled = 1;
-			}
-			else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
-				FAIL_IF(push_inst(compiler, SLTI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2)));
-				is_handled = 1;
-			}
-		}
-
-		if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) {
-			is_handled = 1;
-
-			if (flags & SRC2_IMM) {
-				FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG2) | RS1(TMP_ZERO) | IMM_I(src2)));
-				src2 = TMP_REG2;
-				flags &= ~SRC2_IMM;
-			}
-
-			switch (GET_FLAG_TYPE(op)) {
-			case SLJIT_LESS:
-			case SLJIT_GREATER_EQUAL:
-				FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(src1) | RS2(src2)));
-				break;
-			case SLJIT_GREATER:
-			case SLJIT_LESS_EQUAL:
-				FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(src2) | RS2(src1)));
-				break;
-			case SLJIT_SIG_LESS:
-			case SLJIT_SIG_GREATER_EQUAL:
-				FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RS1(src1) | RS2(src2)));
-				break;
-			case SLJIT_SIG_GREATER:
-			case SLJIT_SIG_LESS_EQUAL:
-				FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RS1(src2) | RS2(src1)));
-				break;
-			}
-		}
-
-		if (is_handled) {
-			if (flags & SRC2_IMM) {
-				if (op & SLJIT_SET_Z)
-					FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-src2)));
-				if (!(flags & UNUSED_DEST))
-					return push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(-src2));
-			}
-			else {
-				if (op & SLJIT_SET_Z)
-					FAIL_IF(push_inst(compiler, SUB | WORD | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2)));
-				if (!(flags & UNUSED_DEST))
-					return push_inst(compiler, SUB | WORD | RD(dst) | RS1(src1) | RS2(src2));
-			}
-			return SLJIT_SUCCESS;
-		}
-
-		is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
-		is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
-
-		if (flags & SRC2_IMM) {
-			if (is_overflow) {
-				if (src2 >= 0)
-					FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0)));
-				else
-					FAIL_IF(push_inst(compiler, XORI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-1)));
-			}
-			else if (op & SLJIT_SET_Z)
-				FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-src2)));
-
-			if (is_overflow || is_carry)
-				FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2)));
-
-			/* Only the zero flag is needed. */
-			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
-				FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(-src2)));
-		}
-		else {
-			if (is_overflow)
-				FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2)));
-			else if (op & SLJIT_SET_Z)
-				FAIL_IF(push_inst(compiler, SUB | WORD | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2)));
-
-			if (is_overflow || is_carry)
-				FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(src1) | RS2(src2)));
-
-			/* Only the zero flag is needed. */
-			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
-				FAIL_IF(push_inst(compiler, SUB | WORD | RD(dst) | RS1(src1) | RS2(src2)));
-		}
-
-		if (!is_overflow)
-			return SLJIT_SUCCESS;
-
-		FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RS1(dst) | RS2(EQUAL_FLAG)));
-		if (op & SLJIT_SET_Z)
-			FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(dst) | IMM_I(0)));
-		FAIL_IF(push_inst(compiler, SRLI | WORD | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_EXTEND(31)));
-		return push_inst(compiler, XOR | RD(OTHER_FLAG) | RS1(TMP_REG1) | RS2(OTHER_FLAG));
-
-	case SLJIT_SUBC:
-		if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
-			FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG2) | RS1(TMP_ZERO) | IMM_I(src2)));
-			src2 = TMP_REG2;
-			flags &= ~SRC2_IMM;
-		}
-
-		is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
-
-		if (flags & SRC2_IMM) {
-			if (is_carry)
-				FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2)));
-
-			FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(-src2)));
-		}
-		else {
-			if (is_carry)
-				FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2)));
-
-			FAIL_IF(push_inst(compiler, SUB | WORD | RD(dst) | RS1(src1) | RS2(src2)));
-		}
-
-		if (is_carry)
-			FAIL_IF(push_inst(compiler, SLTU | RD(TMP_REG1) | RS1(dst) | RS2(OTHER_FLAG)));
-
-		FAIL_IF(push_inst(compiler, SUB | WORD | RD(dst) | RS1(dst) | RS2(OTHER_FLAG)));
-
-		if (!is_carry)
-			return SLJIT_SUCCESS;
-
-		return push_inst(compiler, OR | RD(OTHER_FLAG) | RS1(EQUAL_FLAG) | RS2(TMP_REG1));
-
-	case SLJIT_MUL:
-		SLJIT_ASSERT(!(flags & SRC2_IMM));
-
-		if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW)
-			return push_inst(compiler, MUL | WORD | RD(dst) | RS1(src1) | RS2(src2));
-
-#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
-		if (word) {
-			FAIL_IF(push_inst(compiler, MUL | RD(OTHER_FLAG) | RS1(src1) | RS2(src2)));
-			FAIL_IF(push_inst(compiler, MUL | 0x8 | RD(dst) | RS1(src1) | RS2(src2)));
-			return push_inst(compiler, SUB | RD(OTHER_FLAG) | RS1(dst) | RS2(OTHER_FLAG));
-		}
-#endif /* SLJIT_CONFIG_RISCV_64 */
-
-		FAIL_IF(push_inst(compiler, MULH | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2)));
-		FAIL_IF(push_inst(compiler, MUL | RD(dst) | RS1(src1) | RS2(src2)));
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-		FAIL_IF(push_inst(compiler, SRAI | RD(OTHER_FLAG) | RS1(dst) | IMM_I(31)));
-#else /* !SLJIT_CONFIG_RISCV_32 */
-		FAIL_IF(push_inst(compiler, SRAI | RD(OTHER_FLAG) | RS1(dst) | IMM_I(63)));
-#endif /* SLJIT_CONFIG_RISCV_32 */
-		return push_inst(compiler, SUB | RD(OTHER_FLAG) | RS1(EQUAL_FLAG) | RS2(OTHER_FLAG));
-
-	case SLJIT_AND:
-		EMIT_LOGICAL(ANDI, AND);
-		return SLJIT_SUCCESS;
-
-	case SLJIT_OR:
-		EMIT_LOGICAL(ORI, OR);
-		return SLJIT_SUCCESS;
-
-	case SLJIT_XOR:
-		EMIT_LOGICAL(XORI, XOR);
-		return SLJIT_SUCCESS;
-
-	case SLJIT_SHL:
-	case SLJIT_MSHL:
-		EMIT_SHIFT(SLLI, SLL);
-		break;
-
-	case SLJIT_LSHR:
-	case SLJIT_MLSHR:
-		EMIT_SHIFT(SRLI, SRL);
-		break;
-
-	case SLJIT_ASHR:
-	case SLJIT_MASHR:
-		EMIT_SHIFT(SRAI, SRA);
-		break;
-
-	case SLJIT_ROTL:
-	case SLJIT_ROTR:
-		if (flags & SRC2_IMM) {
-			SLJIT_ASSERT(src2 != 0);
-
-			op_imm = (GET_OPCODE(op) == SLJIT_ROTL) ? SLLI : SRLI;
-			FAIL_IF(push_inst(compiler, op_imm | WORD | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2)));
-
-#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
-			src2 = ((op & SLJIT_32) ? 32 : 64) - src2;
-#else /* !SLJIT_CONFIG_RISCV_64 */
-			src2 = 32 - src2;
-#endif /* SLJIT_CONFIG_RISCV_64 */
-			op_imm = (GET_OPCODE(op) == SLJIT_ROTL) ? SRLI : SLLI;
-			FAIL_IF(push_inst(compiler, op_imm | WORD | RD(dst) | RS1(src1) | IMM_I(src2)));
-			return push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(OTHER_FLAG));
-		}
-
-		if (src2 == TMP_ZERO) {
-			if (dst != src1)
-				return push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(0));
-			return SLJIT_SUCCESS;
-		}
-
-		FAIL_IF(push_inst(compiler, SUB | WORD | RD(EQUAL_FLAG) | RS1(TMP_ZERO) | RS2(src2)));
-		op_reg = (GET_OPCODE(op) == SLJIT_ROTL) ? SLL : SRL;
-		FAIL_IF(push_inst(compiler, op_reg | WORD | RD(OTHER_FLAG) | RS1(src1) | RS2(src2)));
-		op_reg = (GET_OPCODE(op) == SLJIT_ROTL) ? SRL : SLL;
-		FAIL_IF(push_inst(compiler, op_reg | WORD | RD(dst) | RS1(src1) | RS2(EQUAL_FLAG)));
-		return push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(OTHER_FLAG));
-
-	default:
-		SLJIT_UNREACHABLE();
-		return SLJIT_SUCCESS;
-	}
-
-	if (flags & SRC2_IMM) {
-		if (op & SLJIT_SET_Z)
-			FAIL_IF(push_inst(compiler, op_imm | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2)));
-
-		if (flags & UNUSED_DEST)
-			return SLJIT_SUCCESS;
-		return push_inst(compiler, op_imm | WORD | RD(dst) | RS1(src1) | IMM_I(src2));
-	}
-
-	if (op & SLJIT_SET_Z)
-		FAIL_IF(push_inst(compiler, op_reg | WORD | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2)));
-
-	if (flags & UNUSED_DEST)
-		return SLJIT_SUCCESS;
-	return push_inst(compiler, op_reg | WORD | RD(dst) | RS1(src1) | RS2(src2));
-}
-
-#undef IMM_EXTEND
-
-static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
-	sljit_s32 dst, sljit_sw dstw,
-	sljit_s32 src1, sljit_sw src1w,
-	sljit_s32 src2, sljit_sw src2w)
-{
-	/* arg1 goes to TMP_REG1 or src reg
-	   arg2 goes to TMP_REG2, imm or src reg
-	   TMP_REG3 can be used for caching
-	   result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
-	sljit_s32 dst_r = TMP_REG2;
-	sljit_s32 src1_r;
-	sljit_sw src2_r = 0;
-	sljit_s32 sugg_src2_r = TMP_REG2;
-
-	if (!(flags & ALT_KEEP_CACHE)) {
-		compiler->cache_arg = 0;
-		compiler->cache_argw = 0;
-	}
-
-	if (dst == TMP_REG2) {
-		SLJIT_ASSERT(HAS_FLAGS(op));
-		flags |= UNUSED_DEST;
-	}
-	else if (FAST_IS_REG(dst)) {
-		dst_r = dst;
-		flags |= REG_DEST;
-		if (flags & MOVE_OP)
-			sugg_src2_r = dst_r;
-	}
-	else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw))
-		flags |= SLOW_DEST;
-
-	if (flags & IMM_OP) {
-		if ((src2 & SLJIT_IMM) && src2w != 0 && src2w <= SIMM_MAX && src2w >= SIMM_MIN) {
-			flags |= SRC2_IMM;
-			src2_r = src2w;
-		}
-		else if ((flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w != 0 && src1w <= SIMM_MAX && src1w >= SIMM_MIN) {
-			flags |= SRC2_IMM;
-			src2_r = src1w;
-
-			/* And swap arguments. */
-			src1 = src2;
-			src1w = src2w;
-			src2 = SLJIT_IMM;
-			/* src2w = src2_r unneeded. */
-		}
-	}
-
-	/* Source 1. */
-	if (FAST_IS_REG(src1)) {
-		src1_r = src1;
-		flags |= REG1_SOURCE;
-	}
-	else if (src1 & SLJIT_IMM) {
-		if (src1w) {
-			FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3));
-			src1_r = TMP_REG1;
-		}
-		else
-			src1_r = TMP_ZERO;
-	}
-	else {
-		if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w))
-			FAIL_IF(compiler->error);
-		else
-			flags |= SLOW_SRC1;
-		src1_r = TMP_REG1;
-	}
-
-	/* Source 2. */
-	if (FAST_IS_REG(src2)) {
-		src2_r = src2;
-		flags |= REG2_SOURCE;
-		if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP)
-			dst_r = (sljit_s32)src2_r;
-	}
-	else if (src2 & SLJIT_IMM) {
-		if (!(flags & SRC2_IMM)) {
-			if (src2w) {
-				FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w, TMP_REG3));
-				src2_r = sugg_src2_r;
-			}
-			else {
-				src2_r = TMP_ZERO;
-				if (flags & MOVE_OP) {
-					if (dst & SLJIT_MEM)
-						dst_r = 0;
-					else
-						op = SLJIT_MOV;
-				}
-			}
-		}
-	}
-	else {
-		if (getput_arg_fast(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w))
-			FAIL_IF(compiler->error);
-		else
-			flags |= SLOW_SRC2;
-		src2_r = sugg_src2_r;
-	}
-
-	if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
-		SLJIT_ASSERT(src2_r == TMP_REG2);
-		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
-			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
-			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
-		}
-		else {
-			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
-			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
-		}
-	}
-	else if (flags & SLOW_SRC1)
-		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
-	else if (flags & SLOW_SRC2)
-		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
-
-	FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
-
-	if (dst & SLJIT_MEM) {
-		if (!(flags & SLOW_DEST)) {
-			getput_arg_fast(compiler, flags, dst_r, dst, dstw);
-			return compiler->error;
-		}
-		return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0);
-	}
-
-	return SLJIT_SUCCESS;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
-{
-#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
-	sljit_ins word = (op & SLJIT_32) >> 5;
-
-	SLJIT_ASSERT(word == 0 || word == 0x8);
-#endif /* SLJIT_CONFIG_RISCV_64 */
-
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_op0(compiler, op));
-
-	switch (GET_OPCODE(op)) {
-	case SLJIT_BREAKPOINT:
-		return push_inst(compiler, EBREAK);
-	case SLJIT_NOP:
-		return push_inst(compiler, ADDI | RD(TMP_ZERO) | RS1(TMP_ZERO) | IMM_I(0));
-	case SLJIT_LMUL_UW:
-		FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(SLJIT_R1) | IMM_I(0)));
-		FAIL_IF(push_inst(compiler, MULHU | RD(SLJIT_R1) | RS1(SLJIT_R0) | RS2(SLJIT_R1)));
-		return push_inst(compiler, MUL | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(TMP_REG1));
-	case SLJIT_LMUL_SW:
-		FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(SLJIT_R1) | IMM_I(0)));
-		FAIL_IF(push_inst(compiler, MULH | RD(SLJIT_R1) | RS1(SLJIT_R0) | RS2(SLJIT_R1)));
-		return push_inst(compiler, MUL | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(TMP_REG1));
-	case SLJIT_DIVMOD_UW:
-		FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(SLJIT_R0) | IMM_I(0)));
-		FAIL_IF(push_inst(compiler, DIVU | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1)));
-		return push_inst(compiler, REMU | WORD | RD(SLJIT_R1) | RS1(TMP_REG1) | RS2(SLJIT_R1));
-	case SLJIT_DIVMOD_SW:
-		FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(SLJIT_R0) | IMM_I(0)));
-		FAIL_IF(push_inst(compiler, DIV | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1)));
-		return push_inst(compiler, REM | WORD | RD(SLJIT_R1) | RS1(TMP_REG1) | RS2(SLJIT_R1));
-	case SLJIT_DIV_UW:
-		return push_inst(compiler, DIVU | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1));
-	case SLJIT_DIV_SW:
-		return push_inst(compiler, DIV | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1));
-	case SLJIT_ENDBR:
-	case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
-		return SLJIT_SUCCESS;
-	}
-
-	return SLJIT_SUCCESS;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
-	sljit_s32 dst, sljit_sw dstw,
-	sljit_s32 src, sljit_sw srcw)
-{
-	sljit_s32 flags = 0;
-
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
-	ADJUST_LOCAL_OFFSET(dst, dstw);
-	ADJUST_LOCAL_OFFSET(src, srcw);
-
-#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
-	if (op & SLJIT_32)
-		flags = INT_DATA | SIGNED_DATA;
-#endif
-
-	switch (GET_OPCODE(op)) {
-	case SLJIT_MOV:
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-	case SLJIT_MOV_U32:
-	case SLJIT_MOV_S32:
-	case SLJIT_MOV32:
-#endif
-	case SLJIT_MOV_P:
-		return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, srcw);
-
-#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
-	case SLJIT_MOV_U32:
-		return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u32)srcw : srcw);
-
-	case SLJIT_MOV_S32:
-	/* Logical operators have no W variant, so sign extended input is necessary for them. */
-	case SLJIT_MOV32:
-		return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s32)srcw : srcw);
-#endif
-
-	case SLJIT_MOV_U8:
-		return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
-
-	case SLJIT_MOV_S8:
-		return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
-
-	case SLJIT_MOV_U16:
-		return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
-
-	case SLJIT_MOV_S16:
-		return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
-
-	case SLJIT_NOT:
-		return emit_op(compiler, SLJIT_XOR | (op & (SLJIT_32 | SLJIT_SET_Z)), flags, dst, dstw, src, srcw, SLJIT_IMM, -1);
-
-	case SLJIT_CLZ:
-	case SLJIT_CTZ:
-		return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
-	}
-
-	SLJIT_UNREACHABLE();
-	return SLJIT_SUCCESS;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
-	sljit_s32 dst, sljit_sw dstw,
-	sljit_s32 src1, sljit_sw src1w,
-	sljit_s32 src2, sljit_sw src2w)
-{
-	sljit_s32 flags = 0;
-
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
-	ADJUST_LOCAL_OFFSET(dst, dstw);
-	ADJUST_LOCAL_OFFSET(src1, src1w);
-	ADJUST_LOCAL_OFFSET(src2, src2w);
-
-#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
-	if (op & SLJIT_32) {
-		flags |= INT_DATA | SIGNED_DATA;
-		if (src1 & SLJIT_IMM)
-			src1w = (sljit_s32)src1w;
-		if (src2 & SLJIT_IMM)
-			src2w = (sljit_s32)src2w;
-	}
-#endif
-
-	switch (GET_OPCODE(op)) {
-	case SLJIT_ADD:
-	case SLJIT_ADDC:
-		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
-		return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
-
-	case SLJIT_SUB:
-	case SLJIT_SUBC:
-		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
-		return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
-
-	case SLJIT_MUL:
-		compiler->status_flags_state = 0;
-		return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);
-
-	case SLJIT_AND:
-	case SLJIT_OR:
-	case SLJIT_XOR:
-		return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
-
-	case SLJIT_SHL:
-	case SLJIT_MSHL:
-	case SLJIT_LSHR:
-	case SLJIT_MLSHR:
-	case SLJIT_ASHR:
-	case SLJIT_MASHR:
-	case SLJIT_ROTL:
-	case SLJIT_ROTR:
-		if (src2 & SLJIT_IMM) {
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-			src2w &= 0x1f;
-#else /* !SLJIT_CONFIG_RISCV_32 */
-			if (op & SLJIT_32)
-				src2w &= 0x1f;
-			else
-				src2w &= 0x3f;
-#endif /* SLJIT_CONFIG_RISCV_32 */
-		}
-
-		return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
-	}
-
-	SLJIT_UNREACHABLE();
-	return SLJIT_SUCCESS;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
-	sljit_s32 src1, sljit_sw src1w,
-	sljit_s32 src2, sljit_sw src2w)
-{
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
-
-	SLJIT_SKIP_CHECKS(compiler);
-	return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w);
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
-	sljit_s32 src_dst,
-	sljit_s32 src1, sljit_sw src1w,
-	sljit_s32 src2, sljit_sw src2w)
-{
-	sljit_s32 is_left;
-	sljit_ins ins1, ins2, ins3;
-#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
-	sljit_ins word = (op & SLJIT_32) >> 5;
-	sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
-	sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
-#else /* !SLJIT_CONFIG_RISCV_64 */
-	sljit_s32 inp_flags = WORD_DATA | LOAD_DATA;
-	sljit_sw bit_length = 32;
-#endif /* SLJIT_CONFIG_RISCV_64 */
-
-	SLJIT_ASSERT(WORD == 0 || WORD == 0x8);
-
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
-
-	is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
-
-	if (src_dst == src1) {
-		SLJIT_SKIP_CHECKS(compiler);
-		return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), src_dst, 0, src_dst, 0, src2, src2w);
-	}
-
-	ADJUST_LOCAL_OFFSET(src1, src1w);
-	ADJUST_LOCAL_OFFSET(src2, src2w);
-
-	if (src2 & SLJIT_IMM) {
-		src2w &= bit_length - 1;
-
-		if (src2w == 0)
-			return SLJIT_SUCCESS;
-	} else if (src2 & SLJIT_MEM) {
-		FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG2, src2, src2w));
-		src2 = TMP_REG2;
-	}
-
-	if (src1 & SLJIT_MEM) {
-		FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG1, src1, src1w));
-		src1 = TMP_REG1;
-	} else if (src1 & SLJIT_IMM) {
-		FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3));
-		src1 = TMP_REG1;
-	}
-
-	if (src2 & SLJIT_IMM) {
-		if (is_left) {
-			ins1 = SLLI | WORD | IMM_I(src2w);
-			src2w = bit_length - src2w;
-			ins2 = SRLI | WORD | IMM_I(src2w);
-		} else {
-			ins1 = SRLI | WORD | IMM_I(src2w);
-			src2w = bit_length - src2w;
-			ins2 = SLLI | WORD | IMM_I(src2w);
-		}
-
-		FAIL_IF(push_inst(compiler, ins1 | RD(src_dst) | RS1(src_dst)));
-		FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RS1(src1)));
-		return push_inst(compiler, OR | RD(src_dst) | RS1(src_dst) | RS2(TMP_REG1));
-	}
-
-	if (is_left) {
-		ins1 = SLL;
-		ins2 = SRLI;
-		ins3 = SRL;
-	} else {
-		ins1 = SRL;
-		ins2 = SLLI;
-		ins3 = SLL;
-	}
-
-	FAIL_IF(push_inst(compiler, ins1 | WORD | RD(src_dst) | RS1(src_dst) | RS2(src2)));
-
-	if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
-		FAIL_IF(push_inst(compiler, ins2 | WORD | RD(TMP_REG1) | RS1(src1) | IMM_I(1)));
-		FAIL_IF(push_inst(compiler, XORI | RD(TMP_REG2) | RS1(src2) | IMM_I((sljit_ins)bit_length - 1)));
-		src1 = TMP_REG1;
-	} else
-		FAIL_IF(push_inst(compiler, SUB | WORD | RD(TMP_REG2) | RS1(TMP_ZERO) | RS2(src2)));
-
-	FAIL_IF(push_inst(compiler, ins3 | WORD | RD(TMP_REG1) | RS1(src1) | RS2(TMP_REG2)));
-	return push_inst(compiler, OR | RD(src_dst) | RS1(src_dst) | RS2(TMP_REG1));
-}
-
-#undef WORD
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
-	sljit_s32 src, sljit_sw srcw)
-{
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
-	ADJUST_LOCAL_OFFSET(src, srcw);
-
-	switch (op) {
-	case SLJIT_FAST_RETURN:
-		if (FAST_IS_REG(src))
-			FAIL_IF(push_inst(compiler, ADDI | RD(RETURN_ADDR_REG) | RS1(src) | IMM_I(0)));
-		else
-			FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw));
-
-		return push_inst(compiler, JALR | RD(TMP_ZERO) | RS1(RETURN_ADDR_REG) | IMM_I(0));
-	case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
-		return SLJIT_SUCCESS;
-	case SLJIT_PREFETCH_L1:
-	case SLJIT_PREFETCH_L2:
-	case SLJIT_PREFETCH_L3:
-	case SLJIT_PREFETCH_ONCE:
-		return SLJIT_SUCCESS;
-	}
-
-	return SLJIT_SUCCESS;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
-{
-	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
-	return reg_map[reg];
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
-{
-	CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
-	return freg_map[reg];
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
-	void *instruction, sljit_u32 size)
-{
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
-
-	return push_inst(compiler, *(sljit_ins*)instruction);
-}
-
-/* --------------------------------------------------------------------- */
-/*  Floating point operators                                             */
-/* --------------------------------------------------------------------- */
-
-#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 7))
-#define FMT(op) ((sljit_ins)((op & SLJIT_32) ^ SLJIT_32) << 17)
-
-static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
-	sljit_s32 dst, sljit_sw dstw,
-	sljit_s32 src, sljit_sw srcw)
-{
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-#	define flags (sljit_u32)0
-#else
-	sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)) << 21;
-#endif
-	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
-
-	if (src & SLJIT_MEM) {
-		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
-		src = TMP_FREG1;
-	}
-
-	FAIL_IF(push_inst(compiler, FCVT_W_S | FMT(op) | flags | RD(dst_r) | FRS1(src)));
-
-	/* Store the integer value from a VFP register. */
-	if (dst & SLJIT_MEM) {
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-		return emit_op_mem2(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0);
-#else
-		return emit_op_mem2(compiler, flags ? WORD_DATA : INT_DATA, TMP_REG2, dst, dstw, 0, 0);
-#endif
-	}
-	return SLJIT_SUCCESS;
-
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-#	undef flags
-#endif
-}
-
-static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
-	sljit_s32 dst, sljit_sw dstw,
-	sljit_s32 src, sljit_sw srcw)
-{
-	sljit_ins inst;
-#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
-	sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)) << 21;
-#endif
-
-	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
-
-	if (src & SLJIT_MEM) {
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-		FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
-#else
-		FAIL_IF(emit_op_mem2(compiler, (flags ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
-#endif
-		src = TMP_REG1;
-	} else if (src & SLJIT_IMM) {
-#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
-		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
-			srcw = (sljit_s32)srcw;
-#endif
-
-		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw, TMP_REG3));
-		src = TMP_REG1;
-	}
-
-	inst = FCVT_S_W | FMT(op) | FRD(dst_r) | RS1(src);
-
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-	if (op & SLJIT_32)
-		inst |= F3(0x7);
-#else
-	inst |= flags;
-
-	if (op != SLJIT_CONV_F64_FROM_S32)
-		inst |= F3(0x7);
-#endif
-
-	FAIL_IF(push_inst(compiler, inst));
-
-	if (dst & SLJIT_MEM)
-		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
-	return SLJIT_SUCCESS;
-}
-
-static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
-	sljit_s32 src1, sljit_sw src1w,
-	sljit_s32 src2, sljit_sw src2w)
-{
-	sljit_ins inst;
-
-	if (src1 & SLJIT_MEM) {
-		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
-		src1 = TMP_FREG1;
-	}
-
-	if (src2 & SLJIT_MEM) {
-		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
-		src2 = TMP_FREG2;
-	}
-
-	switch (GET_FLAG_TYPE(op)) {
-	case SLJIT_F_EQUAL:
-	case SLJIT_F_NOT_EQUAL:
-	case SLJIT_ORDERED_EQUAL:
-	case SLJIT_UNORDERED_OR_NOT_EQUAL:
-		inst = FEQ_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2);
-		break;
-	case SLJIT_F_LESS:
-	case SLJIT_F_GREATER_EQUAL:
-	case SLJIT_ORDERED_LESS:
-	case SLJIT_UNORDERED_OR_GREATER_EQUAL:
-		inst = FLT_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2);
-		break;
-	case SLJIT_ORDERED_GREATER:
-	case SLJIT_UNORDERED_OR_LESS_EQUAL:
-		inst = FLT_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src2) | FRS2(src1);
-		break;
-	case SLJIT_F_GREATER:
-	case SLJIT_F_LESS_EQUAL:
-	case SLJIT_UNORDERED_OR_GREATER:
-	case SLJIT_ORDERED_LESS_EQUAL:
-		inst = FLE_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2);
-		break;
-	case SLJIT_UNORDERED_OR_LESS:
-	case SLJIT_ORDERED_GREATER_EQUAL:
-		inst = FLE_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src2) | FRS2(src1);
-		break;
-	case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */
-	case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */
-		FAIL_IF(push_inst(compiler, FLT_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2)));
-		FAIL_IF(push_inst(compiler, FLT_S | FMT(op) | RD(TMP_REG1) | FRS1(src2) | FRS2(src1)));
-		inst = OR | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(TMP_REG1);
-		break;
-	default: /* SLJIT_UNORDERED, SLJIT_ORDERED */
-		FAIL_IF(push_inst(compiler, FADD_S | FMT(op) | FRD(TMP_FREG1) | FRS1(src1) | FRS2(src2)));
-		inst = FEQ_S | FMT(op) | RD(OTHER_FLAG) | FRS1(TMP_FREG1) | FRS2(TMP_FREG1);
-		break;
-	}
-
-	return push_inst(compiler, inst);
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
-	sljit_s32 dst, sljit_sw dstw,
-	sljit_s32 src, sljit_sw srcw)
-{
-	sljit_s32 dst_r;
-
-	CHECK_ERROR();
-	compiler->cache_arg = 0;
-	compiler->cache_argw = 0;
-
-	SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
-	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
-
-	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
-		op ^= SLJIT_32;
-
-	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
-
-	if (src & SLJIT_MEM) {
-		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
-		src = dst_r;
-	}
-
-	switch (GET_OPCODE(op)) {
-	case SLJIT_MOV_F64:
-		if (src != dst_r) {
-			if (dst_r != TMP_FREG1)
-				FAIL_IF(push_inst(compiler, FSGNJ_S | FMT(op) | FRD(dst_r) | FRS1(src) | FRS2(src)));
-			else
-				dst_r = src;
-		}
-		break;
-	case SLJIT_NEG_F64:
-		FAIL_IF(push_inst(compiler, FSGNJN_S | FMT(op) | FRD(dst_r) | FRS1(src) | FRS2(src)));
-		break;
-	case SLJIT_ABS_F64:
-		FAIL_IF(push_inst(compiler, FSGNJX_S | FMT(op) | FRD(dst_r) | FRS1(src) | FRS2(src)));
-		break;
-	case SLJIT_CONV_F64_FROM_F32:
-		/* The SLJIT_32 bit is inverted because sljit_f32 needs to be loaded from the memory. */
-		FAIL_IF(push_inst(compiler, FCVT_S_D | ((op & SLJIT_32) ? (1 << 25) : ((1 << 20) | F3(7))) | FRD(dst_r) | FRS1(src)));
-		op ^= SLJIT_32;
-		break;
-	}
-
-	if (dst & SLJIT_MEM)
-		return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0);
-	return SLJIT_SUCCESS;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
-	sljit_s32 dst, sljit_sw dstw,
-	sljit_s32 src1, sljit_sw src1w,
-	sljit_s32 src2, sljit_sw src2w)
-{
-	sljit_s32 dst_r, flags = 0;
-
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
-	ADJUST_LOCAL_OFFSET(dst, dstw);
-	ADJUST_LOCAL_OFFSET(src1, src1w);
-	ADJUST_LOCAL_OFFSET(src2, src2w);
-
-	compiler->cache_arg = 0;
-	compiler->cache_argw = 0;
-
-	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
-
-	if (src1 & SLJIT_MEM) {
-		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
-			FAIL_IF(compiler->error);
-			src1 = TMP_FREG1;
-		} else
-			flags |= SLOW_SRC1;
-	}
-
-	if (src2 & SLJIT_MEM) {
-		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
-			FAIL_IF(compiler->error);
-			src2 = TMP_FREG2;
-		} else
-			flags |= SLOW_SRC2;
-	}
-
-	if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
-		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
-			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
-			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
-		}
-		else {
-			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
-			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
-		}
-	}
-	else if (flags & SLOW_SRC1)
-		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
-	else if (flags & SLOW_SRC2)
-		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
-
-	if (flags & SLOW_SRC1)
-		src1 = TMP_FREG1;
-	if (flags & SLOW_SRC2)
-		src2 = TMP_FREG2;
-
-	switch (GET_OPCODE(op)) {
-	case SLJIT_ADD_F64:
-		FAIL_IF(push_inst(compiler, FADD_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2)));
-		break;
-
-	case SLJIT_SUB_F64:
-		FAIL_IF(push_inst(compiler, FSUB_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2)));
-		break;
-
-	case SLJIT_MUL_F64:
-		FAIL_IF(push_inst(compiler, FMUL_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2)));
-		break;
-
-	case SLJIT_DIV_F64:
-		FAIL_IF(push_inst(compiler, FDIV_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2)));
-		break;
-	}
-
-	if (dst_r == TMP_FREG2)
-		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
-
-	return SLJIT_SUCCESS;
-}
-
-#undef FLOAT_DATA
-#undef FMT
-
-/* --------------------------------------------------------------------- */
-/*  Other instructions                                                   */
-/* --------------------------------------------------------------------- */
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
-{
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
-	ADJUST_LOCAL_OFFSET(dst, dstw);
-
-	if (FAST_IS_REG(dst))
-		return push_inst(compiler, ADDI | RD(dst) | RS1(RETURN_ADDR_REG) | IMM_I(0));
-
-	/* Memory. */
-	return emit_op_mem(compiler, WORD_DATA, RETURN_ADDR_REG, dst, dstw);
-}
-
-/* --------------------------------------------------------------------- */
-/*  Conditional instructions                                             */
-/* --------------------------------------------------------------------- */
-
-SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
-{
-	struct sljit_label *label;
-
-	CHECK_ERROR_PTR();
-	CHECK_PTR(check_sljit_emit_label(compiler));
-
-	if (compiler->last_label && compiler->last_label->size == compiler->size)
-		return compiler->last_label;
-
-	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
-	PTR_FAIL_IF(!label);
-	set_label(label, compiler);
-	return label;
-}
-
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-#define BRANCH_LENGTH	((sljit_ins)(3 * sizeof(sljit_ins)) << 7)
-#else
-#define BRANCH_LENGTH	((sljit_ins)(7 * sizeof(sljit_ins)) << 7)
-#endif
-
-SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
-{
-	struct sljit_jump *jump;
-	sljit_ins inst;
-
-	CHECK_ERROR_PTR();
-	CHECK_PTR(check_sljit_emit_jump(compiler, type));
-
-	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
-	PTR_FAIL_IF(!jump);
-	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
-	type &= 0xff;
-
-	switch (type) {
-	case SLJIT_EQUAL:
-		inst = BNE | RS1(EQUAL_FLAG) | RS2(TMP_ZERO) | BRANCH_LENGTH;
-		break;
-	case SLJIT_NOT_EQUAL:
-		inst = BEQ | RS1(EQUAL_FLAG) | RS2(TMP_ZERO) | BRANCH_LENGTH;
-		break;
-	case SLJIT_LESS:
-	case SLJIT_GREATER:
-	case SLJIT_SIG_LESS:
-	case SLJIT_SIG_GREATER:
-	case SLJIT_OVERFLOW:
-	case SLJIT_CARRY:
-	case SLJIT_F_EQUAL:
-	case SLJIT_ORDERED_EQUAL:
-	case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */
-	case SLJIT_F_LESS:
-	case SLJIT_ORDERED_LESS:
-	case SLJIT_ORDERED_GREATER:
-	case SLJIT_F_LESS_EQUAL:
-	case SLJIT_ORDERED_LESS_EQUAL:
-	case SLJIT_ORDERED_GREATER_EQUAL:
-	case SLJIT_ORDERED:
-		inst = BEQ | RS1(OTHER_FLAG) | RS2(TMP_ZERO) | BRANCH_LENGTH;
-		break;
-	case SLJIT_GREATER_EQUAL:
-	case SLJIT_LESS_EQUAL:
-	case SLJIT_SIG_GREATER_EQUAL:
-	case SLJIT_SIG_LESS_EQUAL:
-	case SLJIT_NOT_OVERFLOW:
-	case SLJIT_NOT_CARRY:
-	case SLJIT_F_NOT_EQUAL:
-	case SLJIT_UNORDERED_OR_NOT_EQUAL:
-	case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */
-	case SLJIT_F_GREATER_EQUAL:
-	case SLJIT_UNORDERED_OR_GREATER_EQUAL:
-	case SLJIT_UNORDERED_OR_LESS_EQUAL:
-	case SLJIT_F_GREATER:
-	case SLJIT_UNORDERED_OR_GREATER:
-	case SLJIT_UNORDERED_OR_LESS:
-	case SLJIT_UNORDERED:
-		inst = BNE | RS1(OTHER_FLAG) | RS2(TMP_ZERO) | BRANCH_LENGTH;
-		break;
-	default:
-		/* Not conditional branch. */
-		inst = 0;
-		break;
-	}
-
-	if (inst != 0) {
-		PTR_FAIL_IF(push_inst(compiler, inst));
-		jump->flags |= IS_COND;
-	}
-
-	jump->addr = compiler->size;
-	inst = JALR | RS1(TMP_REG1) | IMM_I(0);
-
-	if (type >= SLJIT_FAST_CALL) {
-		jump->flags |= IS_CALL;
-		inst |= RD(RETURN_ADDR_REG);
-	}
-
-	PTR_FAIL_IF(push_inst(compiler, inst));
-
-	/* Maximum number of instructions required for generating a constant. */
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-	compiler->size += 1;
-#else
-	compiler->size += 5;
-#endif
-	return jump;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
-	sljit_s32 arg_types)
-{
-	SLJIT_UNUSED_ARG(arg_types);
-	CHECK_ERROR_PTR();
-	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
-
-	if (type & SLJIT_CALL_RETURN) {
-		PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
-		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
-	}
-
-	SLJIT_SKIP_CHECKS(compiler);
-	return sljit_emit_jump(compiler, type);
-}
-
-SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type,
-	sljit_s32 src1, sljit_sw src1w,
-	sljit_s32 src2, sljit_sw src2w)
-{
-	struct sljit_jump *jump;
-	sljit_s32 flags;
-	sljit_ins inst;
-
-	CHECK_ERROR_PTR();
-	CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w));
-	ADJUST_LOCAL_OFFSET(src1, src1w);
-	ADJUST_LOCAL_OFFSET(src2, src2w);
-
-	compiler->cache_arg = 0;
-	compiler->cache_argw = 0;
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-	flags = WORD_DATA | LOAD_DATA;
-#else /* !SLJIT_CONFIG_RISCV_32 */
-	flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
-#endif /* SLJIT_CONFIG_RISCV_32 */
-
-	if (src1 & SLJIT_MEM) {
-		PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG1, src1, src1w, src2, src2w));
-		src1 = TMP_REG1;
-	}
-
-	if (src2 & SLJIT_MEM) {
-		PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG2, src2, src2w, 0, 0));
-		src2 = TMP_REG2;
-	}
-
-	if (src1 & SLJIT_IMM) {
-		if (src1w != 0) {
-			PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3));
-			src1 = TMP_REG1;
-		}
-		else
-			src1 = TMP_ZERO;
-	}
-
-	if (src2 & SLJIT_IMM) {
-		if (src2w != 0) {
-			PTR_FAIL_IF(load_immediate(compiler, TMP_REG2, src2w, TMP_REG3));
-			src2 = TMP_REG2;
-		}
-		else
-			src2 = TMP_ZERO;
-	}
-
-	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
-	PTR_FAIL_IF(!jump);
-	set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | IS_COND));
-	type &= 0xff;
-
-	switch (type) {
-	case SLJIT_EQUAL:
-		inst = BNE | RS1(src1) | RS2(src2) | BRANCH_LENGTH;
-		break;
-	case SLJIT_NOT_EQUAL:
-		inst = BEQ | RS1(src1) | RS2(src2) | BRANCH_LENGTH;
-		break;
-	case SLJIT_LESS:
-		inst = BGEU | RS1(src1) | RS2(src2) | BRANCH_LENGTH;
-		break;
-	case SLJIT_GREATER_EQUAL:
-		inst = BLTU | RS1(src1) | RS2(src2) | BRANCH_LENGTH;
-		break;
-	case SLJIT_GREATER:
-		inst = BGEU | RS1(src2) | RS2(src1) | BRANCH_LENGTH;
-		break;
-	case SLJIT_LESS_EQUAL:
-		inst = BLTU | RS1(src2) | RS2(src1) | BRANCH_LENGTH;
-		break;
-	case SLJIT_SIG_LESS:
-		inst = BGE | RS1(src1) | RS2(src2) | BRANCH_LENGTH;
-		break;
-	case SLJIT_SIG_GREATER_EQUAL:
-		inst = BLT | RS1(src1) | RS2(src2) | BRANCH_LENGTH;
-		break;
-	case SLJIT_SIG_GREATER:
-		inst = BGE | RS1(src2) | RS2(src1) | BRANCH_LENGTH;
-		break;
-	case SLJIT_SIG_LESS_EQUAL:
-		inst = BLT | RS1(src2) | RS2(src1) | BRANCH_LENGTH;
-		break;
-	}
-
-	PTR_FAIL_IF(push_inst(compiler, inst));
-
-	jump->addr = compiler->size;
-	PTR_FAIL_IF(push_inst(compiler, JALR | RD(TMP_ZERO) | RS1(TMP_REG1) | IMM_I(0)));
-
-	/* Maximum number of instructions required for generating a constant. */
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-	compiler->size += 1;
-#else
-	compiler->size += 5;
-#endif
-	return jump;
-}
-
-#undef BRANCH_LENGTH
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
-{
-	struct sljit_jump *jump;
-
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
-
-	if (!(src & SLJIT_IMM)) {
-		if (src & SLJIT_MEM) {
-			ADJUST_LOCAL_OFFSET(src, srcw);
-			FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
-			src = TMP_REG1;
-		}
-		return push_inst(compiler, JALR | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RS1(src) | IMM_I(0));
-	}
-
-	/* These jumps are converted to jump/call instructions when possible. */
-	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
-	FAIL_IF(!jump);
-	set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_CALL : 0));
-	jump->u.target = (sljit_uw)srcw;
-
-	jump->addr = compiler->size;
-	FAIL_IF(push_inst(compiler, JALR | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RS1(TMP_REG1) | IMM_I(0)));
-
-	/* Maximum number of instructions required for generating a constant. */
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-	compiler->size += 1;
-#else
-	compiler->size += 5;
-#endif
-	return SLJIT_SUCCESS;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
-	sljit_s32 arg_types,
-	sljit_s32 src, sljit_sw srcw)
-{
-	SLJIT_UNUSED_ARG(arg_types);
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
-
-	if (src & SLJIT_MEM) {
-		ADJUST_LOCAL_OFFSET(src, srcw);
-		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
-		src = TMP_REG1;
-	}
-
-	if (type & SLJIT_CALL_RETURN) {
-		if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
-			FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(src) | IMM_I(0)));
-			src = TMP_REG1;
-		}
-
-		FAIL_IF(emit_stack_frame_release(compiler, 0));
-		type = SLJIT_JUMP;
-	}
-
-	SLJIT_SKIP_CHECKS(compiler);
-	return sljit_emit_ijump(compiler, type, src, srcw);
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
-	sljit_s32 dst, sljit_sw dstw,
-	sljit_s32 type)
-{
-	sljit_s32 src_r, dst_r, invert;
-	sljit_s32 saved_op = op;
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-	sljit_s32 mem_type = WORD_DATA;
-#else
-	sljit_s32 mem_type = ((op & SLJIT_32) || op == SLJIT_MOV32) ? (INT_DATA | SIGNED_DATA) : WORD_DATA;
-#endif
-
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
-	ADJUST_LOCAL_OFFSET(dst, dstw);
-
-	op = GET_OPCODE(op);
-	dst_r = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
-
-	compiler->cache_arg = 0;
-	compiler->cache_argw = 0;
-
-	if (op >= SLJIT_ADD && (dst & SLJIT_MEM))
-		FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw));
-
-	if (type < SLJIT_F_EQUAL) {
-		src_r = OTHER_FLAG;
-		invert = type & 0x1;
-
-		switch (type) {
-		case SLJIT_EQUAL:
-		case SLJIT_NOT_EQUAL:
-			FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RS1(EQUAL_FLAG) | IMM_I(1)));
-			src_r = dst_r;
-			break;
-		case SLJIT_OVERFLOW:
-		case SLJIT_NOT_OVERFLOW:
-			if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) {
-				src_r = OTHER_FLAG;
-				break;
-			}
-			FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RS1(OTHER_FLAG) | IMM_I(1)));
-			src_r = dst_r;
-			invert ^= 0x1;
-			break;
-		}
-	} else {
-		invert = 0;
-		src_r = OTHER_FLAG;
-
-		switch (type) {
-		case SLJIT_F_NOT_EQUAL:
-		case SLJIT_UNORDERED_OR_NOT_EQUAL:
-		case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */
-		case SLJIT_F_GREATER_EQUAL:
-		case SLJIT_UNORDERED_OR_GREATER_EQUAL:
-		case SLJIT_UNORDERED_OR_LESS_EQUAL:
-		case SLJIT_F_GREATER:
-		case SLJIT_UNORDERED_OR_GREATER:
-		case SLJIT_UNORDERED_OR_LESS:
-		case SLJIT_UNORDERED:
-			invert = 1;
-			break;
-		}
-	}
-
-	if (invert) {
-		FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RS1(src_r) | IMM_I(1)));
-		src_r = dst_r;
-	}
-
-	if (op < SLJIT_ADD) {
-		if (dst & SLJIT_MEM)
-			return emit_op_mem(compiler, mem_type, src_r, dst, dstw);
-
-		if (src_r != dst_r)
-			return push_inst(compiler, ADDI | RD(dst_r) | RS1(src_r) | IMM_I(0));
-		return SLJIT_SUCCESS;
-	}
-
-	mem_type |= CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE;
-
-	if (dst & SLJIT_MEM)
-		return emit_op(compiler, saved_op, mem_type, dst, dstw, TMP_REG1, 0, src_r, 0);
-	return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, src_r, 0);
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
-	sljit_s32 dst_reg,
-	sljit_s32 src, sljit_sw srcw)
-{
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
-
-	return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
-	sljit_s32 reg,
-	sljit_s32 mem, sljit_sw memw)
-{
-	sljit_s32 flags;
-
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
-
-	if (!(reg & REG_PAIR_MASK))
-		return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
-
-	if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
-		memw &= 0x3;
-
-		if (SLJIT_UNLIKELY(memw != 0)) {
-			FAIL_IF(push_inst(compiler, SLLI | RD(TMP_REG1) | RS1(OFFS_REG(mem)) | IMM_I(memw)));
-			FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RS1(TMP_REG1) | RS2(mem & REG_MASK)));
-		} else
-			FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RS1(mem & REG_MASK) | RS2(OFFS_REG(mem))));
-
-		mem = TMP_REG1;
-		memw = 0;
-	} else if (memw > SIMM_MAX - SSIZE_OF(sw) || memw < SIMM_MIN) {
-		if (((memw + 0x800) & 0xfff) <= 0xfff - SSIZE_OF(sw)) {
-			FAIL_IF(load_immediate(compiler, TMP_REG1, TO_ARGW_HI(memw), TMP_REG3));
-			memw &= 0xfff;
-		} else {
-			FAIL_IF(load_immediate(compiler, TMP_REG1, memw, TMP_REG3));
-			memw = 0;
-		}
-
-		if (mem & REG_MASK)
-			FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RS1(TMP_REG1) | RS2(mem & REG_MASK)));
-
-		mem = TMP_REG1;
-	} else {
-		mem &= REG_MASK;
-		memw &= 0xfff;
-	}
-
-	SLJIT_ASSERT((memw >= 0 && memw <= SIMM_MAX - SSIZE_OF(sw)) || (memw > SIMM_MAX && memw <= 0xfff));
-
-	if (!(type & SLJIT_MEM_STORE) && mem == REG_PAIR_FIRST(reg)) {
-		FAIL_IF(push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_SECOND(reg), mem, (memw + SSIZE_OF(sw)) & 0xfff));
-		return push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_FIRST(reg), mem, memw);
-	}
-
-	flags = WORD_DATA | (!(type & SLJIT_MEM_STORE) ? LOAD_DATA : 0);
-
-	FAIL_IF(push_mem_inst(compiler, flags, REG_PAIR_FIRST(reg), mem, memw));
-	return push_mem_inst(compiler, flags, REG_PAIR_SECOND(reg), mem, (memw + SSIZE_OF(sw)) & 0xfff);
-}
-
-#undef TO_ARGW_HI
-
-SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
-{
-	struct sljit_const *const_;
-	sljit_s32 dst_r;
-
-	CHECK_ERROR_PTR();
-	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
-	ADJUST_LOCAL_OFFSET(dst, dstw);
-
-	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
-	PTR_FAIL_IF(!const_);
-	set_const(const_, compiler);
-
-	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
-	PTR_FAIL_IF(emit_const(compiler, dst_r, init_value, ADDI | RD(dst_r)));
-
-	if (dst & SLJIT_MEM)
-		PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
-
-	return const_;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
-{
-	struct sljit_put_label *put_label;
-	sljit_s32 dst_r;
-
-	CHECK_ERROR_PTR();
-	CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
-	ADJUST_LOCAL_OFFSET(dst, dstw);
-
-	put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
-	PTR_FAIL_IF(!put_label);
-	set_put_label(put_label, compiler, 0);
-
-	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
-	PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r));
-#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
-	compiler->size += 1;
-#else
-	compiler->size += 5;
-#endif
-
-	if (dst & SLJIT_MEM)
-		PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
-
-	return put_label;
-}
-
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
-{
-	sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
-}

+ 142 - 504
thirdparty/pcre2/src/sljit/sljitNativeS390X.c

@@ -103,8 +103,11 @@ static const sljit_gpr r15 = 15;	/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stac
 /* When reg cannot be unused. */
 /* When reg cannot be unused. */
 #define IS_GPR_REG(reg)		((reg > 0) && (reg) <= SLJIT_SP)
 #define IS_GPR_REG(reg)		((reg > 0) && (reg) <= SLJIT_SP)
 
 
-/* Link register. */
+/* Link registers. The normal link register is r14, but since
+   we use that for flags we need to use r0 instead to do fast
+   calls so that flags are preserved. */
 static const sljit_gpr link_r = 14;     /* r14 */
 static const sljit_gpr link_r = 14;     /* r14 */
+static const sljit_gpr fast_link_r = 0; /* r0 */
 
 
 #define TMP_FREG1	(0)
 #define TMP_FREG1	(0)
 
 
@@ -217,8 +220,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t
 		}
 		}
 		/* fallthrough */
 		/* fallthrough */
 
 
-	case SLJIT_F_EQUAL:
-	case SLJIT_ORDERED_EQUAL:
+	case SLJIT_EQUAL_F64:
 		return cc0;
 		return cc0;
 
 
 	case SLJIT_NOT_EQUAL:
 	case SLJIT_NOT_EQUAL:
@@ -232,14 +234,13 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t
 		}
 		}
 		/* fallthrough */
 		/* fallthrough */
 
 
-	case SLJIT_UNORDERED_OR_NOT_EQUAL:
+	case SLJIT_NOT_EQUAL_F64:
 		return (cc1 | cc2 | cc3);
 		return (cc1 | cc2 | cc3);
 
 
 	case SLJIT_LESS:
 	case SLJIT_LESS:
 		return cc1;
 		return cc1;
 
 
 	case SLJIT_GREATER_EQUAL:
 	case SLJIT_GREATER_EQUAL:
-	case SLJIT_UNORDERED_OR_GREATER_EQUAL:
 		return (cc0 | cc2 | cc3);
 		return (cc0 | cc2 | cc3);
 
 
 	case SLJIT_GREATER:
 	case SLJIT_GREATER:
@@ -253,8 +254,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t
 		return (cc0 | cc1 | cc2);
 		return (cc0 | cc1 | cc2);
 
 
 	case SLJIT_SIG_LESS:
 	case SLJIT_SIG_LESS:
-	case SLJIT_F_LESS:
-	case SLJIT_ORDERED_LESS:
+	case SLJIT_LESS_F64:
 		return cc1;
 		return cc1;
 
 
 	case SLJIT_NOT_CARRY:
 	case SLJIT_NOT_CARRY:
@@ -263,8 +263,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t
 		/* fallthrough */
 		/* fallthrough */
 
 
 	case SLJIT_SIG_LESS_EQUAL:
 	case SLJIT_SIG_LESS_EQUAL:
-	case SLJIT_F_LESS_EQUAL:
-	case SLJIT_ORDERED_LESS_EQUAL:
+	case SLJIT_LESS_EQUAL_F64:
 		return (cc0 | cc1);
 		return (cc0 | cc1);
 
 
 	case SLJIT_CARRY:
 	case SLJIT_CARRY:
@@ -273,7 +272,6 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t
 		/* fallthrough */
 		/* fallthrough */
 
 
 	case SLJIT_SIG_GREATER:
 	case SLJIT_SIG_GREATER:
-	case SLJIT_UNORDERED_OR_GREATER:
 		/* Overflow is considered greater, see SLJIT_SUB. */
 		/* Overflow is considered greater, see SLJIT_SUB. */
 		return cc2 | cc3;
 		return cc2 | cc3;
 
 
@@ -285,7 +283,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t
 			return (cc2 | cc3);
 			return (cc2 | cc3);
 		/* fallthrough */
 		/* fallthrough */
 
 
-	case SLJIT_UNORDERED:
+	case SLJIT_UNORDERED_F64:
 		return cc3;
 		return cc3;
 
 
 	case SLJIT_NOT_OVERFLOW:
 	case SLJIT_NOT_OVERFLOW:
@@ -293,29 +291,14 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t
 			return (cc0 | cc1);
 			return (cc0 | cc1);
 		/* fallthrough */
 		/* fallthrough */
 
 
-	case SLJIT_ORDERED:
+	case SLJIT_ORDERED_F64:
 		return (cc0 | cc1 | cc2);
 		return (cc0 | cc1 | cc2);
 
 
-	case SLJIT_F_NOT_EQUAL:
-	case SLJIT_ORDERED_NOT_EQUAL:
-		return (cc1 | cc2);
-
-	case SLJIT_F_GREATER:
-	case SLJIT_ORDERED_GREATER:
+	case SLJIT_GREATER_F64:
 		return cc2;
 		return cc2;
 
 
-	case SLJIT_F_GREATER_EQUAL:
-	case SLJIT_ORDERED_GREATER_EQUAL:
+	case SLJIT_GREATER_EQUAL_F64:
 		return (cc0 | cc2);
 		return (cc0 | cc2);
-
-	case SLJIT_UNORDERED_OR_LESS_EQUAL:
-		return (cc0 | cc1 | cc3);
-
-	case SLJIT_UNORDERED_OR_EQUAL:
-		return (cc0 | cc3);
-
-	case SLJIT_UNORDERED_OR_LESS:
-		return (cc1 | cc3);
 	}
 	}
 
 
 	SLJIT_UNREACHABLE();
 	SLJIT_UNREACHABLE();
@@ -995,7 +978,7 @@ static sljit_s32 make_addr_bx(struct sljit_compiler *compiler,
 	(cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr)
 	(cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr)
 
 
 /* May clobber tmp1. */
 /* May clobber tmp1. */
-static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
+static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst,
 		sljit_s32 src, sljit_sw srcw,
 		sljit_s32 src, sljit_sw srcw,
 		sljit_s32 is_32bit)
 		sljit_s32 is_32bit)
 {
 {
@@ -1003,36 +986,21 @@ static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
 	sljit_ins ins;
 	sljit_ins ins;
 
 
 	SLJIT_ASSERT(src & SLJIT_MEM);
 	SLJIT_ASSERT(src & SLJIT_MEM);
-
-	if (is_32bit && ((src & OFFS_REG_MASK) || is_u12(srcw) || !is_s20(srcw))) {
+	if (have_ldisp() || !is_32bit)
+		FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
+	else
 		FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
 		FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
-		return push_inst(compiler, 0x58000000 /* l */ | R20A(dst_r) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
-	}
 
 
-	FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
-
-	ins = is_32bit ? 0xe30000000058 /* ly */ : 0xe30000000004 /* lg */;
-	return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
-}
-
-/* May clobber tmp1. */
-static sljit_s32 load_unsigned_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
-		sljit_s32 src, sljit_sw srcw,
-		sljit_s32 is_32bit)
-{
-	struct addr addr;
-	sljit_ins ins;
-
-	SLJIT_ASSERT(src & SLJIT_MEM);
-
-	FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
+	if (is_32bit)
+		ins = WHEN(is_u12(addr.offset), dst, l, ly, addr);
+	else
+		ins = lg(dst, addr.offset, addr.index, addr.base);
 
 
-	ins = is_32bit ? 0xe30000000016 /* llgf */ : 0xe30000000004 /* lg */;
-	return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
+	return push_inst(compiler, ins);
 }
 }
 
 
 /* May clobber tmp1. */
 /* May clobber tmp1. */
-static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src_r,
+static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src,
 		sljit_s32 dst, sljit_sw dstw,
 		sljit_s32 dst, sljit_sw dstw,
 		sljit_s32 is_32bit)
 		sljit_s32 is_32bit)
 {
 {
@@ -1040,16 +1008,17 @@ static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src_r,
 	sljit_ins ins;
 	sljit_ins ins;
 
 
 	SLJIT_ASSERT(dst & SLJIT_MEM);
 	SLJIT_ASSERT(dst & SLJIT_MEM);
-
-	if (is_32bit && ((dst & OFFS_REG_MASK) || is_u12(dstw) || !is_s20(dstw))) {
+	if (have_ldisp() || !is_32bit)
+		FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));
+	else
 		FAIL_IF(make_addr_bx(compiler, &addr, dst, dstw, tmp1));
 		FAIL_IF(make_addr_bx(compiler, &addr, dst, dstw, tmp1));
-		return push_inst(compiler, 0x50000000 /* st */ | R20A(src_r) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
-	}
 
 
-	FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));
+	if (is_32bit)
+		ins = WHEN(is_u12(addr.offset), src, st, sty, addr);
+	else
+		ins = stg(src, addr.offset, addr.index, addr.base);
 
 
-	ins = is_32bit ? 0xe30000000050 /* sty */ : 0xe30000000024 /* stg */;
-	return push_inst(compiler, ins | R36A(src_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
+	return push_inst(compiler, ins);
 }
 }
 
 
 #undef WHEN
 #undef WHEN
@@ -1649,24 +1618,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
 {
 {
 	/* TODO(mundaym): implement all */
 	/* TODO(mundaym): implement all */
 	switch (feature_type) {
 	switch (feature_type) {
-	case SLJIT_HAS_FPU:
 	case SLJIT_HAS_CLZ:
 	case SLJIT_HAS_CLZ:
-	case SLJIT_HAS_ROT:
-	case SLJIT_HAS_PREFETCH:
-		return 1;
-	case SLJIT_HAS_CTZ:
-		return 2;
+		return have_eimm() ? 1 : 0; /* FLOGR instruction */
 	case SLJIT_HAS_CMOV:
 	case SLJIT_HAS_CMOV:
 		return have_lscond1() ? 1 : 0;
 		return have_lscond1() ? 1 : 0;
+	case SLJIT_HAS_FPU:
+		return 1;
 	}
 	}
 	return 0;
 	return 0;
 }
 }
 
 
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
-{
-	return (type >= SLJIT_UNORDERED && type <= SLJIT_ORDERED_LESS_EQUAL);
-}
-
 /* --------------------------------------------------------------------- */
 /* --------------------------------------------------------------------- */
 /*  Entry, exit                                                          */
 /*  Entry, exit                                                          */
 /* --------------------------------------------------------------------- */
 /* --------------------------------------------------------------------- */
@@ -1675,7 +1636,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
 {
 {
-	sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
+	sljit_s32 word_arg_count = 0;
 	sljit_s32 offset, i, tmp;
 	sljit_s32 offset, i, tmp;
 
 
 	CHECK_ERROR();
 	CHECK_ERROR();
@@ -1687,13 +1648,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 
 
 	offset = 2 * SSIZE_OF(sw);
 	offset = 2 * SSIZE_OF(sw);
 	if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
 	if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
-		if (saved_arg_count == 0) {
-			FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15)));
-			offset += 9 * SSIZE_OF(sw);
-		} else {
-			FAIL_IF(push_inst(compiler, stmg(r6, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
-			offset += (8 - saved_arg_count) * SSIZE_OF(sw);
-		}
+		FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15))); /* save registers TODO(MGM): optimize */
+		offset += 9 * SSIZE_OF(sw);
 	} else {
 	} else {
 		if (scratches == SLJIT_FIRST_SAVED_REG) {
 		if (scratches == SLJIT_FIRST_SAVED_REG) {
 			FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15)));
 			FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15)));
@@ -1703,30 +1659,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 			offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
 			offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
 		}
 		}
 
 
-		if (saved_arg_count == 0) {
-			if (saveds == 0) {
-				FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
-				offset += SSIZE_OF(sw);
-			} else {
-				FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));
-				offset += (saveds + 1) * SSIZE_OF(sw);
-			}
-		} else if (saveds > saved_arg_count) {
-			if (saveds == saved_arg_count + 1) {
-				FAIL_IF(push_inst(compiler, stg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
-				offset += SSIZE_OF(sw);
-			} else {
-				FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
-				offset += (saveds - saved_arg_count) * SSIZE_OF(sw);
-			}
+		if (saveds == 0) {
+			FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
+			offset += SSIZE_OF(sw);
+		} else {
+			FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));
+			offset += (saveds + 1) * SSIZE_OF(sw);
 		}
 		}
 	}
 	}
 
 
-	if (saved_arg_count > 0) {
-		FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
-		offset += SSIZE_OF(sw);
-	}
-
 	tmp = SLJIT_FS0 - fsaveds;
 	tmp = SLJIT_FS0 - fsaveds;
 	for (i = SLJIT_FS0; i > tmp; i--) {
 	for (i = SLJIT_FS0; i > tmp; i--) {
 		FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
 		FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
@@ -1743,19 +1684,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 
 
 	FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size)));
 	FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size)));
 
 
-	if (options & SLJIT_ENTER_REG_ARG)
-		return SLJIT_SUCCESS;
-
 	arg_types >>= SLJIT_ARG_SHIFT;
 	arg_types >>= SLJIT_ARG_SHIFT;
-	saved_arg_count = 0;
 	tmp = 0;
 	tmp = 0;
 	while (arg_types > 0) {
 	while (arg_types > 0) {
 		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
 		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
 			if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
 			if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
-				FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - saved_arg_count), gpr(SLJIT_R0 + tmp))));
-				saved_arg_count++;
+				FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - tmp), gpr(SLJIT_R0 + word_arg_count))));
+				tmp++;
 			}
 			}
-			tmp++;
+			word_arg_count++;
 		}
 		}
 
 
 		arg_types >>= SLJIT_ARG_SHIFT;
 		arg_types >>= SLJIT_ARG_SHIFT;
@@ -1776,13 +1713,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
 	return SLJIT_SUCCESS;
 	return SLJIT_SUCCESS;
 }
 }
 
 
-static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_gpr last_reg)
+static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
 {
 {
 	sljit_s32 offset, i, tmp;
 	sljit_s32 offset, i, tmp;
 	sljit_s32 local_size = compiler->local_size;
 	sljit_s32 local_size = compiler->local_size;
 	sljit_s32 saveds = compiler->saveds;
 	sljit_s32 saveds = compiler->saveds;
 	sljit_s32 scratches = compiler->scratches;
 	sljit_s32 scratches = compiler->scratches;
-	sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
 
 
 	if (is_u12(local_size))
 	if (is_u12(local_size))
 		FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size));
 		FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size));
@@ -1791,13 +1727,8 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
 
 
 	offset = 2 * SSIZE_OF(sw);
 	offset = 2 * SSIZE_OF(sw);
 	if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
 	if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
-		if (kept_saveds_count == 0) {
-			FAIL_IF(push_inst(compiler, lmg(r6, last_reg, offset, r15)));
-			offset += 9 * SSIZE_OF(sw);
-		} else {
-			FAIL_IF(push_inst(compiler, lmg(r6, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
-			offset += (8 - kept_saveds_count) * SSIZE_OF(sw);
-		}
+		FAIL_IF(push_inst(compiler, lmg(r6, r14, offset, r15))); /* save registers TODO(MGM): optimize */
+		offset += 9 * SSIZE_OF(sw);
 	} else {
 	} else {
 		if (scratches == SLJIT_FIRST_SAVED_REG) {
 		if (scratches == SLJIT_FIRST_SAVED_REG) {
 			FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15)));
 			FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15)));
@@ -1807,33 +1738,13 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
 			offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
 			offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
 		}
 		}
 
 
-		if (kept_saveds_count == 0) {
-			if (saveds == 0) {
-				if (last_reg == r14)
-					FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
-				offset += SSIZE_OF(sw);
-			} else if (saveds == 1 && last_reg == r13) {
-				FAIL_IF(push_inst(compiler, lg(r13, offset, 0, r15)));
-				offset += 2 * SSIZE_OF(sw);
-			} else {
-				FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, last_reg, offset, r15)));
-				offset += (saveds + 1) * SSIZE_OF(sw);
-			}
-		} else if (saveds > kept_saveds_count) {
-			if (saveds == kept_saveds_count + 1) {
-				FAIL_IF(push_inst(compiler, lg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
-				offset += SSIZE_OF(sw);
-			} else {
-				FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
-				offset += (saveds - kept_saveds_count) * SSIZE_OF(sw);
-			}
-		}
-	}
-
-	if (kept_saveds_count > 0) {
-		if (last_reg == r14)
+		if (saveds == 0) {
 			FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
 			FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
-		offset += SSIZE_OF(sw);
+			offset += SSIZE_OF(sw);
+		} else {
+			FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));
+			offset += (saveds + 1) * SSIZE_OF(sw);
+		}
 	}
 	}
 
 
 	tmp = SLJIT_FS0 - compiler->fsaveds;
 	tmp = SLJIT_FS0 - compiler->fsaveds;
@@ -1855,33 +1766,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler
 	CHECK_ERROR();
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_return_void(compiler));
 	CHECK(check_sljit_emit_return_void(compiler));
 
 
-	FAIL_IF(emit_stack_frame_release(compiler, r14));
+	FAIL_IF(emit_stack_frame_release(compiler));
 	return push_inst(compiler, br(r14)); /* return */
 	return push_inst(compiler, br(r14)); /* return */
 }
 }
 
 
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
-	sljit_s32 src, sljit_sw srcw)
-{
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_return_to(compiler, src, srcw));
-
-	if (src & SLJIT_MEM) {
-		ADJUST_LOCAL_OFFSET(src, srcw);
-		FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
-		src = TMP_REG2;
-		srcw = 0;
-	} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
-		FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
-		src = TMP_REG2;
-		srcw = 0;
-	}
-
-	FAIL_IF(emit_stack_frame_release(compiler, r13));
-
-	SLJIT_SKIP_CHECKS(compiler);
-	return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
-}
-
 /* --------------------------------------------------------------------- */
 /* --------------------------------------------------------------------- */
 /*  Operators                                                            */
 /*  Operators                                                            */
 /* --------------------------------------------------------------------- */
 /* --------------------------------------------------------------------- */
@@ -1970,47 +1858,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
 	return push_inst(compiler, lgr(arg1, tmp0));
 	return push_inst(compiler, lgr(arg1, tmp0));
 }
 }
 
 
-static sljit_s32 sljit_emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r, sljit_gpr src_r)
-{
-	sljit_s32 is_ctz = (GET_OPCODE(op) == SLJIT_CTZ);
-
-	if ((op & SLJIT_32) && src_r != tmp0) {
-		FAIL_IF(push_inst(compiler, 0xb9160000 /* llgfr */ | R4A(tmp0) | R0A(src_r)));
-		src_r = tmp0;
-	}
-
-	if (is_ctz) {
-		FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */) | R4A(tmp1) | R0A(src_r)));
-
-		if (src_r == tmp0)
-			FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1400 /* nr */ : 0xb9800000 /* ngr */) | R4A(tmp0) | R0A(tmp1)));
-		else
-			FAIL_IF(push_inst(compiler, 0xb9e40000 /* ngrk */ | R12A(tmp1) | R4A(tmp0) | R0A(src_r)));
-
-		src_r = tmp0;
-	}
-
-	FAIL_IF(push_inst(compiler, 0xb9830000 /* flogr */ | R4A(tmp0) | R0A(src_r)));
-
-	if (is_ctz)
-		FAIL_IF(push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(tmp1) | R32A(tmp0) | ((sljit_ins)(-64 & 0xffff) << 16)));
-
-	if (op & SLJIT_32) {
-		if (!is_ctz && dst_r != tmp0)
-			return push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(dst_r) | R32A(tmp0) | ((sljit_ins)(-32 & 0xffff) << 16));
-
-		FAIL_IF(push_inst(compiler, 0xc20800000000 /* agfi */ | R36A(tmp0) | (sljit_u32)-32));
-	}
-
-	if (is_ctz)
-		FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp0) | R32A(tmp1) | ((sljit_ins)((op & SLJIT_32) ? 59 : 58) << 24) | (63 << 16) | ((sljit_ins)((op & SLJIT_32) ? 5 : 6) << 8)));
-
-	if (dst_r == tmp0)
-		return SLJIT_SUCCESS;
-
-	return push_inst(compiler, ((op & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(tmp0));
-}
-
 /* LEVAL will be defined later with different parameters as needed */
 /* LEVAL will be defined later with different parameters as needed */
 #define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2)
 #define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2)
 
 
@@ -2244,25 +2091,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
 
 
 	dst_r = FAST_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0;
 	dst_r = FAST_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0;
 	src_r = FAST_IS_REG(src) ? gpr(REG_MASK & src) : tmp0;
 	src_r = FAST_IS_REG(src) ? gpr(REG_MASK & src) : tmp0;
+	if (src & SLJIT_MEM)
+		FAIL_IF(load_word(compiler, src_r, src, srcw, src & SLJIT_32));
 
 
 	compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
 	compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
 
 
 	/* TODO(mundaym): optimize loads and stores */
 	/* TODO(mundaym): optimize loads and stores */
-	switch (opcode) {
+	switch (opcode | (op & SLJIT_32)) {
 	case SLJIT_NOT:
 	case SLJIT_NOT:
-		if (src & SLJIT_MEM)
-			FAIL_IF(load_word(compiler, src_r, src, srcw, op & SLJIT_32));
-
 		/* emulate ~x with x^-1 */
 		/* emulate ~x with x^-1 */
-		if (!(op & SLJIT_32)) {
-			FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
-			if (src_r != dst_r)
-				FAIL_IF(push_inst(compiler, lgr(dst_r, src_r)));
-
-			FAIL_IF(push_inst(compiler, xgr(dst_r, tmp1)));
-			break;
-		}
+		FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
+		if (src_r != dst_r)
+			FAIL_IF(push_inst(compiler, lgr(dst_r, src_r)));
 
 
+		FAIL_IF(push_inst(compiler, xgr(dst_r, tmp1)));
+		break;
+	case SLJIT_NOT32:
+		/* emulate ~x with x^-1 */
 		if (have_eimm())
 		if (have_eimm())
 			FAIL_IF(push_inst(compiler, xilf(dst_r, 0xffffffff)));
 			FAIL_IF(push_inst(compiler, xilf(dst_r, 0xffffffff)));
 		else {
 		else {
@@ -2274,11 +2119,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
 		}
 		}
 		break;
 		break;
 	case SLJIT_CLZ:
 	case SLJIT_CLZ:
-	case SLJIT_CTZ:
-		if (src & SLJIT_MEM)
-			FAIL_IF(load_unsigned_word(compiler, src_r, src, srcw, op & SLJIT_32));
-
-		FAIL_IF(sljit_emit_clz_ctz(compiler, op, dst_r, src_r));
+		if (have_eimm()) {
+			FAIL_IF(push_inst(compiler, flogr(tmp0, src_r))); /* clobbers tmp1 */
+			if (dst_r != tmp0)
+				FAIL_IF(push_inst(compiler, lgr(dst_r, tmp0)));
+		} else {
+			abort(); /* TODO(mundaym): no eimm (?) */
+		}
+		break;
+	case SLJIT_CLZ32:
+		if (have_eimm()) {
+			FAIL_IF(push_inst(compiler, sllg(tmp1, src_r, 32, 0)));
+			FAIL_IF(push_inst(compiler, iilf(tmp1, 0xffffffff)));
+			FAIL_IF(push_inst(compiler, flogr(tmp0, tmp1))); /* clobbers tmp1 */
+			if (dst_r != tmp0)
+				FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
+		} else {
+			abort(); /* TODO(mundaym): no eimm (?) */
+		}
 		break;
 		break;
 	default:
 	default:
 		SLJIT_UNREACHABLE();
 		SLJIT_UNREACHABLE();
@@ -2287,8 +2145,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
 	if ((op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW))
 	if ((op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW))
 		FAIL_IF(update_zero_overflow(compiler, op, dst_r));
 		FAIL_IF(update_zero_overflow(compiler, op, dst_r));
 
 
+	/* TODO(carenas): doesn't need FAIL_IF */
 	if (dst & SLJIT_MEM)
 	if (dst & SLJIT_MEM)
-		return store_word(compiler, dst_r, dst, dstw, op & SLJIT_32);
+		FAIL_IF(store_word(compiler, dst_r, dst, dstw, op & SLJIT_32));
 
 
 	return SLJIT_SUCCESS;
 	return SLJIT_SUCCESS;
 }
 }
@@ -2307,6 +2166,11 @@ static SLJIT_INLINE int is_commutative(sljit_s32 op)
 	return 0;
 	return 0;
 }
 }
 
 
+static SLJIT_INLINE int is_shift(sljit_s32 op) {
+	sljit_s32 v = GET_OPCODE(op);
+	return (v == SLJIT_SHL || v == SLJIT_ASHR || v == SLJIT_LSHR) ? 1 : 0;
+}
+
 static const struct ins_forms add_forms = {
 static const struct ins_forms add_forms = {
 	0x1a00, /* ar */
 	0x1a00, /* ar */
 	0xb9080000, /* agr */
 	0xb9080000, /* agr */
@@ -2740,41 +2604,33 @@ static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op,
 	sljit_ins ins;
 	sljit_ins ins;
 
 
 	if (FAST_IS_REG(src1))
 	if (FAST_IS_REG(src1))
-		src_r = gpr(src1);
+		src_r = gpr(src1 & REG_MASK);
 	else
 	else
 		FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
 		FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
 
 
-	if (!(src2 & SLJIT_IMM)) {
-		if (FAST_IS_REG(src2))
-			base_r = gpr(src2);
-		else {
-			FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
-			base_r = tmp1;
-		}
-
-		if ((op & SLJIT_32) && (type == SLJIT_MSHL || type == SLJIT_MLSHR || type == SLJIT_MASHR)) {
-			if (base_r != tmp1) {
-				FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(base_r) | (59 << 24) | (1 << 23) | (63 << 16)));
-				base_r = tmp1;
-			} else
-				FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
-		}
-	} else
+	if (src2 & SLJIT_IMM)
 		imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
 		imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
+	else if (FAST_IS_REG(src2))
+		base_r = gpr(src2 & REG_MASK);
+	else {
+		FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
+		base_r = tmp1;
+	}
 
 
 	if ((op & SLJIT_32) && dst_r == src_r) {
 	if ((op & SLJIT_32) && dst_r == src_r) {
-		if (type == SLJIT_SHL || type == SLJIT_MSHL)
+		if (type == SLJIT_SHL)
 			ins = 0x89000000 /* sll */;
 			ins = 0x89000000 /* sll */;
-		else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)
+		else if (type == SLJIT_LSHR)
 			ins = 0x88000000 /* srl */;
 			ins = 0x88000000 /* srl */;
 		else
 		else
 			ins = 0x8a000000 /* sra */;
 			ins = 0x8a000000 /* sra */;
 
 
 		FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(base_r) | imm));
 		FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(base_r) | imm));
-	} else {
-		if (type == SLJIT_SHL || type == SLJIT_MSHL)
+	}
+	else {
+		if (type == SLJIT_SHL)
 			ins = (op & SLJIT_32) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */;
 			ins = (op & SLJIT_32) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */;
-		else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)
+		else if (type == SLJIT_LSHR)
 			ins = (op & SLJIT_32) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */;
 			ins = (op & SLJIT_32) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */;
 		else
 		else
 			ins = (op & SLJIT_32) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */;
 			ins = (op & SLJIT_32) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */;
@@ -2788,47 +2644,6 @@ static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op,
 	return SLJIT_SUCCESS;
 	return SLJIT_SUCCESS;
 }
 }
 
 
-static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op,
-	sljit_s32 dst,
-	sljit_s32 src1, sljit_sw src1w,
-	sljit_s32 src2, sljit_sw src2w)
-{
-	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
-	sljit_gpr src_r = tmp0;
-	sljit_gpr base_r = tmp0;
-	sljit_ins imm = 0;
-	sljit_ins ins;
-
-	if (FAST_IS_REG(src1))
-		src_r = gpr(src1);
-	else
-		FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
-
-	if (!(src2 & SLJIT_IMM)) {
-		if (FAST_IS_REG(src2))
-			base_r = gpr(src2);
-		else {
-			FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
-			base_r = tmp1;
-		}
-	}
-
-	if (GET_OPCODE(op) == SLJIT_ROTR) {
-		if (!(src2 & SLJIT_IMM)) {
-			ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
-			FAIL_IF(push_inst(compiler, ins | R4A(tmp1) | R0A(base_r)));
-			base_r = tmp1;
-		} else
-			src2w = -src2w;
-	}
-
-	if (src2 & SLJIT_IMM)
-		imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
-
-	ins = (op & SLJIT_32) ? 0xeb000000001d /* rll */ : 0xeb000000001c /* rllg */;
-	return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16));
-}
-
 static const struct ins_forms addc_forms = {
 static const struct ins_forms addc_forms = {
 	0xb9980000, /* alcr */
 	0xb9980000, /* alcr */
 	0xb9880000, /* alcgr */
 	0xb9880000, /* alcgr */
@@ -2901,17 +2716,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
 		FAIL_IF(sljit_emit_bitwise(compiler, op, dst, src1, src1w, src2, src2w));
 		FAIL_IF(sljit_emit_bitwise(compiler, op, dst, src1, src1w, src2, src2w));
 		break;
 		break;
 	case SLJIT_SHL:
 	case SLJIT_SHL:
-	case SLJIT_MSHL:
 	case SLJIT_LSHR:
 	case SLJIT_LSHR:
-	case SLJIT_MLSHR:
 	case SLJIT_ASHR:
 	case SLJIT_ASHR:
-	case SLJIT_MASHR:
 		FAIL_IF(sljit_emit_shift(compiler, op, dst, src1, src1w, src2, src2w));
 		FAIL_IF(sljit_emit_shift(compiler, op, dst, src1, src1w, src2, src2w));
 		break;
 		break;
-	case SLJIT_ROTL:
-	case SLJIT_ROTR:
-		FAIL_IF(sljit_emit_rotate(compiler, op, dst, src1, src1w, src2, src2w));
-		break;
 	}
 	}
 
 
 	if (dst & SLJIT_MEM)
 	if (dst & SLJIT_MEM)
@@ -2926,130 +2734,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
 	CHECK_ERROR();
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
 	CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
 
 
-	SLJIT_SKIP_CHECKS(compiler);
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+	compiler->skip_checks = 1;
+#endif
 	return sljit_emit_op2(compiler, op, (sljit_s32)tmp0, 0, src1, src1w, src2, src2w);
 	return sljit_emit_op2(compiler, op, (sljit_s32)tmp0, 0, src1, src1w, src2, src2w);
 }
 }
 
 
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
-	sljit_s32 src_dst,
-	sljit_s32 src1, sljit_sw src1w,
-	sljit_s32 src2, sljit_sw src2w)
-{
-	sljit_s32 is_right;
-	sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
-	sljit_gpr src_dst_r = gpr(src_dst);
-	sljit_gpr src1_r = tmp0;
-	sljit_gpr src2_r = tmp1;
-	sljit_ins ins;
-
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
-
-	is_right = (GET_OPCODE(op) == SLJIT_LSHR || GET_OPCODE(op) == SLJIT_MLSHR);
-
-	if (src_dst == src1) {
-		SLJIT_SKIP_CHECKS(compiler);
-		return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), src_dst, 0, src_dst, 0, src2, src2w);
-	}
-
-	ADJUST_LOCAL_OFFSET(src1, src1w);
-	ADJUST_LOCAL_OFFSET(src2, src2w);
-
-	if (src1 & SLJIT_MEM)
-		FAIL_IF(load_word(compiler, tmp0, src1, src1w, op & SLJIT_32));
-	else if (src1 & SLJIT_IMM)
-		FAIL_IF(push_load_imm_inst(compiler, tmp0, src1w));
-	else
-		src1_r = gpr(src1);
-
-	if (src2 & SLJIT_IMM) {
-		src2w &= bit_length - 1;
-
-		if (src2w == 0)
-			return SLJIT_SUCCESS;
-	} else if (!(src2 & SLJIT_MEM))
-		src2_r = gpr(src2);
-	else
-		FAIL_IF(load_word(compiler, tmp1, src2, src2w, op & SLJIT_32));
-
-	if (src2 & SLJIT_IMM) {
-		if (op & SLJIT_32) {
-			ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
-			FAIL_IF(push_inst(compiler, ins | R20A(src_dst_r) | (sljit_ins)src2w));
-		} else {
-			ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
-			FAIL_IF(push_inst(compiler, ins | R36A(src_dst_r) | R32A(src_dst_r) | ((sljit_ins)src2w << 16)));
-		}
-
-		ins = 0xec0000000055 /* risbg */;
-
-		if (is_right) {
-			src2w = bit_length - src2w;
-			ins |= ((sljit_ins)(64 - bit_length) << 24) | ((sljit_ins)(63 - src2w) << 16) | ((sljit_ins)src2w << 8);
-		} else
-			ins |= ((sljit_ins)(64 - src2w) << 24) | ((sljit_ins)63 << 16) | ((sljit_ins)src2w << 8);
-
-		return push_inst(compiler, ins | R36A(src_dst_r) | R32A(src1_r));
-	}
-
-	if (op & SLJIT_32) {
-		if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) {
-			if (src2_r != tmp1) {
-				FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(src2_r) | (59 << 24) | (1 << 23) | (63 << 16)));
-				src2_r = tmp1;
-			} else
-				FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
-		}
-
-		ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
-		FAIL_IF(push_inst(compiler, ins | R20A(src_dst_r) | R12A(src2_r)));
-
-		if (src2_r != tmp1) {
-			FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x1f));
-			FAIL_IF(push_inst(compiler, 0x1700 /* xr */ | R4A(tmp1) | R0A(src2_r)));
-		} else
-			FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x1f));
-
-		if (src1_r == tmp0) {
-			ins = is_right ? 0x89000000 /* sll */ : 0x88000000 /* srl */;
-			FAIL_IF(push_inst(compiler, ins | R20A(tmp0) | R12A(tmp1) | 0x1));
-		} else {
-			ins = is_right ? 0xeb00000000df /* sllk */ : 0xeb00000000de /* srlk */;
-			FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src1_r) | R28A(tmp1) | (0x1 << 16)));
-		}
-
-		return push_inst(compiler, 0x1600 /* or */ | R4A(src_dst_r) | R0A(tmp0));
-	}
-
-	ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
-	FAIL_IF(push_inst(compiler, ins | R36A(src_dst_r) | R32A(src_dst_r) | R28A(src2_r)));
-
-	ins = is_right ? 0xeb000000000d /* sllg */ : 0xeb000000000c /* srlg */;
-
-	if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
-		if (src2_r != tmp1)
-			FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x3f));
-
-		FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src1_r) | (0x1 << 16)));
-		src1_r = tmp0;
-
-		if (src2_r != tmp1)
-			FAIL_IF(push_inst(compiler, 0xb9820000 /* xgr */ | R4A(tmp1) | R0A(src2_r)));
-		else
-			FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x3f));
-	} else
-		FAIL_IF(push_inst(compiler, 0xb9030000 /* lcgr */ | R4A(tmp1) | R0A(src2_r)));
-
-	FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src1_r) | R28A(tmp1)));
-	return push_inst(compiler, 0xb9810000 /* ogr */ | R4A(src_dst_r) | R0A(tmp0));
-}
-
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(
 	struct sljit_compiler *compiler,
 	struct sljit_compiler *compiler,
 	sljit_s32 op, sljit_s32 src, sljit_sw srcw)
 	sljit_s32 op, sljit_s32 src, sljit_sw srcw)
 {
 {
 	sljit_gpr src_r;
 	sljit_gpr src_r;
-	struct addr addr;
 
 
 	CHECK_ERROR();
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
 	CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
@@ -3063,14 +2759,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(
 
 
 		return push_inst(compiler, br(src_r));
 		return push_inst(compiler, br(src_r));
 	case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
 	case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
+		/* TODO(carenas): implement? */
 		return SLJIT_SUCCESS;
 		return SLJIT_SUCCESS;
 	case SLJIT_PREFETCH_L1:
 	case SLJIT_PREFETCH_L1:
 	case SLJIT_PREFETCH_L2:
 	case SLJIT_PREFETCH_L2:
 	case SLJIT_PREFETCH_L3:
 	case SLJIT_PREFETCH_L3:
 	case SLJIT_PREFETCH_ONCE:
 	case SLJIT_PREFETCH_ONCE:
-		FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
-		return push_inst(compiler, 0xe31000000036 /* pfd */ | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
+		/* TODO(carenas): implement */
+		return SLJIT_SUCCESS;
 	default:
 	default:
+                /* TODO(carenas): probably should not success by default */
 		return SLJIT_SUCCESS;
 		return SLJIT_SUCCESS;
 	}
 	}
 
 
@@ -3366,10 +3064,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
 	ADJUST_LOCAL_OFFSET(dst, dstw);
 	ADJUST_LOCAL_OFFSET(dst, dstw);
 
 
 	if (FAST_IS_REG(dst))
 	if (FAST_IS_REG(dst))
-		return push_inst(compiler, lgr(gpr(dst), link_r));
+		return push_inst(compiler, lgr(gpr(dst), fast_link_r));
 
 
 	/* memory */
 	/* memory */
-	return store_word(compiler, link_r, dst, dstw, 0);
+	return store_word(compiler, fast_link_r, dst, dstw, 0);
 }
 }
 
 
 /* --------------------------------------------------------------------- */
 /* --------------------------------------------------------------------- */
@@ -3409,7 +3107,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
 	/* emit jump instruction */
 	/* emit jump instruction */
 	type &= 0xff;
 	type &= 0xff;
 	if (type >= SLJIT_FAST_CALL)
 	if (type >= SLJIT_FAST_CALL)
-		PTR_FAIL_IF(push_inst(compiler, brasl(link_r, 0)));
+		PTR_FAIL_IF(push_inst(compiler, brasl(type == SLJIT_FAST_CALL ? fast_link_r : link_r, 0)));
 	else
 	else
 		PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0)));
 		PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0)));
 
 
@@ -3419,16 +3117,19 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
 	sljit_s32 arg_types)
 	sljit_s32 arg_types)
 {
 {
-	SLJIT_UNUSED_ARG(arg_types);
 	CHECK_ERROR_PTR();
 	CHECK_ERROR_PTR();
 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
 
 
 	if (type & SLJIT_CALL_RETURN) {
 	if (type & SLJIT_CALL_RETURN) {
-		PTR_FAIL_IF(emit_stack_frame_release(compiler, r14));
+		PTR_FAIL_IF(emit_stack_frame_release(compiler));
 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
 	}
 	}
 
 
-	SLJIT_SKIP_CHECKS(compiler);
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+	compiler->skip_checks = 1;
+#endif
+
 	return sljit_emit_jump(compiler, type);
 	return sljit_emit_jump(compiler, type);
 }
 }
 
 
@@ -3450,7 +3151,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
 
 
 	/* emit jump instruction */
 	/* emit jump instruction */
 	if (type >= SLJIT_FAST_CALL)
 	if (type >= SLJIT_FAST_CALL)
-		return push_inst(compiler, basr(link_r, src_r));
+		return push_inst(compiler, basr(type == SLJIT_FAST_CALL ? fast_link_r : link_r, src_r));
 
 
 	return push_inst(compiler, br(src_r));
 	return push_inst(compiler, br(src_r));
 }
 }
@@ -3468,21 +3169,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
 		ADJUST_LOCAL_OFFSET(src, srcw);
 		ADJUST_LOCAL_OFFSET(src, srcw);
 		FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
 		FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
 		src = TMP_REG2;
 		src = TMP_REG2;
-		srcw = 0;
 	}
 	}
 
 
 	if (type & SLJIT_CALL_RETURN) {
 	if (type & SLJIT_CALL_RETURN) {
-		if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
+		if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) {
 			FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
 			FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
 			src = TMP_REG2;
 			src = TMP_REG2;
-			srcw = 0;
 		}
 		}
 
 
-		FAIL_IF(emit_stack_frame_release(compiler, r14));
+		FAIL_IF(emit_stack_frame_release(compiler));
 		type = SLJIT_JUMP;
 		type = SLJIT_JUMP;
 	}
 	}
 
 
-	SLJIT_SKIP_CHECKS(compiler);
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+	compiler->skip_checks = 1;
+#endif
+
 	return sljit_emit_ijump(compiler, type, src, srcw);
 	return sljit_emit_ijump(compiler, type, src, srcw);
 }
 }
 
 
@@ -3490,7 +3193,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
 	sljit_s32 dst, sljit_sw dstw,
 	sljit_s32 dst, sljit_sw dstw,
 	sljit_s32 type)
 	sljit_s32 type)
 {
 {
-	sljit_u8 mask = get_cc(compiler, type);
+	sljit_u8 mask = get_cc(compiler, type & 0xff);
 
 
 	CHECK_ERROR();
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
@@ -3560,92 +3263,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
 	sljit_s32 dst_reg,
 	sljit_s32 dst_reg,
 	sljit_s32 src, sljit_sw srcw)
 	sljit_s32 src, sljit_sw srcw)
 {
 {
-	sljit_ins mask = get_cc(compiler, type & ~SLJIT_32);
-	sljit_gpr src_r;
-	sljit_ins ins;
+	sljit_u8 mask = get_cc(compiler, type & 0xff);
+	sljit_gpr dst_r = gpr(dst_reg & ~SLJIT_32);
+	sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp0;
 
 
 	CHECK_ERROR();
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
 	CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
 
 
-	if (type & SLJIT_32)
-		srcw = (sljit_s32)srcw;
-
-	if (have_lscond2() && (src & SLJIT_IMM) && is_s16(srcw)) {
-		ins = (type & SLJIT_32) ? 0xec0000000042 /* lochi */ : 0xec0000000046 /* locghi */;
-		return push_inst(compiler, ins | R36A(gpr(dst_reg)) | (mask << 32) | (sljit_ins)(srcw & 0xffff) << 16);
-	}
-
 	if (src & SLJIT_IMM) {
 	if (src & SLJIT_IMM) {
-		FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));
-		src_r = tmp0;
-	} else
-		src_r = gpr(src);
-
-	if (have_lscond1()) {
-		ins = (type & SLJIT_32) ? 0xb9f20000 /* locr */ : 0xb9e20000 /* locgr */;
-		return push_inst(compiler, ins | (mask << 12) | R4A(gpr(dst_reg)) | R0A(src_r));
-	}
-
-	return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
-}
-
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
-	sljit_s32 reg,
-	sljit_s32 mem, sljit_sw memw)
-{
-	sljit_ins ins, reg1, reg2, base, offs = 0;
-
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
-
-	if (!(reg & REG_PAIR_MASK))
-		return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
-
-	ADJUST_LOCAL_OFFSET(mem, memw);
-
-	base = gpr(mem & REG_MASK);
-	reg1 = gpr(REG_PAIR_FIRST(reg));
-	reg2 = gpr(REG_PAIR_SECOND(reg));
-
-	if (mem & OFFS_REG_MASK) {
-		memw &= 0x3;
-		offs = gpr(OFFS_REG(mem));
-
-		if (memw != 0) {
-			FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(offs) | ((sljit_ins)memw << 16)));
-			offs = tmp1;
-		} else if (!(type & SLJIT_MEM_STORE) && (base == reg1 || base == reg2) && (offs == reg1 || offs == reg2)) {
-			FAIL_IF(push_inst(compiler, 0xb9f80000 | R12A(tmp1) | R4A(base) | R0A(offs)));
-			base = tmp1;
-			offs = 0;
-		}
-
-		memw = 0;
-	} else if (memw < -0x80000 || memw > 0x7ffff - ((reg2 == reg1 + 1) ? 0 : SSIZE_OF(sw))) {
-		FAIL_IF(push_load_imm_inst(compiler, tmp1, memw));
-
-		if (base == 0)
-			base = tmp1;
-		else
-			offs = tmp1;
-
-		memw = 0;
-	}
-
-	if (offs == 0 && reg2 == (reg1 + 1)) {
-		ins = (type & SLJIT_MEM_STORE) ? 0xeb0000000024 /* stmg */ : 0xeb0000000004 /* lmg */;
-		return push_inst(compiler, ins | R36A(reg1) | R32A(reg2) | R28A(base) | disp_s20((sljit_s32)memw));
+		/* TODO(mundaym): fast path with lscond2 */
+		FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
 	}
 	}
 
 
-	ins = ((type & SLJIT_MEM_STORE) ? 0xe30000000024 /* stg */ : 0xe30000000004 /* lg */) | R32A(offs) | R28A(base);
+	#define LEVAL(i) i(dst_r, src_r, mask)
+	if (have_lscond1())
+		return push_inst(compiler,
+			WHEN2(dst_reg & SLJIT_32, locr, locgr));
 
 
-	if (!(type & SLJIT_MEM_STORE) && base == reg1) {
-		FAIL_IF(push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw))));
-		return push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw));
-	}
+	#undef LEVAL
 
 
-	FAIL_IF(push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw)));
-	return push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw)));
+	/* TODO(mundaym): implement */
+	return SLJIT_ERR_UNSUPPORTED;
 }
 }
 
 
 /* --------------------------------------------------------------------- */
 /* --------------------------------------------------------------------- */

+ 283 - 0
thirdparty/pcre2/src/sljit/sljitNativeSPARC_32.c

@@ -0,0 +1,283 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright Zoltan Herczeg ([email protected]). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw imm)
+{
+	if (imm <= SIMM_MAX && imm >= SIMM_MIN)
+		return push_inst(compiler, OR | D(dst) | S1(0) | IMM(imm), DR(dst));
+
+	FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((imm >> 10) & 0x3fffff), DR(dst)));
+	return (imm & 0x3ff) ? push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (imm & 0x3ff), DR(dst)) : SLJIT_SUCCESS;
+}
+
+#define ARG2(flags, src2) ((flags & SRC2_IMM) ? IMM(src2) : S2(src2))
+
+static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_u32 flags,
+	sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
+{
+	SLJIT_COMPILE_ASSERT(ICC_IS_SET == SET_FLAGS, icc_is_set_and_set_flags_must_be_the_same);
+
+	switch (op) {
+	case SLJIT_MOV:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+		if (dst != src2)
+			return push_inst(compiler, OR | D(dst) | S1(0) | S2(src2), DR(dst));
+		return SLJIT_SUCCESS;
+
+	case SLJIT_MOV_U8:
+	case SLJIT_MOV_S8:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+			if (op == SLJIT_MOV_U8)
+				return push_inst(compiler, AND | D(dst) | S1(src2) | IMM(0xff), DR(dst));
+			FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(24), DR(dst)));
+			return push_inst(compiler, SRA | D(dst) | S1(dst) | IMM(24), DR(dst));
+		}
+		SLJIT_ASSERT(dst == src2);
+		return SLJIT_SUCCESS;
+
+	case SLJIT_MOV_U16:
+	case SLJIT_MOV_S16:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+			FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(16), DR(dst)));
+			return push_inst(compiler, (op == SLJIT_MOV_S16 ? SRA : SRL) | D(dst) | S1(dst) | IMM(16), DR(dst));
+		}
+		SLJIT_ASSERT(dst == src2);
+		return SLJIT_SUCCESS;
+
+	case SLJIT_NOT:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+		return push_inst(compiler, XNOR | (flags & SET_FLAGS) | D(dst) | S1(0) | S2(src2), DRF(dst, flags));
+
+	case SLJIT_CLZ:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+		FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(src2) | S2(0), SET_FLAGS));
+		FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2(src2), DR(TMP_REG1)));
+		FAIL_IF(push_inst(compiler, BICC | DA(0x1) | (7 & DISP_MASK), UNMOVABLE_INS));
+		FAIL_IF(push_inst(compiler, OR | D(dst) | S1(0) | IMM(32), UNMOVABLE_INS));
+		FAIL_IF(push_inst(compiler, OR | D(dst) | S1(0) | IMM(-1), DR(dst)));
+
+		/* Loop. */
+		FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(0), SET_FLAGS));
+		FAIL_IF(push_inst(compiler, SLL | D(TMP_REG1) | S1(TMP_REG1) | IMM(1), DR(TMP_REG1)));
+		FAIL_IF(push_inst(compiler, BICC | DA(0xe) | ((sljit_ins)-2 & DISP_MASK), UNMOVABLE_INS));
+		return push_inst(compiler, ADD | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS);
+
+	case SLJIT_ADD:
+		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
+		return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags));
+
+	case SLJIT_ADDC:
+		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
+		return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags));
+
+	case SLJIT_SUB:
+		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
+		return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags));
+
+	case SLJIT_SUBC:
+		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
+		return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags));
+
+	case SLJIT_MUL:
+		compiler->status_flags_state = 0;
+		FAIL_IF(push_inst(compiler, SMUL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst)));
+		if (!(flags & SET_FLAGS))
+			return SLJIT_SUCCESS;
+		FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(dst) | IMM(31), DR(TMP_REG1)));
+		FAIL_IF(push_inst(compiler, RDY | D(TMP_LINK), DR(TMP_LINK)));
+		return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_LINK), MOVABLE_INS | SET_FLAGS);
+
+	case SLJIT_AND:
+		return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags));
+
+	case SLJIT_OR:
+		return push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags));
+
+	case SLJIT_XOR:
+		return push_inst(compiler, XOR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags));
+
+	case SLJIT_SHL:
+		FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst)));
+		return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS);
+
+	case SLJIT_LSHR:
+		FAIL_IF(push_inst(compiler, SRL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst)));
+		return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS);
+
+	case SLJIT_ASHR:
+		FAIL_IF(push_inst(compiler, SRA | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst)));
+		return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS);
+	}
+
+	SLJIT_UNREACHABLE();
+	return SLJIT_SUCCESS;
+}
+
+static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src)
+{
+	sljit_s32 reg_index = 8;
+	sljit_s32 word_reg_index = 8;
+	sljit_s32 float_arg_index = 1;
+	sljit_s32 double_arg_count = 0;
+	sljit_u32 float_offset = (16 + 6) * sizeof(sljit_sw);
+	sljit_s32 types = 0;
+	sljit_s32 reg = 0;
+	sljit_s32 move_to_tmp2 = 0;
+
+	if (src)
+		reg = reg_map[*src & REG_MASK];
+
+	arg_types >>= SLJIT_ARG_SHIFT;
+
+	while (arg_types) {
+		types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
+
+		switch (arg_types & SLJIT_ARG_MASK) {
+		case SLJIT_ARG_TYPE_F64:
+			float_arg_index++;
+			double_arg_count++;
+			if (reg_index == reg || reg_index + 1 == reg)
+				move_to_tmp2 = 1;
+			reg_index += 2;
+			break;
+		case SLJIT_ARG_TYPE_F32:
+			float_arg_index++;
+			if (reg_index == reg)
+				move_to_tmp2 = 1;
+			reg_index++;
+			break;
+		default:
+			if (reg_index != word_reg_index && reg_index == reg)
+				move_to_tmp2 = 1;
+			reg_index++;
+			word_reg_index++;
+			break;
+		}
+
+		arg_types >>= SLJIT_ARG_SHIFT;
+	}
+
+	if (move_to_tmp2) {
+		if (reg < 14)
+			FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2A(reg), DR(TMP_REG1)));
+		*src = TMP_REG1;
+	}
+
+	arg_types = types;
+
+	while (arg_types) {
+		switch (arg_types & SLJIT_ARG_MASK) {
+		case SLJIT_ARG_TYPE_F64:
+			float_arg_index--;
+			if (float_arg_index == 4 && double_arg_count == 4) {
+				/* The address is not doubleword aligned, so two instructions are required to store the double. */
+				FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | S1(SLJIT_SP) | IMM((16 + 7) * sizeof(sljit_sw)), MOVABLE_INS));
+				FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | (1 << 25) | S1(SLJIT_SP) | IMM((16 + 8) * sizeof(sljit_sw)), MOVABLE_INS));
+			}
+			else
+				FAIL_IF(push_inst(compiler, STDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS));
+			float_offset -= sizeof(sljit_f64);
+			break;
+		case SLJIT_ARG_TYPE_F32:
+			float_arg_index--;
+			FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS));
+			float_offset -= sizeof(sljit_f64);
+			break;
+		default:
+			break;
+		}
+
+		arg_types >>= SLJIT_ARG_SHIFT;
+	}
+
+	float_offset = (16 + 6) * sizeof(sljit_sw);
+
+	while (types) {
+		switch (types & SLJIT_ARG_MASK) {
+		case SLJIT_ARG_TYPE_F64:
+			reg_index -= 2;
+			if (reg_index < 14) {
+				if ((reg_index & 0x1) != 0) {
+					FAIL_IF(push_inst(compiler, LDUW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index));
+					if (reg_index < 8 + 6 - 1)
+						FAIL_IF(push_inst(compiler, LDUW | DA(reg_index + 1) | S1(SLJIT_SP) | IMM(float_offset + sizeof(sljit_sw)), reg_index + 1));
+				}
+				else
+					FAIL_IF(push_inst(compiler, LDD | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index));
+			}
+			float_offset -= sizeof(sljit_f64);
+			break;
+		case SLJIT_ARG_TYPE_F32:
+			reg_index--;
+			if (reg_index < 8 + 6)
+				FAIL_IF(push_inst(compiler, LDUW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index));
+			float_offset -= sizeof(sljit_f64);
+			break;
+		default:
+			reg_index--;
+			word_reg_index--;
+
+			if (reg_index != word_reg_index) {
+				if (reg_index < 14)
+					FAIL_IF(push_inst(compiler, OR | DA(reg_index) | S1(0) | S2A(word_reg_index), reg_index));
+				else
+					FAIL_IF(push_inst(compiler, STW | DA(word_reg_index) | S1(SLJIT_SP) | IMM(92), word_reg_index));
+			}
+			break;
+		}
+
+		types >>= SLJIT_ARG_SHIFT;
+	}
+
+	return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value)
+{
+	FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((init_value >> 10) & 0x3fffff), DR(dst)));
+	return push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (init_value & 0x3ff), DR(dst));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
+{
+	sljit_ins *inst = (sljit_ins *)addr;
+	SLJIT_UNUSED_ARG(executable_offset);
+
+	SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
+	SLJIT_ASSERT(((inst[0] & 0xc1c00000) == 0x01000000) && ((inst[1] & 0xc1f82000) == 0x80102000));
+	inst[0] = (inst[0] & 0xffc00000) | ((new_target >> 10) & 0x3fffff);
+	inst[1] = (inst[1] & 0xfffffc00) | (new_target & 0x3ff);
+	SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
+	inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
+	SLJIT_CACHE_FLUSH(inst, inst + 2);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
+{
+	sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
+}

+ 1673 - 0
thirdparty/pcre2/src/sljit/sljitNativeSPARC_common.c

@@ -0,0 +1,1673 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright Zoltan Herczeg ([email protected]). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
+{
+	return "SPARC" SLJIT_CPUINFO;
+}
+
+/* Length of an instruction word
+   Both for sparc-32 and sparc-64 */
+typedef sljit_u32 sljit_ins;
+
+#if (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL)
+
+static void sparc_cache_flush(sljit_ins *from, sljit_ins *to)
+{
+#if defined(__SUNPRO_C) && __SUNPRO_C < 0x590
+	__asm (
+		/* if (from == to) return */
+		"cmp %i0, %i1\n"
+		"be .leave\n"
+		"nop\n"
+
+		/* loop until from >= to */
+		".mainloop:\n"
+		"flush %i0\n"
+		"add %i0, 8, %i0\n"
+		"cmp %i0, %i1\n"
+		"bcs .mainloop\n"
+		"nop\n"
+
+		/* The comparison was done above. */
+		"bne .leave\n"
+		/* nop is not necessary here, since the
+		   sub operation has no side effect. */
+		"sub %i0, 4, %i0\n"
+		"flush %i0\n"
+		".leave:"
+	);
+#else
+	if (SLJIT_UNLIKELY(from == to))
+		return;
+
+	do {
+		__asm__ volatile (
+			"flush %0\n"
+			: : "r"(from)
+		);
+		/* Operates at least on doubleword. */
+		from += 2;
+	} while (from < to);
+
+	if (from == to) {
+		/* Flush the last word. */
+		from --;
+		__asm__ volatile (
+			"flush %0\n"
+			: : "r"(from)
+		);
+	}
+#endif
+}
+
+#endif /* (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) */
+
+/* TMP_REG2 is not used by getput_arg */
+#define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
+#define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
+#define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
+/* This register is modified by calls, which affects the instruction
+   in the delay slot if it is used as a source register. */
+#define TMP_LINK	(SLJIT_NUMBER_OF_REGISTERS + 5)
+
+#define TMP_FREG1	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
+#define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
+
+static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
+	0, 8, 9, 10, 11, 23, 22, 21, 20, 19, 18, 17, 16, 29, 28, 27, 26, 25, 24, 14, 1, 12, 13, 15
+};
+
+static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
+	0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+};
+
+/* --------------------------------------------------------------------- */
+/*  Instrucion forms                                                     */
+/* --------------------------------------------------------------------- */
+
+#define D(d)		((sljit_ins)reg_map[d] << 25)
+#define FD(d)		((sljit_ins)freg_map[d] << 25)
+#define FDN(d)		(((sljit_ins)freg_map[d] | 0x1) << 25)
+#define DA(d)		((sljit_ins)(d) << 25)
+#define S1(s1)		((sljit_ins)reg_map[s1] << 14)
+#define FS1(s1)		((sljit_ins)freg_map[s1] << 14)
+#define S1A(s1)		((sljit_ins)(s1) << 14)
+#define S2(s2)		((sljit_ins)reg_map[s2])
+#define FS2(s2)		((sljit_ins)freg_map[s2])
+#define FS2N(s2)	((sljit_ins)freg_map[s2] | 0x1)
+#define S2A(s2)		((sljit_ins)(s2))
+#define IMM_ARG		0x2000
+#define DOP(op)		((sljit_ins)(op) << 5)
+#define IMM(imm)	(((sljit_ins)(imm) & 0x1fff) | IMM_ARG)
+
+#define DR(dr)		(reg_map[dr])
+#define DRF(dr, flags)	((sljit_s32)(reg_map[dr] | ((flags) & SET_FLAGS)))
+#define OPC1(opcode)	((sljit_ins)(opcode) << 30)
+#define OPC2(opcode)	((sljit_ins)(opcode) << 22)
+#define OPC3(opcode)	((sljit_ins)(opcode) << 19)
+#define SET_FLAGS	OPC3(0x10)
+
+#define ADD		(OPC1(0x2) | OPC3(0x00))
+#define ADDC		(OPC1(0x2) | OPC3(0x08))
+#define AND		(OPC1(0x2) | OPC3(0x01))
+#define ANDN		(OPC1(0x2) | OPC3(0x05))
+#define CALL		(OPC1(0x1))
+#define FABSS		(OPC1(0x2) | OPC3(0x34) | DOP(0x09))
+#define FADDD		(OPC1(0x2) | OPC3(0x34) | DOP(0x42))
+#define FADDS		(OPC1(0x2) | OPC3(0x34) | DOP(0x41))
+#define FCMPD		(OPC1(0x2) | OPC3(0x35) | DOP(0x52))
+#define FCMPS		(OPC1(0x2) | OPC3(0x35) | DOP(0x51))
+#define FDIVD		(OPC1(0x2) | OPC3(0x34) | DOP(0x4e))
+#define FDIVS		(OPC1(0x2) | OPC3(0x34) | DOP(0x4d))
+#define FDTOI		(OPC1(0x2) | OPC3(0x34) | DOP(0xd2))
+#define FDTOS		(OPC1(0x2) | OPC3(0x34) | DOP(0xc6))
+#define FITOD		(OPC1(0x2) | OPC3(0x34) | DOP(0xc8))
+#define FITOS		(OPC1(0x2) | OPC3(0x34) | DOP(0xc4))
+#define FMOVS		(OPC1(0x2) | OPC3(0x34) | DOP(0x01))
+#define FMULD		(OPC1(0x2) | OPC3(0x34) | DOP(0x4a))
+#define FMULS		(OPC1(0x2) | OPC3(0x34) | DOP(0x49))
+#define FNEGS		(OPC1(0x2) | OPC3(0x34) | DOP(0x05))
+#define FSTOD		(OPC1(0x2) | OPC3(0x34) | DOP(0xc9))
+#define FSTOI		(OPC1(0x2) | OPC3(0x34) | DOP(0xd1))
+#define FSUBD		(OPC1(0x2) | OPC3(0x34) | DOP(0x46))
+#define FSUBS		(OPC1(0x2) | OPC3(0x34) | DOP(0x45))
+#define JMPL		(OPC1(0x2) | OPC3(0x38))
+#define LDD		(OPC1(0x3) | OPC3(0x03))
+#define LDDF		(OPC1(0x3) | OPC3(0x23))
+#define LDF		(OPC1(0x3) | OPC3(0x20))
+#define LDUW		(OPC1(0x3) | OPC3(0x00))
+#define NOP		(OPC1(0x0) | OPC2(0x04))
+#define OR		(OPC1(0x2) | OPC3(0x02))
+#define ORN		(OPC1(0x2) | OPC3(0x06))
+#define RDY		(OPC1(0x2) | OPC3(0x28) | S1A(0))
+#define RESTORE		(OPC1(0x2) | OPC3(0x3d))
+#define SAVE		(OPC1(0x2) | OPC3(0x3c))
+#define SETHI		(OPC1(0x0) | OPC2(0x04))
+#define SLL		(OPC1(0x2) | OPC3(0x25))
+#define SLLX		(OPC1(0x2) | OPC3(0x25) | (1 << 12))
+#define SRA		(OPC1(0x2) | OPC3(0x27))
+#define SRAX		(OPC1(0x2) | OPC3(0x27) | (1 << 12))
+#define SRL		(OPC1(0x2) | OPC3(0x26))
+#define SRLX		(OPC1(0x2) | OPC3(0x26) | (1 << 12))
+#define STD		(OPC1(0x3) | OPC3(0x07))
+#define STDF		(OPC1(0x3) | OPC3(0x27))
+#define STF		(OPC1(0x3) | OPC3(0x24))
+#define STW		(OPC1(0x3) | OPC3(0x04))
+#define SUB		(OPC1(0x2) | OPC3(0x04))
+#define SUBC		(OPC1(0x2) | OPC3(0x0c))
+#define TA		(OPC1(0x2) | OPC3(0x3a) | (8 << 25))
+#define WRY		(OPC1(0x2) | OPC3(0x30) | DA(0))
+#define XOR		(OPC1(0x2) | OPC3(0x03))
+#define XNOR		(OPC1(0x2) | OPC3(0x07))
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#define MAX_DISP	(0x1fffff)
+#define MIN_DISP	(-0x200000)
+#define DISP_MASK	((sljit_ins)0x3fffff)
+
+#define BICC		(OPC1(0x0) | OPC2(0x2))
+#define FBFCC		(OPC1(0x0) | OPC2(0x6))
+#define SLL_W		SLL
+#define SDIV		(OPC1(0x2) | OPC3(0x0f))
+#define SMUL		(OPC1(0x2) | OPC3(0x0b))
+#define UDIV		(OPC1(0x2) | OPC3(0x0e))
+#define UMUL		(OPC1(0x2) | OPC3(0x0a))
+#else
+#define SLL_W		SLLX
+#endif
+
+#define SIMM_MAX	(0x0fff)
+#define SIMM_MIN	(-0x1000)
+
+/* dest_reg is the absolute name of the register
+   Useful for reordering instructions in the delay slot. */
+static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 delay_slot)
+{
+	sljit_ins *ptr;
+	SLJIT_ASSERT((delay_slot & DST_INS_MASK) == UNMOVABLE_INS
+		|| (delay_slot & DST_INS_MASK) == MOVABLE_INS
+		|| (delay_slot & DST_INS_MASK) == ((ins >> 25) & 0x1f));
+	ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
+	FAIL_IF(!ptr);
+	*ptr = ins;
+	compiler->size++;
+	compiler->delay_slot = delay_slot;
+	return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
+{
+	sljit_sw diff;
+	sljit_uw target_addr;
+	sljit_ins *inst;
+	sljit_ins saved_inst;
+
+	if (jump->flags & SLJIT_REWRITABLE_JUMP)
+		return code_ptr;
+
+	if (jump->flags & JUMP_ADDR)
+		target_addr = jump->u.target;
+	else {
+		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
+		target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
+	}
+	inst = (sljit_ins*)jump->addr;
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+	if (jump->flags & IS_CALL) {
+		/* Call is always patchable on sparc 32. */
+		jump->flags |= PATCH_CALL;
+		if (jump->flags & IS_MOVABLE) {
+			inst[0] = inst[-1];
+			inst[-1] = CALL;
+			jump->addr -= sizeof(sljit_ins);
+			return inst;
+		}
+		inst[0] = CALL;
+		inst[1] = NOP;
+		return inst + 1;
+	}
+#else
+	/* Both calls and BPr instructions shall not pass this point. */
+#error "Implementation required"
+#endif
+
+	if (jump->flags & IS_COND)
+		inst--;
+
+	diff = ((sljit_sw)target_addr - (sljit_sw)(inst - 1) - executable_offset) >> 2;
+
+	if (jump->flags & IS_MOVABLE) {
+		if (diff <= MAX_DISP && diff >= MIN_DISP) {
+			jump->flags |= PATCH_B;
+			inst--;
+			if (jump->flags & IS_COND) {
+				saved_inst = inst[0];
+				inst[0] = inst[1] ^ (1 << 28);
+				inst[1] = saved_inst;
+			} else {
+				inst[1] = inst[0];
+				inst[0] = BICC | DA(0x8);
+			}
+			jump->addr = (sljit_uw)inst;
+			return inst + 1;
+		}
+	}
+
+	diff += SSIZE_OF(ins);
+
+	if (diff <= MAX_DISP && diff >= MIN_DISP) {
+		jump->flags |= PATCH_B;
+		if (jump->flags & IS_COND)
+			inst[0] ^= (1 << 28);
+		else
+			inst[0] = BICC | DA(0x8);
+		inst[1] = NOP;
+		jump->addr = (sljit_uw)inst;
+		return inst + 1;
+	}
+
+	return code_ptr;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
+{
+	struct sljit_memory_fragment *buf;
+	sljit_ins *code;
+	sljit_ins *code_ptr;
+	sljit_ins *buf_ptr;
+	sljit_ins *buf_end;
+	sljit_uw word_count;
+	sljit_uw next_addr;
+	sljit_sw executable_offset;
+	sljit_sw addr;
+
+	struct sljit_label *label;
+	struct sljit_jump *jump;
+	struct sljit_const *const_;
+	struct sljit_put_label *put_label;
+
+	CHECK_ERROR_PTR();
+	CHECK_PTR(check_sljit_generate_code(compiler));
+	reverse_buf(compiler);
+
+	code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data);
+	PTR_FAIL_WITH_EXEC_IF(code);
+	buf = compiler->buf;
+
+	code_ptr = code;
+	word_count = 0;
+	next_addr = 0;
+	executable_offset = SLJIT_EXEC_OFFSET(code);
+
+	label = compiler->labels;
+	jump = compiler->jumps;
+	const_ = compiler->consts;
+	put_label = compiler->put_labels;
+
+	do {
+		buf_ptr = (sljit_ins*)buf->memory;
+		buf_end = buf_ptr + (buf->used_size >> 2);
+		do {
+			*code_ptr = *buf_ptr++;
+			if (next_addr == word_count) {
+				SLJIT_ASSERT(!label || label->size >= word_count);
+				SLJIT_ASSERT(!jump || jump->addr >= word_count);
+				SLJIT_ASSERT(!const_ || const_->addr >= word_count);
+				SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
+
+				/* These structures are ordered by their address. */
+				if (label && label->size == word_count) {
+					/* Just recording the address. */
+					label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+					label->size = (sljit_uw)(code_ptr - code);
+					label = label->next;
+				}
+				if (jump && jump->addr == word_count) {
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+					jump->addr = (sljit_uw)(code_ptr - 3);
+#else
+					jump->addr = (sljit_uw)(code_ptr - 6);
+#endif
+					code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
+					jump = jump->next;
+				}
+				if (const_ && const_->addr == word_count) {
+					/* Just recording the address. */
+					const_->addr = (sljit_uw)code_ptr;
+					const_ = const_->next;
+				}
+				if (put_label && put_label->addr == word_count) {
+					SLJIT_ASSERT(put_label->label);
+					put_label->addr = (sljit_uw)code_ptr;
+					put_label = put_label->next;
+				}
+				next_addr = compute_next_addr(label, jump, const_, put_label);
+			}
+			code_ptr ++;
+			word_count ++;
+		} while (buf_ptr < buf_end);
+
+		buf = buf->next;
+	} while (buf);
+
+	if (label && label->size == word_count) {
+		label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+		label->size = (sljit_uw)(code_ptr - code);
+		label = label->next;
+	}
+
+	SLJIT_ASSERT(!label);
+	SLJIT_ASSERT(!jump);
+	SLJIT_ASSERT(!const_);
+	SLJIT_ASSERT(!put_label);
+	SLJIT_ASSERT(code_ptr - code <= (sljit_s32)compiler->size);
+
+	jump = compiler->jumps;
+	while (jump) {
+		do {
+			addr = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target);
+			buf_ptr = (sljit_ins *)jump->addr;
+
+			if (jump->flags & PATCH_CALL) {
+				addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
+				SLJIT_ASSERT(addr <= 0x1fffffff && addr >= -0x20000000);
+				buf_ptr[0] = CALL | ((sljit_ins)addr & 0x3fffffff);
+				break;
+			}
+			if (jump->flags & PATCH_B) {
+				addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
+				SLJIT_ASSERT(addr <= MAX_DISP && addr >= MIN_DISP);
+				buf_ptr[0] = (buf_ptr[0] & ~DISP_MASK) | ((sljit_ins)addr & DISP_MASK);
+				break;
+			}
+
+			/* Set the fields of immediate loads. */
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+			SLJIT_ASSERT(((buf_ptr[0] & 0xc1cfffff) == 0x01000000) && ((buf_ptr[1] & 0xc1f83fff) == 0x80102000));
+			buf_ptr[0] |= (sljit_ins)(addr >> 10) & 0x3fffff;
+			buf_ptr[1] |= (sljit_ins)addr & 0x3ff;
+#else
+#error "Implementation required"
+#endif
+		} while (0);
+		jump = jump->next;
+	}
+
+	put_label = compiler->put_labels;
+	while (put_label) {
+		addr = (sljit_sw)put_label->label->addr;
+		buf_ptr = (sljit_ins *)put_label->addr;
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+		SLJIT_ASSERT(((buf_ptr[0] & 0xc1cfffff) == 0x01000000) && ((buf_ptr[1] & 0xc1f83fff) == 0x80102000));
+		buf_ptr[0] |= (addr >> 10) & 0x3fffff;
+		buf_ptr[1] |= addr & 0x3ff;
+#else
+#error "Implementation required"
+#endif
+		put_label = put_label->next;
+	}
+
+	compiler->error = SLJIT_ERR_COMPILED;
+	compiler->executable_offset = executable_offset;
+	compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins);
+
+	code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
+	code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+
+	SLJIT_CACHE_FLUSH(code, code_ptr);
+	SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
+	return code;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
+{
+	switch (feature_type) {
+	case SLJIT_HAS_FPU:
+#ifdef SLJIT_IS_FPU_AVAILABLE
+		return SLJIT_IS_FPU_AVAILABLE;
+#else
+		/* Available by default. */
+		return 1;
+#endif
+
+	case SLJIT_HAS_ZERO_REGISTER:
+		return 1;
+
+#if (defined SLJIT_CONFIG_SPARC_64 && SLJIT_CONFIG_SPARC_64)
+	case SLJIT_HAS_CMOV:
+		return 1;
+#endif
+
+	default:
+		return 0;
+	}
+}
+
+/* --------------------------------------------------------------------- */
+/*  Entry, exit                                                          */
+/* --------------------------------------------------------------------- */
+
+/* Creates an index in data_transfer_insts array. */
+#define LOAD_DATA	0x01
+#define WORD_DATA	0x00
+#define BYTE_DATA	0x02
+#define HALF_DATA	0x04
+#define INT_DATA	0x06
+#define SIGNED_DATA	0x08
+/* Separates integer and floating point registers */
+#define GPR_REG		0x0f
+#define DOUBLE_DATA	0x10
+#define SINGLE_DATA	0x12
+
+#define MEM_MASK	0x1f
+
+#define ARG_TEST	0x00020
+#define ALT_KEEP_CACHE	0x00040
+#define CUMULATIVE_OP	0x00080
+#define IMM_OP		0x00100
+#define MOVE_OP		0x00200
+#define SRC2_IMM	0x00400
+
+#define REG_DEST	0x00800
+#define REG2_SOURCE	0x01000
+#define SLOW_SRC1	0x02000
+#define SLOW_SRC2	0x04000
+#define SLOW_DEST	0x08000
+
+/* SET_FLAGS (0x10 << 19) also belong here! */
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#include "sljitNativeSPARC_32.c"
+#else
+#include "sljitNativeSPARC_64.c"
+#endif
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
+	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
+	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
+{
+	sljit_s32 reg_index, types, tmp;
+	sljit_u32 float_offset, args_offset;
+	sljit_s32 saved_arg_index, scratch_arg_index, float_arg_index;
+
+	CHECK_ERROR();
+	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
+	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
+
+	local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7;
+	compiler->local_size = local_size;
+
+	if (local_size <= -SIMM_MIN) {
+		FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | IMM(-local_size), UNMOVABLE_INS));
+	}
+	else {
+		FAIL_IF(load_immediate(compiler, TMP_REG1, -local_size));
+		FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | S2(TMP_REG1), UNMOVABLE_INS));
+	}
+
+	arg_types >>= SLJIT_ARG_SHIFT;
+
+	types = arg_types;
+	float_offset = 16 * sizeof(sljit_sw);
+	reg_index = 24;
+
+	while (types && reg_index < 24 + 6) {
+		switch (types & SLJIT_ARG_MASK) {
+		case SLJIT_ARG_TYPE_F64:
+			if (reg_index & 0x1) {
+				FAIL_IF(push_inst(compiler, STW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS));
+				if (reg_index >= 24 + 6 - 1)
+					break;
+				FAIL_IF(push_inst(compiler, STW | DA(reg_index + 1) | S1(SLJIT_SP) | IMM(float_offset + sizeof(sljit_sw)), MOVABLE_INS));
+			} else
+				FAIL_IF(push_inst(compiler, STD | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS));
+
+			float_offset += sizeof(sljit_f64);
+			reg_index++;
+			break;
+		case SLJIT_ARG_TYPE_F32:
+			FAIL_IF(push_inst(compiler, STW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS));
+			float_offset += sizeof(sljit_f64);
+			break;
+		}
+
+		reg_index++;
+		types >>= SLJIT_ARG_SHIFT;
+	}
+
+	args_offset = (16 + 1 + 6) * sizeof(sljit_sw);
+	float_offset = 16 * sizeof(sljit_sw);
+	reg_index = 24;
+	saved_arg_index = 24;
+	scratch_arg_index = 8 - 1;
+	float_arg_index = 1;
+
+	while (arg_types) {
+		switch (arg_types & SLJIT_ARG_MASK) {
+		case SLJIT_ARG_TYPE_F64:
+			if (reg_index < 24 + 6 - 1) {
+				FAIL_IF(push_inst(compiler, LDDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS));
+			} else if (reg_index < 24 + 6) {
+				FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS));
+				FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | (1 << 25) | S1A(30) | IMM(args_offset), MOVABLE_INS));
+			} else {
+				FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | S1A(30) | IMM(args_offset), MOVABLE_INS));
+				FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | (1 << 25) | S1A(30) | IMM(args_offset + sizeof(sljit_sw)), MOVABLE_INS));
+			}
+
+			float_arg_index++;
+			float_offset += sizeof(sljit_f64);
+			reg_index++;
+			break;
+		case SLJIT_ARG_TYPE_F32:
+			if (reg_index < 24 + 6)
+				FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS));
+			else
+				FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | S1A(30) | IMM(args_offset), MOVABLE_INS));
+			float_arg_index++;
+			float_offset += sizeof(sljit_f64);
+			break;
+		default:
+			scratch_arg_index++;
+
+			if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
+				tmp = saved_arg_index++;
+				if (tmp == reg_index)
+					break;
+			} else
+				tmp = scratch_arg_index;
+
+			if (reg_index < 24 + 6)
+				FAIL_IF(push_inst(compiler, OR | DA(tmp) | S1(0) | S2A(reg_index), tmp));
+			else
+				FAIL_IF(push_inst(compiler, LDUW | DA(tmp) | S1A(30) | IMM(args_offset), tmp));
+			break;
+		}
+
+		reg_index++;
+		arg_types >>= SLJIT_ARG_SHIFT;
+	}
+
+	return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
+	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
+	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
+{
+	CHECK_ERROR();
+	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
+	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
+
+	compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7;
+	return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
+{
+	CHECK_ERROR();
+	CHECK(check_sljit_emit_return_void(compiler));
+
+	FAIL_IF(push_inst(compiler, JMPL | D(0) | S1A(31) | IMM(8), UNMOVABLE_INS));
+	return push_inst(compiler, RESTORE | D(SLJIT_R0) | S1(SLJIT_R0) | S2(0), UNMOVABLE_INS);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
+{
+	CHECK_ERROR();
+	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
+
+	if (TYPE_CAST_NEEDED(op) || !FAST_IS_REG(src)) {
+		FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
+		src = SLJIT_R0;
+	}
+
+	FAIL_IF(push_inst(compiler, JMPL | D(0) | S1A(31) | IMM(8), UNMOVABLE_INS));
+	return push_inst(compiler, RESTORE | D(SLJIT_R0) | S1(src) | S2(0), UNMOVABLE_INS);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Operators                                                            */
+/* --------------------------------------------------------------------- */
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#define ARCH_32_64(a, b)	a
+#else
+#define ARCH_32_64(a, b)	b
+#endif
+
+static const sljit_ins data_transfer_insts[16 + 4] = {
+/* u w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */),
+/* u w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */),
+/* u b s */ OPC1(3) | OPC3(0x05) /* stb */,
+/* u b l */ OPC1(3) | OPC3(0x01) /* ldub */,
+/* u h s */ OPC1(3) | OPC3(0x06) /* sth */,
+/* u h l */ OPC1(3) | OPC3(0x02) /* lduh */,
+/* u i s */ OPC1(3) | OPC3(0x04) /* stw */,
+/* u i l */ OPC1(3) | OPC3(0x00) /* lduw */,
+
+/* s w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */),
+/* s w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */),
+/* s b s */ OPC1(3) | OPC3(0x05) /* stb */,
+/* s b l */ OPC1(3) | OPC3(0x09) /* ldsb */,
+/* s h s */ OPC1(3) | OPC3(0x06) /* sth */,
+/* s h l */ OPC1(3) | OPC3(0x0a) /* ldsh */,
+/* s i s */ OPC1(3) | OPC3(0x04) /* stw */,
+/* s i l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x08) /* ldsw */),
+
+/* d   s */ OPC1(3) | OPC3(0x27),
+/* d   l */ OPC1(3) | OPC3(0x23),
+/* s   s */ OPC1(3) | OPC3(0x24),
+/* s   l */ OPC1(3) | OPC3(0x20),
+};
+
+#undef ARCH_32_64
+
+/* Can perform an operation using at most 1 instruction. */
+static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_u32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
+{
+	SLJIT_ASSERT(arg & SLJIT_MEM);
+
+	if ((!(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN)
+			|| ((arg & OFFS_REG_MASK) && (argw & 0x3) == 0)) {
+		/* Works for both absoulte and relative addresses (immediate case). */
+		if (SLJIT_UNLIKELY(flags & ARG_TEST))
+			return 1;
+		FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK]
+			| ((flags & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))
+			| S1(arg & REG_MASK) | ((arg & OFFS_REG_MASK) ? S2(OFFS_REG(arg)) : IMM(argw)),
+			((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS));
+		return -1;
+	}
+	return 0;
+}
+
+/* See getput_arg below.
+   Note: can_cache is called only for binary operators. Those
+   operators always uses word arguments without write back. */
+static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
+{
+	SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
+
+	/* Simple operation except for updates. */
+	if (arg & OFFS_REG_MASK) {
+		argw &= 0x3;
+		SLJIT_ASSERT(argw);
+		next_argw &= 0x3;
+		if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == next_argw)
+			return 1;
+		return 0;
+	}
+
+	if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN))
+		return 1;
+	return 0;
+}
+
+/* Emit the necessary instructions. See can_cache above. */
+static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_u32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
+{
+	sljit_s32 base, arg2, delay_slot;
+	sljit_ins dest;
+
+	SLJIT_ASSERT(arg & SLJIT_MEM);
+	if (!(next_arg & SLJIT_MEM)) {
+		next_arg = 0;
+		next_argw = 0;
+	}
+
+	base = arg & REG_MASK;
+	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+		argw &= 0x3;
+
+		/* Using the cache. */
+		if (((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) && (argw == compiler->cache_argw))
+			arg2 = TMP_REG3;
+		else {
+			if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) {
+				compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
+				compiler->cache_argw = argw;
+				arg2 = TMP_REG3;
+			}
+			else if ((flags & LOAD_DATA) && ((flags & MEM_MASK) <= GPR_REG) && reg != base && reg != OFFS_REG(arg))
+				arg2 = reg;
+			else /* It must be a mov operation, so tmp1 must be free to use. */
+				arg2 = TMP_REG1;
+			FAIL_IF(push_inst(compiler, SLL_W | D(arg2) | S1(OFFS_REG(arg)) | IMM_ARG | (sljit_ins)argw, DR(arg2)));
+		}
+	}
+	else {
+		/* Using the cache. */
+		if ((compiler->cache_arg == SLJIT_MEM) && (argw - compiler->cache_argw) <= SIMM_MAX && (argw - compiler->cache_argw) >= SIMM_MIN) {
+			if (argw != compiler->cache_argw) {
+				FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | S1(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3)));
+				compiler->cache_argw = argw;
+			}
+			arg2 = TMP_REG3;
+		} else {
+			if ((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN) {
+				compiler->cache_arg = SLJIT_MEM;
+				compiler->cache_argw = argw;
+				arg2 = TMP_REG3;
+			}
+			else if ((flags & LOAD_DATA) && ((flags & MEM_MASK) <= GPR_REG) && reg != base)
+				arg2 = reg;
+			else /* It must be a mov operation, so tmp1 must be free to use. */
+				arg2 = TMP_REG1;
+			FAIL_IF(load_immediate(compiler, arg2, argw));
+		}
+	}
+
+	dest = ((flags & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg));
+	delay_slot = ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS;
+	if (!base)
+		return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(arg2) | IMM(0), delay_slot);
+	return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot);
+}
+
+static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_u32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
+{
+	if (getput_arg_fast(compiler, flags, reg, arg, argw))
+		return compiler->error;
+	compiler->cache_arg = 0;
+	compiler->cache_argw = 0;
+	return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
+}
+
+static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_u32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
+{
+	if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
+		return compiler->error;
+	return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
+}
+
+static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_u32 flags,
+	sljit_s32 dst, sljit_sw dstw,
+	sljit_s32 src1, sljit_sw src1w,
+	sljit_s32 src2, sljit_sw src2w)
+{
+	/* arg1 goes to TMP_REG1 or src reg
+	   arg2 goes to TMP_REG2, imm or src reg
+	   TMP_REG3 can be used for caching
+	   result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
+	sljit_s32 dst_r = TMP_REG2;
+	sljit_s32 src1_r;
+	sljit_sw src2_r = 0;
+	sljit_s32 sugg_src2_r = TMP_REG2;
+
+	if (!(flags & ALT_KEEP_CACHE)) {
+		compiler->cache_arg = 0;
+		compiler->cache_argw = 0;
+	}
+
+	if (dst != TMP_REG2) {
+		if (FAST_IS_REG(dst)) {
+			dst_r = dst;
+			flags |= REG_DEST;
+			if (flags & MOVE_OP)
+				sugg_src2_r = dst_r;
+		}
+		else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw))
+			flags |= SLOW_DEST;
+	}
+
+	if (flags & IMM_OP) {
+		if ((src2 & SLJIT_IMM) && src2w) {
+			if (src2w <= SIMM_MAX && src2w >= SIMM_MIN) {
+				flags |= SRC2_IMM;
+				src2_r = src2w;
+			}
+		}
+		if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) {
+			if (src1w <= SIMM_MAX && src1w >= SIMM_MIN) {
+				flags |= SRC2_IMM;
+				src2_r = src1w;
+
+				/* And swap arguments. */
+				src1 = src2;
+				src1w = src2w;
+				src2 = SLJIT_IMM;
+				/* src2w = src2_r unneeded. */
+			}
+		}
+	}
+
+	/* Source 1. */
+	if (FAST_IS_REG(src1))
+		src1_r = src1;
+	else if (src1 & SLJIT_IMM) {
+		if (src1w) {
+			FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
+			src1_r = TMP_REG1;
+		}
+		else
+			src1_r = 0;
+	}
+	else {
+		if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w))
+			FAIL_IF(compiler->error);
+		else
+			flags |= SLOW_SRC1;
+		src1_r = TMP_REG1;
+	}
+
+	/* Source 2. */
+	if (FAST_IS_REG(src2)) {
+		src2_r = src2;
+		flags |= REG2_SOURCE;
+		if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP)
+			dst_r = src2_r;
+	}
+	else if (src2 & SLJIT_IMM) {
+		if (!(flags & SRC2_IMM)) {
+			if (src2w) {
+				FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
+				src2_r = sugg_src2_r;
+			}
+			else {
+				src2_r = 0;
+				if (flags & MOVE_OP) {
+					if (dst & SLJIT_MEM)
+						dst_r = 0;
+					else
+						op = SLJIT_MOV;
+				}
+			}
+		}
+	}
+	else {
+		if (getput_arg_fast(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w))
+			FAIL_IF(compiler->error);
+		else
+			flags |= SLOW_SRC2;
+		src2_r = sugg_src2_r;
+	}
+
+	if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
+		SLJIT_ASSERT(src2_r == TMP_REG2);
+		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
+			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
+		}
+		else {
+			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
+			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
+		}
+	}
+	else if (flags & SLOW_SRC1)
+		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
+	else if (flags & SLOW_SRC2)
+		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
+
+	FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
+
+	if (dst & SLJIT_MEM) {
+		if (!(flags & SLOW_DEST)) {
+			getput_arg_fast(compiler, flags, dst_r, dst, dstw);
+			return compiler->error;
+		}
+		return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0);
+	}
+
+	return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
+{
+	CHECK_ERROR();
+	CHECK(check_sljit_emit_op0(compiler, op));
+
+	op = GET_OPCODE(op);
+	switch (op) {
+	case SLJIT_BREAKPOINT:
+		return push_inst(compiler, TA, UNMOVABLE_INS);
+	case SLJIT_NOP:
+		return push_inst(compiler, NOP, UNMOVABLE_INS);
+	case SLJIT_LMUL_UW:
+	case SLJIT_LMUL_SW:
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+		FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? UMUL : SMUL) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0)));
+		return push_inst(compiler, RDY | D(SLJIT_R1), DR(SLJIT_R1));
+#else
+#error "Implementation required"
+#endif
+	case SLJIT_DIVMOD_UW:
+	case SLJIT_DIVMOD_SW:
+	case SLJIT_DIV_UW:
+	case SLJIT_DIV_SW:
+		SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+		if ((op | 0x2) == SLJIT_DIV_UW)
+			FAIL_IF(push_inst(compiler, WRY | S1(0), MOVABLE_INS));
+		else {
+			FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(SLJIT_R0) | IMM(31), DR(TMP_REG1)));
+			FAIL_IF(push_inst(compiler, WRY | S1(TMP_REG1), MOVABLE_INS));
+		}
+		if (op <= SLJIT_DIVMOD_SW)
+			FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_R0), DR(TMP_REG2)));
+		FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? UDIV : SDIV) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0)));
+		if (op >= SLJIT_DIV_UW)
+			return SLJIT_SUCCESS;
+		FAIL_IF(push_inst(compiler, SMUL | D(SLJIT_R1) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R1)));
+		return push_inst(compiler, SUB | D(SLJIT_R1) | S1(TMP_REG2) | S2(SLJIT_R1), DR(SLJIT_R1));
+#else
+#error "Implementation required"
+#endif
+	case SLJIT_ENDBR:
+	case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
+		return SLJIT_SUCCESS;
+	}
+
+	return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
+	sljit_s32 dst, sljit_sw dstw,
+	sljit_s32 src, sljit_sw srcw)
+{
+	sljit_u32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
+
+	CHECK_ERROR();
+	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
+	ADJUST_LOCAL_OFFSET(dst, dstw);
+	ADJUST_LOCAL_OFFSET(src, srcw);
+
+	op = GET_OPCODE(op);
+	switch (op) {
+	case SLJIT_MOV:
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+	case SLJIT_MOV_U32:
+	case SLJIT_MOV_S32:
+	case SLJIT_MOV32:
+#endif
+	case SLJIT_MOV_P:
+		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, srcw);
+
+	case SLJIT_MOV_U8:
+		return emit_op(compiler, SLJIT_MOV_U8, flags | BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
+
+	case SLJIT_MOV_S8:
+		return emit_op(compiler, SLJIT_MOV_S8, flags | BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
+
+	case SLJIT_MOV_U16:
+		return emit_op(compiler, SLJIT_MOV_U16, flags | HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
+
+	case SLJIT_MOV_S16:
+		return emit_op(compiler, SLJIT_MOV_S16, flags | HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
+
+	case SLJIT_NOT:
+	case SLJIT_CLZ:
+		return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
+	}
+
+	return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
+	sljit_s32 dst, sljit_sw dstw,
+	sljit_s32 src1, sljit_sw src1w,
+	sljit_s32 src2, sljit_sw src2w)
+{
+	sljit_u32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
+
+	CHECK_ERROR();
+	CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
+	ADJUST_LOCAL_OFFSET(dst, dstw);
+	ADJUST_LOCAL_OFFSET(src1, src1w);
+	ADJUST_LOCAL_OFFSET(src2, src2w);
+
+	op = GET_OPCODE(op);
+	switch (op) {
+	case SLJIT_ADD:
+	case SLJIT_ADDC:
+	case SLJIT_MUL:
+	case SLJIT_AND:
+	case SLJIT_OR:
+	case SLJIT_XOR:
+		return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+
+	case SLJIT_SUB:
+	case SLJIT_SUBC:
+		return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+
+	case SLJIT_SHL:
+	case SLJIT_LSHR:
+	case SLJIT_ASHR:
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+		if (src2 & SLJIT_IMM)
+			src2w &= 0x1f;
+#else
+		SLJIT_UNREACHABLE();
+#endif
+		return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
+	}
+
+	return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
+	sljit_s32 src1, sljit_sw src1w,
+	sljit_s32 src2, sljit_sw src2w)
+{
+	CHECK_ERROR();
+	CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+	compiler->skip_checks = 1;
+#endif
+	return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
+	sljit_s32 src, sljit_sw srcw)
+{
+	CHECK_ERROR();
+	CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
+	ADJUST_LOCAL_OFFSET(src, srcw);
+
+	switch (op) {
+	case SLJIT_FAST_RETURN:
+		if (FAST_IS_REG(src))
+			FAIL_IF(push_inst(compiler, OR | D(TMP_LINK) | S1(0) | S2(src), DR(TMP_LINK)));
+		else
+			FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_LINK, src, srcw));
+
+		FAIL_IF(push_inst(compiler, JMPL | D(0) | S1(TMP_LINK) | IMM(8), UNMOVABLE_INS));
+		return push_inst(compiler, NOP, UNMOVABLE_INS);
+	case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
+	case SLJIT_PREFETCH_L1:
+	case SLJIT_PREFETCH_L2:
+	case SLJIT_PREFETCH_L3:
+	case SLJIT_PREFETCH_ONCE:
+		return SLJIT_SUCCESS;
+	}
+
+	return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
+{
+	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
+	return reg_map[reg];
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
+{
+	CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
+	return freg_map[reg];
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
+	void *instruction, sljit_u32 size)
+{
+	CHECK_ERROR();
+	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
+
+	return push_inst(compiler, *(sljit_ins*)instruction, UNMOVABLE_INS);
+}
+
+/* --------------------------------------------------------------------- */
+/*  Floating point operators                                             */
+/* --------------------------------------------------------------------- */
+
+#define FLOAT_DATA(op) ((sljit_ins)DOUBLE_DATA | (((sljit_ins)(op) & SLJIT_32) >> 7))
+#define SELECT_FOP(op, single, double) ((op & SLJIT_32) ? single : double)
+#define FLOAT_TMP_MEM_OFFSET (22 * sizeof(sljit_sw))
+
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
+	sljit_s32 dst, sljit_sw dstw,
+	sljit_s32 src, sljit_sw srcw)
+{
+	if (src & SLJIT_MEM) {
+		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+		src = TMP_FREG1;
+	}
+
+	FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOI, FDTOI) | FD(TMP_FREG1) | FS2(src), MOVABLE_INS));
+
+	if (FAST_IS_REG(dst)) {
+		FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
+		return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET);
+	}
+
+	/* Store the integer value from a VFP register. */
+	return emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
+}
+
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
+	sljit_s32 dst, sljit_sw dstw,
+	sljit_s32 src, sljit_sw srcw)
+{
+	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+	if (src & SLJIT_IMM) {
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
+			srcw = (sljit_s32)srcw;
+#endif
+		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+		src = TMP_REG1;
+		srcw = 0;
+	}
+
+	if (FAST_IS_REG(src)) {
+		FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
+		src = SLJIT_MEM1(SLJIT_SP);
+		srcw = FLOAT_TMP_MEM_OFFSET;
+	}
+
+	FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
+	FAIL_IF(push_inst(compiler, SELECT_FOP(op, FITOS, FITOD) | FD(dst_r) | FS2(TMP_FREG1), MOVABLE_INS));
+
+	if (dst & SLJIT_MEM)
+		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
+	return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
+	sljit_s32 src1, sljit_sw src1w,
+	sljit_s32 src2, sljit_sw src2w)
+{
+	if (src1 & SLJIT_MEM) {
+		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+		src1 = TMP_FREG1;
+	}
+
+	if (src2 & SLJIT_MEM) {
+		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
+		src2 = TMP_FREG2;
+	}
+
+	return push_inst(compiler, SELECT_FOP(op, FCMPS, FCMPD) | FS1(src1) | FS2(src2), FCC_IS_SET | MOVABLE_INS);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
+	sljit_s32 dst, sljit_sw dstw,
+	sljit_s32 src, sljit_sw srcw)
+{
+	sljit_s32 dst_r;
+
+	CHECK_ERROR();
+	compiler->cache_arg = 0;
+	compiler->cache_argw = 0;
+
+	SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
+	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
+
+	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
+		op ^= SLJIT_32;
+
+	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
+
+	if (src & SLJIT_MEM) {
+		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
+		src = dst_r;
+	}
+
+	switch (GET_OPCODE(op)) {
+	case SLJIT_MOV_F64:
+		if (src != dst_r) {
+			if (dst_r != TMP_FREG1) {
+				FAIL_IF(push_inst(compiler, FMOVS | FD(dst_r) | FS2(src), MOVABLE_INS));
+				if (!(op & SLJIT_32))
+					FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS));
+			}
+			else
+				dst_r = src;
+		}
+		break;
+	case SLJIT_NEG_F64:
+		FAIL_IF(push_inst(compiler, FNEGS | FD(dst_r) | FS2(src), MOVABLE_INS));
+		if (dst_r != src && !(op & SLJIT_32))
+			FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS));
+		break;
+	case SLJIT_ABS_F64:
+		FAIL_IF(push_inst(compiler, FABSS | FD(dst_r) | FS2(src), MOVABLE_INS));
+		if (dst_r != src && !(op & SLJIT_32))
+			FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS));
+		break;
+	case SLJIT_CONV_F64_FROM_F32:
+		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOD, FDTOS) | FD(dst_r) | FS2(src), MOVABLE_INS));
+		op ^= SLJIT_32;
+		break;
+	}
+
+	if (dst & SLJIT_MEM)
+		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0));
+	return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
+	sljit_s32 dst, sljit_sw dstw,
+	sljit_s32 src1, sljit_sw src1w,
+	sljit_s32 src2, sljit_sw src2w)
+{
+	sljit_s32 dst_r, flags = 0;
+
+	CHECK_ERROR();
+	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
+	ADJUST_LOCAL_OFFSET(dst, dstw);
+	ADJUST_LOCAL_OFFSET(src1, src1w);
+	ADJUST_LOCAL_OFFSET(src2, src2w);
+
+	compiler->cache_arg = 0;
+	compiler->cache_argw = 0;
+
+	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
+
+	if (src1 & SLJIT_MEM) {
+		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
+			FAIL_IF(compiler->error);
+			src1 = TMP_FREG1;
+		} else
+			flags |= SLOW_SRC1;
+	}
+
+	if (src2 & SLJIT_MEM) {
+		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
+			FAIL_IF(compiler->error);
+			src2 = TMP_FREG2;
+		} else
+			flags |= SLOW_SRC2;
+	}
+
+	if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
+		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
+			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
+		}
+		else {
+			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
+			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
+		}
+	}
+	else if (flags & SLOW_SRC1)
+		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
+	else if (flags & SLOW_SRC2)
+		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
+
+	if (flags & SLOW_SRC1)
+		src1 = TMP_FREG1;
+	if (flags & SLOW_SRC2)
+		src2 = TMP_FREG2;
+
+	switch (GET_OPCODE(op)) {
+	case SLJIT_ADD_F64:
+		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADDD) | FD(dst_r) | FS1(src1) | FS2(src2), MOVABLE_INS));
+		break;
+
+	case SLJIT_SUB_F64:
+		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUBD) | FD(dst_r) | FS1(src1) | FS2(src2), MOVABLE_INS));
+		break;
+
+	case SLJIT_MUL_F64:
+		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMULD) | FD(dst_r) | FS1(src1) | FS2(src2), MOVABLE_INS));
+		break;
+
+	case SLJIT_DIV_F64:
+		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIVD) | FD(dst_r) | FS1(src1) | FS2(src2), MOVABLE_INS));
+		break;
+	}
+
+	if (dst_r == TMP_FREG2)
+		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
+
+	return SLJIT_SUCCESS;
+}
+
+#undef FLOAT_DATA
+#undef SELECT_FOP
+
+/* --------------------------------------------------------------------- */
+/*  Other instructions                                                   */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+{
+	CHECK_ERROR();
+	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
+	ADJUST_LOCAL_OFFSET(dst, dstw);
+
+	if (FAST_IS_REG(dst))
+		return push_inst(compiler, OR | D(dst) | S1(0) | S2(TMP_LINK), UNMOVABLE_INS);
+
+	/* Memory. */
+	FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_LINK, dst, dstw));
+	compiler->delay_slot = UNMOVABLE_INS;
+	return SLJIT_SUCCESS;
+}
+
+/* --------------------------------------------------------------------- */
+/*  Conditional instructions                                             */
+/* --------------------------------------------------------------------- */
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
+{
+	struct sljit_label *label;
+
+	CHECK_ERROR_PTR();
+	CHECK_PTR(check_sljit_emit_label(compiler));
+
+	if (compiler->last_label && compiler->last_label->size == compiler->size)
+		return compiler->last_label;
+
+	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
+	PTR_FAIL_IF(!label);
+	set_label(label, compiler);
+	compiler->delay_slot = UNMOVABLE_INS;
+	return label;
+}
+
+static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
+{
+	switch (type) {
+	case SLJIT_EQUAL:
+	case SLJIT_NOT_EQUAL_F64: /* Unordered. */
+		return DA(0x1);
+
+	case SLJIT_NOT_EQUAL:
+	case SLJIT_EQUAL_F64:
+		return DA(0x9);
+
+	case SLJIT_LESS:
+	case SLJIT_GREATER_F64: /* Unordered. */
+	case SLJIT_CARRY:
+		return DA(0x5);
+
+	case SLJIT_GREATER_EQUAL:
+	case SLJIT_LESS_EQUAL_F64:
+	case SLJIT_NOT_CARRY:
+		return DA(0xd);
+
+	case SLJIT_GREATER:
+	case SLJIT_GREATER_EQUAL_F64: /* Unordered. */
+		return DA(0xc);
+
+	case SLJIT_LESS_EQUAL:
+	case SLJIT_LESS_F64:
+		return DA(0x4);
+
+	case SLJIT_SIG_LESS:
+		return DA(0x3);
+
+	case SLJIT_SIG_GREATER_EQUAL:
+		return DA(0xb);
+
+	case SLJIT_SIG_GREATER:
+		return DA(0xa);
+
+	case SLJIT_SIG_LESS_EQUAL:
+		return DA(0x2);
+
+	case SLJIT_OVERFLOW:
+		if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
+			return DA(0x9);
+		/* fallthrough */
+
+	case SLJIT_UNORDERED_F64:
+		return DA(0x7);
+
+	case SLJIT_NOT_OVERFLOW:
+		if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
+			return DA(0x1);
+		/* fallthrough */
+
+	case SLJIT_ORDERED_F64:
+		return DA(0xf);
+
+	default:
+		SLJIT_UNREACHABLE();
+		return DA(0x8);
+	}
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
+{
+	struct sljit_jump *jump;
+
+	CHECK_ERROR_PTR();
+	CHECK_PTR(check_sljit_emit_jump(compiler, type));
+
+	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+	PTR_FAIL_IF(!jump);
+	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+	type &= 0xff;
+
+	if (type < SLJIT_EQUAL_F64) {
+		jump->flags |= IS_COND;
+		if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & ICC_IS_SET))
+			jump->flags |= IS_MOVABLE;
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+		PTR_FAIL_IF(push_inst(compiler, BICC | get_cc(compiler, type ^ 1) | 5, UNMOVABLE_INS));
+#else
+#error "Implementation required"
+#endif
+	}
+	else if (type < SLJIT_JUMP) {
+		jump->flags |= IS_COND;
+		if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & FCC_IS_SET))
+			jump->flags |= IS_MOVABLE;
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+		PTR_FAIL_IF(push_inst(compiler, FBFCC | get_cc(compiler, type ^ 1) | 5, UNMOVABLE_INS));
+#else
+#error "Implementation required"
+#endif
+	}
+	else {
+		if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS)
+			jump->flags |= IS_MOVABLE;
+		if (type >= SLJIT_FAST_CALL)
+			jump->flags |= IS_CALL;
+	}
+
+	PTR_FAIL_IF(emit_const(compiler, TMP_REG1, 0));
+	PTR_FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? TMP_LINK : 0) | S1(TMP_REG1) | IMM(0), UNMOVABLE_INS));
+	jump->addr = compiler->size;
+	PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
+
+	return jump;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
+	sljit_s32 arg_types)
+{
+	CHECK_ERROR_PTR();
+	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
+
+	PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+	compiler->skip_checks = 1;
+#endif
+
+	return sljit_emit_jump(compiler, type);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
+{
+	struct sljit_jump *jump = NULL;
+	sljit_s32 src_r;
+
+	CHECK_ERROR();
+	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
+	ADJUST_LOCAL_OFFSET(src, srcw);
+
+	if (FAST_IS_REG(src))
+		src_r = src;
+	else if (src & SLJIT_IMM) {
+		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+		FAIL_IF(!jump);
+		set_jump(jump, compiler, JUMP_ADDR);
+		jump->u.target = (sljit_uw)srcw;
+
+		if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS)
+			jump->flags |= IS_MOVABLE;
+		if (type >= SLJIT_FAST_CALL)
+			jump->flags |= IS_CALL;
+
+		FAIL_IF(emit_const(compiler, TMP_REG1, 0));
+		src_r = TMP_REG1;
+	}
+	else {
+		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
+		src_r = TMP_REG1;
+	}
+
+	FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? TMP_LINK : 0) | S1(src_r) | IMM(0), UNMOVABLE_INS));
+	if (jump)
+		jump->addr = compiler->size;
+	return push_inst(compiler, NOP, UNMOVABLE_INS);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
+	sljit_s32 arg_types,
+	sljit_s32 src, sljit_sw srcw)
+{
+	CHECK_ERROR();
+	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
+
+	if (src & SLJIT_MEM) {
+		ADJUST_LOCAL_OFFSET(src, srcw);
+		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
+		src = TMP_REG1;
+	}
+
+	FAIL_IF(call_with_args(compiler, arg_types, &src));
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+	compiler->skip_checks = 1;
+#endif
+
+	return sljit_emit_ijump(compiler, type, src, srcw);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
+	sljit_s32 dst, sljit_sw dstw,
+	sljit_s32 type)
+{
+	sljit_s32 reg;
+	sljit_u32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
+
+	CHECK_ERROR();
+	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
+	ADJUST_LOCAL_OFFSET(dst, dstw);
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+	op = GET_OPCODE(op);
+	reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
+
+	compiler->cache_arg = 0;
+	compiler->cache_argw = 0;
+
+	if (op >= SLJIT_ADD && (dst & SLJIT_MEM))
+		FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw));
+
+	type &= 0xff;
+	if (type < SLJIT_EQUAL_F64)
+		FAIL_IF(push_inst(compiler, BICC | get_cc(compiler, type) | 3, UNMOVABLE_INS));
+	else
+		FAIL_IF(push_inst(compiler, FBFCC | get_cc(compiler, type) | 3, UNMOVABLE_INS));
+
+	FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(1), UNMOVABLE_INS));
+	FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(0), UNMOVABLE_INS));
+
+	if (op >= SLJIT_ADD) {
+		flags |= CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE;
+		if (dst & SLJIT_MEM)
+			return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
+		return emit_op(compiler, op, flags, dst, 0, dst, 0, TMP_REG2, 0);
+	}
+
+	if (!(dst & SLJIT_MEM))
+		return SLJIT_SUCCESS;
+
+	return emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw);
+#else
+#error "Implementation required"
+#endif
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+	sljit_s32 dst_reg,
+	sljit_s32 src, sljit_sw srcw)
+{
+	CHECK_ERROR();
+	CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+	return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);;
+#else
+#error "Implementation required"
+#endif
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
+{
+	struct sljit_const *const_;
+	sljit_s32 dst_r;
+
+	CHECK_ERROR_PTR();
+	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
+	ADJUST_LOCAL_OFFSET(dst, dstw);
+
+	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
+	PTR_FAIL_IF(!const_);
+	set_const(const_, compiler);
+
+	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
+	PTR_FAIL_IF(emit_const(compiler, dst_r, init_value));
+
+	if (dst & SLJIT_MEM)
+		PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
+	return const_;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+{
+	struct sljit_put_label *put_label;
+	sljit_s32 dst_r;
+
+	CHECK_ERROR_PTR();
+	CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
+	ADJUST_LOCAL_OFFSET(dst, dstw);
+
+	put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
+	PTR_FAIL_IF(!put_label);
+	set_put_label(put_label, compiler, 0);
+
+	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
+	PTR_FAIL_IF(emit_const(compiler, dst_r, 0));
+
+	if (dst & SLJIT_MEM)
+		PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
+	return put_label;
+}

File diff suppressed because it is too large
+ 468 - 247
thirdparty/pcre2/src/sljit/sljitNativeX86_32.c


+ 63 - 171
thirdparty/pcre2/src/sljit/sljitNativeX86_64.c

@@ -101,38 +101,34 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
 	/* Calculate size of b. */
 	/* Calculate size of b. */
 	inst_size += 1; /* mod r/m byte. */
 	inst_size += 1; /* mod r/m byte. */
 	if (b & SLJIT_MEM) {
 	if (b & SLJIT_MEM) {
-		if (!(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) {
-			PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb));
-			immb = 0;
-			if (b & REG_MASK)
-				b |= TO_OFFS_REG(TMP_REG2);
-			else
-				b |= TMP_REG2;
+		if (!(b & OFFS_REG_MASK)) {
+			if (NOT_HALFWORD(immb)) {
+				PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb));
+				immb = 0;
+				if (b & REG_MASK)
+					b |= TO_OFFS_REG(TMP_REG2);
+				else
+					b |= TMP_REG2;
+			}
+			else if (reg_lmap[b & REG_MASK] == 4)
+				b |= TO_OFFS_REG(SLJIT_SP);
 		}
 		}
 
 
 		if (!(b & REG_MASK))
 		if (!(b & REG_MASK))
 			inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */
 			inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */
 		else {
 		else {
-			if (immb != 0 && !(b & OFFS_REG_MASK)) {
+			if (reg_map[b & REG_MASK] >= 8)
+				rex |= REX_B;
+
+			if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) {
 				/* Immediate operand. */
 				/* Immediate operand. */
 				if (immb <= 127 && immb >= -128)
 				if (immb <= 127 && immb >= -128)
 					inst_size += sizeof(sljit_s8);
 					inst_size += sizeof(sljit_s8);
 				else
 				else
 					inst_size += sizeof(sljit_s32);
 					inst_size += sizeof(sljit_s32);
 			}
 			}
-			else if (reg_lmap[b & REG_MASK] == 5) {
-				/* Swap registers if possible. */
-				if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_lmap[OFFS_REG(b)] != 5)
-					b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK);
-				else
-					inst_size += sizeof(sljit_s8);
-			}
-
-			if (reg_map[b & REG_MASK] >= 8)
-				rex |= REX_B;
-
-			if (reg_lmap[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK))
-				b |= TO_OFFS_REG(SLJIT_SP);
+			else if (reg_lmap[b & REG_MASK] == 5)
+				inst_size += sizeof(sljit_s8);
 
 
 			if (b & OFFS_REG_MASK) {
 			if (b & OFFS_REG_MASK) {
 				inst_size += 1; /* SIB byte. */
 				inst_size += 1; /* SIB byte. */
@@ -157,9 +153,9 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
 				inst_size += 4;
 				inst_size += 4;
 		}
 		}
 		else if (flags & EX86_SHIFT_INS) {
 		else if (flags & EX86_SHIFT_INS) {
-			SLJIT_ASSERT(imma <= (compiler->mode32 ? 0x1f : 0x3f));
+			imma &= compiler->mode32 ? 0x1f : 0x3f;
 			if (imma != 1) {
 			if (imma != 1) {
-				inst_size++;
+				inst_size ++;
 				flags |= EX86_BYTE_ARG;
 				flags |= EX86_BYTE_ARG;
 			}
 			}
 		} else if (flags & EX86_BYTE_ARG)
 		} else if (flags & EX86_BYTE_ARG)
@@ -227,7 +223,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
 	} else if (b & REG_MASK) {
 	} else if (b & REG_MASK) {
 		reg_lmap_b = reg_lmap[b & REG_MASK];
 		reg_lmap_b = reg_lmap[b & REG_MASK];
 
 
-		if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
+		if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP) || reg_lmap_b == 5) {
 			if (immb != 0 || reg_lmap_b == 5) {
 			if (immb != 0 || reg_lmap_b == 5) {
 				if (immb <= 127 && immb >= -128)
 				if (immb <= 127 && immb >= -128)
 					*buf_ptr |= 0x40;
 					*buf_ptr |= 0x40;
@@ -252,14 +248,8 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw
 			}
 			}
 		}
 		}
 		else {
 		else {
-			if (reg_lmap_b == 5)
-				*buf_ptr |= 0x40;
-
 			*buf_ptr++ |= 0x04;
 			*buf_ptr++ |= 0x04;
 			*buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6));
 			*buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6));
-
-			if (reg_lmap_b == 5)
-				*buf_ptr++ = 0;
 		}
 		}
 	}
 	}
 	else {
 	else {
@@ -376,7 +366,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 {
 {
 	sljit_uw size;
 	sljit_uw size;
 	sljit_s32 word_arg_count = 0;
 	sljit_s32 word_arg_count = 0;
-	sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
+	sljit_s32 saved_arg_count = 0;
 	sljit_s32 saved_regs_size, tmp, i;
 	sljit_s32 saved_regs_size, tmp, i;
 #ifdef _WIN64
 #ifdef _WIN64
 	sljit_s32 saved_float_regs_size;
 	sljit_s32 saved_float_regs_size;
@@ -389,19 +379,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
 
 
-	if (options & SLJIT_ENTER_REG_ARG)
-		arg_types = 0;
-
 	/* Emit ENDBR64 at function entry if needed.  */
 	/* Emit ENDBR64 at function entry if needed.  */
 	FAIL_IF(emit_endbranch(compiler));
 	FAIL_IF(emit_endbranch(compiler));
 
 
 	compiler->mode32 = 0;
 	compiler->mode32 = 0;
 
 
 	/* Including the return address saved by the call instruction. */
 	/* Including the return address saved by the call instruction. */
-	saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
+	saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
 
 
 	tmp = SLJIT_S0 - saveds;
 	tmp = SLJIT_S0 - saveds;
-	for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
+	for (i = SLJIT_S0; i > tmp; i--) {
 		size = reg_map[i] >= 8 ? 2 : 1;
 		size = reg_map[i] >= 8 ? 2 : 1;
 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
 		FAIL_IF(!inst);
 		FAIL_IF(!inst);
@@ -574,15 +561,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
 #endif /* _WIN64 */
 #endif /* _WIN64 */
 
 
 	/* Including the return address saved by the call instruction. */
 	/* Including the return address saved by the call instruction. */
-	saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
+	saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
 	compiler->local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size;
 	compiler->local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size;
 	return SLJIT_SUCCESS;
 	return SLJIT_SUCCESS;
 }
 }
 
 
-static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
+static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
 {
 {
 	sljit_uw size;
 	sljit_uw size;
-	sljit_s32 local_size, i, tmp;
+	sljit_s32 i, tmp;
 	sljit_u8 *inst;
 	sljit_u8 *inst;
 #ifdef _WIN64
 #ifdef _WIN64
 	sljit_s32 saved_float_regs_offset;
 	sljit_s32 saved_float_regs_offset;
@@ -611,21 +598,30 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
 			*inst = MOVAPS_x_xm;
 			*inst = MOVAPS_x_xm;
 			saved_float_regs_offset += 16;
 			saved_float_regs_offset += 16;
 		}
 		}
-
-		compiler->mode32 = 0;
 	}
 	}
 #endif /* _WIN64 */
 #endif /* _WIN64 */
 
 
-	local_size = compiler->local_size;
-
-	if (is_return_to && compiler->scratches < SLJIT_FIRST_SAVED_REG && (compiler->saveds == SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
-		local_size += SSIZE_OF(sw);
-		is_return_to = 0;
+	if (compiler->local_size > 0) {
+		if (compiler->local_size <= 127) {
+			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
+			FAIL_IF(!inst);
+			INC_SIZE(4);
+			*inst++ = REX_W;
+			*inst++ = GROUP_BINARY_83;
+			*inst++ = MOD_REG | ADD | 4;
+			*inst = U8(compiler->local_size);
+		}
+		else {
+			inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
+			FAIL_IF(!inst);
+			INC_SIZE(7);
+			*inst++ = REX_W;
+			*inst++ = GROUP_BINARY_81;
+			*inst++ = MOD_REG | ADD | 4;
+			sljit_unaligned_store_s32(inst, compiler->local_size);
+		}
 	}
 	}
 
 
-	if (local_size > 0)
-		BINARY_IMM32(ADD, local_size, SLJIT_SP, 0);
-
 	tmp = compiler->scratches;
 	tmp = compiler->scratches;
 	for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
 	for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
 		size = reg_map[i] >= 8 ? 2 : 1;
 		size = reg_map[i] >= 8 ? 2 : 1;
@@ -637,8 +633,8 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
 		POP_REG(reg_lmap[i]);
 		POP_REG(reg_lmap[i]);
 	}
 	}
 
 
-	tmp = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
-	for (i = SLJIT_S0 + 1 - compiler->saveds; i <= tmp; i++) {
+	tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
+	for (i = tmp; i <= SLJIT_S0; i++) {
 		size = reg_map[i] >= 8 ? 2 : 1;
 		size = reg_map[i] >= 8 ? 2 : 1;
 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
 		FAIL_IF(!inst);
 		FAIL_IF(!inst);
@@ -648,9 +644,6 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
 		POP_REG(reg_lmap[i]);
 		POP_REG(reg_lmap[i]);
 	}
 	}
 
 
-	if (is_return_to)
-		BINARY_IMM32(ADD, sizeof(sljit_sw), SLJIT_SP, 0);
-
 	return SLJIT_SUCCESS;
 	return SLJIT_SUCCESS;
 }
 }
 
 
@@ -661,9 +654,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler
 	CHECK_ERROR();
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_return_void(compiler));
 	CHECK(check_sljit_emit_return_void(compiler));
 
 
-	compiler->mode32 = 0;
-
-	FAIL_IF(emit_stack_frame_release(compiler, 0));
+	FAIL_IF(emit_stack_frame_release(compiler));
 
 
 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
 	FAIL_IF(!inst);
 	FAIL_IF(!inst);
@@ -672,28 +663,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler
 	return SLJIT_SUCCESS;
 	return SLJIT_SUCCESS;
 }
 }
 
 
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
-	sljit_s32 src, sljit_sw srcw)
-{
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_return_to(compiler, src, srcw));
-
-	compiler->mode32 = 0;
-
-	if ((src & SLJIT_MEM) || (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
-		ADJUST_LOCAL_OFFSET(src, srcw);
-
-		EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
-		src = TMP_REG2;
-		srcw = 0;
-	}
-
-	FAIL_IF(emit_stack_frame_release(compiler, 1));
-
-	SLJIT_SKIP_CHECKS(compiler);
-	return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
-}
-
 /* --------------------------------------------------------------------- */
 /* --------------------------------------------------------------------- */
 /*  Call / return instructions                                           */
 /*  Call / return instructions                                           */
 /* --------------------------------------------------------------------- */
 /* --------------------------------------------------------------------- */
@@ -817,15 +786,17 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
 
 
 	compiler->mode32 = 0;
 	compiler->mode32 = 0;
 
 
-	if ((type & 0xff) != SLJIT_CALL_REG_ARG)
-		PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));
+	PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));
 
 
 	if (type & SLJIT_CALL_RETURN) {
 	if (type & SLJIT_CALL_RETURN) {
-		PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
+		PTR_FAIL_IF(emit_stack_frame_release(compiler));
 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
 	}
 	}
 
 
-	SLJIT_SKIP_CHECKS(compiler);
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+	compiler->skip_checks = 1;
+#endif
 	return sljit_emit_jump(compiler, type);
 	return sljit_emit_jump(compiler, type);
 }
 }
 
 
@@ -845,21 +816,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
 	}
 	}
 
 
 	if (type & SLJIT_CALL_RETURN) {
 	if (type & SLJIT_CALL_RETURN) {
-		if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
+		if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) {
 			EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
 			EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
 			src = TMP_REG2;
 			src = TMP_REG2;
 		}
 		}
 
 
-		FAIL_IF(emit_stack_frame_release(compiler, 0));
+		FAIL_IF(emit_stack_frame_release(compiler));
+		type = SLJIT_JUMP;
 	}
 	}
 
 
-	if ((type & 0xff) != SLJIT_CALL_REG_ARG)
-		FAIL_IF(call_with_args(compiler, arg_types, &src));
+	FAIL_IF(call_with_args(compiler, arg_types, &src));
 
 
-	if (type & SLJIT_CALL_RETURN)
-		type = SLJIT_JUMP;
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+	compiler->skip_checks = 1;
+#endif
 
 
-	SLJIT_SKIP_CHECKS(compiler);
 	return sljit_emit_ijump(compiler, type, src, srcw);
 	return sljit_emit_ijump(compiler, type, src, srcw);
 }
 }
 
 
@@ -935,89 +907,9 @@ static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src
 }
 }
 
 
 /* --------------------------------------------------------------------- */
 /* --------------------------------------------------------------------- */
-/*  Other operations                                                     */
+/*  Extend input                                                         */
 /* --------------------------------------------------------------------- */
 /* --------------------------------------------------------------------- */
 
 
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
-	sljit_s32 reg,
-	sljit_s32 mem, sljit_sw memw)
-{
-	sljit_u8* inst;
-	sljit_s32 i, next, reg_idx;
-	sljit_u8 regs[2];
-
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
-
-	if (!(reg & REG_PAIR_MASK))
-		return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
-
-	ADJUST_LOCAL_OFFSET(mem, memw);
-
-	compiler->mode32 = 0;
-
-	if ((mem & REG_MASK) == 0) {
-		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, memw);
-
-		mem = SLJIT_MEM1(TMP_REG1);
-		memw = 0;
-	} else if (!(mem & OFFS_REG_MASK) && ((memw < HALFWORD_MIN) || (memw > HALFWORD_MAX - SSIZE_OF(sw)))) {
-		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, memw);
-
-		mem = SLJIT_MEM2(mem & REG_MASK, TMP_REG1);
-		memw = 0;
-	}
-
-	regs[0] = U8(REG_PAIR_FIRST(reg));
-	regs[1] = U8(REG_PAIR_SECOND(reg));
-
-	next = SSIZE_OF(sw);
-
-	if (!(type & SLJIT_MEM_STORE) && (regs[0] == (mem & REG_MASK) || regs[0] == OFFS_REG(mem))) {
-		if (regs[1] == (mem & REG_MASK) || regs[1] == OFFS_REG(mem)) {
-			/* Base and offset cannot be TMP_REG1. */
-			EMIT_MOV(compiler, TMP_REG1, 0, OFFS_REG(mem), 0);
-
-			if (regs[1] == OFFS_REG(mem))
-				next = -SSIZE_OF(sw);
-
-			mem = (mem & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1);
-		} else {
-			next = -SSIZE_OF(sw);
-
-			if (!(mem & OFFS_REG_MASK))
-				memw += SSIZE_OF(sw);
-		}
-	}
-
-	for (i = 0; i < 2; i++) {
-		reg_idx = next > 0 ? i : (i ^ 0x1);
-		reg = regs[reg_idx];
-
-		if ((mem & OFFS_REG_MASK) && (reg_idx == 1)) {
-			inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 5));
-			FAIL_IF(!inst);
-
-			INC_SIZE(5);
-
-			inst[0] = U8(REX_W | ((reg_map[reg] >= 8) ? REX_R : 0) | ((reg_map[mem & REG_MASK] >= 8) ? REX_B : 0) | ((reg_map[OFFS_REG(mem)] >= 8) ? REX_X : 0));
-			inst[1] = (type & SLJIT_MEM_STORE) ? MOV_rm_r : MOV_r_rm;
-			inst[2] = 0x44 | U8(reg_lmap[reg] << 3);
-			inst[3] = U8(memw << 6) | U8(reg_lmap[OFFS_REG(mem)] << 3) | reg_lmap[mem & REG_MASK];
-			inst[4] = sizeof(sljit_sw);
-		} else if (type & SLJIT_MEM_STORE) {
-			EMIT_MOV(compiler, mem, memw, reg, 0);
-		} else {
-			EMIT_MOV(compiler, reg, 0, mem, memw);
-		}
-
-		if (!(mem & OFFS_REG_MASK))
-			memw += next;
-	}
-
-	return SLJIT_SUCCESS;
-}
-
 static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
 static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
 	sljit_s32 dst, sljit_sw dstw,
 	sljit_s32 dst, sljit_sw dstw,
 	sljit_s32 src, sljit_sw srcw)
 	sljit_s32 src, sljit_sw srcw)

+ 118 - 419
thirdparty/pcre2/src/sljit/sljitNativeX86_common.c

@@ -26,7 +26,11 @@
 
 
 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
 {
 {
+#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
+	return "x86" SLJIT_CPUINFO " ABI:fastcall";
+#else
 	return "x86" SLJIT_CPUINFO;
 	return "x86" SLJIT_CPUINFO;
+#endif
 }
 }
 
 
 /*
 /*
@@ -74,7 +78,10 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
 
 
 #define CHECK_EXTRA_REGS(p, w, do) \
 #define CHECK_EXTRA_REGS(p, w, do) \
 	if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
 	if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
-		w = (2 * SSIZE_OF(sw)) + ((p) - SLJIT_R3) * SSIZE_OF(sw); \
+		if (p <= compiler->scratches) \
+			w = compiler->scratches_offset + ((p) - SLJIT_R3) * SSIZE_OF(sw); \
+		else \
+			w = compiler->locals_offset + ((p) - SLJIT_S2) * SSIZE_OF(sw); \
 		p = SLJIT_MEM1(SLJIT_SP); \
 		p = SLJIT_MEM1(SLJIT_SP); \
 		do; \
 		do; \
 	}
 	}
@@ -174,7 +181,6 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
 #define AND_rm_r	0x21
 #define AND_rm_r	0x21
 #define ANDPD_x_xm	0x54
 #define ANDPD_x_xm	0x54
 #define BSR_r_rm	(/* GROUP_0F */ 0xbd)
 #define BSR_r_rm	(/* GROUP_0F */ 0xbd)
-#define BSF_r_rm	(/* GROUP_0F */ 0xbc)
 #define CALL_i32	0xe8
 #define CALL_i32	0xe8
 #define CALL_rm		(/* GROUP_FF */ 2 << 3)
 #define CALL_rm		(/* GROUP_FF */ 2 << 3)
 #define CDQ		0x99
 #define CDQ		0x99
@@ -188,8 +194,6 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
 #define CVTTSD2SI_r_xm	0x2c
 #define CVTTSD2SI_r_xm	0x2c
 #define DIV		(/* GROUP_F7 */ 6 << 3)
 #define DIV		(/* GROUP_F7 */ 6 << 3)
 #define DIVSD_x_xm	0x5e
 #define DIVSD_x_xm	0x5e
-#define FLDS		0xd9
-#define FLDL		0xdd
 #define FSTPS		0xd9
 #define FSTPS		0xd9
 #define FSTPD		0xdd
 #define FSTPD		0xdd
 #define INT3		0xcc
 #define INT3		0xcc
@@ -205,7 +209,6 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
 #define JMP_rm		(/* GROUP_FF */ 4 << 3)
 #define JMP_rm		(/* GROUP_FF */ 4 << 3)
 #define LEA_r_m		0x8d
 #define LEA_r_m		0x8d
 #define LOOP_i8		0xe2
 #define LOOP_i8		0xe2
-#define LZCNT_r_rm	(/* GROUP_F3 */ /* GROUP_0F */ 0xbd)
 #define MOV_r_rm	0x8b
 #define MOV_r_rm	0x8b
 #define MOV_r_i32	0xb8
 #define MOV_r_i32	0xb8
 #define MOV_rm_r	0x89
 #define MOV_rm_r	0x89
@@ -239,8 +242,6 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
 #define PUSH_r		0x50
 #define PUSH_r		0x50
 #define PUSH_rm		(/* GROUP_FF */ 6 << 3)
 #define PUSH_rm		(/* GROUP_FF */ 6 << 3)
 #define PUSHF		0x9c
 #define PUSHF		0x9c
-#define ROL		(/* SHIFT */ 0 << 3)
-#define ROR		(/* SHIFT */ 1 << 3)
 #define RET_near	0xc3
 #define RET_near	0xc3
 #define RET_i16		0xc2
 #define RET_i16		0xc2
 #define SBB		(/* BINARY */ 3 << 3)
 #define SBB		(/* BINARY */ 3 << 3)
@@ -249,8 +250,6 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
 #define SBB_rm_r	0x19
 #define SBB_rm_r	0x19
 #define SAR		(/* SHIFT */ 7 << 3)
 #define SAR		(/* SHIFT */ 7 << 3)
 #define SHL		(/* SHIFT */ 4 << 3)
 #define SHL		(/* SHIFT */ 4 << 3)
-#define SHLD		(/* GROUP_0F */ 0xa5)
-#define SHRD		(/* GROUP_0F */ 0xad)
 #define SHR		(/* SHIFT */ 5 << 3)
 #define SHR		(/* SHIFT */ 5 << 3)
 #define SUB		(/* BINARY */ 5 << 3)
 #define SUB		(/* BINARY */ 5 << 3)
 #define SUB_EAX_i32	0x2d
 #define SUB_EAX_i32	0x2d
@@ -259,7 +258,6 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
 #define SUBSD_x_xm	0x5c
 #define SUBSD_x_xm	0x5c
 #define TEST_EAX_i32	0xa9
 #define TEST_EAX_i32	0xa9
 #define TEST_rm_r	0x85
 #define TEST_rm_r	0x85
-#define TZCNT_r_rm	(/* GROUP_F3 */ /* GROUP_0F */ 0xbc)
 #define UCOMISD_x_xm	0x2e
 #define UCOMISD_x_xm	0x2e
 #define UNPCKLPD_x_xm	0x14
 #define UNPCKLPD_x_xm	0x14
 #define XCHG_EAX_r	0x90
 #define XCHG_EAX_r	0x90
@@ -271,7 +269,6 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
 #define XORPD_x_xm	0x57
 #define XORPD_x_xm	0x57
 
 
 #define GROUP_0F	0x0f
 #define GROUP_0F	0x0f
-#define GROUP_F3	0xf3
 #define GROUP_F7	0xf7
 #define GROUP_F7	0xf7
 #define GROUP_FF	0xff
 #define GROUP_FF	0xff
 #define GROUP_BINARY_81	0x81
 #define GROUP_BINARY_81	0x81
@@ -293,15 +290,10 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
 /* Multithreading does not affect these static variables, since they store
 /* Multithreading does not affect these static variables, since they store
    built-in CPU features. Therefore they can be overwritten by different threads
    built-in CPU features. Therefore they can be overwritten by different threads
    if they detect the CPU features in the same time. */
    if they detect the CPU features in the same time. */
-#define CPU_FEATURE_DETECTED		0x001
 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
-#define CPU_FEATURE_SSE2		0x002
+static sljit_s32 cpu_has_sse2 = -1;
 #endif
 #endif
-#define CPU_FEATURE_LZCNT		0x004
-#define CPU_FEATURE_TZCNT		0x008
-#define CPU_FEATURE_CMOV		0x010
-
-static sljit_u32 cpu_feature_list = 0;
+static sljit_s32 cpu_has_cmov = -1;
 
 
 #ifdef _WIN32_WCE
 #ifdef _WIN32_WCE
 #include <cmnintrin.h>
 #include <cmnintrin.h>
@@ -334,64 +326,17 @@ static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
 
 
 static void get_cpu_features(void)
 static void get_cpu_features(void)
 {
 {
-	sljit_u32 feature_list = CPU_FEATURE_DETECTED;
-	sljit_u32 value;
+	sljit_u32 features;
 
 
 #if defined(_MSC_VER) && _MSC_VER >= 1400
 #if defined(_MSC_VER) && _MSC_VER >= 1400
 
 
 	int CPUInfo[4];
 	int CPUInfo[4];
-
-	__cpuid(CPUInfo, 0);
-	if (CPUInfo[0] >= 7) {
-		__cpuidex(CPUInfo, 7, 0);
-		if (CPUInfo[1] & 0x8)
-			feature_list |= CPU_FEATURE_TZCNT;
-	}
-
-	__cpuid(CPUInfo, (int)0x80000001);
-	if (CPUInfo[2] & 0x20)
-		feature_list |= CPU_FEATURE_LZCNT;
-
 	__cpuid(CPUInfo, 1);
 	__cpuid(CPUInfo, 1);
-	value = (sljit_u32)CPUInfo[3];
+	features = (sljit_u32)CPUInfo[3];
 
 
 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
 
 
 	/* AT&T syntax. */
 	/* AT&T syntax. */
-	__asm__ (
-		"movl $0x0, %%eax\n"
-		"lzcnt %%eax, %%eax\n"
-		"setnz %%al\n"
-		"movl %%eax, %0\n"
-		: "=g" (value)
-		:
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-		: "eax"
-#else
-		: "rax"
-#endif
-	);
-
-	if (value & 0x1)
-		feature_list |= CPU_FEATURE_LZCNT;
-
-	__asm__ (
-		"movl $0x0, %%eax\n"
-		"tzcnt %%eax, %%eax\n"
-		"setnz %%al\n"
-		"movl %%eax, %0\n"
-		: "=g" (value)
-		:
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-		: "eax"
-#else
-		: "rax"
-#endif
-	);
-
-	if (value & 0x1)
-		feature_list |= CPU_FEATURE_TZCNT;
-
 	__asm__ (
 	__asm__ (
 		"movl $0x1, %%eax\n"
 		"movl $0x1, %%eax\n"
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
@@ -404,7 +349,7 @@ static void get_cpu_features(void)
 		"pop %%ebx\n"
 		"pop %%ebx\n"
 #endif
 #endif
 		"movl %%edx, %0\n"
 		"movl %%edx, %0\n"
-		: "=g" (value)
+		: "=g" (features)
 		:
 		:
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 		: "%eax", "%ecx", "%edx"
 		: "%eax", "%ecx", "%edx"
@@ -416,83 +361,47 @@ static void get_cpu_features(void)
 #else /* _MSC_VER && _MSC_VER >= 1400 */
 #else /* _MSC_VER && _MSC_VER >= 1400 */
 
 
 	/* Intel syntax. */
 	/* Intel syntax. */
-	__asm {
-		mov eax, 0
-		lzcnt eax, eax
-		setnz al
-		mov value, eax
-	}
-
-	if (value & 0x1)
-		feature_list |= CPU_FEATURE_LZCNT;
-
-	__asm {
-		mov eax, 0
-		tzcnt eax, eax
-		setnz al
-		mov value, eax
-	}
-
-	if (value & 0x1)
-		feature_list |= CPU_FEATURE_TZCNT;
-
 	__asm {
 	__asm {
 		mov eax, 1
 		mov eax, 1
 		cpuid
 		cpuid
-		mov value, edx
+		mov features, edx
 	}
 	}
 
 
 #endif /* _MSC_VER && _MSC_VER >= 1400 */
 #endif /* _MSC_VER && _MSC_VER >= 1400 */
 
 
 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
-	if (value & 0x4000000)
-		feature_list |= CPU_FEATURE_SSE2;
+	cpu_has_sse2 = (features >> 26) & 0x1;
 #endif
 #endif
-	if (value & 0x8000)
-		feature_list |= CPU_FEATURE_CMOV;
-
-	cpu_feature_list = feature_list;
+	cpu_has_cmov = (features >> 15) & 0x1;
 }
 }
 
 
 static sljit_u8 get_jump_code(sljit_uw type)
 static sljit_u8 get_jump_code(sljit_uw type)
 {
 {
 	switch (type) {
 	switch (type) {
 	case SLJIT_EQUAL:
 	case SLJIT_EQUAL:
-	case SLJIT_F_EQUAL:
-	case SLJIT_UNORDERED_OR_EQUAL:
-	case SLJIT_ORDERED_EQUAL: /* Not supported. */
+	case SLJIT_EQUAL_F64:
 		return 0x84 /* je */;
 		return 0x84 /* je */;
 
 
 	case SLJIT_NOT_EQUAL:
 	case SLJIT_NOT_EQUAL:
-	case SLJIT_F_NOT_EQUAL:
-	case SLJIT_ORDERED_NOT_EQUAL:
-	case SLJIT_UNORDERED_OR_NOT_EQUAL: /* Not supported. */
+	case SLJIT_NOT_EQUAL_F64:
 		return 0x85 /* jne */;
 		return 0x85 /* jne */;
 
 
 	case SLJIT_LESS:
 	case SLJIT_LESS:
 	case SLJIT_CARRY:
 	case SLJIT_CARRY:
-	case SLJIT_F_LESS:
-	case SLJIT_UNORDERED_OR_LESS:
-	case SLJIT_UNORDERED_OR_GREATER:
+	case SLJIT_LESS_F64:
 		return 0x82 /* jc */;
 		return 0x82 /* jc */;
 
 
 	case SLJIT_GREATER_EQUAL:
 	case SLJIT_GREATER_EQUAL:
 	case SLJIT_NOT_CARRY:
 	case SLJIT_NOT_CARRY:
-	case SLJIT_F_GREATER_EQUAL:
-	case SLJIT_ORDERED_GREATER_EQUAL:
-	case SLJIT_ORDERED_LESS_EQUAL:
+	case SLJIT_GREATER_EQUAL_F64:
 		return 0x83 /* jae */;
 		return 0x83 /* jae */;
 
 
 	case SLJIT_GREATER:
 	case SLJIT_GREATER:
-	case SLJIT_F_GREATER:
-	case SLJIT_ORDERED_LESS:
-	case SLJIT_ORDERED_GREATER:
+	case SLJIT_GREATER_F64:
 		return 0x87 /* jnbe */;
 		return 0x87 /* jnbe */;
 
 
 	case SLJIT_LESS_EQUAL:
 	case SLJIT_LESS_EQUAL:
-	case SLJIT_F_LESS_EQUAL:
-	case SLJIT_UNORDERED_OR_GREATER_EQUAL:
-	case SLJIT_UNORDERED_OR_LESS_EQUAL:
+	case SLJIT_LESS_EQUAL_F64:
 		return 0x86 /* jbe */;
 		return 0x86 /* jbe */;
 
 
 	case SLJIT_SIG_LESS:
 	case SLJIT_SIG_LESS:
@@ -513,10 +422,10 @@ static sljit_u8 get_jump_code(sljit_uw type)
 	case SLJIT_NOT_OVERFLOW:
 	case SLJIT_NOT_OVERFLOW:
 		return 0x81 /* jno */;
 		return 0x81 /* jno */;
 
 
-	case SLJIT_UNORDERED:
+	case SLJIT_UNORDERED_F64:
 		return 0x8a /* jp */;
 		return 0x8a /* jp */;
 
 
-	case SLJIT_ORDERED:
+	case SLJIT_ORDERED_F64:
 		return 0x8b /* jpo */;
 		return 0x8b /* jpo */;
 	}
 	}
 	return 0;
 	return 0;
@@ -540,13 +449,13 @@ static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code
 	else
 	else
 		label_addr = jump->u.target - (sljit_uw)executable_offset;
 		label_addr = jump->u.target - (sljit_uw)executable_offset;
 
 
+	short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
+
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 	if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
 	if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
 		return generate_far_jump_code(jump, code_ptr);
 		return generate_far_jump_code(jump, code_ptr);
 #endif
 #endif
 
 
-	short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
-
 	if (type == SLJIT_JUMP) {
 	if (type == SLJIT_JUMP) {
 		if (short_jump)
 		if (short_jump)
 			*code_ptr++ = JMP_i8;
 			*code_ptr++ = JMP_i8;
@@ -672,33 +581,32 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 
 
 	jump = compiler->jumps;
 	jump = compiler->jumps;
 	while (jump) {
 	while (jump) {
-		if (jump->flags & (PATCH_MB | PATCH_MW)) {
-			if (jump->flags & JUMP_LABEL)
-				jump_addr = jump->u.label->addr;
-			else
-				jump_addr = jump->u.target;
+		jump_addr = jump->addr + (sljit_uw)executable_offset;
 
 
-			jump_addr -= jump->addr + (sljit_uw)executable_offset;
-
-			if (jump->flags & PATCH_MB) {
-				jump_addr -= sizeof(sljit_s8);
-				SLJIT_ASSERT((sljit_sw)jump_addr >= -128 && (sljit_sw)jump_addr <= 127);
-				*(sljit_u8*)jump->addr = U8(jump_addr);
-			} else {
-				jump_addr -= sizeof(sljit_s32);
+		if (jump->flags & PATCH_MB) {
+			SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127);
+			*(sljit_u8*)jump->addr = U8(jump->u.label->addr - (jump_addr + sizeof(sljit_s8)));
+		} else if (jump->flags & PATCH_MW) {
+			if (jump->flags & JUMP_LABEL) {
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-				sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump_addr);
+				sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw))));
 #else
 #else
-				SLJIT_ASSERT((sljit_sw)jump_addr >= HALFWORD_MIN && (sljit_sw)jump_addr <= HALFWORD_MAX);
-				sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)jump_addr);
+				SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
+				sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))));
+#endif
+			}
+			else {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+				sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw))));
+#else
+				SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
+				sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32))));
 #endif
 #endif
 			}
 			}
 		}
 		}
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-		else if (jump->flags & PATCH_MD) {
-				SLJIT_ASSERT(jump->flags & JUMP_LABEL);
-				sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump->u.label->addr);
-		}
+		else if (jump->flags & PATCH_MD)
+			sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump->u.label->addr);
 #endif
 #endif
 
 
 		jump = jump->next;
 		jump = jump->next;
@@ -739,9 +647,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
 #ifdef SLJIT_IS_FPU_AVAILABLE
 #ifdef SLJIT_IS_FPU_AVAILABLE
 		return SLJIT_IS_FPU_AVAILABLE;
 		return SLJIT_IS_FPU_AVAILABLE;
 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
-		if (cpu_feature_list == 0)
+		if (cpu_has_sse2 == -1)
 			get_cpu_features();
 			get_cpu_features();
-		return (cpu_feature_list & CPU_FEATURE_SSE2) != 0;
+		return cpu_has_sse2;
 #else /* SLJIT_DETECT_SSE2 */
 #else /* SLJIT_DETECT_SSE2 */
 		return 1;
 		return 1;
 #endif /* SLJIT_DETECT_SSE2 */
 #endif /* SLJIT_DETECT_SSE2 */
@@ -749,57 +657,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 	case SLJIT_HAS_VIRTUAL_REGISTERS:
 	case SLJIT_HAS_VIRTUAL_REGISTERS:
 		return 1;
 		return 1;
-#endif /* SLJIT_CONFIG_X86_32 */
+#endif
 
 
 	case SLJIT_HAS_CLZ:
 	case SLJIT_HAS_CLZ:
-		if (cpu_feature_list == 0)
-			get_cpu_features();
-
-		return (cpu_feature_list & CPU_FEATURE_LZCNT) ? 1 : 2;
-
-	case SLJIT_HAS_CTZ:
-		if (cpu_feature_list == 0)
-			get_cpu_features();
-
-		return (cpu_feature_list & CPU_FEATURE_TZCNT) ? 1 : 2;
-
 	case SLJIT_HAS_CMOV:
 	case SLJIT_HAS_CMOV:
-		if (cpu_feature_list == 0)
+		if (cpu_has_cmov == -1)
 			get_cpu_features();
 			get_cpu_features();
-		return (cpu_feature_list & CPU_FEATURE_CMOV) != 0;
+		return cpu_has_cmov;
 
 
-	case SLJIT_HAS_ROT:
 	case SLJIT_HAS_PREFETCH:
 	case SLJIT_HAS_PREFETCH:
 		return 1;
 		return 1;
 
 
 	case SLJIT_HAS_SSE2:
 	case SLJIT_HAS_SSE2:
 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
-		if (cpu_feature_list == 0)
+		if (cpu_has_sse2 == -1)
 			get_cpu_features();
 			get_cpu_features();
-		return (cpu_feature_list & CPU_FEATURE_SSE2) != 0;
-#else /* !SLJIT_DETECT_SSE2 */
+		return cpu_has_sse2;
+#else
 		return 1;
 		return 1;
-#endif /* SLJIT_DETECT_SSE2 */
+#endif
 
 
 	default:
 	default:
 		return 0;
 		return 0;
 	}
 	}
 }
 }
 
 
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
-{
-	if (type < SLJIT_UNORDERED || type > SLJIT_ORDERED_LESS_EQUAL)
-		return 0;
-
-	switch (type) {
-	case SLJIT_ORDERED_EQUAL:
-	case SLJIT_UNORDERED_OR_NOT_EQUAL:
-		return 0;
-	}
-
-	return 1;
-}
-
 /* --------------------------------------------------------------------- */
 /* --------------------------------------------------------------------- */
 /*  Operators                                                            */
 /*  Operators                                                            */
 /* --------------------------------------------------------------------- */
 /* --------------------------------------------------------------------- */
@@ -1503,75 +1385,47 @@ static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
 
 
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 static const sljit_sw emit_clz_arg = 32 + 31;
 static const sljit_sw emit_clz_arg = 32 + 31;
-static const sljit_sw emit_ctz_arg = 32;
 #endif
 #endif
 
 
-static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz,
+static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
 	sljit_s32 dst, sljit_sw dstw,
 	sljit_s32 dst, sljit_sw dstw,
 	sljit_s32 src, sljit_sw srcw)
 	sljit_s32 src, sljit_sw srcw)
 {
 {
 	sljit_u8* inst;
 	sljit_u8* inst;
 	sljit_s32 dst_r;
 	sljit_s32 dst_r;
-	sljit_sw max;
 
 
-	if (cpu_feature_list == 0)
+	SLJIT_UNUSED_ARG(op_flags);
+
+	if (cpu_has_cmov == -1)
 		get_cpu_features();
 		get_cpu_features();
 
 
 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
 
 
-	if (is_clz ? (cpu_feature_list & CPU_FEATURE_LZCNT) : (cpu_feature_list & CPU_FEATURE_TZCNT)) {
-		/* Group prefix added separately. */
-		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
-		FAIL_IF(!inst);
-		INC_SIZE(1);
-		*inst++ = GROUP_F3;
-
-		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
-		FAIL_IF(!inst);
-		*inst++ = GROUP_0F;
-		*inst = is_clz ? LZCNT_r_rm : TZCNT_r_rm;
-
-		if (dst & SLJIT_MEM)
-			EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
-		return SLJIT_SUCCESS;
-	}
-
 	inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
 	inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
 	FAIL_IF(!inst);
 	FAIL_IF(!inst);
 	*inst++ = GROUP_0F;
 	*inst++ = GROUP_0F;
-	*inst = is_clz ? BSR_r_rm : BSF_r_rm;
+	*inst = BSR_r_rm;
 
 
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-	max = is_clz ? (32 + 31) : 32;
-
-	if (cpu_feature_list & CPU_FEATURE_CMOV) {
+	if (cpu_has_cmov) {
 		if (dst_r != TMP_REG1) {
 		if (dst_r != TMP_REG1) {
-			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, max);
+			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 32 + 31);
 			inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
 			inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
 		}
 		}
 		else
 		else
-			inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), is_clz ? (sljit_sw)&emit_clz_arg : (sljit_sw)&emit_ctz_arg);
+			inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), (sljit_sw)&emit_clz_arg);
 
 
 		FAIL_IF(!inst);
 		FAIL_IF(!inst);
 		*inst++ = GROUP_0F;
 		*inst++ = GROUP_0F;
 		*inst = CMOVE_r_rm;
 		*inst = CMOVE_r_rm;
 	}
 	}
 	else
 	else
-		FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
+		FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, 32 + 31));
 
 
-	if (is_clz) {
-		inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
-		FAIL_IF(!inst);
-		*(inst + 1) |= XOR;
-	}
+	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
 #else
 #else
-	if (is_clz)
-		max = compiler->mode32 ? (32 + 31) : (64 + 63);
-	else
-		max = compiler->mode32 ? 32 : 64;
-
-	if (cpu_feature_list & CPU_FEATURE_CMOV) {
-		EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, max);
+	if (cpu_has_cmov) {
+		EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & SLJIT_32) ? (64 + 63) : (32 + 31));
 
 
 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
 		FAIL_IF(!inst);
 		FAIL_IF(!inst);
@@ -1579,15 +1433,14 @@ static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz,
 		*inst = CMOVE_r_rm;
 		*inst = CMOVE_r_rm;
 	}
 	}
 	else
 	else
-		FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
+		FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, !(op_flags & SLJIT_32) ? (64 + 63) : (32 + 31)));
 
 
-	if (is_clz) {
-		inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, max >> 1, dst_r, 0);
-		FAIL_IF(!inst);
-		*(inst + 1) |= XOR;
-	}
+	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_32) ? 63 : 31, dst_r, 0);
 #endif
 #endif
 
 
+	FAIL_IF(!inst);
+	*(inst + 1) |= XOR;
+
 	if (dst & SLJIT_MEM)
 	if (dst & SLJIT_MEM)
 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
 	return SLJIT_SUCCESS;
 	return SLJIT_SUCCESS;
@@ -1725,8 +1578,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
 		return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
 		return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
 
 
 	case SLJIT_CLZ:
 	case SLJIT_CLZ:
-	case SLJIT_CTZ:
-		return emit_clz_ctz(compiler, (op == SLJIT_CLZ), dst, dstw, src, srcw);
+		return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
 	}
 	}
 
 
 	return SLJIT_SUCCESS;
 	return SLJIT_SUCCESS;
@@ -2264,9 +2116,6 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler,
 	sljit_s32 src1, sljit_sw src1w,
 	sljit_s32 src1, sljit_sw src1w,
 	sljit_s32 src2, sljit_sw src2w)
 	sljit_s32 src2, sljit_sw src2w)
 {
 {
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-	sljit_s32 mode32;
-#endif
 	sljit_u8* inst;
 	sljit_u8* inst;
 
 
 	if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
 	if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
@@ -2306,61 +2155,40 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler,
 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
 		FAIL_IF(!inst);
 		FAIL_IF(!inst);
 		*inst |= mode;
 		*inst |= mode;
-		return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
+		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
 	}
 	}
-
-	if (FAST_IS_REG(dst) && dst != src2 && dst != TMP_REG1 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
+	else if (FAST_IS_REG(dst) && dst != src2 && dst != TMP_REG1 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
 		if (src1 != dst)
 		if (src1 != dst)
 			EMIT_MOV(compiler, dst, 0, src1, src1w);
 			EMIT_MOV(compiler, dst, 0, src1, src1w);
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-		mode32 = compiler->mode32;
-		compiler->mode32 = 0;
-#endif
 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-		compiler->mode32 = mode32;
-#endif
 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
 		FAIL_IF(!inst);
 		FAIL_IF(!inst);
 		*inst |= mode;
 		*inst |= mode;
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-		compiler->mode32 = 0;
-#endif
 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-		compiler->mode32 = mode32;
-#endif
-		return SLJIT_SUCCESS;
 	}
 	}
-
-	/* This case is complex since ecx itself may be used for
-	   addressing, and this case must be supported as well. */
-	EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-	EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
-#else /* !SLJIT_CONFIG_X86_32 */
-	mode32 = compiler->mode32;
-	compiler->mode32 = 0;
-	EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
-	compiler->mode32 = mode32;
-#endif /* SLJIT_CONFIG_X86_32 */
-
-	EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
-	inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
-	FAIL_IF(!inst);
-	*inst |= mode;
-
+	else {
+		/* This case is complex since ecx itself may be used for
+		   addressing, and this case must be supported as well. */
+		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-	EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
+		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
+		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
+		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
+		FAIL_IF(!inst);
+		*inst |= mode;
+		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
 #else
 #else
-	compiler->mode32 = 0;
-	EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
-	compiler->mode32 = mode32;
-#endif /* SLJIT_CONFIG_X86_32 */
-
-	if (dst != TMP_REG1)
-		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
+		EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
+		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
+		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
+		FAIL_IF(!inst);
+		*inst |= mode;
+		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
+#endif
+		if (dst != TMP_REG1)
+			return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
+	}
 
 
 	return SLJIT_SUCCESS;
 	return SLJIT_SUCCESS;
 }
 }
@@ -2374,13 +2202,12 @@ static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
 	/* The CPU does not set flags if the shift count is 0. */
 	/* The CPU does not set flags if the shift count is 0. */
 	if (src2 & SLJIT_IMM) {
 	if (src2 & SLJIT_IMM) {
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-		src2w &= compiler->mode32 ? 0x1f : 0x3f;
-#else /* !SLJIT_CONFIG_X86_64 */
-		src2w &= 0x1f;
-#endif /* SLJIT_CONFIG_X86_64 */
-		if (src2w != 0)
+		if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
-
+#else
+		if ((src2w & 0x1f) != 0)
+			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
+#endif
 		if (!set_flags)
 		if (!set_flags)
 			return emit_mov(compiler, dst, dstw, src1, src1w);
 			return emit_mov(compiler, dst, dstw, src1, src1w);
 		/* OR dst, src, 0 */
 		/* OR dst, src, 0 */
@@ -2462,23 +2289,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
 		return emit_cum_binary(compiler, BINARY_OPCODE(XOR),
 		return emit_cum_binary(compiler, BINARY_OPCODE(XOR),
 			dst, dstw, src1, src1w, src2, src2w);
 			dst, dstw, src1, src1w, src2, src2w);
 	case SLJIT_SHL:
 	case SLJIT_SHL:
-	case SLJIT_MSHL:
 		return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
 		return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
 			dst, dstw, src1, src1w, src2, src2w);
 			dst, dstw, src1, src1w, src2, src2w);
 	case SLJIT_LSHR:
 	case SLJIT_LSHR:
-	case SLJIT_MLSHR:
 		return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
 		return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
 			dst, dstw, src1, src1w, src2, src2w);
 			dst, dstw, src1, src1w, src2, src2w);
 	case SLJIT_ASHR:
 	case SLJIT_ASHR:
-	case SLJIT_MASHR:
 		return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
 		return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
 			dst, dstw, src1, src1w, src2, src2w);
 			dst, dstw, src1, src1w, src2, src2w);
-	case SLJIT_ROTL:
-		return emit_shift_with_flags(compiler, ROL, 0,
-			dst, dstw, src1, src1w, src2, src2w);
-	case SLJIT_ROTR:
-		return emit_shift_with_flags(compiler, ROR, 0,
-			dst, dstw, src1, src1w, src2, src2w);
 	}
 	}
 
 
 	return SLJIT_SUCCESS;
 	return SLJIT_SUCCESS;
@@ -2494,7 +2312,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
 	CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
 	CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
 
 
 	if (opcode != SLJIT_SUB && opcode != SLJIT_AND) {
 	if (opcode != SLJIT_SUB && opcode != SLJIT_AND) {
-		SLJIT_SKIP_CHECKS(compiler);
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+			|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+		compiler->skip_checks = 1;
+#endif
 		return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
 		return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
 	}
 	}
 
 
@@ -2513,122 +2334,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
 	return emit_test_binary(compiler, src1, src1w, src2, src2w);
 	return emit_test_binary(compiler, src1, src1w, src2, src2w);
 }
 }
 
 
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
-	sljit_s32 src_dst,
-	sljit_s32 src1, sljit_sw src1w,
-	sljit_s32 src2, sljit_sw src2w)
-{
-	sljit_s32 restore_ecx = 0;
-	sljit_s32 is_rotate, is_left;
-	sljit_u8* inst;
-	sljit_sw dstw = 0;
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-	sljit_s32 tmp2 = SLJIT_MEM1(SLJIT_SP);
-#else /* !SLJIT_CONFIG_X86_32 */
-	sljit_s32 tmp2 = TMP_REG2;
-#endif /* SLJIT_CONFIG_X86_32 */
-
-	CHECK_ERROR();
-	CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
-	ADJUST_LOCAL_OFFSET(src1, src1w);
-	ADJUST_LOCAL_OFFSET(src2, src2w);
-
-	CHECK_EXTRA_REGS(src1, src1w, (void)0);
-	CHECK_EXTRA_REGS(src2, src2w, (void)0);
-
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-	compiler->mode32 = op & SLJIT_32;
-#endif
-
-	if (src2 & SLJIT_IMM) {
-#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-		src2w &= 0x1f;
-#else /* !SLJIT_CONFIG_X86_32 */
-		src2w &= (op & SLJIT_32) ? 0x1f : 0x3f;
-#endif /* SLJIT_CONFIG_X86_32 */
-
-		if (src2w == 0)
-			return SLJIT_SUCCESS;
-	}
-
-	is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
-
-	is_rotate = (src_dst == src1);
-	CHECK_EXTRA_REGS(src_dst, dstw, (void)0);
-
-	if (is_rotate)
-		return emit_shift(compiler, is_left ? ROL : ROR, src_dst, dstw, src1, src1w, src2, src2w);
-
-	if ((src2 & SLJIT_IMM) || src2 == SLJIT_PREF_SHIFT_REG) {
-		if (!FAST_IS_REG(src1)) {
-			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
-			src1 = TMP_REG1;
-		}
-	} else if (FAST_IS_REG(src1)) {
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-		compiler->mode32 = 0;
-#endif
-		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-		compiler->mode32 = op & SLJIT_32;
-#endif
-		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
-
-		if (src1 == SLJIT_PREF_SHIFT_REG)
-			src1 = TMP_REG1;
-
-		if (src_dst == SLJIT_PREF_SHIFT_REG)
-			src_dst = TMP_REG1;
-
-		restore_ecx = 1;
-	} else {
-		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-		compiler->mode32 = 0;
-#endif
-		EMIT_MOV(compiler, tmp2, 0, SLJIT_PREF_SHIFT_REG, 0);
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-		compiler->mode32 = op & SLJIT_32;
-#endif
-		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
-
-		src1 = TMP_REG1;
-
-		if (src_dst == SLJIT_PREF_SHIFT_REG) {
-			src_dst = tmp2;
-			SLJIT_ASSERT(dstw == 0);
-		}
-
-		restore_ecx = 2;
-	}
-
-	inst = emit_x86_instruction(compiler, 2, src1, 0, src_dst, dstw);
-	FAIL_IF(!inst);
-	inst[0] = GROUP_0F;
-
-	if (src2 & SLJIT_IMM) {
-		inst[1] = U8((is_left ? SHLD : SHRD) - 1);
-
-		/* Immedate argument is added separately. */
-		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
-		FAIL_IF(!inst);
-		INC_SIZE(1);
-		*inst = U8(src2w);
-	} else
-		inst[1] = U8(is_left ? SHLD : SHRD);
-
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-	compiler->mode32 = 0;
-#endif
-
-	if (restore_ecx == 1)
-		return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
-	if (restore_ecx == 2)
-		return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, tmp2, 0);
-
-	return SLJIT_SUCCESS;
-}
-
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
 	sljit_s32 src, sljit_sw srcw)
 	sljit_s32 src, sljit_sw srcw)
 {
 {
@@ -2811,19 +2516,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile
 	sljit_s32 src1, sljit_sw src1w,
 	sljit_s32 src1, sljit_sw src1w,
 	sljit_s32 src2, sljit_sw src2w)
 	sljit_s32 src2, sljit_sw src2w)
 {
 {
-	switch (GET_FLAG_TYPE(op)) {
-	case SLJIT_ORDERED_LESS:
-	case SLJIT_UNORDERED_OR_GREATER_EQUAL:
-	case SLJIT_UNORDERED_OR_GREATER:
-	case SLJIT_ORDERED_LESS_EQUAL:
-		if (!FAST_IS_REG(src2)) {
-			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));
-			src2 = TMP_FREG;
-		}
-
-		return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_32), src2, src1, src1w);
-	}
-
 	if (!FAST_IS_REG(src1)) {
 	if (!FAST_IS_REG(src1)) {
 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
 		src1 = TMP_FREG;
 		src1 = TMP_FREG;
@@ -3077,6 +2769,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
 	ADJUST_LOCAL_OFFSET(dst, dstw);
 	ADJUST_LOCAL_OFFSET(dst, dstw);
 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
 
 
+	type &= 0xff;
 	/* setcc = jcc + 0x10. */
 	/* setcc = jcc + 0x10. */
 	cond_set = U8(get_jump_code((sljit_uw)type) + 0x10);
 	cond_set = U8(get_jump_code((sljit_uw)type) + 0x10);
 
 
@@ -3120,7 +2813,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
 	}
 	}
 
 
-	SLJIT_SKIP_CHECKS(compiler);
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+	compiler->skip_checks = 1;
+#endif
 	return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
 	return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
 
 
 #else
 #else
@@ -3143,10 +2839,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
 		}
 		}
 
 
 		/* Low byte is not accessible. */
 		/* Low byte is not accessible. */
-		if (cpu_feature_list == 0)
+		if (cpu_has_cmov == -1)
 			get_cpu_features();
 			get_cpu_features();
 
 
-		if (cpu_feature_list & CPU_FEATURE_CMOV) {
+		if (cpu_has_cmov) {
 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
 			/* a xor reg, reg operation would overwrite the flags. */
 			/* a xor reg, reg operation would overwrite the flags. */
 			EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
 			EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
@@ -3231,7 +2927,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
 	if (GET_OPCODE(op) < SLJIT_ADD)
 	if (GET_OPCODE(op) < SLJIT_ADD)
 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
 
 
-	SLJIT_SKIP_CHECKS(compiler);
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+	compiler->skip_checks = 1;
+#endif
 	return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
 	return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
 #endif /* SLJIT_CONFIG_X86_64 */
 #endif /* SLJIT_CONFIG_X86_64 */
 }
 }
@@ -3246,7 +2945,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
 	CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
 	CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
 
 
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-	type &= ~SLJIT_32;
+	dst_reg &= ~SLJIT_32;
 
 
 	if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3))
 	if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3))
 		return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
 		return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
@@ -3259,8 +2958,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
 	CHECK_EXTRA_REGS(src, srcw, (void)0);
 	CHECK_EXTRA_REGS(src, srcw, (void)0);
 
 
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
-	compiler->mode32 = type & SLJIT_32;
-	type &= ~SLJIT_32;
+	compiler->mode32 = dst_reg & SLJIT_32;
+	dst_reg &= ~SLJIT_32;
 #endif
 #endif
 
 
 	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
 	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
@@ -3272,7 +2971,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
 	inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
 	inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
 	FAIL_IF(!inst);
 	FAIL_IF(!inst);
 	*inst++ = GROUP_0F;
 	*inst++ = GROUP_0F;
-	*inst = U8(get_jump_code((sljit_uw)type) - 0x40);
+	*inst = U8(get_jump_code(type & 0xff) - 0x40);
 	return SLJIT_SUCCESS;
 	return SLJIT_SUCCESS;
 }
 }
 
 

+ 39 - 14
thirdparty/pcre2/src/sljit/sljitWXExecAllocator.c

@@ -59,15 +59,38 @@
 #include <sys/mman.h>
 #include <sys/mman.h>
 
 
 #ifdef __NetBSD__
 #ifdef __NetBSD__
-#define SLJIT_PROT_WX PROT_MPROTECT(PROT_EXEC)
+#if defined(PROT_MPROTECT)
 #define check_se_protected(ptr, size) (0)
 #define check_se_protected(ptr, size) (0)
-#else /* POSIX */
-#if !(defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED)
-#include <pthread.h>
-#define SLJIT_SE_LOCK()		pthread_mutex_lock(&se_lock)
-#define SLJIT_SE_UNLOCK()	pthread_mutex_unlock(&se_lock)
-#endif /* !SLJIT_SINGLE_THREADED */
+#define SLJIT_PROT_WX PROT_MPROTECT(PROT_EXEC)
+#else /* !PROT_MPROTECT */
+#ifdef _NETBSD_SOURCE
+#include <sys/param.h>
+#else /* !_NETBSD_SOURCE */
+typedef unsigned int	u_int;
+#define devmajor_t sljit_s32
+#endif /* _NETBSD_SOURCE */
+#include <sys/sysctl.h>
+#include <unistd.h>
+
+#define check_se_protected(ptr, size) netbsd_se_protected()
+
+static SLJIT_INLINE int netbsd_se_protected(void)
+{
+	int mib[3];
+	int paxflags;
+	size_t len = sizeof(paxflags);
+
+	mib[0] = CTL_PROC;
+	mib[1] = getpid();
+	mib[2] = PROC_PID_PAXFLAGS;
+
+	if (SLJIT_UNLIKELY(sysctl(mib, 3, &paxflags, &len, NULL, 0) < 0))
+		return -1;
 
 
+	return (paxflags & CTL_PROC_PAXFLAGS_MPROTECT) ? -1 : 0;
+}
+#endif /* PROT_MPROTECT */
+#else /* POSIX */
 #define check_se_protected(ptr, size) generic_se_protected(ptr, size)
 #define check_se_protected(ptr, size) generic_se_protected(ptr, size)
 
 
 static SLJIT_INLINE int generic_se_protected(void *ptr, sljit_uw size)
 static SLJIT_INLINE int generic_se_protected(void *ptr, sljit_uw size)
@@ -79,20 +102,22 @@ static SLJIT_INLINE int generic_se_protected(void *ptr, sljit_uw size)
 }
 }
 #endif /* NetBSD */
 #endif /* NetBSD */
 
 
-#ifndef SLJIT_SE_LOCK
+#if defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED
 #define SLJIT_SE_LOCK()
 #define SLJIT_SE_LOCK()
-#endif
-#ifndef SLJIT_SE_UNLOCK
 #define SLJIT_SE_UNLOCK()
 #define SLJIT_SE_UNLOCK()
-#endif
+#else /* !SLJIT_SINGLE_THREADED */
+#include <pthread.h>
+#define SLJIT_SE_LOCK()	pthread_mutex_lock(&se_lock)
+#define SLJIT_SE_UNLOCK()	pthread_mutex_unlock(&se_lock)
+#endif /* SLJIT_SINGLE_THREADED */
+
 #ifndef SLJIT_PROT_WX
 #ifndef SLJIT_PROT_WX
 #define SLJIT_PROT_WX 0
 #define SLJIT_PROT_WX 0
-#endif
+#endif /* !SLJIT_PROT_WX */
 
 
 SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size)
 SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size)
 {
 {
-#if !(defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) \
-	&& !defined(__NetBSD__)
+#if !(defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED)
 	static pthread_mutex_t se_lock = PTHREAD_MUTEX_INITIALIZER;
 	static pthread_mutex_t se_lock = PTHREAD_MUTEX_INITIALIZER;
 #endif
 #endif
 	static int se_protected = !SLJIT_PROT_WX;
 	static int se_protected = !SLJIT_PROT_WX;

Some files were not shown because too many files changed in this diff