|
@@ -39,7 +39,29 @@ POSSIBILITY OF SUCH DAMAGE.
|
|
|
-----------------------------------------------------------------------------
|
|
|
*/
|
|
|
|
|
|
-#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
|
|
|
+#if !(defined SUPPORT_VALGRIND)
|
|
|
+
|
|
|
+#if ((defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
|
|
|
+ || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X))
|
|
|
+
|
|
|
+typedef enum {
|
|
|
+ vector_compare_match1,
|
|
|
+ vector_compare_match1i,
|
|
|
+ vector_compare_match2,
|
|
|
+} vector_compare_type;
|
|
|
+
|
|
|
+static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
|
|
|
+{
|
|
|
+#if PCRE2_CODE_UNIT_WIDTH == 8
|
|
|
+return 15;
|
|
|
+#elif PCRE2_CODE_UNIT_WIDTH == 16
|
|
|
+return 7;
|
|
|
+#elif PCRE2_CODE_UNIT_WIDTH == 32
|
|
|
+return 3;
|
|
|
+#else
|
|
|
+#error "Unsupported unit width"
|
|
|
+#endif
|
|
|
+}
|
|
|
|
|
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
|
|
static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg)
|
|
@@ -56,6 +78,10 @@ return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00);
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
+#endif /* SLJIT_CONFIG_X86 || SLJIT_CONFIG_S390X */
|
|
|
+
|
|
|
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
|
|
|
+
|
|
|
static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
|
|
|
{
|
|
|
sljit_u32 value = chr;
|
|
@@ -97,13 +123,7 @@ instruction[4] = (sljit_u8)offset;
|
|
|
sljit_emit_op_custom(compiler, instruction, 5);
|
|
|
}
|
|
|
|
|
|
-typedef enum {
|
|
|
- sse2_compare_match1,
|
|
|
- sse2_compare_match1i,
|
|
|
- sse2_compare_match2,
|
|
|
-} sse2_compare_type;
|
|
|
-
|
|
|
-static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, sse2_compare_type compare_type,
|
|
|
+static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type,
|
|
|
int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
|
|
|
{
|
|
|
sljit_u8 instruction[4];
|
|
@@ -112,11 +132,11 @@ instruction[1] = 0x0f;
|
|
|
|
|
|
SLJIT_ASSERT(step >= 0 && step <= 3);
|
|
|
|
|
|
-if (compare_type != sse2_compare_match2)
|
|
|
+if (compare_type != vector_compare_match2)
|
|
|
{
|
|
|
if (step == 0)
|
|
|
{
|
|
|
- if (compare_type == sse2_compare_match1i)
|
|
|
+ if (compare_type == vector_compare_match1i)
|
|
|
{
|
|
|
/* POR xmm1, xmm2/m128 */
|
|
|
/* instruction[0] = 0x66; */
|
|
@@ -185,14 +205,14 @@ switch (step)
|
|
|
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
|
|
|
{
|
|
|
DEFINE_COMPILER;
|
|
|
+sljit_u8 instruction[8];
|
|
|
struct sljit_label *start;
|
|
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
|
|
struct sljit_label *restart;
|
|
|
#endif
|
|
|
struct sljit_jump *quit;
|
|
|
struct sljit_jump *partial_quit[2];
|
|
|
-sse2_compare_type compare_type = sse2_compare_match1;
|
|
|
-sljit_u8 instruction[8];
|
|
|
+vector_compare_type compare_type = vector_compare_match1;
|
|
|
sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
|
|
|
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
|
|
|
sljit_s32 data_ind = 0;
|
|
@@ -207,12 +227,12 @@ SLJIT_UNUSED_ARG(offset);
|
|
|
if (char1 != char2)
|
|
|
{
|
|
|
bit = char1 ^ char2;
|
|
|
- compare_type = sse2_compare_match1i;
|
|
|
+ compare_type = vector_compare_match1i;
|
|
|
|
|
|
if (!is_powerof2(bit))
|
|
|
{
|
|
|
bit = 0;
|
|
|
- compare_type = sse2_compare_match2;
|
|
|
+ compare_type = vector_compare_match2;
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -349,11 +369,11 @@ if (common->utf && offset > 0)
|
|
|
static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
|
|
|
{
|
|
|
DEFINE_COMPILER;
|
|
|
+sljit_u8 instruction[8];
|
|
|
struct sljit_label *start;
|
|
|
struct sljit_jump *quit;
|
|
|
jump_list *not_found = NULL;
|
|
|
-sse2_compare_type compare_type = sse2_compare_match1;
|
|
|
-sljit_u8 instruction[8];
|
|
|
+vector_compare_type compare_type = vector_compare_match1;
|
|
|
sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
|
|
|
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
|
|
|
sljit_s32 data_ind = 0;
|
|
@@ -366,12 +386,12 @@ int i;
|
|
|
if (char1 != char2)
|
|
|
{
|
|
|
bit = char1 ^ char2;
|
|
|
- compare_type = sse2_compare_match1i;
|
|
|
+ compare_type = vector_compare_match1i;
|
|
|
|
|
|
if (!is_powerof2(bit))
|
|
|
{
|
|
|
bit = 0;
|
|
|
- compare_type = sse2_compare_match2;
|
|
|
+ compare_type = vector_compare_match2;
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -476,27 +496,15 @@ return not_found;
|
|
|
|
|
|
#ifndef _WIN64
|
|
|
|
|
|
-static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
|
|
|
-{
|
|
|
-#if PCRE2_CODE_UNIT_WIDTH == 8
|
|
|
-return 15;
|
|
|
-#elif PCRE2_CODE_UNIT_WIDTH == 16
|
|
|
-return 7;
|
|
|
-#elif PCRE2_CODE_UNIT_WIDTH == 32
|
|
|
-return 3;
|
|
|
-#else
|
|
|
-#error "Unsupported unit width"
|
|
|
-#endif
|
|
|
-}
|
|
|
-
|
|
|
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
|
|
|
|
|
|
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
|
|
|
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
|
|
|
{
|
|
|
DEFINE_COMPILER;
|
|
|
-sse2_compare_type compare1_type = sse2_compare_match1;
|
|
|
-sse2_compare_type compare2_type = sse2_compare_match1;
|
|
|
+sljit_u8 instruction[8];
|
|
|
+vector_compare_type compare1_type = vector_compare_match1;
|
|
|
+vector_compare_type compare2_type = vector_compare_match1;
|
|
|
sljit_u32 bit1 = 0;
|
|
|
sljit_u32 bit2 = 0;
|
|
|
sljit_u32 diff = IN_UCHARS(offs1 - offs2);
|
|
@@ -516,7 +524,6 @@ struct sljit_label *start;
|
|
|
struct sljit_label *restart;
|
|
|
#endif
|
|
|
struct sljit_jump *jump[2];
|
|
|
-sljit_u8 instruction[8];
|
|
|
int i;
|
|
|
|
|
|
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
|
|
@@ -549,13 +556,13 @@ else
|
|
|
bit1 = char1a ^ char1b;
|
|
|
if (is_powerof2(bit1))
|
|
|
{
|
|
|
- compare1_type = sse2_compare_match1i;
|
|
|
+ compare1_type = vector_compare_match1i;
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1));
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1));
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
- compare1_type = sse2_compare_match2;
|
|
|
+ compare1_type = vector_compare_match2;
|
|
|
bit1 = 0;
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b));
|
|
@@ -578,13 +585,13 @@ else
|
|
|
bit2 = char2a ^ char2b;
|
|
|
if (is_powerof2(bit2))
|
|
|
{
|
|
|
- compare2_type = sse2_compare_match1i;
|
|
|
+ compare2_type = vector_compare_match1i;
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2));
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2));
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
- compare2_type = sse2_compare_match2;
|
|
|
+ compare2_type = vector_compare_match2;
|
|
|
bit2 = 0;
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b));
|
|
@@ -731,9 +738,6 @@ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
|
|
|
|
|
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
|
|
|
|
|
-if (common->match_end_ptr != 0)
|
|
|
- OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
|
|
|
-
|
|
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
|
|
if (common->utf)
|
|
|
{
|
|
@@ -760,7 +764,7 @@ if (common->match_end_ptr != 0)
|
|
|
|
|
|
#undef SSE2_COMPARE_TYPE_INDEX
|
|
|
|
|
|
-#endif /* SLJIT_CONFIG_X86 && !SUPPORT_VALGRIND */
|
|
|
+#endif /* SLJIT_CONFIG_X86 */
|
|
|
|
|
|
#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__))
|
|
|
|
|
@@ -1121,3 +1125,734 @@ JUMPHERE(partial_quit);
|
|
|
}
|
|
|
|
|
|
#endif /* SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 */
|
|
|
+
|
|
|
+#if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
|
|
|
+
|
|
|
+#if PCRE2_CODE_UNIT_WIDTH == 8
|
|
|
+#define VECTOR_ELEMENT_SIZE 0
|
|
|
+#elif PCRE2_CODE_UNIT_WIDTH == 16
|
|
|
+#define VECTOR_ELEMENT_SIZE 1
|
|
|
+#elif PCRE2_CODE_UNIT_WIDTH == 32
|
|
|
+#define VECTOR_ELEMENT_SIZE 2
|
|
|
+#else
|
|
|
+#error "Unsupported unit width"
|
|
|
+#endif
|
|
|
+
|
|
|
+static void load_from_mem_vector(struct sljit_compiler *compiler, BOOL vlbb, sljit_s32 dst_vreg,
|
|
|
+ sljit_s32 base_reg, sljit_s32 index_reg)
|
|
|
+{
|
|
|
+sljit_u16 instruction[3];
|
|
|
+
|
|
|
+instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | index_reg);
|
|
|
+instruction[1] = (sljit_u16)(base_reg << 12);
|
|
|
+instruction[2] = (sljit_u16)((0x8 << 8) | (vlbb ? 0x07 : 0x06));
|
|
|
+
|
|
|
+sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+}
|
|
|
+
|
|
|
+#if PCRE2_CODE_UNIT_WIDTH == 32
|
|
|
+
|
|
|
+static void replicate_imm_vector(struct sljit_compiler *compiler, int step, sljit_s32 dst_vreg,
|
|
|
+ PCRE2_UCHAR chr, sljit_s32 tmp_general_reg)
|
|
|
+{
|
|
|
+sljit_u16 instruction[3];
|
|
|
+
|
|
|
+SLJIT_ASSERT(step >= 0 && step <= 1);
|
|
|
+
|
|
|
+if (chr < 0x7fff)
|
|
|
+ {
|
|
|
+ if (step == 1)
|
|
|
+ return;
|
|
|
+
|
|
|
+ /* VREPI */
|
|
|
+ instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4));
|
|
|
+ instruction[1] = (sljit_u16)chr;
|
|
|
+ instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
|
|
|
+ sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+if (step == 0)
|
|
|
+ {
|
|
|
+ OP1(SLJIT_MOV, tmp_general_reg, 0, SLJIT_IMM, chr);
|
|
|
+
|
|
|
+ /* VLVG */
|
|
|
+ instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | sljit_get_register_index(tmp_general_reg));
|
|
|
+ instruction[1] = 0;
|
|
|
+ instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x22);
|
|
|
+ sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+/* VREP */
|
|
|
+instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | dst_vreg);
|
|
|
+instruction[1] = 0;
|
|
|
+instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xc << 8) | 0x4d);
|
|
|
+sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+}
|
|
|
+
|
|
|
+#endif
|
|
|
+
|
|
|
+static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type,
|
|
|
+ int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
|
|
|
+{
|
|
|
+sljit_u16 instruction[3];
|
|
|
+
|
|
|
+SLJIT_ASSERT(step >= 0 && step <= 2);
|
|
|
+
|
|
|
+if (step == 1)
|
|
|
+ {
|
|
|
+ /* VCEQ */
|
|
|
+ instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
|
|
|
+ instruction[1] = (sljit_u16)(cmp1_ind << 12);
|
|
|
+ instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8);
|
|
|
+ sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+if (compare_type != vector_compare_match2)
|
|
|
+ {
|
|
|
+ if (step == 0 && compare_type == vector_compare_match1i)
|
|
|
+ {
|
|
|
+ /* VO */
|
|
|
+ instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
|
|
|
+ instruction[1] = (sljit_u16)(cmp2_ind << 12);
|
|
|
+ instruction[2] = (sljit_u16)((0xe << 8) | 0x6a);
|
|
|
+ sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+ }
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+switch (step)
|
|
|
+ {
|
|
|
+ case 0:
|
|
|
+ /* VCEQ */
|
|
|
+ instruction[0] = (sljit_u16)(0xe700 | (tmp_ind << 4) | dst_ind);
|
|
|
+ instruction[1] = (sljit_u16)(cmp2_ind << 12);
|
|
|
+ instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8);
|
|
|
+ sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+ return;
|
|
|
+
|
|
|
+ case 2:
|
|
|
+ /* VO */
|
|
|
+ instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
|
|
|
+ instruction[1] = (sljit_u16)(tmp_ind << 12);
|
|
|
+ instruction[2] = (sljit_u16)((0xe << 8) | 0x6a);
|
|
|
+ sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
|
|
|
+
|
|
|
+static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
|
|
|
+{
|
|
|
+DEFINE_COMPILER;
|
|
|
+sljit_u16 instruction[3];
|
|
|
+struct sljit_label *start;
|
|
|
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
|
|
+struct sljit_label *restart;
|
|
|
+#endif
|
|
|
+struct sljit_jump *quit;
|
|
|
+struct sljit_jump *partial_quit[2];
|
|
|
+vector_compare_type compare_type = vector_compare_match1;
|
|
|
+sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
|
|
|
+sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
|
|
|
+sljit_s32 data_ind = 0;
|
|
|
+sljit_s32 tmp_ind = 1;
|
|
|
+sljit_s32 cmp1_ind = 2;
|
|
|
+sljit_s32 cmp2_ind = 3;
|
|
|
+sljit_s32 zero_ind = 4;
|
|
|
+sljit_u32 bit = 0;
|
|
|
+int i;
|
|
|
+
|
|
|
+SLJIT_UNUSED_ARG(offset);
|
|
|
+
|
|
|
+if (char1 != char2)
|
|
|
+ {
|
|
|
+ bit = char1 ^ char2;
|
|
|
+ compare_type = vector_compare_match1i;
|
|
|
+
|
|
|
+ if (!is_powerof2(bit))
|
|
|
+ {
|
|
|
+ bit = 0;
|
|
|
+ compare_type = vector_compare_match2;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
|
|
+if (common->mode == PCRE2_JIT_COMPLETE)
|
|
|
+ add_jump(compiler, &common->failed_match, partial_quit[0]);
|
|
|
+
|
|
|
+/* First part (unaligned start) */
|
|
|
+
|
|
|
+OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
|
|
+
|
|
|
+#if PCRE2_CODE_UNIT_WIDTH != 32
|
|
|
+
|
|
|
+/* VREPI */
|
|
|
+instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4));
|
|
|
+instruction[1] = (sljit_u16)(char1 | bit);
|
|
|
+instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
|
|
|
+sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+
|
|
|
+if (char1 != char2)
|
|
|
+ {
|
|
|
+ /* VREPI */
|
|
|
+ instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4));
|
|
|
+ instruction[1] = (sljit_u16)(bit != 0 ? bit : char2);
|
|
|
+ /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
|
|
|
+ sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+ }
|
|
|
+
|
|
|
+#else /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
|
|
+
|
|
|
+for (int i = 0; i < 2; i++)
|
|
|
+ {
|
|
|
+ replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP1);
|
|
|
+
|
|
|
+ if (char1 != char2)
|
|
|
+ replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP1);
|
|
|
+ }
|
|
|
+
|
|
|
+#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
|
|
+
|
|
|
+if (compare_type == vector_compare_match2)
|
|
|
+ {
|
|
|
+ /* VREPI */
|
|
|
+ instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
|
|
|
+ instruction[1] = 0;
|
|
|
+ instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
|
|
|
+ sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+ }
|
|
|
+
|
|
|
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
|
|
+restart = LABEL();
|
|
|
+#endif
|
|
|
+
|
|
|
+load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0);
|
|
|
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15);
|
|
|
+
|
|
|
+if (compare_type != vector_compare_match2)
|
|
|
+ {
|
|
|
+ if (compare_type == vector_compare_match1i)
|
|
|
+ fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
|
|
+
|
|
|
+ /* VFEE */
|
|
|
+ instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
|
|
+ instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
|
|
|
+ instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
|
|
|
+ sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+ }
|
|
|
+else
|
|
|
+ {
|
|
|
+ for (i = 0; i < 3; i++)
|
|
|
+ fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
|
|
+
|
|
|
+ /* VFENE */
|
|
|
+ instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
|
|
+ instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
|
|
+ instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
|
|
+ sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+ }
|
|
|
+
|
|
|
+/* VLGVB */
|
|
|
+instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind);
|
|
|
+instruction[1] = 7;
|
|
|
+instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
|
|
+sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+
|
|
|
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
|
|
+quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
|
|
|
+
|
|
|
+OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16);
|
|
|
+
|
|
|
+/* Second part (aligned) */
|
|
|
+start = LABEL();
|
|
|
+
|
|
|
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
|
|
+
|
|
|
+partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
|
|
+if (common->mode == PCRE2_JIT_COMPLETE)
|
|
|
+ add_jump(compiler, &common->failed_match, partial_quit[1]);
|
|
|
+
|
|
|
+load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0);
|
|
|
+
|
|
|
+if (compare_type != vector_compare_match2)
|
|
|
+ {
|
|
|
+ if (compare_type == vector_compare_match1i)
|
|
|
+ fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
|
|
+
|
|
|
+ /* VFEE */
|
|
|
+ instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
|
|
+ instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
|
|
|
+ instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
|
|
|
+ sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+ }
|
|
|
+else
|
|
|
+ {
|
|
|
+ for (i = 0; i < 3; i++)
|
|
|
+ fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
|
|
+
|
|
|
+ /* VFENE */
|
|
|
+ instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
|
|
+ instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
|
|
+ instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
|
|
+ sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+ }
|
|
|
+
|
|
|
+sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW);
|
|
|
+JUMPTO(SLJIT_OVERFLOW, start);
|
|
|
+
|
|
|
+/* VLGVB */
|
|
|
+instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind);
|
|
|
+instruction[1] = 7;
|
|
|
+instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
|
|
+sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+
|
|
|
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
|
|
+
|
|
|
+JUMPHERE(quit);
|
|
|
+
|
|
|
+if (common->mode != PCRE2_JIT_COMPLETE)
|
|
|
+ {
|
|
|
+ JUMPHERE(partial_quit[0]);
|
|
|
+ JUMPHERE(partial_quit[1]);
|
|
|
+ OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
|
|
|
+ CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
|
|
|
+ }
|
|
|
+else
|
|
|
+ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
|
|
+
|
|
|
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
|
|
+if (common->utf && offset > 0)
|
|
|
+ {
|
|
|
+ SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
|
|
|
+
|
|
|
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
|
|
|
+
|
|
|
+ quit = jump_if_utf_char_start(compiler, TMP1);
|
|
|
+
|
|
|
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
|
|
+ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
|
|
+
|
|
|
+ OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
|
|
+ JUMPTO(SLJIT_JUMP, restart);
|
|
|
+
|
|
|
+ JUMPHERE(quit);
|
|
|
+ }
|
|
|
+#endif
|
|
|
+}
|
|
|
+
|
|
|
+#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD 1
|
|
|
+
|
|
|
+static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
|
|
|
+{
|
|
|
+DEFINE_COMPILER;
|
|
|
+sljit_u16 instruction[3];
|
|
|
+struct sljit_label *start;
|
|
|
+struct sljit_jump *quit;
|
|
|
+jump_list *not_found = NULL;
|
|
|
+vector_compare_type compare_type = vector_compare_match1;
|
|
|
+sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
|
|
|
+sljit_s32 tmp3_reg_ind = sljit_get_register_index(TMP3);
|
|
|
+sljit_s32 data_ind = 0;
|
|
|
+sljit_s32 tmp_ind = 1;
|
|
|
+sljit_s32 cmp1_ind = 2;
|
|
|
+sljit_s32 cmp2_ind = 3;
|
|
|
+sljit_s32 zero_ind = 4;
|
|
|
+sljit_u32 bit = 0;
|
|
|
+int i;
|
|
|
+
|
|
|
+if (char1 != char2)
|
|
|
+ {
|
|
|
+ bit = char1 ^ char2;
|
|
|
+ compare_type = vector_compare_match1i;
|
|
|
+
|
|
|
+ if (!is_powerof2(bit))
|
|
|
+ {
|
|
|
+ bit = 0;
|
|
|
+ compare_type = vector_compare_match2;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
|
|
+
|
|
|
+/* First part (unaligned start) */
|
|
|
+
|
|
|
+OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, 16);
|
|
|
+
|
|
|
+#if PCRE2_CODE_UNIT_WIDTH != 32
|
|
|
+
|
|
|
+/* VREPI */
|
|
|
+instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4));
|
|
|
+instruction[1] = (sljit_u16)(char1 | bit);
|
|
|
+instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
|
|
|
+sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+
|
|
|
+if (char1 != char2)
|
|
|
+ {
|
|
|
+ /* VREPI */
|
|
|
+ instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4));
|
|
|
+ instruction[1] = (sljit_u16)(bit != 0 ? bit : char2);
|
|
|
+ /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
|
|
|
+ sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+ }
|
|
|
+
|
|
|
+#else /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
|
|
+
|
|
|
+for (int i = 0; i < 2; i++)
|
|
|
+ {
|
|
|
+ replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP3);
|
|
|
+
|
|
|
+ if (char1 != char2)
|
|
|
+ replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP3);
|
|
|
+ }
|
|
|
+
|
|
|
+#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
|
|
+
|
|
|
+if (compare_type == vector_compare_match2)
|
|
|
+ {
|
|
|
+ /* VREPI */
|
|
|
+ instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
|
|
|
+ instruction[1] = 0;
|
|
|
+ instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
|
|
|
+ sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+ }
|
|
|
+
|
|
|
+load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0);
|
|
|
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15);
|
|
|
+
|
|
|
+if (compare_type != vector_compare_match2)
|
|
|
+ {
|
|
|
+ if (compare_type == vector_compare_match1i)
|
|
|
+ fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
|
|
+
|
|
|
+ /* VFEE */
|
|
|
+ instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
|
|
+ instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
|
|
|
+ instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
|
|
|
+ sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+ }
|
|
|
+else
|
|
|
+ {
|
|
|
+ for (i = 0; i < 3; i++)
|
|
|
+ fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
|
|
+
|
|
|
+ /* VFENE */
|
|
|
+ instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
|
|
+ instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
|
|
+ instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
|
|
+ sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+ }
|
|
|
+
|
|
|
+/* VLGVB */
|
|
|
+instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind);
|
|
|
+instruction[1] = 7;
|
|
|
+instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
|
|
+sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+
|
|
|
+OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
|
|
|
+quit = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0);
|
|
|
+
|
|
|
+OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 16);
|
|
|
+
|
|
|
+/* Second part (aligned) */
|
|
|
+start = LABEL();
|
|
|
+
|
|
|
+OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 16);
|
|
|
+
|
|
|
+add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
|
|
+
|
|
|
+load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0);
|
|
|
+
|
|
|
+if (compare_type != vector_compare_match2)
|
|
|
+ {
|
|
|
+ if (compare_type == vector_compare_match1i)
|
|
|
+ fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
|
|
+
|
|
|
+ /* VFEE */
|
|
|
+ instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
|
|
+ instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
|
|
|
+ instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
|
|
|
+ sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+ }
|
|
|
+else
|
|
|
+ {
|
|
|
+ for (i = 0; i < 3; i++)
|
|
|
+ fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
|
|
+
|
|
|
+ /* VFENE */
|
|
|
+ instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
|
|
|
+ instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
|
|
+ instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
|
|
+ sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+ }
|
|
|
+
|
|
|
+sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW);
|
|
|
+JUMPTO(SLJIT_OVERFLOW, start);
|
|
|
+
|
|
|
+/* VLGVB */
|
|
|
+instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind);
|
|
|
+instruction[1] = 7;
|
|
|
+instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
|
|
+sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+
|
|
|
+OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
|
|
|
+
|
|
|
+JUMPHERE(quit);
|
|
|
+add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
|
|
+
|
|
|
+return not_found;
|
|
|
+}
|
|
|
+
|
|
|
+#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1
|
|
|
+
|
|
|
+static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
|
|
|
+ PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
|
|
|
+{
|
|
|
+DEFINE_COMPILER;
|
|
|
+sljit_u16 instruction[3];
|
|
|
+struct sljit_label *start;
|
|
|
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
|
|
+struct sljit_label *restart;
|
|
|
+#endif
|
|
|
+struct sljit_jump *quit;
|
|
|
+struct sljit_jump *jump[2];
|
|
|
+vector_compare_type compare1_type = vector_compare_match1;
|
|
|
+vector_compare_type compare2_type = vector_compare_match1;
|
|
|
+sljit_u32 bit1 = 0;
|
|
|
+sljit_u32 bit2 = 0;
|
|
|
+sljit_s32 diff = IN_UCHARS(offs2 - offs1);
|
|
|
+sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
|
|
|
+sljit_s32 tmp2_reg_ind = sljit_get_register_index(TMP2);
|
|
|
+sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
|
|
|
+sljit_s32 data1_ind = 0;
|
|
|
+sljit_s32 data2_ind = 1;
|
|
|
+sljit_s32 tmp1_ind = 2;
|
|
|
+sljit_s32 tmp2_ind = 3;
|
|
|
+sljit_s32 cmp1a_ind = 4;
|
|
|
+sljit_s32 cmp1b_ind = 5;
|
|
|
+sljit_s32 cmp2a_ind = 6;
|
|
|
+sljit_s32 cmp2b_ind = 7;
|
|
|
+sljit_s32 zero_ind = 8;
|
|
|
+int i;
|
|
|
+
|
|
|
+SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
|
|
|
+SLJIT_ASSERT(-diff <= (sljit_s32)IN_UCHARS(max_fast_forward_char_pair_offset()));
|
|
|
+SLJIT_ASSERT(tmp1_reg_ind != 0 && tmp2_reg_ind != 0);
|
|
|
+
|
|
|
+if (char1a != char1b)
|
|
|
+ {
|
|
|
+ bit1 = char1a ^ char1b;
|
|
|
+ compare1_type = vector_compare_match1i;
|
|
|
+
|
|
|
+ if (!is_powerof2(bit1))
|
|
|
+ {
|
|
|
+ bit1 = 0;
|
|
|
+ compare1_type = vector_compare_match2;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+if (char2a != char2b)
|
|
|
+ {
|
|
|
+ bit2 = char2a ^ char2b;
|
|
|
+ compare2_type = vector_compare_match1i;
|
|
|
+
|
|
|
+ if (!is_powerof2(bit2))
|
|
|
+ {
|
|
|
+ bit2 = 0;
|
|
|
+ compare2_type = vector_compare_match2;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+/* Initialize. */
|
|
|
+if (common->match_end_ptr != 0)
|
|
|
+ {
|
|
|
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
|
|
|
+ OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
|
|
|
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
|
|
|
+
|
|
|
+ OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
|
|
|
+ CMOV(SLJIT_LESS, STR_END, TMP1, 0);
|
|
|
+ }
|
|
|
+
|
|
|
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
|
|
|
+add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
|
|
+OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15);
|
|
|
+
|
|
|
+#if PCRE2_CODE_UNIT_WIDTH != 32
|
|
|
+
|
|
|
+OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
|
|
|
+
|
|
|
+/* VREPI */
|
|
|
+instruction[0] = (sljit_u16)(0xe700 | (cmp1a_ind << 4));
|
|
|
+instruction[1] = (sljit_u16)(char1a | bit1);
|
|
|
+instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
|
|
|
+sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+
|
|
|
+if (char1a != char1b)
|
|
|
+ {
|
|
|
+ /* VREPI */
|
|
|
+ instruction[0] = (sljit_u16)(0xe700 | (cmp1b_ind << 4));
|
|
|
+ instruction[1] = (sljit_u16)(bit1 != 0 ? bit1 : char1b);
|
|
|
+ /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
|
|
|
+ sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+ }
|
|
|
+
|
|
|
+/* VREPI */
|
|
|
+instruction[0] = (sljit_u16)(0xe700 | (cmp2a_ind << 4));
|
|
|
+instruction[1] = (sljit_u16)(char2a | bit2);
|
|
|
+/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
|
|
|
+sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+
|
|
|
+if (char2a != char2b)
|
|
|
+ {
|
|
|
+ /* VREPI */
|
|
|
+ instruction[0] = (sljit_u16)(0xe700 | (cmp2b_ind << 4));
|
|
|
+ instruction[1] = (sljit_u16)(bit2 != 0 ? bit2 : char2b);
|
|
|
+ /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
|
|
|
+ sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+ }
|
|
|
+
|
|
|
+#else /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
|
|
+
|
|
|
+for (int i = 0; i < 2; i++)
|
|
|
+ {
|
|
|
+ replicate_imm_vector(compiler, i, cmp1a_ind, char1a | bit1, TMP1);
|
|
|
+
|
|
|
+ if (char1a != char1b)
|
|
|
+ replicate_imm_vector(compiler, i, cmp1b_ind, bit1 != 0 ? bit1 : char1b, TMP1);
|
|
|
+
|
|
|
+ replicate_imm_vector(compiler, i, cmp2a_ind, char2a | bit2, TMP1);
|
|
|
+
|
|
|
+ if (char2a != char2b)
|
|
|
+ replicate_imm_vector(compiler, i, cmp2b_ind, bit2 != 0 ? bit2 : char2b, TMP1);
|
|
|
+ }
|
|
|
+
|
|
|
+OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
|
|
|
+
|
|
|
+#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
|
|
+
|
|
|
+/* VREPI */
|
|
|
+instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
|
|
|
+instruction[1] = 0;
|
|
|
+instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
|
|
|
+sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+
|
|
|
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
|
|
+restart = LABEL();
|
|
|
+#endif
|
|
|
+
|
|
|
+jump[0] = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0);
|
|
|
+load_from_mem_vector(compiler, TRUE, data2_ind, tmp1_reg_ind, 0);
|
|
|
+jump[1] = JUMP(SLJIT_JUMP);
|
|
|
+JUMPHERE(jump[0]);
|
|
|
+load_from_mem_vector(compiler, FALSE, data2_ind, tmp1_reg_ind, 0);
|
|
|
+JUMPHERE(jump[1]);
|
|
|
+
|
|
|
+load_from_mem_vector(compiler, TRUE, data1_ind, str_ptr_reg_ind, 0);
|
|
|
+OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 16);
|
|
|
+
|
|
|
+for (i = 0; i < 3; i++)
|
|
|
+ {
|
|
|
+ fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
|
|
|
+ fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
|
|
|
+ }
|
|
|
+
|
|
|
+/* VN */
|
|
|
+instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
|
|
|
+instruction[1] = (sljit_u16)(data2_ind << 12);
|
|
|
+instruction[2] = (sljit_u16)((0xe << 8) | 0x68);
|
|
|
+sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+
|
|
|
+/* VFENE */
|
|
|
+instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
|
|
|
+instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
|
|
+instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
|
|
+sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+
|
|
|
+/* VLGVB */
|
|
|
+instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data1_ind);
|
|
|
+instruction[1] = 7;
|
|
|
+instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
|
|
+sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+
|
|
|
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
|
|
|
+quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
|
|
|
+
|
|
|
+OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16);
|
|
|
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, diff);
|
|
|
+
|
|
|
+/* Main loop. */
|
|
|
+start = LABEL();
|
|
|
+
|
|
|
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
|
|
+add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
|
|
+
|
|
|
+load_from_mem_vector(compiler, FALSE, data1_ind, str_ptr_reg_ind, 0);
|
|
|
+load_from_mem_vector(compiler, FALSE, data2_ind, str_ptr_reg_ind, tmp1_reg_ind);
|
|
|
+
|
|
|
+for (i = 0; i < 3; i++)
|
|
|
+ {
|
|
|
+ fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
|
|
|
+ fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
|
|
|
+ }
|
|
|
+
|
|
|
+/* VN */
|
|
|
+instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
|
|
|
+instruction[1] = (sljit_u16)(data2_ind << 12);
|
|
|
+instruction[2] = (sljit_u16)((0xe << 8) | 0x68);
|
|
|
+sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+
|
|
|
+/* VFENE */
|
|
|
+instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
|
|
|
+instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
|
|
|
+instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
|
|
|
+sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+
|
|
|
+sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW);
|
|
|
+JUMPTO(SLJIT_OVERFLOW, start);
|
|
|
+
|
|
|
+/* VLGVB */
|
|
|
+instruction[0] = (sljit_u16)(0xe700 | (tmp2_reg_ind << 4) | data1_ind);
|
|
|
+instruction[1] = 7;
|
|
|
+instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
|
|
|
+sljit_emit_op_custom(compiler, instruction, 6);
|
|
|
+
|
|
|
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
|
|
+
|
|
|
+JUMPHERE(quit);
|
|
|
+
|
|
|
+add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
|
|
+
|
|
|
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
|
|
+if (common->utf)
|
|
|
+ {
|
|
|
+ SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
|
|
|
+
|
|
|
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
|
|
|
+
|
|
|
+ quit = jump_if_utf_char_start(compiler, TMP1);
|
|
|
+
|
|
|
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
|
|
+ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
|
|
+
|
|
|
+ /* TMP1 contains diff. */
|
|
|
+ OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15);
|
|
|
+ OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
|
|
|
+ JUMPTO(SLJIT_JUMP, restart);
|
|
|
+
|
|
|
+ JUMPHERE(quit);
|
|
|
+ }
|
|
|
+#endif
|
|
|
+
|
|
|
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
|
|
|
+
|
|
|
+if (common->match_end_ptr != 0)
|
|
|
+ OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
|
|
|
+}
|
|
|
+
|
|
|
+#endif /* SLJIT_CONFIG_S390X */
|
|
|
+
|
|
|
+#endif /* !SUPPORT_VALGRIND */
|