Browse Source

opus: Update to upstream version 1.1.5

(cherry picked from commit 47e0e530a9756817cc1d62c48ec44089f3c2932f)
Rémi Verschelde 8 years ago
parent
commit
75a6e0f65a
40 changed files with 425 additions and 250 deletions
  1. 1 1
      thirdparty/README.md
  2. 12 1
      thirdparty/opus/celt/arch.h
  3. 23 1
      thirdparty/opus/celt/arm/arm_celt_map.c
  4. 30 19
      thirdparty/opus/celt/arm/armcpu.c
  5. 6 0
      thirdparty/opus/celt/arm/armcpu.h
  6. 60 1
      thirdparty/opus/celt/arm/celt_neon_intr.c
  7. 63 5
      thirdparty/opus/celt/arm/pitch_arm.h
  8. 2 2
      thirdparty/opus/celt/bands.c
  9. 1 1
      thirdparty/opus/celt/celt.h
  10. 7 3
      thirdparty/opus/celt/celt_decoder.c
  11. 8 5
      thirdparty/opus/celt/celt_encoder.c
  12. 1 2
      thirdparty/opus/celt/celt_lpc.c
  13. 1 1
      thirdparty/opus/celt/cwrs.c
  14. 16 0
      thirdparty/opus/celt/fixed_generic.h
  15. 1 1
      thirdparty/opus/celt/kiss_fft.c
  16. 1 1
      thirdparty/opus/celt/mathops.c
  17. 40 27
      thirdparty/opus/celt/pitch.c
  18. 0 20
      thirdparty/opus/celt/pitch.h
  19. 1 1
      thirdparty/opus/celt/rate.c
  20. 1 1
      thirdparty/opus/celt/vq.c
  21. 17 17
      thirdparty/opus/celt/x86/pitch_sse.h
  22. 1 1
      thirdparty/opus/celt/x86/x86_celt_map.c
  23. 1 1
      thirdparty/opus/celt/x86/x86cpu.c
  24. 1 1
      thirdparty/opus/opus_multistream_encoder.c
  25. 32 29
      thirdparty/opus/silk/CNG.c
  26. 7 7
      thirdparty/opus/silk/NLSF_del_dec_quant.c
  27. 3 2
      thirdparty/opus/silk/NLSF_encode.c
  28. 17 41
      thirdparty/opus/silk/NSQ.c
  29. 16 24
      thirdparty/opus/silk/NSQ_del_dec.c
  30. 2 1
      thirdparty/opus/silk/PLC.c
  31. 1 1
      thirdparty/opus/silk/decode_core.c
  32. 13 8
      thirdparty/opus/silk/fixed/burg_modified_FIX.c
  33. 8 6
      thirdparty/opus/silk/fixed/x86/burg_modified_FIX_sse.c
  34. 14 8
      thirdparty/opus/silk/macros.h
  35. 5 1
      thirdparty/opus/silk/mips/NSQ_del_dec_mipsr1.h
  36. 4 2
      thirdparty/opus/silk/process_NLSFs.c
  37. 1 1
      thirdparty/opus/silk/sort.c
  38. 4 4
      thirdparty/opus/silk/stereo_LR_to_MS.c
  39. 1 1
      thirdparty/opus/silk/x86/NSQ_sse.c
  40. 2 1
      thirdparty/opus/silk/x86/main_sse.h

+ 1 - 1
thirdparty/README.md

@@ -235,7 +235,7 @@ Files extracted from the upstream source:
 ## opus
 ## opus
 
 
 - Upstream: https://opus-codec.org
 - Upstream: https://opus-codec.org
-- Version: 1.1.4 (opus) and 0.8 (opusfile)
+- Version: 1.1.5 (opus) and 0.8 (opusfile)
 - License: BSD-3-Clause
 - License: BSD-3-Clause
 
 
 Files extracted from upstream source:
 Files extracted from upstream source:

+ 12 - 1
thirdparty/opus/celt/arch.h

@@ -78,6 +78,15 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)
 #define UADD32(a,b) ((a)+(b))
 #define UADD32(a,b) ((a)+(b))
 #define USUB32(a,b) ((a)-(b))
 #define USUB32(a,b) ((a)-(b))
 
 
+/* Set this if opus_int64 is a native type of the CPU. */
+/* Assume that all LP64 architectures have fast 64-bit types; also x86_64
+   (which can be ILP32 for x32) and Win64 (which is LLP64). */
+#if defined(__x86_64__) || defined(__LP64__) || defined(_WIN64)
+#define OPUS_FAST_INT64 1
+#else
+#define OPUS_FAST_INT64 0
+#endif
+
 #define PRINT_MIPS(file)
 #define PRINT_MIPS(file)
 
 
 #ifdef FIXED_POINT
 #ifdef FIXED_POINT
@@ -118,7 +127,9 @@ static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
 
 
 #include "fixed_generic.h"
 #include "fixed_generic.h"
 
 
-#ifdef OPUS_ARM_INLINE_EDSP
+#ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR
+#include "arm/fixed_arm64.h"
+#elif OPUS_ARM_INLINE_EDSP
 #include "arm/fixed_armv5e.h"
 #include "arm/fixed_armv5e.h"
 #elif defined (OPUS_ARM_INLINE_ASM)
 #elif defined (OPUS_ARM_INLINE_ASM)
 #include "arm/fixed_armv4.h"
 #include "arm/fixed_armv4.h"

+ 23 - 1
thirdparty/opus/celt/arm/arm_celt_map.c

@@ -36,6 +36,9 @@
 #if defined(OPUS_HAVE_RTCD)
 #if defined(OPUS_HAVE_RTCD)
 
 
 # if defined(FIXED_POINT)
 # if defined(FIXED_POINT)
+#  if ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \
+    (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \
+    (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
 opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
 opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
     const opus_val16 *, opus_val32 *, int , int) = {
     const opus_val16 *, opus_val32 *, int , int) = {
   celt_pitch_xcorr_c,               /* ARMv4 */
   celt_pitch_xcorr_c,               /* ARMv4 */
@@ -43,8 +46,10 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
   MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
   MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
   MAY_HAVE_NEON(celt_pitch_xcorr)   /* NEON */
   MAY_HAVE_NEON(celt_pitch_xcorr)   /* NEON */
 };
 };
+
+#  endif
 # else /* !FIXED_POINT */
 # else /* !FIXED_POINT */
-#  if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+#  if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
 void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
 void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
     const opus_val16 *, opus_val32 *, int, int) = {
     const opus_val16 *, opus_val32 *, int, int) = {
   celt_pitch_xcorr_c,              /* ARMv4 */
   celt_pitch_xcorr_c,              /* ARMv4 */
@@ -55,6 +60,23 @@ void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
 #  endif
 #  endif
 # endif /* FIXED_POINT */
 # endif /* FIXED_POINT */
 
 
+#if defined(FIXED_POINT) && defined(OPUS_HAVE_RTCD) && \
+ defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
+
+void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
+         const opus_val16 *x,
+         const opus_val16 *y,
+         opus_val32       sum[4],
+         int              len
+) = {
+  xcorr_kernel_c,                /* ARMv4 */
+  xcorr_kernel_c,                /* EDSP */
+  xcorr_kernel_c,                /* Media */
+  xcorr_kernel_neon_fixed,       /* Neon */
+};
+
+#endif
+
 # if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
 # if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
 #  if defined(HAVE_ARM_NE10)
 #  if defined(HAVE_ARM_NE10)
 #   if defined(CUSTOM_MODES)
 #   if defined(CUSTOM_MODES)

+ 30 - 19
thirdparty/opus/celt/arm/armcpu.c

@@ -37,11 +37,12 @@
 #include "cpu_support.h"
 #include "cpu_support.h"
 #include "os_support.h"
 #include "os_support.h"
 #include "opus_types.h"
 #include "opus_types.h"
+#include "arch.h"
 
 
-#define OPUS_CPU_ARM_V4    (1)
-#define OPUS_CPU_ARM_EDSP  (1<<1)
-#define OPUS_CPU_ARM_MEDIA (1<<2)
-#define OPUS_CPU_ARM_NEON  (1<<3)
+#define OPUS_CPU_ARM_V4_FLAG    (1<<OPUS_ARCH_ARM_V4)
+#define OPUS_CPU_ARM_EDSP_FLAG  (1<<OPUS_ARCH_ARM_EDSP)
+#define OPUS_CPU_ARM_MEDIA_FLAG (1<<OPUS_ARCH_ARM_MEDIA)
+#define OPUS_CPU_ARM_NEON_FLAG  (1<<OPUS_ARCH_ARM_NEON)
 
 
 #if defined(_MSC_VER)
 #if defined(_MSC_VER)
 /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
 /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
@@ -55,20 +56,22 @@ static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){
   /* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit
   /* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit
    * instructions via their assembled hex code.
    * instructions via their assembled hex code.
    * All of these instructions should be essentially nops. */
    * All of these instructions should be essentially nops. */
-# if defined(OPUS_ARM_MAY_HAVE_EDSP)
+# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \
+ || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
   __try{
   __try{
     /*PLD [r13]*/
     /*PLD [r13]*/
     __emit(0xF5DDF000);
     __emit(0xF5DDF000);
-    flags|=OPUS_CPU_ARM_EDSP;
+    flags|=OPUS_CPU_ARM_EDSP_FLAG;
   }
   }
   __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
   __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
     /*Ignore exception.*/
     /*Ignore exception.*/
   }
   }
-#  if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+#  if defined(OPUS_ARM_MAY_HAVE_MEDIA) \
+ || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
   __try{
   __try{
     /*SHADD8 r3,r3,r3*/
     /*SHADD8 r3,r3,r3*/
     __emit(0xE6333F93);
     __emit(0xE6333F93);
-    flags|=OPUS_CPU_ARM_MEDIA;
+    flags|=OPUS_CPU_ARM_MEDIA_FLAG;
   }
   }
   __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
   __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
     /*Ignore exception.*/
     /*Ignore exception.*/
@@ -77,7 +80,7 @@ static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){
   __try{
   __try{
     /*VORR q0,q0,q0*/
     /*VORR q0,q0,q0*/
     __emit(0xF2200150);
     __emit(0xF2200150);
-    flags|=OPUS_CPU_ARM_NEON;
+    flags|=OPUS_CPU_ARM_NEON_FLAG;
   }
   }
   __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
   __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
     /*Ignore exception.*/
     /*Ignore exception.*/
@@ -107,26 +110,26 @@ opus_uint32 opus_cpu_capabilities(void)
 
 
     while(fgets(buf, 512, cpuinfo) != NULL)
     while(fgets(buf, 512, cpuinfo) != NULL)
     {
     {
-# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \
+ || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
       /* Search for edsp and neon flag */
       /* Search for edsp and neon flag */
       if(memcmp(buf, "Features", 8) == 0)
       if(memcmp(buf, "Features", 8) == 0)
       {
       {
         char *p;
         char *p;
-#  if defined(OPUS_ARM_MAY_HAVE_EDSP)
         p = strstr(buf, " edsp");
         p = strstr(buf, " edsp");
         if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
         if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
-          flags |= OPUS_CPU_ARM_EDSP;
-#  endif
+          flags |= OPUS_CPU_ARM_EDSP_FLAG;
 
 
 #  if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
 #  if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
         p = strstr(buf, " neon");
         p = strstr(buf, " neon");
         if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
         if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
-          flags |= OPUS_CPU_ARM_NEON;
+          flags |= OPUS_CPU_ARM_NEON_FLAG;
 #  endif
 #  endif
       }
       }
 # endif
 # endif
 
 
-# if defined(OPUS_ARM_MAY_HAVE_MEDIA)
+# if defined(OPUS_ARM_MAY_HAVE_MEDIA) \
+ || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
       /* Search for media capabilities (>= ARMv6) */
       /* Search for media capabilities (>= ARMv6) */
       if(memcmp(buf, "CPU architecture:", 17) == 0)
       if(memcmp(buf, "CPU architecture:", 17) == 0)
       {
       {
@@ -134,7 +137,7 @@ opus_uint32 opus_cpu_capabilities(void)
         version = atoi(buf+17);
         version = atoi(buf+17);
 
 
         if(version >= 6)
         if(version >= 6)
-          flags |= OPUS_CPU_ARM_MEDIA;
+          flags |= OPUS_CPU_ARM_MEDIA_FLAG;
       }
       }
 # endif
 # endif
     }
     }
@@ -156,18 +159,26 @@ int opus_select_arch(void)
   opus_uint32 flags = opus_cpu_capabilities();
   opus_uint32 flags = opus_cpu_capabilities();
   int arch = 0;
   int arch = 0;
 
 
-  if(!(flags & OPUS_CPU_ARM_EDSP))
+  if(!(flags & OPUS_CPU_ARM_EDSP_FLAG)) {
+    /* Asserts ensure arch values are sequential */
+    celt_assert(arch == OPUS_ARCH_ARM_V4);
     return arch;
     return arch;
+  }
   arch++;
   arch++;
 
 
-  if(!(flags & OPUS_CPU_ARM_MEDIA))
+  if(!(flags & OPUS_CPU_ARM_MEDIA_FLAG)) {
+    celt_assert(arch == OPUS_ARCH_ARM_EDSP);
     return arch;
     return arch;
+  }
   arch++;
   arch++;
 
 
-  if(!(flags & OPUS_CPU_ARM_NEON))
+  if(!(flags & OPUS_CPU_ARM_NEON_FLAG)) {
+    celt_assert(arch == OPUS_ARCH_ARM_MEDIA);
     return arch;
     return arch;
+  }
   arch++;
   arch++;
 
 
+  celt_assert(arch == OPUS_ARCH_ARM_NEON);
   return arch;
   return arch;
 }
 }
 
 

+ 6 - 0
thirdparty/opus/celt/arm/armcpu.h

@@ -66,6 +66,12 @@
 
 
 # if defined(OPUS_HAVE_RTCD)
 # if defined(OPUS_HAVE_RTCD)
 int opus_select_arch(void);
 int opus_select_arch(void);
+
+#define OPUS_ARCH_ARM_V4    (0)
+#define OPUS_ARCH_ARM_EDSP  (1)
+#define OPUS_ARCH_ARM_MEDIA (2)
+#define OPUS_ARCH_ARM_NEON  (3)
+
 # endif
 # endif
 
 
 #endif
 #endif

+ 60 - 1
thirdparty/opus/celt/arm/celt_neon_intr.c

@@ -37,7 +37,66 @@
 #include <arm_neon.h>
 #include <arm_neon.h>
 #include "../pitch.h"
 #include "../pitch.h"
 
 
-#if !defined(FIXED_POINT)
+#if defined(FIXED_POINT)
+void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
+{
+   int j;
+   int32x4_t a = vld1q_s32(sum);
+   /* Load y[0...3] */
+   /* This requires len>0 to always be valid (which we assert in the C code). */
+   int16x4_t y0 = vld1_s16(y);
+   y += 4;
+
+   for (j = 0; j + 8 <= len; j += 8)
+   {
+      /* Load x[0...7] */
+      int16x8_t xx = vld1q_s16(x);
+      int16x4_t x0 = vget_low_s16(xx);
+      int16x4_t x4 = vget_high_s16(xx);
+      /* Load y[4...11] */
+      int16x8_t yy = vld1q_s16(y);
+      int16x4_t y4 = vget_low_s16(yy);
+      int16x4_t y8 = vget_high_s16(yy);
+      int32x4_t a0 = vmlal_lane_s16(a, y0, x0, 0);
+      int32x4_t a1 = vmlal_lane_s16(a0, y4, x4, 0);
+
+      int16x4_t y1 = vext_s16(y0, y4, 1);
+      int16x4_t y5 = vext_s16(y4, y8, 1);
+      int32x4_t a2 = vmlal_lane_s16(a1, y1, x0, 1);
+      int32x4_t a3 = vmlal_lane_s16(a2, y5, x4, 1);
+
+      int16x4_t y2 = vext_s16(y0, y4, 2);
+      int16x4_t y6 = vext_s16(y4, y8, 2);
+      int32x4_t a4 = vmlal_lane_s16(a3, y2, x0, 2);
+      int32x4_t a5 = vmlal_lane_s16(a4, y6, x4, 2);
+
+      int16x4_t y3 = vext_s16(y0, y4, 3);
+      int16x4_t y7 = vext_s16(y4, y8, 3);
+      int32x4_t a6 = vmlal_lane_s16(a5, y3, x0, 3);
+      int32x4_t a7 = vmlal_lane_s16(a6, y7, x4, 3);
+
+      y0 = y8;
+      a = a7;
+      x += 8;
+      y += 8;
+   }
+
+   for (; j < len; j++)
+   {
+      int16x4_t x0 = vld1_dup_s16(x);  /* load next x */
+      int32x4_t a0 = vmlal_s16(a, y0, x0);
+
+      int16x4_t y4 = vld1_dup_s16(y);  /* load next y */
+      y0 = vext_s16(y0, y4, 1);
+      a = a0;
+      x++;
+      y++;
+   }
+
+   vst1q_s32(sum, a);
+}
+
+#else
 /*
 /*
  * Function: xcorr_kernel_neon_float
  * Function: xcorr_kernel_neon_float
  * ---------------------------------
  * ---------------------------------

+ 63 - 5
thirdparty/opus/celt/arm/pitch_arm.h

@@ -46,10 +46,53 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
     opus_val32 *xcorr, int len, int max_pitch);
     opus_val32 *xcorr, int len, int max_pitch);
 #  endif
 #  endif
 
 
-#  if !defined(OPUS_HAVE_RTCD)
+#  if defined(OPUS_HAVE_RTCD) && \
+    ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \
+     (defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \
+     (defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
+extern opus_val32
+(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
+      const opus_val16 *, opus_val32 *, int, int);
+#   define OVERRIDE_PITCH_XCORR (1)
+#   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
+        xcorr, len, max_pitch))
+
+#  elif defined(OPUS_ARM_PRESUME_EDSP) || \
+    defined(OPUS_ARM_PRESUME_MEDIA) || \
+    defined(OPUS_ARM_PRESUME_NEON)
 #   define OVERRIDE_PITCH_XCORR (1)
 #   define OVERRIDE_PITCH_XCORR (1)
 #   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
 #   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
   ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch))
   ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch))
+
+#  endif
+
+#  if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+void xcorr_kernel_neon_fixed(
+                    const opus_val16 *x,
+                    const opus_val16 *y,
+                    opus_val32       sum[4],
+                    int              len);
+#  endif
+
+#  if defined(OPUS_HAVE_RTCD) && \
+    (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
+
+extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
+                    const opus_val16 *x,
+                    const opus_val16 *y,
+                    opus_val32       sum[4],
+                    int              len);
+
+#   define OVERRIDE_XCORR_KERNEL (1)
+#   define xcorr_kernel(x, y, sum, len, arch) \
+     ((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))
+
+#  elif defined(OPUS_ARM_PRESUME_NEON_INTR)
+#   define OVERRIDE_XCORR_KERNEL (1)
+#   define xcorr_kernel(x, y, sum, len, arch) \
+      ((void)arch, xcorr_kernel_neon_fixed(x, y, sum, len))
+
 #  endif
 #  endif
 
 
 #else /* Start !FIXED_POINT */
 #else /* Start !FIXED_POINT */
@@ -57,12 +100,27 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
 #if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
 #if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
 void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
 void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
                                  opus_val32 *xcorr, int len, int max_pitch);
                                  opus_val32 *xcorr, int len, int max_pitch);
-#if !defined(OPUS_HAVE_RTCD) || defined(OPUS_ARM_PRESUME_NEON_INTR)
-#define OVERRIDE_PITCH_XCORR (1)
+#endif
+
+#  if defined(OPUS_HAVE_RTCD) && \
+    (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
+extern void
+(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
+      const opus_val16 *, opus_val32 *, int, int);
+
+#  define OVERRIDE_PITCH_XCORR (1)
+#  define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+  ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
+        xcorr, len, max_pitch))
+
+#  elif defined(OPUS_ARM_PRESUME_NEON_INTR)
+
+#   define OVERRIDE_PITCH_XCORR (1)
 #   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
 #   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
    ((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch))
    ((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch))
-#endif
-#endif
+
+#  endif
 
 
 #endif /* end !FIXED_POINT */
 #endif /* end !FIXED_POINT */
+
 #endif
 #endif

+ 2 - 2
thirdparty/opus/celt/bands.c

@@ -414,7 +414,7 @@ static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT
    /* Compensating for the mid normalization */
    /* Compensating for the mid normalization */
    xp = MULT16_32_Q15(mid, xp);
    xp = MULT16_32_Q15(mid, xp);
    /* mid and side are in Q15, not Q14 like X and Y */
    /* mid and side are in Q15, not Q14 like X and Y */
-   mid2 = SHR32(mid, 1);
+   mid2 = SHR16(mid, 1);
    El = MULT16_16(mid2, mid2) + side - 2*xp;
    El = MULT16_16(mid2, mid2) + side - 2*xp;
    Er = MULT16_16(mid2, mid2) + side + 2*xp;
    Er = MULT16_16(mid2, mid2) + side + 2*xp;
    if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
    if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
@@ -714,7 +714,7 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
    if (qn!=1)
    if (qn!=1)
    {
    {
       if (encode)
       if (encode)
-         itheta = (itheta*qn+8192)>>14;
+         itheta = (itheta*(opus_int32)qn+8192)>>14;
 
 
       /* Entropy coding of the angle. We use a uniform pdf for the
       /* Entropy coding of the angle. We use a uniform pdf for the
          time split, a step for stereo, and a triangular one for the rest. */
          time split, a step for stereo, and a triangular one for the rest. */

+ 1 - 1
thirdparty/opus/celt/celt.h

@@ -209,7 +209,7 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
 #endif
 #endif
 
 
 #ifndef OVERRIDE_COMB_FILTER_CONST
 #ifndef OVERRIDE_COMB_FILTER_CONST
-# define comb_filter_const(y, x, T, N, g10, g11, g12, arch)		\
+# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
     ((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12))
     ((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12))
 #endif
 #endif
 
 

+ 7 - 3
thirdparty/opus/celt/celt_decoder.c

@@ -82,6 +82,7 @@ struct OpusCustomDecoder {
    int error;
    int error;
    int last_pitch_index;
    int last_pitch_index;
    int loss_count;
    int loss_count;
+   int skip_plc;
    int postfilter_period;
    int postfilter_period;
    int postfilter_period_old;
    int postfilter_period_old;
    opus_val16 postfilter_gain;
    opus_val16 postfilter_gain;
@@ -164,8 +165,6 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMod
    st->signalling = 1;
    st->signalling = 1;
    st->arch = opus_select_arch();
    st->arch = opus_select_arch();
 
 
-   st->loss_count = 0;
-
    opus_custom_decoder_ctl(st, OPUS_RESET_STATE);
    opus_custom_decoder_ctl(st, OPUS_RESET_STATE);
 
 
    return OPUS_OK;
    return OPUS_OK;
@@ -447,7 +446,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
 
 
    loss_count = st->loss_count;
    loss_count = st->loss_count;
    start = st->start;
    start = st->start;
-   noise_based = loss_count >= 5 || start != 0;
+   noise_based = loss_count >= 5 || start != 0 || st->skip_plc;
    if (noise_based)
    if (noise_based)
    {
    {
       /* Noise-based PLC/CNG */
       /* Noise-based PLC/CNG */
@@ -832,6 +831,10 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
       return frame_size/st->downsample;
       return frame_size/st->downsample;
    }
    }
 
 
+   /* Check if there are at least two packets received consecutively before
+    * turning on the pitch-based PLC */
+   st->skip_plc = st->loss_count != 0;
+
    if (dec == NULL)
    if (dec == NULL)
    {
    {
       ec_dec_init(&_dec,(unsigned char*)data,len);
       ec_dec_init(&_dec,(unsigned char*)data,len);
@@ -1198,6 +1201,7 @@ int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
                ((char*)&st->DECODER_RESET_START - (char*)st));
                ((char*)&st->DECODER_RESET_START - (char*)st));
          for (i=0;i<2*st->mode->nbEBands;i++)
          for (i=0;i<2*st->mode->nbEBands;i++)
             oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
             oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
+         st->skip_plc = 1;
       }
       }
       break;
       break;
       case OPUS_GET_PITCH_REQUEST:
       case OPUS_GET_PITCH_REQUEST:

+ 8 - 5
thirdparty/opus/celt/celt_encoder.c

@@ -1175,10 +1175,10 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem,
 
 
       if (N>COMBFILTER_MAXPERIOD)
       if (N>COMBFILTER_MAXPERIOD)
       {
       {
-         OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD);
+         OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD);
       } else {
       } else {
          OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N);
          OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N);
-         OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N);
+         OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N);
       }
       }
    } while (++c<CC);
    } while (++c<CC);
 
 
@@ -1281,12 +1281,15 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
 
 
    if ((!has_surround_mask||lfe) && (constrained_vbr || bitrate<64000))
    if ((!has_surround_mask||lfe) && (constrained_vbr || bitrate<64000))
    {
    {
-      opus_val16 rate_factor;
+      opus_val16 rate_factor = Q15ONE;
+      if (bitrate < 64000)
+      {
 #ifdef FIXED_POINT
 #ifdef FIXED_POINT
-      rate_factor = MAX16(0,(bitrate-32000));
+         rate_factor = MAX16(0,(bitrate-32000));
 #else
 #else
-      rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000));
+         rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000));
 #endif
 #endif
+      }
       if (constrained_vbr)
       if (constrained_vbr)
          rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15));
          rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15));
       target = base_target + (opus_int32)MULT16_32_Q15(rate_factor, target-base_target);
       target = base_target + (opus_int32)MULT16_32_Q15(rate_factor, target-base_target);

+ 1 - 2
thirdparty/opus/celt/celt_lpc.c

@@ -49,8 +49,7 @@ int          p
    float *lpc = _lpc;
    float *lpc = _lpc;
 #endif
 #endif
 
 
-   for (i = 0; i < p; i++)
-      lpc[i] = 0;
+   OPUS_CLEAR(lpc, p);
    if (ac[0] != 0)
    if (ac[0] != 0)
    {
    {
       for (i = 0; i < p; i++) {
       for (i = 0; i < p; i++) {

+ 1 - 1
thirdparty/opus/celt/cwrs.c

@@ -74,7 +74,7 @@ int log2_frac(opus_uint32 val, int frac)
 /*Although derived separately, the pulse vector coding scheme is equivalent to
 /*Although derived separately, the pulse vector coding scheme is equivalent to
    a Pyramid Vector Quantizer \cite{Fis86}.
    a Pyramid Vector Quantizer \cite{Fis86}.
   Some additional notes about an early version appear at
   Some additional notes about an early version appear at
-   http://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering
+   https://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering
    and the definitions of some terms have evolved since that was written.
    and the definitions of some terms have evolved since that was written.
 
 
   The conversion from a pulse vector to an integer index (encoding) and back
   The conversion from a pulse vector to an integer index (encoding) and back

+ 16 - 0
thirdparty/opus/celt/fixed_generic.h

@@ -37,16 +37,32 @@
 #define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
 #define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
 
 
 /** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
 /** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
+#if OPUS_FAST_INT64
+#define MULT16_32_Q16(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),16))
+#else
 #define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
 #define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
+#endif
 
 
 /** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */
 /** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */
+#if OPUS_FAST_INT64
+#define MULT16_32_P16(a,b) ((opus_val32)PSHR((opus_int64)((opus_val16)(a))*(b),16))
+#else
 #define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
 #define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
+#endif
 
 
 /** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
 /** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
+#if OPUS_FAST_INT64
+#define MULT16_32_Q15(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),15))
+#else
 #define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
 #define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
+#endif
 
 
 /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
 /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
+#if OPUS_FAST_INT64
+#define MULT32_32_Q31(a,b) ((opus_val32)SHR((opus_int64)(a)*(opus_int64)(b),31))
+#else
 #define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
 #define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
+#endif
 
 
 /** Compile-time conversion of float constant to 16-bit value */
 /** Compile-time conversion of float constant to 16-bit value */
 #define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))
 #define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))

+ 1 - 1
thirdparty/opus/celt/kiss_fft.c

@@ -191,7 +191,7 @@ static void kf_bfly3(
 
 
    kiss_fft_cpx * Fout_beg = Fout;
    kiss_fft_cpx * Fout_beg = Fout;
 #ifdef FIXED_POINT
 #ifdef FIXED_POINT
-   epi3.r = -16384;
+   /*epi3.r = -16384;*/ /* Unused */
    epi3.i = -28378;
    epi3.i = -28378;
 #else
 #else
    epi3 = st->twiddles[fstride*m];
    epi3 = st->twiddles[fstride*m];

+ 1 - 1
thirdparty/opus/celt/mathops.c

@@ -164,7 +164,7 @@ opus_val16 celt_cos_norm(opus_val32 x)
       {
       {
          return _celt_cos_pi_2(EXTRACT16(x));
          return _celt_cos_pi_2(EXTRACT16(x));
       } else {
       } else {
-         return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x)));
+         return NEG16(_celt_cos_pi_2(EXTRACT16(65536-x)));
       }
       }
    } else {
    } else {
       if (x&0x0000ffff)
       if (x&0x0000ffff)

+ 40 - 27
thirdparty/opus/celt/pitch.c

@@ -412,6 +412,41 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
    RESTORE_STACK;
    RESTORE_STACK;
 }
 }
 
 
+#ifdef FIXED_POINT
+static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
+{
+   opus_val32 x2y2;
+   int sx, sy, shift;
+   opus_val32 g;
+   opus_val16 den;
+   if (xy == 0 || xx == 0 || yy == 0)
+      return 0;
+   sx = celt_ilog2(xx)-14;
+   sy = celt_ilog2(yy)-14;
+   shift = sx + sy;
+   x2y2 = MULT16_16_Q14(VSHR32(xx, sx), VSHR32(yy, sy));
+   if (shift & 1) {
+      if (x2y2 < 32768)
+      {
+         x2y2 <<= 1;
+         shift--;
+      } else {
+         x2y2 >>= 1;
+         shift++;
+      }
+   }
+   den = celt_rsqrt_norm(x2y2);
+   g = MULT16_32_Q15(den, xy);
+   g = VSHR32(g, (shift>>1)-1);
+   return EXTRACT16(MIN32(g, Q15ONE));
+}
+#else
+static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
+{
+   return xy/celt_sqrt(1+xx*yy);
+}
+#endif
+
 static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
 static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
 opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
 opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
       int N, int *T0_, int prev_period, opus_val16 prev_gain, int arch)
       int N, int *T0_, int prev_period, opus_val16 prev_gain, int arch)
@@ -450,18 +485,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
    yy = yy_lookup[T0];
    yy = yy_lookup[T0];
    best_xy = xy;
    best_xy = xy;
    best_yy = yy;
    best_yy = yy;
-#ifdef FIXED_POINT
-      {
-         opus_val32 x2y2;
-         int sh, t;
-         x2y2 = 1+HALF32(MULT32_32_Q31(xx,yy));
-         sh = celt_ilog2(x2y2)>>1;
-         t = VSHR32(x2y2, 2*(sh-7));
-         g = g0 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
-      }
-#else
-      g = g0 = xy/celt_sqrt(1+xx*yy);
-#endif
+   g = g0 = compute_pitch_gain(xy, xx, yy);
    /* Look for any pitch at T/k */
    /* Look for any pitch at T/k */
    for (k=2;k<=15;k++)
    for (k=2;k<=15;k++)
    {
    {
@@ -484,24 +508,13 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
          T1b = celt_udiv(2*second_check[k]*T0+k, 2*k);
          T1b = celt_udiv(2*second_check[k]*T0+k, 2*k);
       }
       }
       dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2, arch);
       dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2, arch);
-      xy += xy2;
-      yy = yy_lookup[T1] + yy_lookup[T1b];
-#ifdef FIXED_POINT
-      {
-         opus_val32 x2y2;
-         int sh, t;
-         x2y2 = 1+MULT32_32_Q31(xx,yy);
-         sh = celt_ilog2(x2y2)>>1;
-         t = VSHR32(x2y2, 2*(sh-7));
-         g1 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
-      }
-#else
-      g1 = xy/celt_sqrt(1+2.f*xx*1.f*yy);
-#endif
+      xy = HALF32(xy + xy2);
+      yy = HALF32(yy_lookup[T1] + yy_lookup[T1b]);
+      g1 = compute_pitch_gain(xy, xx, yy);
       if (abs(T1-prev_period)<=1)
       if (abs(T1-prev_period)<=1)
          cont = prev_gain;
          cont = prev_gain;
       else if (abs(T1-prev_period)<=2 && 5*k*k < T0)
       else if (abs(T1-prev_period)<=2 && 5*k*k < T0)
-         cont = HALF32(prev_gain);
+         cont = HALF16(prev_gain);
       else
       else
          cont = 0;
          cont = 0;
       thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);
       thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);

+ 0 - 20
thirdparty/opus/celt/pitch.h

@@ -187,25 +187,6 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
       opus_val32 *xcorr, int len, int max_pitch);
       opus_val32 *xcorr, int len, int max_pitch);
 
 
 #if !defined(OVERRIDE_PITCH_XCORR)
 #if !defined(OVERRIDE_PITCH_XCORR)
-/*Is run-time CPU detection enabled on this platform?*/
-# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_ASM) \
-   || (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) \
-   && !defined(OPUS_ARM_PRESUME_NEON_INTR)))
-extern
-#  if defined(FIXED_POINT)
-opus_val32
-#  else
-void
-#  endif
-(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
-      const opus_val16 *, opus_val32 *, int, int);
-
-#  define OVERRIDE_PITCH_XCORR
-#  define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
-  ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
-        xcorr, len, max_pitch))
-# else
-
 #ifdef FIXED_POINT
 #ifdef FIXED_POINT
 opus_val32
 opus_val32
 #else
 #else
@@ -214,7 +195,6 @@ void
 celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
 celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
       opus_val32 *xcorr, int len, int max_pitch, int arch);
       opus_val32 *xcorr, int len, int max_pitch, int arch);
 
 
-# endif
 #endif
 #endif
 
 
 #endif
 #endif

+ 1 - 1
thirdparty/opus/celt/rate.c

@@ -296,7 +296,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
    done = 0;
    done = 0;
    for (j=end;j-->start;)
    for (j=end;j-->start;)
    {
    {
-      int tmp = bits1[j] + (lo*bits2[j]>>ALLOC_STEPS);
+      int tmp = bits1[j] + ((opus_int32)lo*bits2[j]>>ALLOC_STEPS);
       if (tmp < thresh[j] && !done)
       if (tmp < thresh[j] && !done)
       {
       {
          if (tmp >= alloc_floor)
          if (tmp >= alloc_floor)

+ 1 - 1
thirdparty/opus/celt/vq.c

@@ -271,7 +271,7 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
       best_id = 0;
       best_id = 0;
       /* The squared magnitude term gets added anyway, so we might as well
       /* The squared magnitude term gets added anyway, so we might as well
          add it outside the loop */
          add it outside the loop */
-      yy = ADD32(yy, 1);
+      yy = ADD16(yy, 1);
       j=0;
       j=0;
       do {
       do {
          opus_val16 Rxy, Ryy;
          opus_val16 Rxy, Ryy;

+ 17 - 17
thirdparty/opus/celt/x86/pitch_sse.h

@@ -102,21 +102,21 @@ opus_val32 celt_inner_prod_sse(
 #if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT)
 #if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT)
 #define OVERRIDE_CELT_INNER_PROD
 #define OVERRIDE_CELT_INNER_PROD
 #define celt_inner_prod(x, y, N, arch) \
 #define celt_inner_prod(x, y, N, arch) \
-	((void)arch, celt_inner_prod_sse4_1(x, y, N))
+    ((void)arch, celt_inner_prod_sse4_1(x, y, N))
 
 
 #elif defined(OPUS_X86_PRESUME_SSE2) && defined(FIXED_POINT) && !defined(OPUS_X86_MAY_HAVE_SSE4_1)
 #elif defined(OPUS_X86_PRESUME_SSE2) && defined(FIXED_POINT) && !defined(OPUS_X86_MAY_HAVE_SSE4_1)
 #define OVERRIDE_CELT_INNER_PROD
 #define OVERRIDE_CELT_INNER_PROD
 #define celt_inner_prod(x, y, N, arch) \
 #define celt_inner_prod(x, y, N, arch) \
-	((void)arch, celt_inner_prod_sse2(x, y, N))
+    ((void)arch, celt_inner_prod_sse2(x, y, N))
 
 
 #elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT)
 #elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT)
 #define OVERRIDE_CELT_INNER_PROD
 #define OVERRIDE_CELT_INNER_PROD
 #define celt_inner_prod(x, y, N, arch) \
 #define celt_inner_prod(x, y, N, arch) \
-	((void)arch, celt_inner_prod_sse(x, y, N))
+    ((void)arch, celt_inner_prod_sse(x, y, N))
 
 
 
 
 #elif ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) || \
 #elif ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) || \
-	(defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT))
+    (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT))
 
 
 extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
 extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
                     const opus_val16 *x,
                     const opus_val16 *x,
@@ -138,19 +138,19 @@ extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
 #undef comb_filter_const
 #undef comb_filter_const
 
 
 void dual_inner_prod_sse(const opus_val16 *x,
 void dual_inner_prod_sse(const opus_val16 *x,
-	const opus_val16 *y01,
-	const opus_val16 *y02,
-	int               N,
-	opus_val32       *xy1,
-	opus_val32       *xy2);
+    const opus_val16 *y01,
+    const opus_val16 *y02,
+    int               N,
+    opus_val32       *xy1,
+    opus_val32       *xy2);
 
 
 void comb_filter_const_sse(opus_val32 *y,
 void comb_filter_const_sse(opus_val32 *y,
-	opus_val32 *x,
-	int         T,
-	int         N,
-	opus_val16  g10,
-	opus_val16  g11,
-	opus_val16  g12);
+    opus_val32 *x,
+    int         T,
+    int         N,
+    opus_val16  g10,
+    opus_val16  g11,
+    opus_val16  g12);
 
 
 
 
 #if defined(OPUS_X86_PRESUME_SSE)
 #if defined(OPUS_X86_PRESUME_SSE)
@@ -169,7 +169,7 @@ extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
               opus_val32       *xy1,
               opus_val32       *xy1,
               opus_val32       *xy2);
               opus_val32       *xy2);
 
 
-#define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch)			\
+#define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
     ((*DUAL_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2))
     ((*DUAL_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2))
 
 
 extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
 extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
@@ -181,7 +181,7 @@ extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
               opus_val16  g11,
               opus_val16  g11,
               opus_val16  g12);
               opus_val16  g12);
 
 
-#define comb_filter_const(y, x, T, N, g10, g11, g12, arch)				\
+#define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
     ((*COMB_FILTER_CONST_IMPL[(arch) & OPUS_ARCHMASK])(y, x, T, N, g10, g11, g12))
     ((*COMB_FILTER_CONST_IMPL[(arch) & OPUS_ARCHMASK])(y, x, T, N, g10, g11, g12))
 
 
 #define NON_STATIC_COMB_FILTER_CONST_C
 #define NON_STATIC_COMB_FILTER_CONST_C

+ 1 - 1
thirdparty/opus/celt/x86/x86_celt_map.c

@@ -72,7 +72,7 @@ void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
 #endif
 #endif
 
 
 #if (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) ||  \
 #if (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) ||  \
-	(!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2))
+ (!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2))
 
 
 opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
 opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
          const opus_val16 *x,
          const opus_val16 *x,

+ 1 - 1
thirdparty/opus/celt/x86/x86cpu.c

@@ -46,7 +46,7 @@
 #include <intrin.h>
 #include <intrin.h>
 static _inline void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
 static _inline void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
 {
 {
-	__cpuid((int*)CPUInfo, InfoType);
+    __cpuid((int*)CPUInfo, InfoType);
 }
 }
 
 
 #else
 #else

+ 1 - 1
thirdparty/opus/opus_multistream_encoder.c

@@ -277,7 +277,7 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b
          sum = celt_inner_prod(in, in, frame_size+overlap, 0);
          sum = celt_inner_prod(in, in, frame_size+overlap, 0);
          /* This should filter out both NaNs and ridiculous signals that could
          /* This should filter out both NaNs and ridiculous signals that could
             cause NaNs further down. */
             cause NaNs further down. */
-         if (!(sum < 1e9f) || celt_isnan(sum))
+         if (!(sum < 1e18f) || celt_isnan(sum))
          {
          {
             OPUS_CLEAR(in, frame_size+overlap);
             OPUS_CLEAR(in, frame_size+overlap);
             preemph_mem[c] = 0;
             preemph_mem[c] = 0;

+ 32 - 29
thirdparty/opus/silk/CNG.c

@@ -34,9 +34,8 @@ POSSIBILITY OF SUCH DAMAGE.
 
 
 /* Generates excitation for CNG LPC synthesis */
 /* Generates excitation for CNG LPC synthesis */
 static OPUS_INLINE void silk_CNG_exc(
 static OPUS_INLINE void silk_CNG_exc(
-    opus_int32                       exc_Q10[],          /* O    CNG excitation signal Q10                   */
+    opus_int32                       exc_Q14[],          /* O    CNG excitation signal Q10                   */
     opus_int32                       exc_buf_Q14[],      /* I    Random samples buffer Q10                   */
     opus_int32                       exc_buf_Q14[],      /* I    Random samples buffer Q10                   */
-    opus_int32                       Gain_Q16,           /* I    Gain to apply                               */
     opus_int                         length,             /* I    Length                                      */
     opus_int                         length,             /* I    Length                                      */
     opus_int32                       *rand_seed          /* I/O  Seed to random index generator              */
     opus_int32                       *rand_seed          /* I/O  Seed to random index generator              */
 )
 )
@@ -55,7 +54,7 @@ static OPUS_INLINE void silk_CNG_exc(
         idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask );
         idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask );
         silk_assert( idx >= 0 );
         silk_assert( idx >= 0 );
         silk_assert( idx <= CNG_BUF_MASK_MAX );
         silk_assert( idx <= CNG_BUF_MASK_MAX );
-        exc_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) );
+        exc_Q14[ i ] = exc_buf_Q14[ idx ];
     }
     }
     *rand_seed = seed;
     *rand_seed = seed;
 }
 }
@@ -85,7 +84,7 @@ void silk_CNG(
 )
 )
 {
 {
     opus_int   i, subfr;
     opus_int   i, subfr;
-    opus_int32 sum_Q6, max_Gain_Q16, gain_Q16;
+    opus_int32 LPC_pred_Q10, max_Gain_Q16, gain_Q16, gain_Q10;
     opus_int16 A_Q12[ MAX_LPC_ORDER ];
     opus_int16 A_Q12[ MAX_LPC_ORDER ];
     silk_CNG_struct *psCNG = &psDec->sCNG;
     silk_CNG_struct *psCNG = &psDec->sCNG;
     SAVE_STACK;
     SAVE_STACK;
@@ -124,8 +123,8 @@ void silk_CNG(
 
 
     /* Add CNG when packet is lost or during DTX */
     /* Add CNG when packet is lost or during DTX */
     if( psDec->lossCnt ) {
     if( psDec->lossCnt ) {
-        VARDECL( opus_int32, CNG_sig_Q10 );
-        ALLOC( CNG_sig_Q10, length + MAX_LPC_ORDER, opus_int32 );
+        VARDECL( opus_int32, CNG_sig_Q14 );
+        ALLOC( CNG_sig_Q14, length + MAX_LPC_ORDER, opus_int32 );
 
 
         /* Generate CNG excitation */
         /* Generate CNG excitation */
         gain_Q16 = silk_SMULWW( psDec->sPLC.randScale_Q14, psDec->sPLC.prevGain_Q16[1] );
         gain_Q16 = silk_SMULWW( psDec->sPLC.randScale_Q14, psDec->sPLC.prevGain_Q16[1] );
@@ -138,42 +137,46 @@ void silk_CNG(
             gain_Q16 = silk_SUB_LSHIFT32(silk_SMULWW( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 );
             gain_Q16 = silk_SUB_LSHIFT32(silk_SMULWW( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 );
             gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 8 );
             gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 8 );
         }
         }
-        silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, gain_Q16, length, &psCNG->rand_seed );
+        gain_Q10 = silk_RSHIFT( gain_Q16, 6 );
+        
+        silk_CNG_exc( CNG_sig_Q14 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, length, &psCNG->rand_seed );
 
 
         /* Convert CNG NLSF to filter representation */
         /* Convert CNG NLSF to filter representation */
         silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order );
         silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order );
 
 
         /* Generate CNG signal, by synthesis filtering */
         /* Generate CNG signal, by synthesis filtering */
-        silk_memcpy( CNG_sig_Q10, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) );
+        silk_memcpy( CNG_sig_Q14, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) );
         for( i = 0; i < length; i++ ) {
         for( i = 0; i < length; i++ ) {
             silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 );
             silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 );
             /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
             /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
-            sum_Q6 = silk_RSHIFT( psDec->LPC_order, 1 );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  1 ], A_Q12[ 0 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  2 ], A_Q12[ 1 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  3 ], A_Q12[ 2 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  4 ], A_Q12[ 3 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  5 ], A_Q12[ 4 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  6 ], A_Q12[ 5 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  7 ], A_Q12[ 6 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  8 ], A_Q12[ 7 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i -  9 ], A_Q12[ 8 ] );
-            sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] );
+            LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  1 ], A_Q12[ 0 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  2 ], A_Q12[ 1 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  3 ], A_Q12[ 2 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  4 ], A_Q12[ 3 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  5 ], A_Q12[ 4 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  6 ], A_Q12[ 5 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  7 ], A_Q12[ 6 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  8 ], A_Q12[ 7 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i -  9 ], A_Q12[ 8 ] );
+            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] );
             if( psDec->LPC_order == 16 ) {
             if( psDec->LPC_order == 16 ) {
-                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 11 ], A_Q12[ 10 ] );
-                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 12 ], A_Q12[ 11 ] );
-                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 13 ], A_Q12[ 12 ] );
-                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 14 ], A_Q12[ 13 ] );
-                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 15 ], A_Q12[ 14 ] );
-                sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 16 ], A_Q12[ 15 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 11 ], A_Q12[ 10 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 12 ], A_Q12[ 11 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 13 ], A_Q12[ 12 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 14 ], A_Q12[ 13 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 15 ], A_Q12[ 14 ] );
+                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 16 ], A_Q12[ 15 ] );
             }
             }
 
 
             /* Update states */
             /* Update states */
-            CNG_sig_Q10[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q10[ MAX_LPC_ORDER + i ], sum_Q6, 4 );
-
-            frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( CNG_sig_Q10[ MAX_LPC_ORDER + i ], 10 ) );
+            CNG_sig_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q14[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 );
+            
+            /* Scale with Gain and add to input signal */
+            frame[ i ] = (opus_int16)silk_ADD_SAT16( frame[ i ], silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( CNG_sig_Q14[ MAX_LPC_ORDER + i ], gain_Q10 ), 8 ) ) );
+            
         }
         }
-        silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q10[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
+        silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q14[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
     } else {
     } else {
         silk_memset( psCNG->CNG_synth_state, 0, psDec->LPC_order *  sizeof( opus_int32 ) );
         silk_memset( psCNG->CNG_synth_state, 0, psDec->LPC_order *  sizeof( opus_int32 ) );
     }
     }

+ 7 - 7
thirdparty/opus/silk/NLSF_del_dec_quant.c

@@ -46,8 +46,9 @@ opus_int32 silk_NLSF_del_dec_quant(                             /* O    Returns
 )
 )
 {
 {
     opus_int         i, j, nStates, ind_tmp, ind_min_max, ind_max_min, in_Q10, res_Q10;
     opus_int         i, j, nStates, ind_tmp, ind_min_max, ind_max_min, in_Q10, res_Q10;
-    opus_int         pred_Q10, diff_Q10, out0_Q10, out1_Q10, rate0_Q5, rate1_Q5;
-    opus_int32       RD_tmp_Q25, min_Q25, min_max_Q25, max_min_Q25, pred_coef_Q16;
+    opus_int         pred_Q10, diff_Q10, rate0_Q5, rate1_Q5;
+    opus_int16       out0_Q10, out1_Q10;
+    opus_int32       RD_tmp_Q25, min_Q25, min_max_Q25, max_min_Q25;
     opus_int         ind_sort[         NLSF_QUANT_DEL_DEC_STATES ];
     opus_int         ind_sort[         NLSF_QUANT_DEL_DEC_STATES ];
     opus_int8        ind[              NLSF_QUANT_DEL_DEC_STATES ][ MAX_LPC_ORDER ];
     opus_int8        ind[              NLSF_QUANT_DEL_DEC_STATES ][ MAX_LPC_ORDER ];
     opus_int16       prev_out_Q10[ 2 * NLSF_QUANT_DEL_DEC_STATES ];
     opus_int16       prev_out_Q10[ 2 * NLSF_QUANT_DEL_DEC_STATES ];
@@ -74,8 +75,8 @@ opus_int32 silk_NLSF_del_dec_quant(                             /* O    Returns
             out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
             out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
             out1_Q10 = silk_ADD16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
             out1_Q10 = silk_ADD16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
         }
         }
-        out0_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_SMULWB( (opus_int32)out0_Q10, quant_step_size_Q16 );
-        out1_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_SMULWB( (opus_int32)out1_Q10, quant_step_size_Q16 );
+        out0_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_RSHIFT( silk_SMULBB( out0_Q10, quant_step_size_Q16 ), 16 );
+        out1_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_RSHIFT( silk_SMULBB( out1_Q10, quant_step_size_Q16 ), 16 );
     }
     }
 
 
     silk_assert( (NLSF_QUANT_DEL_DEC_STATES & (NLSF_QUANT_DEL_DEC_STATES-1)) == 0 );     /* must be power of two */
     silk_assert( (NLSF_QUANT_DEL_DEC_STATES & (NLSF_QUANT_DEL_DEC_STATES-1)) == 0 );     /* must be power of two */
@@ -85,12 +86,11 @@ opus_int32 silk_NLSF_del_dec_quant(                             /* O    Returns
     prev_out_Q10[ 0 ] = 0;
     prev_out_Q10[ 0 ] = 0;
     for( i = order - 1; ; i-- ) {
     for( i = order - 1; ; i-- ) {
         rates_Q5 = &ec_rates_Q5[ ec_ix[ i ] ];
         rates_Q5 = &ec_rates_Q5[ ec_ix[ i ] ];
-        pred_coef_Q16 = silk_LSHIFT( (opus_int32)pred_coef_Q8[ i ], 8 );
         in_Q10 = x_Q10[ i ];
         in_Q10 = x_Q10[ i ];
         for( j = 0; j < nStates; j++ ) {
         for( j = 0; j < nStates; j++ ) {
-            pred_Q10 = silk_SMULWB( pred_coef_Q16, prev_out_Q10[ j ] );
+            pred_Q10 = silk_RSHIFT( silk_SMULBB( (opus_int16)pred_coef_Q8[ i ], prev_out_Q10[ j ] ), 8 );
             res_Q10  = silk_SUB16( in_Q10, pred_Q10 );
             res_Q10  = silk_SUB16( in_Q10, pred_Q10 );
-            ind_tmp  = silk_SMULWB( (opus_int32)inv_quant_step_size_Q6, res_Q10 );
+            ind_tmp  = silk_RSHIFT( silk_SMULBB( inv_quant_step_size_Q6, res_Q10 ), 16 );
             ind_tmp  = silk_LIMIT( ind_tmp, -NLSF_QUANT_MAX_AMPLITUDE_EXT, NLSF_QUANT_MAX_AMPLITUDE_EXT-1 );
             ind_tmp  = silk_LIMIT( ind_tmp, -NLSF_QUANT_MAX_AMPLITUDE_EXT, NLSF_QUANT_MAX_AMPLITUDE_EXT-1 );
             ind[ j ][ i ] = (opus_int8)ind_tmp;
             ind[ j ][ i ] = (opus_int8)ind_tmp;
 
 

+ 3 - 2
thirdparty/opus/silk/NLSF_encode.c

@@ -46,7 +46,7 @@ opus_int32 silk_NLSF_encode(                                    /* O    Returns
 )
 )
 {
 {
     opus_int         i, s, ind1, bestIndex, prob_Q8, bits_q7;
     opus_int         i, s, ind1, bestIndex, prob_Q8, bits_q7;
-    opus_int32       W_tmp_Q9;
+    opus_int32       W_tmp_Q9, ret;
     VARDECL( opus_int32, err_Q26 );
     VARDECL( opus_int32, err_Q26 );
     VARDECL( opus_int32, RD_Q25 );
     VARDECL( opus_int32, RD_Q25 );
     VARDECL( opus_int, tempIndices1 );
     VARDECL( opus_int, tempIndices1 );
@@ -131,6 +131,7 @@ opus_int32 silk_NLSF_encode(                                    /* O    Returns
     /* Decode */
     /* Decode */
     silk_NLSF_decode( pNLSF_Q15, NLSFIndices, psNLSF_CB );
     silk_NLSF_decode( pNLSF_Q15, NLSFIndices, psNLSF_CB );
 
 
+    ret = RD_Q25[ 0 ];
     RESTORE_STACK;
     RESTORE_STACK;
-    return RD_Q25[ 0 ];
+    return ret;
 }
 }

+ 17 - 41
thirdparty/opus/silk/NSQ.c

@@ -31,6 +31,8 @@ POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "main.h"
 #include "main.h"
 #include "stack_alloc.h"
 #include "stack_alloc.h"
+#include "NSQ.h"
+
 
 
 static OPUS_INLINE void silk_nsq_scale_states(
 static OPUS_INLINE void silk_nsq_scale_states(
     const silk_encoder_state *psEncC,           /* I    Encoder State                   */
     const silk_encoder_state *psEncC,           /* I    Encoder State                   */
@@ -66,7 +68,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer(
     opus_int            offset_Q10,             /* I                                    */
     opus_int            offset_Q10,             /* I                                    */
     opus_int            length,                 /* I    Input length                    */
     opus_int            length,                 /* I    Input length                    */
     opus_int            shapingLPCOrder,        /* I    Noise shaping AR filter order   */
     opus_int            shapingLPCOrder,        /* I    Noise shaping AR filter order   */
-    opus_int            predictLPCOrder         /* I    Prediction filter order         */
+    opus_int            predictLPCOrder,        /* I    Prediction filter order         */
+    int                 arch                    /* I    Architecture                    */
 );
 );
 #endif
 #endif
 
 
@@ -155,7 +158,7 @@ void silk_NSQ_c
 
 
         silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
         silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
             AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10,
             AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10,
-            offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder );
+            offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->arch );
 
 
         x_Q3   += psEncC->subfr_length;
         x_Q3   += psEncC->subfr_length;
         pulses += psEncC->subfr_length;
         pulses += psEncC->subfr_length;
@@ -198,15 +201,19 @@ void silk_noise_shape_quantizer(
     opus_int            offset_Q10,             /* I                                    */
     opus_int            offset_Q10,             /* I                                    */
     opus_int            length,                 /* I    Input length                    */
     opus_int            length,                 /* I    Input length                    */
     opus_int            shapingLPCOrder,        /* I    Noise shaping AR filter order   */
     opus_int            shapingLPCOrder,        /* I    Noise shaping AR filter order   */
-    opus_int            predictLPCOrder         /* I    Prediction filter order         */
+    opus_int            predictLPCOrder,        /* I    Prediction filter order         */
+    int                 arch                    /* I    Architecture                    */
 )
 )
 {
 {
-    opus_int     i, j;
+    opus_int     i;
     opus_int32   LTP_pred_Q13, LPC_pred_Q10, n_AR_Q12, n_LTP_Q13;
     opus_int32   LTP_pred_Q13, LPC_pred_Q10, n_AR_Q12, n_LTP_Q13;
     opus_int32   n_LF_Q12, r_Q10, rr_Q10, q1_Q0, q1_Q10, q2_Q10, rd1_Q20, rd2_Q20;
     opus_int32   n_LF_Q12, r_Q10, rr_Q10, q1_Q0, q1_Q10, q2_Q10, rd1_Q20, rd2_Q20;
     opus_int32   exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
     opus_int32   exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
     opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
     opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
     opus_int32   *psLPC_Q14, *shp_lag_ptr, *pred_lag_ptr;
     opus_int32   *psLPC_Q14, *shp_lag_ptr, *pred_lag_ptr;
+#ifdef silk_short_prediction_create_arch_coef
+    opus_int32   a_Q12_arch[MAX_LPC_ORDER];
+#endif
 
 
     shp_lag_ptr  = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
     shp_lag_ptr  = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
     pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
     pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
@@ -215,32 +222,16 @@ void silk_noise_shape_quantizer(
     /* Set up short term AR state */
     /* Set up short term AR state */
     psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 ];
     psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 ];
 
 
+#ifdef silk_short_prediction_create_arch_coef
+    silk_short_prediction_create_arch_coef(a_Q12_arch, a_Q12, predictLPCOrder);
+#endif
+
     for( i = 0; i < length; i++ ) {
     for( i = 0; i < length; i++ ) {
         /* Generate dither */
         /* Generate dither */
         NSQ->rand_seed = silk_RAND( NSQ->rand_seed );
         NSQ->rand_seed = silk_RAND( NSQ->rand_seed );
 
 
         /* Short-term prediction */
         /* Short-term prediction */
-        silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
-        /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
-        LPC_pred_Q10 = silk_RSHIFT( predictLPCOrder, 1 );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[  0 ], a_Q12[ 0 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -1 ], a_Q12[ 1 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -2 ], a_Q12[ 2 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -3 ], a_Q12[ 3 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -4 ], a_Q12[ 4 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -5 ], a_Q12[ 5 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -6 ], a_Q12[ 6 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -7 ], a_Q12[ 7 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
-        LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
-        if( predictLPCOrder == 16 ) {
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -10 ], a_Q12[ 10 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -11 ], a_Q12[ 11 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -12 ], a_Q12[ 12 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -13 ], a_Q12[ 13 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -14 ], a_Q12[ 14 ] );
-            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -15 ], a_Q12[ 15 ] );
-        }
+        LPC_pred_Q10 = silk_noise_shape_quantizer_short_prediction(psLPC_Q14, a_Q12, a_Q12_arch, predictLPCOrder, arch);
 
 
         /* Long-term prediction */
         /* Long-term prediction */
         if( signalType == TYPE_VOICED ) {
         if( signalType == TYPE_VOICED ) {
@@ -259,23 +250,8 @@ void silk_noise_shape_quantizer(
 
 
         /* Noise shape feedback */
         /* Noise shape feedback */
         silk_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */
         silk_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */
-        tmp2 = psLPC_Q14[ 0 ];
-        tmp1 = NSQ->sAR2_Q14[ 0 ];
-        NSQ->sAR2_Q14[ 0 ] = tmp2;
-        n_AR_Q12 = silk_RSHIFT( shapingLPCOrder, 1 );
-        n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp2, AR_shp_Q13[ 0 ] );
-        for( j = 2; j < shapingLPCOrder; j += 2 ) {
-            tmp2 = NSQ->sAR2_Q14[ j - 1 ];
-            NSQ->sAR2_Q14[ j - 1 ] = tmp1;
-            n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp1, AR_shp_Q13[ j - 1 ] );
-            tmp1 = NSQ->sAR2_Q14[ j + 0 ];
-            NSQ->sAR2_Q14[ j + 0 ] = tmp2;
-            n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp2, AR_shp_Q13[ j ] );
-        }
-        NSQ->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;
-        n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] );
+        n_AR_Q12 = silk_NSQ_noise_shape_feedback_loop(psLPC_Q14, NSQ->sAR2_Q14, AR_shp_Q13, shapingLPCOrder, arch);
 
 
-        n_AR_Q12 = silk_LSHIFT32( n_AR_Q12, 1 );                                /* Q11 -> Q12 */
         n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sLF_AR_shp_Q14, Tilt_Q14 );
         n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sLF_AR_shp_Q14, Tilt_Q14 );
 
 
         n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 );
         n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 );

+ 16 - 24
thirdparty/opus/silk/NSQ_del_dec.c

@@ -31,6 +31,8 @@ POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "main.h"
 #include "main.h"
 #include "stack_alloc.h"
 #include "stack_alloc.h"
+#include "NSQ.h"
+
 
 
 typedef struct {
 typedef struct {
     opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ];
     opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ];
@@ -106,7 +108,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
     opus_int            warping_Q16,            /* I                                        */
     opus_int            warping_Q16,            /* I                                        */
     opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
     opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
     opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
     opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
-    opus_int            decisionDelay           /* I                                        */
+    opus_int            decisionDelay,          /* I                                        */
+    int                 arch                    /* I                                        */
 );
 );
 
 
 void silk_NSQ_del_dec_c(
 void silk_NSQ_del_dec_c(
@@ -260,7 +263,7 @@ void silk_NSQ_del_dec_c(
         silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15,
         silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15,
             delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ],
             delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ],
             Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder,
             Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder,
-            psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay );
+            psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay, psEncC->arch );
 
 
         x_Q3   += psEncC->subfr_length;
         x_Q3   += psEncC->subfr_length;
         pulses += psEncC->subfr_length;
         pulses += psEncC->subfr_length;
@@ -333,7 +336,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
     opus_int            warping_Q16,            /* I                                        */
     opus_int            warping_Q16,            /* I                                        */
     opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
     opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
     opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
     opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
-    opus_int            decisionDelay           /* I                                        */
+    opus_int            decisionDelay,          /* I                                        */
+    int                 arch                    /* I                                        */
 )
 )
 {
 {
     opus_int     i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
     opus_int     i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
@@ -343,6 +347,10 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
     opus_int32   q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
     opus_int32   q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
     opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
     opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;
     opus_int32   *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14;
     opus_int32   *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14;
+#ifdef silk_short_prediction_create_arch_coef
+    opus_int32   a_Q12_arch[MAX_LPC_ORDER];
+#endif
+
     VARDECL( NSQ_sample_pair, psSampleState );
     VARDECL( NSQ_sample_pair, psSampleState );
     NSQ_del_dec_struct *psDD;
     NSQ_del_dec_struct *psDD;
     NSQ_sample_struct  *psSS;
     NSQ_sample_struct  *psSS;
@@ -355,6 +363,10 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
     pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
     pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
     Gain_Q10     = silk_RSHIFT( Gain_Q16, 6 );
     Gain_Q10     = silk_RSHIFT( Gain_Q16, 6 );
 
 
+#ifdef silk_short_prediction_create_arch_coef
+    silk_short_prediction_create_arch_coef(a_Q12_arch, a_Q12, predictLPCOrder);
+#endif
+
     for( i = 0; i < length; i++ ) {
     for( i = 0; i < length; i++ ) {
         /* Perform common calculations used in all states */
         /* Perform common calculations used in all states */
 
 
@@ -398,27 +410,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
             /* Pointer used in short term prediction and shaping */
             /* Pointer used in short term prediction and shaping */
             psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ];
             psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ];
             /* Short-term prediction */
             /* Short-term prediction */
-            silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );
-            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
-            LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[  0 ], a_Q12[ 0 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -1 ], a_Q12[ 1 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -2 ], a_Q12[ 2 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -3 ], a_Q12[ 3 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -4 ], a_Q12[ 4 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -5 ], a_Q12[ 5 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -6 ], a_Q12[ 6 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -7 ], a_Q12[ 7 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
-            LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
-            if( predictLPCOrder == 16 ) {
-                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -10 ], a_Q12[ 10 ] );
-                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -11 ], a_Q12[ 11 ] );
-                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -12 ], a_Q12[ 12 ] );
-                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -13 ], a_Q12[ 13 ] );
-                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -14 ], a_Q12[ 14 ] );
-                LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -15 ], a_Q12[ 15 ] );
-            }
+            LPC_pred_Q14 = silk_noise_shape_quantizer_short_prediction(psLPC_Q14, a_Q12, a_Q12_arch, predictLPCOrder, arch);
             LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 );                              /* Q10 -> Q14 */
             LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 );                              /* Q10 -> Q14 */
 
 
             /* Noise shape feedback */
             /* Noise shape feedback */

+ 2 - 1
thirdparty/opus/silk/PLC.c

@@ -365,7 +365,8 @@ static OPUS_INLINE void silk_PLC_conceal(
         }
         }
 
 
         /* Add prediction to LPC excitation */
         /* Add prediction to LPC excitation */
-        sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 );
+        sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = silk_ADD_SAT32( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ],
+                                            silk_LSHIFT_SAT32( LPC_pred_Q10, 4 ));
 
 
         /* Scale with Gain */
         /* Scale with Gain */
         frame[ i ] = (opus_int16)silk_SAT16( silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], prevGain_Q10[ 1 ] ), 8 ) ) );
         frame[ i ] = (opus_int16)silk_SAT16( silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], prevGain_Q10[ 1 ] ), 8 ) ) );

+ 1 - 1
thirdparty/opus/silk/decode_core.c

@@ -219,7 +219,7 @@ void silk_decode_core(
             }
             }
 
 
             /* Add prediction to LPC excitation */
             /* Add prediction to LPC excitation */
-            sLPC_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( pres_Q14[ i ], LPC_pred_Q10, 4 );
+            sLPC_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_SAT32( pres_Q14[ i ], silk_LSHIFT_SAT32( LPC_pred_Q10, 4 ) );
 
 
             /* Scale with gain */
             /* Scale with gain */
             pxq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14[ MAX_LPC_ORDER + i ], Gain_Q10 ), 8 ) );
             pxq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14[ MAX_LPC_ORDER + i ], Gain_Q10 ), 8 ) );

+ 13 - 8
thirdparty/opus/silk/fixed/burg_modified_FIX.c

@@ -150,8 +150,11 @@ void silk_burg_modified_c(
                     C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
                     C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ]            ); /* Q( -rshifts ) */
                     C_last_row[ k ]  = silk_MLA( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
                     C_last_row[ k ]  = silk_MLA( C_last_row[ k ],  x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
                     Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 );                                   /* Q17 */
                     Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 );                                   /* Q17 */
-                    tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ],            Atmp1 );                      /* Q17 */
-                    tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 );                      /* Q17 */
+                    /* We sometimes have get overflows in the multiplications (even beyond +/- 2^32),
+                       but they cancel each other and the real result seems to always fit in a 32-bit
+                       signed integer. This was determined experimentally, not theoretically (unfortunately). */
+                    tmp1 = silk_MLA_ovflw( tmp1, x_ptr[ n - k - 1 ],            Atmp1 );                      /* Q17 */
+                    tmp2 = silk_MLA_ovflw( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 );                      /* Q17 */
                 }
                 }
                 tmp1 = -tmp1;                                                                           /* Q17 */
                 tmp1 = -tmp1;                                                                           /* Q17 */
                 tmp2 = -tmp2;                                                                           /* Q17 */
                 tmp2 = -tmp2;                                                                           /* Q17 */
@@ -200,12 +203,14 @@ void silk_burg_modified_c(
             /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
             /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
             tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 );            /* Q30 */
             tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 );            /* Q30 */
             rc_Q31 = silk_SQRT_APPROX( tmp2 );                                                  /* Q15 */
             rc_Q31 = silk_SQRT_APPROX( tmp2 );                                                  /* Q15 */
-            /* Newton-Raphson iteration */
-            rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 );                   /* Q15 */
-            rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );                                               /* Q31 */
-            if( num < 0 ) {
-                /* Ensure adjusted reflection coefficients has the original sign */
-                rc_Q31 = -rc_Q31;
+            if( rc_Q31 > 0 ) {
+                /* Newton-Raphson iteration */
+                rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 );                       /* Q15 */
+                rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );                                                   /* Q31 */
+                if( num < 0 ) {
+                    /* Ensure adjusted reflection coefficients has the original sign */
+                    rc_Q31 = -rc_Q31;
+                }
             }
             }
             invGain_Q30 = minInvGain_Q30;
             invGain_Q30 = minInvGain_Q30;
             reached_max_gain = 1;
             reached_max_gain = 1;

+ 8 - 6
thirdparty/opus/silk/fixed/x86/burg_modified_FIX_sse.c

@@ -300,12 +300,14 @@ void silk_burg_modified_sse4_1(
             /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
             /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
             tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 );            /* Q30 */
             tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 );            /* Q30 */
             rc_Q31 = silk_SQRT_APPROX( tmp2 );                                                  /* Q15 */
             rc_Q31 = silk_SQRT_APPROX( tmp2 );                                                  /* Q15 */
-            /* Newton-Raphson iteration */
-            rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 );                   /* Q15 */
-            rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );                                               /* Q31 */
-            if( num < 0 ) {
-                /* Ensure adjusted reflection coefficients has the original sign */
-                rc_Q31 = -rc_Q31;
+            if( rc_Q31 > 0 ) {
+                 /* Newton-Raphson iteration */
+                rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 );                   /* Q15 */
+                rc_Q31 = silk_LSHIFT32( rc_Q31, 16 );                                               /* Q31 */
+                if( num < 0 ) {
+                    /* Ensure adjusted reflection coefficients has the original sign */
+                    rc_Q31 = -rc_Q31;
+                }
             }
             }
             invGain_Q30 = minInvGain_Q30;
             invGain_Q30 = minInvGain_Q30;
             reached_max_gain = 1;
             reached_max_gain = 1;

+ 14 - 8
thirdparty/opus/silk/macros.h

@@ -34,6 +34,7 @@ POSSIBILITY OF SUCH DAMAGE.
 
 
 #include "opus_types.h"
 #include "opus_types.h"
 #include "opus_defines.h"
 #include "opus_defines.h"
+#include "arch.h"
 
 
 #if OPUS_GNUC_PREREQ(3, 0)
 #if OPUS_GNUC_PREREQ(3, 0)
 #define opus_likely(x)       (__builtin_expect(!!(x), 1))
 #define opus_likely(x)       (__builtin_expect(!!(x), 1))
@@ -43,31 +44,32 @@ POSSIBILITY OF SUCH DAMAGE.
 #define opus_unlikely(x)     (!!(x))
 #define opus_unlikely(x)     (!!(x))
 #endif
 #endif
 
 
-/* Set this if opus_int64 is a native type of the CPU. */
-#define OPUS_FAST_INT64 (defined(__x86_64__) || defined(__LP64__) || defined(_WIN64))
-
 /* This is an OPUS_INLINE header file for general platform. */
 /* This is an OPUS_INLINE header file for general platform. */
 
 
 /* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
 /* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
 #if OPUS_FAST_INT64
 #if OPUS_FAST_INT64
-#define silk_SMULWB(a32, b32)            (((a32) * (opus_int64)((opus_int16)(b32))) >> 16)
+#define silk_SMULWB(a32, b32)            ((opus_int32)(((a32) * (opus_int64)((opus_int16)(b32))) >> 16))
 #else
 #else
 #define silk_SMULWB(a32, b32)            ((((a32) >> 16) * (opus_int32)((opus_int16)(b32))) + ((((a32) & 0x0000FFFF) * (opus_int32)((opus_int16)(b32))) >> 16))
 #define silk_SMULWB(a32, b32)            ((((a32) >> 16) * (opus_int32)((opus_int16)(b32))) + ((((a32) & 0x0000FFFF) * (opus_int32)((opus_int16)(b32))) >> 16))
 #endif
 #endif
 
 
 /* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
 /* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
 #if OPUS_FAST_INT64
 #if OPUS_FAST_INT64
-#define silk_SMLAWB(a32, b32, c32)       ((a32) + (((b32) * (opus_int64)((opus_int16)(c32))) >> 16))
+#define silk_SMLAWB(a32, b32, c32)       ((opus_int32)((a32) + (((b32) * (opus_int64)((opus_int16)(c32))) >> 16)))
 #else
 #else
 #define silk_SMLAWB(a32, b32, c32)       ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16)))
 #define silk_SMLAWB(a32, b32, c32)       ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16)))
 #endif
 #endif
 
 
 /* (a32 * (b32 >> 16)) >> 16 */
 /* (a32 * (b32 >> 16)) >> 16 */
+#if OPUS_FAST_INT64
+#define silk_SMULWT(a32, b32)            ((opus_int32)(((a32) * (opus_int64)((b32) >> 16)) >> 16))
+#else
 #define silk_SMULWT(a32, b32)            (((a32) >> 16) * ((b32) >> 16) + ((((a32) & 0x0000FFFF) * ((b32) >> 16)) >> 16))
 #define silk_SMULWT(a32, b32)            (((a32) >> 16) * ((b32) >> 16) + ((((a32) & 0x0000FFFF) * ((b32) >> 16)) >> 16))
+#endif
 
 
 /* a32 + (b32 * (c32 >> 16)) >> 16 */
 /* a32 + (b32 * (c32 >> 16)) >> 16 */
 #if OPUS_FAST_INT64
 #if OPUS_FAST_INT64
-#define silk_SMLAWT(a32, b32, c32)       ((a32) + (((b32) * ((opus_int64)(c32) >> 16)) >> 16))
+#define silk_SMLAWT(a32, b32, c32)       ((opus_int32)((a32) + (((b32) * ((opus_int64)(c32) >> 16)) >> 16)))
 #else
 #else
 #define silk_SMLAWT(a32, b32, c32)       ((a32) + (((b32) >> 16) * ((c32) >> 16)) + ((((b32) & 0x0000FFFF) * ((c32) >> 16)) >> 16))
 #define silk_SMLAWT(a32, b32, c32)       ((a32) + (((b32) >> 16) * ((c32) >> 16)) + ((((b32) & 0x0000FFFF) * ((c32) >> 16)) >> 16))
 #endif
 #endif
@@ -89,14 +91,14 @@ POSSIBILITY OF SUCH DAMAGE.
 
 
 /* (a32 * b32) >> 16 */
 /* (a32 * b32) >> 16 */
 #if OPUS_FAST_INT64
 #if OPUS_FAST_INT64
-#define silk_SMULWW(a32, b32)            (((opus_int64)(a32) * (b32)) >> 16)
+#define silk_SMULWW(a32, b32)            ((opus_int32)(((opus_int64)(a32) * (b32)) >> 16))
 #else
 #else
 #define silk_SMULWW(a32, b32)            silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16))
 #define silk_SMULWW(a32, b32)            silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16))
 #endif
 #endif
 
 
 /* a32 + ((b32 * c32) >> 16) */
 /* a32 + ((b32 * c32) >> 16) */
 #if OPUS_FAST_INT64
 #if OPUS_FAST_INT64
-#define silk_SMLAWW(a32, b32, c32)       ((a32) + (((opus_int64)(b32) * (c32)) >> 16))
+#define silk_SMLAWW(a32, b32, c32)       ((opus_int32)((a32) + (((opus_int64)(b32) * (c32)) >> 16)))
 #else
 #else
 #define silk_SMLAWW(a32, b32, c32)       silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16))
 #define silk_SMLAWW(a32, b32, c32)       silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16))
 #endif
 #endif
@@ -149,5 +151,9 @@ static OPUS_INLINE opus_int32 silk_CLZ32(opus_int32 in32)
 #include "arm/macros_armv5e.h"
 #include "arm/macros_armv5e.h"
 #endif
 #endif
 
 
+#ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR
+#include "arm/macros_arm64.h"
+#endif
+
 #endif /* SILK_MACROS_H */
 #endif /* SILK_MACROS_H */
 
 

+ 5 - 1
thirdparty/opus/silk/mips/NSQ_del_dec_mipsr1.h

@@ -62,7 +62,8 @@ static inline void silk_noise_shape_quantizer_del_dec(
     opus_int            warping_Q16,            /* I                                        */
     opus_int            warping_Q16,            /* I                                        */
     opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
     opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */
     opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
     opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
-    opus_int            decisionDelay           /* I                                        */
+    opus_int            decisionDelay,          /* I                                        */
+    int                 arch                    /* I                                        */
 )
 )
 {
 {
     opus_int     i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
     opus_int     i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
@@ -82,6 +83,9 @@ static inline void silk_noise_shape_quantizer_del_dec(
 
 
     opus_int32 cur, prev, next;
     opus_int32 cur, prev, next;
 
 
+    /*Unused.*/
+    (void)arch;
+
     //Intialize b_Q14 variables
     //Intialize b_Q14 variables
     b_Q14_0 = b_Q14[ 0 ];
     b_Q14_0 = b_Q14[ 0 ];
     b_Q14_1 = b_Q14[ 1 ];
     b_Q14_1 = b_Q14[ 1 ];

+ 4 - 2
thirdparty/opus/silk/process_NLSFs.c

@@ -41,7 +41,7 @@ void silk_process_NLSFs(
 {
 {
     opus_int     i, doInterpolate;
     opus_int     i, doInterpolate;
     opus_int     NLSF_mu_Q20;
     opus_int     NLSF_mu_Q20;
-    opus_int32   i_sqr_Q15;
+    opus_int16   i_sqr_Q15;
     opus_int16   pNLSF0_temp_Q15[ MAX_LPC_ORDER ];
     opus_int16   pNLSF0_temp_Q15[ MAX_LPC_ORDER ];
     opus_int16   pNLSFW_QW[ MAX_LPC_ORDER ];
     opus_int16   pNLSFW_QW[ MAX_LPC_ORDER ];
     opus_int16   pNLSFW0_temp_QW[ MAX_LPC_ORDER ];
     opus_int16   pNLSFW0_temp_QW[ MAX_LPC_ORDER ];
@@ -79,7 +79,8 @@ void silk_process_NLSFs(
         /* Update NLSF weights with contribution from first half */
         /* Update NLSF weights with contribution from first half */
         i_sqr_Q15 = silk_LSHIFT( silk_SMULBB( psEncC->indices.NLSFInterpCoef_Q2, psEncC->indices.NLSFInterpCoef_Q2 ), 11 );
         i_sqr_Q15 = silk_LSHIFT( silk_SMULBB( psEncC->indices.NLSFInterpCoef_Q2, psEncC->indices.NLSFInterpCoef_Q2 ), 11 );
         for( i = 0; i < psEncC->predictLPCOrder; i++ ) {
         for( i = 0; i < psEncC->predictLPCOrder; i++ ) {
-            pNLSFW_QW[ i ] = silk_SMLAWB( silk_RSHIFT( pNLSFW_QW[ i ], 1 ), (opus_int32)pNLSFW0_temp_QW[ i ], i_sqr_Q15 );
+            pNLSFW_QW[ i ] = silk_ADD16( silk_RSHIFT( pNLSFW_QW[ i ], 1 ), silk_RSHIFT(
+                  silk_SMULBB( pNLSFW0_temp_QW[ i ], i_sqr_Q15 ), 16) );
             silk_assert( pNLSFW_QW[ i ] >= 1 );
             silk_assert( pNLSFW_QW[ i ] >= 1 );
         }
         }
     }
     }
@@ -100,6 +101,7 @@ void silk_process_NLSFs(
 
 
     } else {
     } else {
         /* Copy LPC coefficients for first half from second half */
         /* Copy LPC coefficients for first half from second half */
+        silk_assert( psEncC->predictLPCOrder <= MAX_LPC_ORDER );
         silk_memcpy( PredCoef_Q12[ 0 ], PredCoef_Q12[ 1 ], psEncC->predictLPCOrder * sizeof( opus_int16 ) );
         silk_memcpy( PredCoef_Q12[ 0 ], PredCoef_Q12[ 1 ], psEncC->predictLPCOrder * sizeof( opus_int16 ) );
     }
     }
 }
 }

+ 1 - 1
thirdparty/opus/silk/sort.c

@@ -33,7 +33,7 @@ POSSIBILITY OF SUCH DAMAGE.
 /* Best case:  O(n)   for an already sorted array            */
 /* Best case:  O(n)   for an already sorted array            */
 /* Worst case: O(n^2) for an inversely sorted array          */
 /* Worst case: O(n^2) for an inversely sorted array          */
 /*                                                           */
 /*                                                           */
-/* Shell short:    http://en.wikipedia.org/wiki/Shell_sort   */
+/* Shell short:    https://en.wikipedia.org/wiki/Shell_sort  */
 
 
 #include "SigProc_FIX.h"
 #include "SigProc_FIX.h"
 
 

+ 4 - 4
thirdparty/opus/silk/stereo_LR_to_MS.c

@@ -77,7 +77,7 @@ void silk_stereo_LR_to_MS(
     ALLOC( LP_mid, frame_length, opus_int16 );
     ALLOC( LP_mid, frame_length, opus_int16 );
     ALLOC( HP_mid, frame_length, opus_int16 );
     ALLOC( HP_mid, frame_length, opus_int16 );
     for( n = 0; n < frame_length; n++ ) {
     for( n = 0; n < frame_length; n++ ) {
-        sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 );
+        sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 );
         LP_mid[ n ] = sum;
         LP_mid[ n ] = sum;
         HP_mid[ n ] = mid[ n + 1 ] - sum;
         HP_mid[ n ] = mid[ n + 1 ] - sum;
     }
     }
@@ -86,7 +86,7 @@ void silk_stereo_LR_to_MS(
     ALLOC( LP_side, frame_length, opus_int16 );
     ALLOC( LP_side, frame_length, opus_int16 );
     ALLOC( HP_side, frame_length, opus_int16 );
     ALLOC( HP_side, frame_length, opus_int16 );
     for( n = 0; n < frame_length; n++ ) {
     for( n = 0; n < frame_length; n++ ) {
-        sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( side[ n ] + side[ n + 2 ], side[ n + 1 ], 1 ), 2 );
+        sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( side[ n ] + (opus_int32)side[ n + 2 ], side[ n + 1 ], 1 ), 2 );
         LP_side[ n ] = sum;
         LP_side[ n ] = sum;
         HP_side[ n ] = side[ n + 1 ] - sum;
         HP_side[ n ] = side[ n + 1 ] - sum;
     }
     }
@@ -207,7 +207,7 @@ void silk_stereo_LR_to_MS(
         pred0_Q13 += delta0_Q13;
         pred0_Q13 += delta0_Q13;
         pred1_Q13 += delta1_Q13;
         pred1_Q13 += delta1_Q13;
         w_Q24   += deltaw_Q24;
         w_Q24   += deltaw_Q24;
-        sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 );    /* Q11 */
+        sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 );    /* Q11 */
         sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 );               /* Q8  */
         sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 );               /* Q8  */
         sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 );       /* Q8  */
         sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 );       /* Q8  */
         x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
         x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
@@ -217,7 +217,7 @@ void silk_stereo_LR_to_MS(
     pred1_Q13 = -pred_Q13[ 1 ];
     pred1_Q13 = -pred_Q13[ 1 ];
     w_Q24     =  silk_LSHIFT( width_Q14, 10 );
     w_Q24     =  silk_LSHIFT( width_Q14, 10 );
     for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) {
     for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) {
-        sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 );    /* Q11 */
+        sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 );    /* Q11 */
         sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 );               /* Q8  */
         sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 );               /* Q8  */
         sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 );       /* Q8  */
         sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 );       /* Q8  */
         x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
         x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );

+ 1 - 1
thirdparty/opus/silk/x86/NSQ_sse.c

@@ -221,7 +221,7 @@ void silk_NSQ_sse4_1(
         {
         {
             silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
             silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
                 AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10,
                 AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10,
-                offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder );
+                offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->arch );
         }
         }
 
 
         x_Q3   += psEncC->subfr_length;
         x_Q3   += psEncC->subfr_length;

+ 2 - 1
thirdparty/opus/silk/x86/main_sse.h

@@ -207,7 +207,8 @@ void silk_noise_shape_quantizer(
     opus_int            offset_Q10,             /* I                                    */
     opus_int            offset_Q10,             /* I                                    */
     opus_int            length,                 /* I    Input length                    */
     opus_int            length,                 /* I    Input length                    */
     opus_int            shapingLPCOrder,        /* I    Noise shaping AR filter order   */
     opus_int            shapingLPCOrder,        /* I    Noise shaping AR filter order   */
-    opus_int            predictLPCOrder         /* I    Prediction filter order         */
+    opus_int            predictLPCOrder,        /* I    Prediction filter order         */
+    int                 arch                    /* I    Architecture                    */
 );
 );
 
 
 /**************************/
 /**************************/