Browse Source

loongarch: add SDL_FillRect4LSX opt

yuanhecai 1 month ago
parent
commit
14eebdab3c
1 changed files with 62 additions and 0 deletions
  1. 62 0
      src/video/SDL_fillrect.c

+ 62 - 0
src/video/SDL_fillrect.c

@@ -135,6 +135,61 @@ DEFINE_SSE_FILLRECT(4, Uint32)
 /* *INDENT-ON* */ /* clang-format on */
 #endif            /* __SSE__ */
 
+#if defined(__loongarch_sx)
+
+#define LSX_BEGIN __m128i c128 = __lsx_vreplgr2vr_w(color);
+
+#define LSX_WORK \
+    for (i = n / 64; i--;) { \
+        __lsx_vst(c128, p, 0); \
+        __lsx_vst(c128, p, 16); \
+        __lsx_vst(c128, p, 32); \
+        __lsx_vst(c128, p, 48); \
+        p += 64; \
+    }
+
+#define DEFINE_LSX_FILLRECT(bpp, type) \
+static void \
+SDL_FillRect##bpp##LSX(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
+{ \
+    int i, n; \
+    Uint8 *p = NULL; \
+ \
+    LSX_BEGIN; \
+ \
+    while (h--) { \
+        n = w * bpp; \
+        p = pixels; \
+ \
+        if (n > 63) { \
+            int adjust = 16 - ((uintptr_t)p & 15); \
+            if (adjust < 16) { \
+                n -= adjust; \
+                adjust /= bpp; \
+                while (adjust--) { \
+                    *((type *)p) = (type)color; \
+                    p += bpp; \
+                } \
+            } \
+            LSX_WORK; \
+        } \
+        if (n & 63) { \
+            int remainder = (n & 63); \
+            remainder /= bpp; \
+            while (remainder--) { \
+                *((type *)p) = (type)color; \
+                p += bpp; \
+            } \
+        } \
+        pixels += pitch; \
+    } \
+ \
+}
+
+DEFINE_LSX_FILLRECT(4, Uint32)
+
+#endif
+
 static void SDL_FillRect1(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
 {
     int n;
@@ -423,6 +478,13 @@ int SDL_FillRects(SDL_Surface *dst, const SDL_Rect *rects, int count,
                 break;
             }
 #endif
+
+#ifdef __loongarch_sx
+                if (SDL_HasLSX()) {
+                    fill_function = SDL_FillRect4LSX;
+                    break;
+                }
+#endif
             fill_function = SDL_FillRect4;
             break;
         }