Browse Source

Make fft non-recursive so to trigger vectorization

Co-authored-by: alqeeu <[email protected]>
rexim 6 months ago
parent
commit
bbe90095dc
2 changed files with 30 additions and 16 deletions
  1. 27 16
      src/plug.c
  2. 3 0
      src_build/nob_linux.c

+ 27 - 16
src/plug.c

@@ -243,25 +243,36 @@ static void fft_clean(void)
     memset(p->out_smear, 0, sizeof(p->out_smear));
 }
 
-// Ported from https://rosettacode.org/wiki/Fast_Fourier_transform#Python
-static void fft(float in[], size_t stride, Float_Complex out[], size_t n)
+// Ported from https://cp-algorithms.com/algebra/fft.html
+static void fft(float in[], Float_Complex out[], size_t n)
 {
-    assert(n > 0);
-
-    if (n == 1) {
-        out[0] = cfromreal(in[0]);
-        return;
+    for(size_t i = 0; i < n; i++) {
+        out[i] = cfromreal(in[i]);
     }
 
-    fft(in, stride*2, out, n/2);
-    fft(in + stride, stride*2,  out + n/2, n/2);
+    for (size_t i = 1, j = 0; i < n; i++) {
+        int bit = n >> 1;
+        for (; j & bit; bit >>= 1) j ^= bit;
+        j ^= bit;
+        if (i < j) {
+            Float_Complex temp = out[i];
+            out[i] = out[j];
+            out[j] = temp;
+        }
+    }
 
-    for (size_t k = 0; k < n/2; ++k) {
-        float t = (float)k/n;
-        Float_Complex v = mulcc(cexpf(cfromimag(-2*PI*t)), out[k + n/2]);
-        Float_Complex e = out[k];
-        out[k]       = addcc(e, v);
-        out[k + n/2] = subcc(e, v);
+    for (size_t len = 2; len <= n; len <<= 1) {
+        float ang = 2 * PI / len;
+        Float_Complex wlen = cos(ang) + I*sin(ang) ;
+        for (size_t i = 0; i < n; i += len) {
+            Float_Complex w = 1;
+            for (size_t j = 0; j < len / 2; j++) {
+                Float_Complex u = out[i+j], v = out[i+j+len/2] * w;
+                out[i+j] = u + v;
+                out[i+j+len/2] = u - v;
+                w *= wlen;
+            }
+        }
     }
 }
 
@@ -282,7 +293,7 @@ static size_t fft_analyze(float dt)
     }
 
     // FFT
-    fft(p->in_win, 1, p->out_raw, FFT_SIZE);
+    fft(p->in_win, p->out_raw, FFT_SIZE);
 
     // "Squash" into the Logarithmic Scale
     float step = 1.06;

+ 3 - 0
src_build/nob_linux.c

@@ -16,6 +16,7 @@ bool build_musializer(void)
         "-o", "./build/libplug.so",
         "./src/plug.c", "./src/ffmpeg_linux.c",
         nob_temp_sprintf("-L./build/raylib/%s", MUSIALIZER_TARGET_NAME), "-l:libraylib.so",
+        "-O3", "-march=native", "-flto", "-ffast-math",
         "-lm", "-ldl", "-lpthread");
     nob_da_append(&procs, nob_cmd_run_async_and_reset(&cmd));
 
@@ -30,6 +31,7 @@ bool build_musializer(void)
         // NOTE: just in case somebody wants to run musializer from within the ./build/ folder
         nob_temp_sprintf("-Wl,-rpath=./raylib/%s", MUSIALIZER_TARGET_NAME),
         nob_temp_sprintf("-L./build/raylib/%s", MUSIALIZER_TARGET_NAME),
+        "-O3", "-march=native", "-flto", "-ffast-math",
         "-l:libraylib.so", "-lm", "-ldl", "-lpthread");
     nob_da_append(&procs, nob_cmd_run_async_and_reset(&cmd));
 
@@ -42,6 +44,7 @@ bool build_musializer(void)
         "-o", "./build/musializer",
         "./src/plug.c", "./src/ffmpeg_linux.c", "./src/musializer.c",
         nob_temp_sprintf("-L./build/raylib/%s", MUSIALIZER_TARGET_NAME), "-l:libraylib.a",
+        "-O3", "-march=native", "-flto", "-ffast-math",
         "-lm", "-ldl", "-lpthread");
     if (!nob_cmd_run_sync_and_reset(&cmd)) nob_return_defer(false);
 #endif // MUSIALIZER_HOTRELOAD