Browse Source

SCons: Improve cache purging logic

• Implement caching via SCons arguments, rather than environment variables
Thaddeus Crews 10 months ago
parent
commit
0e4a4e3c4d

+ 4 - 6
.github/actions/godot-build/action.yml

@@ -18,12 +18,12 @@ inputs:
     required: false
     required: false
   scons-cache:
   scons-cache:
     description: The SCons cache path.
     description: The SCons cache path.
-    default: ${{ github.workspace }}/.scons-cache/
+    default: ${{ github.workspace }}/.scons_cache/
   scons-cache-limit:
   scons-cache-limit:
     description: The SCons cache size limit.
     description: The SCons cache size limit.
     # actions/cache has 10 GiB limit, and GitHub runners have a 14 GiB disk.
     # actions/cache has 10 GiB limit, and GitHub runners have a 14 GiB disk.
     # Limit to 7 GiB to avoid having the extracted cache fill the disk.
     # Limit to 7 GiB to avoid having the extracted cache fill the disk.
-    default: 7168
+    default: 7
 
 
 runs:
 runs:
   using: composite
   using: composite
@@ -32,10 +32,8 @@ runs:
       shell: sh
       shell: sh
       env:
       env:
         SCONSFLAGS: ${{ inputs.sconsflags }}
         SCONSFLAGS: ${{ inputs.sconsflags }}
-        SCONS_CACHE: ${{ inputs.scons-cache }}
-        SCONS_CACHE_LIMIT: ${{ inputs.scons-cache-limit }}
       run: |
       run: |
-        echo "Building with flags:" platform=${{ inputs.platform }} target=${{ inputs.target }} tests=${{ inputs.tests }} ${{ env.SCONSFLAGS }}
+        echo "Building with flags:" platform=${{ inputs.platform }} target=${{ inputs.target }} tests=${{ inputs.tests }} ${{ env.SCONSFLAGS }} "cache_path=${{ inputs.scons-cache }}" cache_limit=${{ inputs.scons-cache-limit }}
 
 
         if [ "${{ inputs.target }}" != "editor" ]; then
         if [ "${{ inputs.target }}" != "editor" ]; then
           # Ensure we don't include editor code in export template builds.
           # Ensure we don't include editor code in export template builds.
@@ -49,5 +47,5 @@ runs:
           export BUILD_NAME="gh"
           export BUILD_NAME="gh"
         fi
         fi
 
 
-        scons platform=${{ inputs.platform }} target=${{ inputs.target }} tests=${{ inputs.tests }} ${{ env.SCONSFLAGS }}
+        scons platform=${{ inputs.platform }} target=${{ inputs.target }} tests=${{ inputs.tests }} ${{ env.SCONSFLAGS }} "cache_path=${{ inputs.scons-cache }}" cache_limit=${{ inputs.scons-cache-limit }}
         ls -l bin/
         ls -l bin/

+ 1 - 2
.github/actions/godot-cache-restore/action.yml

@@ -6,7 +6,7 @@ inputs:
     default: ${{ github.job }}
     default: ${{ github.job }}
   scons-cache:
   scons-cache:
     description: The SCons cache path.
     description: The SCons cache path.
-    default: ${{ github.workspace }}/.scons-cache/
+    default: ${{ github.workspace }}/.scons_cache/
 
 
 runs:
 runs:
   using: composite
   using: composite
@@ -29,7 +29,6 @@ runs:
         #   4. A partial match for the same base branch only (not ideal, matches any PR with the same base branch).
         #   4. A partial match for the same base branch only (not ideal, matches any PR with the same base branch).
 
 
         restore-keys: |
         restore-keys: |
-          ${{ inputs.cache-name }}-${{ env.GODOT_BASE_BRANCH }}-${{ github.ref }}-${{ github.sha }}
           ${{ inputs.cache-name }}-${{ env.GODOT_BASE_BRANCH }}-${{ github.ref }}
           ${{ inputs.cache-name }}-${{ env.GODOT_BASE_BRANCH }}-${{ github.ref }}
           ${{ inputs.cache-name }}-${{ env.GODOT_BASE_BRANCH }}-refs/heads/${{ env.GODOT_BASE_BRANCH }}
           ${{ inputs.cache-name }}-${{ env.GODOT_BASE_BRANCH }}-refs/heads/${{ env.GODOT_BASE_BRANCH }}
           ${{ inputs.cache-name }}-${{ env.GODOT_BASE_BRANCH }}
           ${{ inputs.cache-name }}-${{ env.GODOT_BASE_BRANCH }}

+ 1 - 1
.github/actions/godot-cache-save/action.yml

@@ -6,7 +6,7 @@ inputs:
     default: ${{ github.job }}
     default: ${{ github.job }}
   scons-cache:
   scons-cache:
     description: The SCons cache path.
     description: The SCons cache path.
-    default: ${{ github.workspace }}/.scons-cache/
+    default: ${{ github.workspace }}/.scons_cache/
 
 
 runs:
 runs:
   using: composite
   using: composite

+ 0 - 3
.github/workflows/godot_cpp_test.yml

@@ -52,9 +52,6 @@ jobs:
       #   continue-on-error: true
       #   continue-on-error: true
 
 
       - name: Build godot-cpp test extension
       - name: Build godot-cpp test extension
-        env: # Keep synced with godot-build.
-          SCONS_CACHE: ${{ github.workspace }}/.scons-cache/
-          SCONS_CACHE_LIMIT: 7168
         run: scons --directory=./godot-cpp/test target=template_debug dev_build=yes verbose=yes
         run: scons --directory=./godot-cpp/test target=template_debug dev_build=yes verbose=yes
 
 
       # - name: Save Godot build cache
       # - name: Save Godot build cache

+ 3 - 3
.gitignore

@@ -77,6 +77,9 @@ venv
 __pycache__/
 __pycache__/
 *.pyc
 *.pyc
 
 
+# Python modules
+.*_cache/
+
 # Documentation
 # Documentation
 doc/_build/
 doc/_build/
 
 
@@ -164,9 +167,6 @@ gmon.out
 # Kdevelop
 # Kdevelop
 *.kdev4
 *.kdev4
 
 
-# Mypy
-.mypy_cache
-
 # Qt Creator
 # Qt Creator
 *.config
 *.config
 *.creator
 *.creator

+ 8 - 8
SConstruct

@@ -271,6 +271,8 @@ opts.Add(BoolVariable("scu_build", "Use single compilation unit build", False))
 opts.Add("scu_limit", "Max includes per SCU file when using scu_build (determines RAM use)", "0")
 opts.Add("scu_limit", "Max includes per SCU file when using scu_build (determines RAM use)", "0")
 opts.Add(BoolVariable("engine_update_check", "Enable engine update checks in the Project Manager", True))
 opts.Add(BoolVariable("engine_update_check", "Enable engine update checks in the Project Manager", True))
 opts.Add(BoolVariable("steamapi", "Enable minimal SteamAPI integration for usage time tracking (editor only)", False))
 opts.Add(BoolVariable("steamapi", "Enable minimal SteamAPI integration for usage time tracking (editor only)", False))
+opts.Add("cache_path", "Path to a directory where SCons cache files will be stored. No value disables the cache.", "")
+opts.Add("cache_limit", "Max size (in GiB) for the SCons cache. 0 means no limit.", "0")
 
 
 # Thirdparty libraries
 # Thirdparty libraries
 opts.Add(BoolVariable("builtin_brotli", "Use the built-in Brotli library", True))
 opts.Add(BoolVariable("builtin_brotli", "Use the built-in Brotli library", True))
@@ -321,6 +323,9 @@ opts.Add("rcflags", "Custom flags for Windows resource compiler")
 # in following code (especially platform and custom_modules).
 # in following code (especially platform and custom_modules).
 opts.Update(env)
 opts.Update(env)
 
 
+# Setup caching logic early to catch everything.
+methods.prepare_cache(env)
+
 # Copy custom environment variables if set.
 # Copy custom environment variables if set.
 if env["import_env_vars"]:
 if env["import_env_vars"]:
     for env_var in str(env["import_env_vars"]).split(","):
     for env_var in str(env["import_env_vars"]).split(","):
@@ -354,7 +359,9 @@ if env["platform"] == "":
 if env["platform"] in compatibility_platform_aliases:
 if env["platform"] in compatibility_platform_aliases:
     alias = env["platform"]
     alias = env["platform"]
     platform = compatibility_platform_aliases[alias]
     platform = compatibility_platform_aliases[alias]
-    print_warning(f'Platform "{alias}" has been renamed to "{platform}" in Godot 4. Building for platform "{platform}".')
+    print_warning(
+        f'Platform "{alias}" has been renamed to "{platform}" in Godot 4. Building for platform "{platform}".'
+    )
     env["platform"] = platform
     env["platform"] = platform
 
 
 # Alias for convenience.
 # Alias for convenience.
@@ -1039,11 +1046,6 @@ GLSL_BUILDERS = {
 }
 }
 env.Append(BUILDERS=GLSL_BUILDERS)
 env.Append(BUILDERS=GLSL_BUILDERS)
 
 
-scons_cache_path = os.environ.get("SCONS_CACHE")
-if scons_cache_path is not None:
-    CacheDir(scons_cache_path)
-    print("Scons cache enabled... (path: '" + scons_cache_path + "')")
-
 if env["compiledb"]:
 if env["compiledb"]:
     env.Tool("compilation_db")
     env.Tool("compilation_db")
     env.Alias("compiledb", env.CompilationDatabase())
     env.Alias("compiledb", env.CompilationDatabase())
@@ -1126,5 +1128,3 @@ def purge_flaky_files():
 
 
 
 
 atexit.register(purge_flaky_files)
 atexit.register(purge_flaky_files)
-
-methods.clean_cache(env)

+ 142 - 140
methods.py

@@ -1,5 +1,7 @@
+import atexit
 import contextlib
 import contextlib
 import glob
 import glob
+import math
 import os
 import os
 import re
 import re
 import subprocess
 import subprocess
@@ -8,7 +10,7 @@ from collections import OrderedDict
 from enum import Enum
 from enum import Enum
 from io import StringIO, TextIOWrapper
 from io import StringIO, TextIOWrapper
 from pathlib import Path
 from pathlib import Path
-from typing import Generator, List, Optional, Union
+from typing import Generator, List, Optional, Union, cast
 
 
 # Get the "Godot" folder name ahead of time
 # Get the "Godot" folder name ahead of time
 base_folder_path = str(os.path.abspath(Path(__file__).parent)) + "/"
 base_folder_path = str(os.path.abspath(Path(__file__).parent)) + "/"
@@ -784,159 +786,159 @@ def using_emcc(env):
 
 
 
 
 def show_progress(env):
 def show_progress(env):
-    if env["ninja"]:
-        # Has its own progress/tracking tool that clashes with ours
+    # Progress reporting is not available in non-TTY environments since it messes with the output
+    # (for example, when writing to a file). Ninja has its own progress/tracking tool that clashes
+    # with ours.
+    if not env["progress"] or not sys.stdout.isatty() or env["ninja"]:
         return
         return
 
 
-    import sys
-
-    from SCons.Script import AlwaysBuild, Command, Progress
-
-    screen = sys.stdout
-    # Progress reporting is not available in non-TTY environments since it
-    # messes with the output (for example, when writing to a file)
-    show_progress = env["progress"] and sys.stdout.isatty()
-    node_count = 0
-    node_count_max = 0
-    node_count_interval = 1
-    node_count_fname = str(env.Dir("#")) + "/.scons_node_count"
-
-    import math
-
-    class cache_progress:
-        # The default is 1 GB cache
-        def __init__(self, path=None, limit=pow(1024, 3)):
-            self.path = path
-            self.limit = limit
-            if env["verbose"] and path is not None:
-                screen.write(
-                    "Current cache limit is {} (used: {})\n".format(
-                        self.convert_size(limit), self.convert_size(self.get_size(path))
-                    )
-                )
+    NODE_COUNT_FILENAME = f"{base_folder_path}.scons_node_count"
+
+    class ShowProgress:
+        def __init__(self):
+            self.count = 0
+            self.max = 0
+            try:
+                with open(NODE_COUNT_FILENAME, "r", encoding="utf-8") as f:
+                    self.max = int(f.readline())
+            except OSError:
+                pass
+            if self.max == 0:
+                print("NOTE: Performing initial build, progress percentage unavailable!")
 
 
         def __call__(self, node, *args, **kw):
         def __call__(self, node, *args, **kw):
-            nonlocal node_count, node_count_max, node_count_interval, node_count_fname, show_progress
-            if show_progress:
-                # Print the progress percentage
-                node_count += node_count_interval
-                if node_count_max > 0 and node_count <= node_count_max:
-                    screen.write("\r[%3d%%] " % (node_count * 100 / node_count_max))
-                    screen.flush()
-                elif node_count_max > 0 and node_count > node_count_max:
-                    screen.write("\r[100%] ")
-                    screen.flush()
-                else:
-                    screen.write("\r[Initial build] ")
-                    screen.flush()
-
-        def convert_size(self, size_bytes):
-            if size_bytes == 0:
-                return "0 bytes"
-            size_name = ("bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
-            i = int(math.floor(math.log(size_bytes, 1024)))
-            p = math.pow(1024, i)
-            s = round(size_bytes / p, 2)
-            return "%s %s" % (int(s) if i == 0 else s, size_name[i])
-
-        def get_size(self, start_path="."):
-            total_size = 0
-            for dirpath, dirnames, filenames in os.walk(start_path):
-                for f in filenames:
-                    fp = os.path.join(dirpath, f)
-                    total_size += os.path.getsize(fp)
-            return total_size
+            self.count += 1
+            if self.max != 0:
+                percent = int(min(self.count * 100 / self.max, 100))
+                sys.stdout.write(f"\r[{percent:3d}%] ")
+                sys.stdout.flush()
+
+    from SCons.Script import Progress
+
+    progressor = ShowProgress()
+    Progress(progressor)
 
 
     def progress_finish(target, source, env):
     def progress_finish(target, source, env):
-        nonlocal node_count, progressor
         try:
         try:
-            with open(node_count_fname, "w", encoding="utf-8", newline="\n") as f:
-                f.write("%d\n" % node_count)
-        except Exception:
+            with open(NODE_COUNT_FILENAME, "w", encoding="utf-8", newline="\n") as f:
+                f.write(f"{progressor.count}\n")
+        except OSError:
             pass
             pass
 
 
-    try:
-        with open(node_count_fname, "r", encoding="utf-8") as f:
-            node_count_max = int(f.readline())
-    except Exception:
-        pass
+    env.AlwaysBuild(
+        env.CommandNoCache(
+            "progress_finish", [], env.Action(progress_finish, "Building node count database .scons_node_count")
+        )
+    )
 
 
-    cache_directory = os.environ.get("SCONS_CACHE")
-    # Simple cache pruning, attached to SCons' progress callback. Trim the
-    # cache directory to a size not larger than cache_limit.
-    cache_limit = float(os.getenv("SCONS_CACHE_LIMIT", 1024)) * 1024 * 1024
-    progressor = cache_progress(cache_directory, cache_limit)
-    Progress(progressor, interval=node_count_interval)
-
-    progress_finish_command = Command("progress_finish", [], progress_finish)
-    AlwaysBuild(progress_finish_command)
-
-
-def clean_cache(env):
-    import atexit
-    import time
-
-    class cache_clean:
-        def __init__(self, path=None, limit=pow(1024, 3)):
-            self.path = path
-            self.limit = limit
-
-        def clean(self):
-            self.delete(self.file_list())
-
-        def delete(self, files):
-            if len(files) == 0:
-                return
-            if env["verbose"]:
-                # Utter something
-                print("Purging %d %s from cache..." % (len(files), "files" if len(files) > 1 else "file"))
-            [os.remove(f) for f in files]
-
-        def file_list(self):
-            if self.path is None:
-                # Nothing to do
-                return []
-            # Gather a list of (filename, (size, atime)) within the
-            # cache directory
-            file_stat = [(x, os.stat(x)[6:8]) for x in glob.glob(os.path.join(self.path, "*", "*"))]
-            if file_stat == []:
-                # Nothing to do
-                return []
-            # Weight the cache files by size (assumed to be roughly
-            # proportional to the recompilation time) times an exponential
-            # decay since the ctime, and return a list with the entries
-            # (filename, size, weight).
-            current_time = time.time()
-            file_stat = [(x[0], x[1][0], (current_time - x[1][1])) for x in file_stat]
-            # Sort by the most recently accessed files (most sensible to keep) first
-            file_stat.sort(key=lambda x: x[2])
-            # Search for the first entry where the storage limit is
-            # reached
-            sum, mark = 0, None
-            for i, x in enumerate(file_stat):
-                sum += x[1]
-                if sum > self.limit:
-                    mark = i
-                    break
-            if mark is None:
-                return []
-            else:
-                return [x[0] for x in file_stat[mark:]]
 
 
-    def cache_finally():
-        nonlocal cleaner
+def convert_size(size_bytes: int) -> str:
+    if size_bytes == 0:
+        return "0 bytes"
+    SIZE_NAMES = ["bytes", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"]
+    index = math.floor(math.log(size_bytes, 1024))
+    power = math.pow(1024, index)
+    size = round(size_bytes / power, 2)
+    return f"{size} {SIZE_NAMES[index]}"
+
+
+def get_size(start_path: str = ".") -> int:
+    total_size = 0
+    for dirpath, _, filenames in os.walk(start_path):
+        for file in filenames:
+            path = os.path.join(dirpath, file)
+            total_size += os.path.getsize(path)
+    return total_size
+
+
+def clean_cache(cache_path: str, cache_limit: int, verbose: bool):
+    files = glob.glob(os.path.join(cache_path, "*", "*"))
+    if not files:
+        return
+
+    # Remove all text files, store binary files in list of (filename, size, atime).
+    purge = []
+    texts = []
+    stats = []
+    for file in files:
+        # Failing a utf-8 decode is the easiest way to determine if a file is binary.
         try:
         try:
-            cleaner.clean()
-        except Exception:
-            pass
+            with open(file, encoding="utf-8") as out:
+                out.read(1024)
+        except UnicodeDecodeError:
+            stats.append((file, *os.stat(file)[6:8]))
+        except OSError:
+            print_error(f'Failed to access cache file "{file}"; skipping.')
+        else:
+            texts.append(file)
+
+    if texts:
+        count = len(texts)
+        for file in texts:
+            try:
+                os.remove(file)
+            except OSError:
+                print_error(f'Failed to remove cache file "{file}"; skipping.')
+                count -= 1
+        if verbose:
+            print("Purging %d text %s from cache..." % (count, "files" if count > 1 else "file"))
+
+    if cache_limit:
+        # Sort by most recent access (most sensible to keep) first. Search for the first entry where
+        # the cache limit is reached.
+        stats.sort(key=lambda x: x[2], reverse=True)
+        sum = 0
+        for index, stat in enumerate(stats):
+            sum += stat[1]
+            if sum > cache_limit:
+                purge.extend([x[0] for x in stats[index:]])
+                break
+
+    if purge:
+        count = len(purge)
+        for file in purge:
+            try:
+                os.remove(file)
+            except OSError:
+                print_error(f'Failed to remove cache file "{file}"; skipping.')
+                count -= 1
+        if verbose:
+            print("Purging %d %s from cache..." % (count, "files" if count > 1 else "file"))
+
+
+def prepare_cache(env) -> None:
+    if env.GetOption("clean"):
+        return
+
+    if env["cache_path"]:
+        cache_path = cast(str, env["cache_path"])
+    elif os.environ.get("SCONS_CACHE"):
+        print_warning("Environment variable `SCONS_CACHE` is deprecated; use `cache_path` argument instead.")
+        cache_path = cast(str, os.environ.get("SCONS_CACHE"))
 
 
-    cache_directory = os.environ.get("SCONS_CACHE")
-    # Simple cache pruning, attached to SCons' progress callback. Trim the
-    # cache directory to a size not larger than cache_limit.
-    cache_limit = float(os.getenv("SCONS_CACHE_LIMIT", 1024)) * 1024 * 1024
-    cleaner = cache_clean(cache_directory, cache_limit)
+    if not cache_path:
+        return
+
+    env.CacheDir(cache_path)
+    print(f'SCons cache enabled... (path: "{cache_path}")')
+
+    if env["cache_limit"]:
+        cache_limit = float(env["cache_limit"])
+    elif os.environ.get("SCONS_CACHE_LIMIT"):
+        print_warning("Environment variable `SCONS_CACHE_LIMIT` is deprecated; use `cache_limit` argument instead.")
+        cache_limit = float(os.getenv("SCONS_CACHE_LIMIT", "0")) / 1024  # Old method used MiB, convert to GiB
+
+    # Convert GiB to bytes; treat negative numbers as 0 (unlimited).
+    cache_limit = max(0, int(cache_limit * 1024 * 1024 * 1024))
+    if env["verbose"]:
+        print(
+            "Current cache limit is {} (used: {})".format(
+                convert_size(cache_limit) if cache_limit else "∞",
+                convert_size(get_size(cache_path)),
+            )
+        )
 
 
-    atexit.register(cache_finally)
+    atexit.register(clean_cache, cache_path, cache_limit, env["verbose"])
 
 
 
 
 def dump(env):
 def dump(env):