Browse Source

Merge branch 'shaderpipeline' into vulkan

rdb 10 months ago
parent
commit
157ad9dc69
100 changed files with 2510 additions and 876 deletions
  1. 10 10
      .github/workflows/ci.yml
  2. 2 2
      .github/workflows/mypy.yml
  3. 8 9
      README.md
  4. 6 2
      cmake/macros/Interrogate.cmake
  5. 20 1
      direct/src/dist/FreezeTool.py
  6. 6 3
      direct/src/dist/_android.py
  7. 66 26
      direct/src/dist/commands.py
  8. 2 2
      direct/src/distributed/DistributedObjectAI.py
  9. 1 1
      direct/src/distributed/DistributedObjectUD.py
  10. 12 5
      direct/src/showbase/Loader.py
  11. 27 23
      direct/src/showbase/ShowBase.py
  12. 2 1
      direct/src/showbase/ShowBaseGlobal.py
  13. 1 1
      direct/src/task/Task.py
  14. 1 1
      dtool/Config.cmake
  15. 2 0
      dtool/src/dtoolutil/CMakeLists.txt
  16. 50 0
      dtool/src/dtoolutil/console_preamble.js
  17. 12 13
      dtool/src/dtoolutil/executionEnvironment.cxx
  18. 4 4
      dtool/src/dtoolutil/executionEnvironment.h
  19. 1 0
      dtool/src/dtoolutil/p3dtoolutil_ext_composite.cxx
  20. 80 0
      dtool/src/dtoolutil/pyenv_init.cxx
  21. 14 0
      dtool/src/dtoolutil/pyenv_init.h
  22. 3 3
      dtool/src/prc/notify.cxx
  23. 1 1
      makepanda/makepackage.py
  24. 33 10
      makepanda/makepanda.py
  25. 17 14
      makepanda/makepandacore.py
  26. 6 0
      makepanda/makewheel.py
  27. 3 0
      mypy.ini
  28. 1 1
      panda/CMakeLists.txt
  29. 11 7
      panda/src/android/android_main.cxx
  30. 1 1
      panda/src/android/pview_manifest.xml
  31. 11 12
      panda/src/androiddisplay/androidGraphicsWindow.cxx
  32. 2 1
      panda/src/cocoagldisplay/cocoaGLGraphicsBuffer.mm
  33. 2 2
      panda/src/cocoagldisplay/cocoaGLGraphicsPipe.mm
  34. 2 1
      panda/src/cocoagldisplay/cocoaGLGraphicsWindow.mm
  35. 2 0
      panda/src/collide/CMakeLists.txt
  36. 21 0
      panda/src/collide/collisionNode.I
  37. 22 2
      panda/src/collide/collisionNode.cxx
  38. 19 0
      panda/src/collide/collisionNode.h
  39. 62 0
      panda/src/collide/collisionNode_ext.cxx
  40. 40 0
      panda/src/collide/collisionNode_ext.h
  41. 1 0
      panda/src/collide/p3collide_ext_composite.cxx
  42. 1 1
      panda/src/cull/cullBinBackToFront.cxx
  43. 1 1
      panda/src/cull/cullBinFrontToBack.cxx
  44. 4 4
      panda/src/display/displayRegion.cxx
  45. 1 1
      panda/src/display/displayRegion.h
  46. 50 0
      panda/src/display/graphicsEngine.I
  47. 71 142
      panda/src/display/graphicsEngine.cxx
  48. 32 12
      panda/src/display/graphicsEngine.h
  49. 41 1
      panda/src/display/graphicsStateGuardian.I
  50. 36 0
      panda/src/display/graphicsStateGuardian.cxx
  51. 17 0
      panda/src/display/graphicsStateGuardian.h
  52. 5 5
      panda/src/display/shaderInputBinding_impls.cxx
  53. 5 5
      panda/src/display/shaderInputBinding_impls.h
  54. 124 15
      panda/src/dxgsg9/dxShaderContext9.cxx
  55. 2 0
      panda/src/dxgsg9/dxShaderContext9.h
  56. 8 0
      panda/src/dxgsg9/dxTextureContext9.cxx
  57. 3 0
      panda/src/dxgsg9/dxTextureContext9.h
  58. 2 1
      panda/src/egldisplay/eglGraphicsBuffer.cxx
  59. 3 0
      panda/src/egldisplay/eglGraphicsPipe.cxx
  60. 2 1
      panda/src/egldisplay/eglGraphicsWindow.cxx
  61. 11 0
      panda/src/event/asyncFuture.cxx
  62. 32 1
      panda/src/event/asyncFuture.h
  63. 3 0
      panda/src/event/asyncTaskChain.cxx
  64. 17 18
      panda/src/express/trueClock.cxx
  65. 2 0
      panda/src/framework/pandaFramework.cxx
  66. 9 0
      panda/src/gles2gsg/gles2gsg.h
  67. 4 0
      panda/src/glstuff/glBufferContext_src.h
  68. 181 22
      panda/src/glstuff/glGraphicsBuffer_src.cxx
  69. 4 0
      panda/src/glstuff/glGraphicsBuffer_src.h
  70. 489 223
      panda/src/glstuff/glGraphicsStateGuardian_src.cxx
  71. 72 22
      panda/src/glstuff/glGraphicsStateGuardian_src.h
  72. 159 31
      panda/src/glstuff/glShaderContext_src.cxx
  73. 10 1
      panda/src/glstuff/glShaderContext_src.h
  74. 40 0
      panda/src/glstuff/glTextureContext_src.I
  75. 110 24
      panda/src/glstuff/glTextureContext_src.cxx
  76. 31 2
      panda/src/glstuff/glTextureContext_src.h
  77. 18 0
      panda/src/glstuff/glmisc_src.cxx
  78. 4 0
      panda/src/glstuff/glmisc_src.h
  79. 2 1
      panda/src/glxdisplay/glxGraphicsBuffer.cxx
  80. 3 0
      panda/src/glxdisplay/glxGraphicsPipe.cxx
  81. 2 1
      panda/src/glxdisplay/glxGraphicsWindow.cxx
  82. 4 1
      panda/src/gobj/bufferContext.cxx
  83. 1 1
      panda/src/gobj/bufferContext.h
  84. 7 0
      panda/src/gobj/bufferContextChain.cxx
  85. 4 0
      panda/src/gobj/bufferContextChain.h
  86. 1 0
      panda/src/gobj/bufferResidencyTracker.cxx
  87. 18 3
      panda/src/gobj/preparedGraphicsObjects.cxx
  88. 2 2
      panda/src/gobj/shaderInputBinding.cxx
  89. 1 1
      panda/src/gobj/shaderInputBinding.h
  90. 9 1
      panda/src/gobj/texture.I
  91. 38 2
      panda/src/gobj/texture.cxx
  92. 6 0
      panda/src/gobj/texture.h
  93. 2 0
      panda/src/gsgbase/graphicsStateGuardianBase.h
  94. 133 127
      panda/src/linmath/compose_matrix_src.cxx
  95. 11 7
      panda/src/pgraph/loader.cxx
  96. 2 2
      panda/src/pgraph/modelPool.I
  97. 6 29
      panda/src/pgraph/modelPool.cxx
  98. 6 6
      panda/src/pgraph/modelPool.h
  99. 2 0
      panda/src/pgraph/nodePath.h
  100. 59 0
      panda/src/pgraph/nodePath_ext.cxx

+ 10 - 10
.github/workflows/ci.yml

@@ -508,7 +508,7 @@ jobs:
     - name: Setup emsdk
       uses: mymindstorm/setup-emsdk@v14
       with:
-        version: 3.1.70
+        version: 4.0.2
         actions-cache-folder: 'emsdk-cache'
 
     - name: Restore Python build cache
@@ -516,19 +516,19 @@ jobs:
       uses: actions/cache/restore@v4
       with:
         path: ~/python
-        key: cache-emscripten-python-3.12.7
+        key: cache-emscripten-python-3.12.8
 
     - name: Build Python 3.12
       if: steps.cache-emscripten-python-restore.outputs.cache-hit != 'true'
       run: |
-        wget https://www.python.org/ftp/python/3.12.7/Python-3.12.7.tar.xz
-        tar -xJf Python-3.12.7.tar.xz
-        (cd Python-3.12.7 && EM_CONFIG=$EMSDK/.emscripten python3 Tools/wasm/wasm_build.py emscripten-browser)
-        (cd Python-3.12.7/builddir/emscripten-browser && make install DESTDIR=~/python)
-        cp Python-3.12.7/builddir/emscripten-browser/Modules/_hacl/libHacl_Hash_SHA2.a ~/python/usr/local/lib
-        cp Python-3.12.7/builddir/emscripten-browser/Modules/_decimal/libmpdec/libmpdec.a ~/python/usr/local/lib
-        cp Python-3.12.7/builddir/emscripten-browser/Modules/expat/libexpat.a ~/python/usr/local/lib
-        rm -rf Python-3.12.7
+        wget https://www.python.org/ftp/python/3.12.8/Python-3.12.8.tar.xz
+        tar -xJf Python-3.12.8.tar.xz
+        (cd Python-3.12.8 && EM_CONFIG=$EMSDK/.emscripten python3 Tools/wasm/wasm_build.py emscripten-browser)
+        (cd Python-3.12.8/builddir/emscripten-browser && make install DESTDIR=~/python)
+        cp Python-3.12.8/builddir/emscripten-browser/Modules/_hacl/libHacl_Hash_SHA2.a ~/python/usr/local/lib
+        cp Python-3.12.8/builddir/emscripten-browser/Modules/_decimal/libmpdec/libmpdec.a ~/python/usr/local/lib
+        cp Python-3.12.8/builddir/emscripten-browser/Modules/expat/libexpat.a ~/python/usr/local/lib
+        rm -rf Python-3.12.8
 
     - name: Save Python build cache
       id: cache-emscripten-python-save

+ 2 - 2
.github/workflows/mypy.yml

@@ -7,7 +7,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: ['3.8', '3.11']
+        python-version: ['3.9', '3.13']
       fail-fast: false
     steps:
       - uses: actions/checkout@v4
@@ -18,6 +18,6 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install mypy==1.4.0
+          pip install mypy==1.14.1
       - name: Run mypy on direct
         run: python tests/run_mypy.py

+ 8 - 9
README.md

@@ -181,22 +181,21 @@ Although it's possible to build Panda3D on an Android device using the
 [termux](https://termux.com/) shell, the recommended route is to cross-compile
 .whl files using the SDK and NDK, which can then be used by the `build_apps`
 command to build a Python application into an .apk or .aab bundle.  You will
-need to get the latest thirdparty packages, which can be obtained from the
-artifacts page of the last successful run here:
+need to get the latest thirdparty packages, which can be obtained from here:
 
-https://github.com/rdb/panda3d-thirdparty/actions?query=branch%3Amain+is%3Asuccess+event%3Apush
+https://rdb.name/thirdparty-android.tar.gz
 
-This does not include Python at the moment, which can be extracted from
-[this archive](https://rdb.name/thirdparty-android.tar.gz) instead.
+This includes a copy of Python 3.13 compiled for Android.  You will need to
+use Python 3.13 on the host as well.
 
 These commands show how to compile wheels for the supported Android ABIs:
 
 ```bash
 export ANDROID_SDK_ROOT=/home/rdb/local/android
-python3.8 makepanda/makepanda.py --everything --outputdir built-droid-arm64 --arch arm64 --target android-21 --threads 6 --wheel
-python3.8 makepanda/makepanda.py --everything --outputdir built-droid-armv7a --arch armv7a --target android-19 --threads 6 --wheel
-python3.8 makepanda/makepanda.py --everything --outputdir built-droid-x86_64 --arch x86_64 --target android-21 --threads 6 --wheel
-python3.8 makepanda/makepanda.py --everything --outputdir built-droid-x86 --arch x86 --target android-19 --threads 6 --wheel
+python3.13 makepanda/makepanda.py --everything --outputdir built-droid-arm64 --arch arm64 --target android-21 --threads 6 --wheel
+python3.13 makepanda/makepanda.py --everything --outputdir built-droid-armv7a --arch arm --target android-21 --threads 6 --wheel
+python3.13 makepanda/makepanda.py --everything --outputdir built-droid-x86_64 --arch x86_64 --target android-21 --threads 6 --wheel
+python3.13 makepanda/makepanda.py --everything --outputdir built-droid-x86 --arch x86 --target android-21 --threads 6 --wheel
 ```
 
 It is now possible to use the generated wheels with `build_apps`, as explained

+ 6 - 2
cmake/macros/Interrogate.cmake

@@ -283,7 +283,7 @@ endfunction(interrogate_sources)
 
 #
 # Function: add_python_module(module [lib1 [lib2 ...]] [LINK lib1 ...]
-#    [IMPORT mod1 ...])
+#    [IMPORT mod1 ...] [INIT func1 ...])
 # Uses interrogate to create a Python module. If the LINK keyword is specified,
 # the Python module is linked against the specified libraries instead of those
 # listed before. The IMPORT keyword makes the output module import another
@@ -305,7 +305,7 @@ function(add_python_module module)
 
   set(keyword)
   foreach(arg ${ARGN})
-    if(arg STREQUAL "LINK" OR arg STREQUAL "IMPORT" OR arg STREQUAL "COMPONENT")
+    if(arg STREQUAL "LINK" OR arg STREQUAL "IMPORT" OR arg STREQUAL "INIT" OR arg STREQUAL "COMPONENT")
       set(keyword "${arg}")
 
     elseif(keyword STREQUAL "LINK")
@@ -316,6 +316,10 @@ function(add_python_module module)
       list(APPEND import_flags "-import" "${arg}")
       set(keyword)
 
+    elseif(keyword STREQUAL "INIT")
+      list(APPEND import_flags "-init" "${arg}")
+      set(keyword)
+
     elseif(keyword STREQUAL "COMPONENT")
       set(component "${arg}")
       set(keyword)

+ 20 - 1
direct/src/dist/FreezeTool.py

@@ -88,6 +88,7 @@ defaultHiddenImports = {
         'numpy.core._dtype_ctypes',
         'numpy.core._methods',
     ],
+    'panda3d.core': ['enum'],
     'pandas.compat': ['lzma', 'cmath'],
     'pandas._libs.tslibs.conversion': ['pandas._libs.tslibs.base'],
     'plyer': ['plyer.platforms'],
@@ -925,7 +926,22 @@ class Freezer:
             if sys.version_info < (3, 8):
                 abi_flags += 'm'
 
-            if 'linux' in self.platform:
+            if 'android' in self.platform:
+                arch = self.platform.split('_', 1)[1]
+                if arch in ('arm64', 'aarch64'):
+                    suffixes.append(('.cpython-{0}{1}-aarch64-linux-android.so'.format(abi_version, abi_flags), 'rb', 3))
+                elif arch in ('arm', 'armv7l'):
+                    suffixes.append(('.cpython-{0}{1}-arm-linux-androideabi.so'.format(abi_version, abi_flags), 'rb', 3))
+                elif arch in ('x86_64', 'amd64'):
+                    suffixes.append(('.cpython-{0}{1}-x86_64-linux-android.so'.format(abi_version, abi_flags), 'rb', 3))
+                elif arch in ('i386', 'i686'):
+                    suffixes.append(('.cpython-{0}{1}-i686-linux-android.so'.format(abi_version, abi_flags), 'rb', 3))
+
+                suffixes += [
+                    ('.abi{0}.so'.format(sys.version_info[0]), 'rb', 3),
+                    ('.so', 'rb', 3),
+                ]
+            elif 'linux' in self.platform:
                 suffixes += [
                     ('.cpython-{0}{1}-x86_64-linux-gnu.so'.format(abi_version, abi_flags), 'rb', 3),
                     ('.cpython-{0}{1}-i686-linux-gnu.so'.format(abi_version, abi_flags), 'rb', 3),
@@ -1150,6 +1166,9 @@ class Freezer:
             self.modules['_frozen_importlib'] = self.ModuleDef('importlib._bootstrap', implicit = True)
             self.modules['_frozen_importlib_external'] = self.ModuleDef('importlib._bootstrap_external', implicit = True)
 
+            if self.platform.startswith('android'):
+                self.modules['_android_support'] = self.ModuleDef('_android_support', implicit = True)
+
             for moduleName in startupModules:
                 if moduleName not in self.modules:
                     self.addModule(moduleName, implicit = True)

+ 6 - 3
direct/src/dist/_android.py

@@ -50,7 +50,7 @@ def flag_resource(id, **values):
         bitmask = 0
         flags = attrib.value.split('|')
         for flag in flags:
-            bitmask = values[flag]
+            bitmask |= values[flag]
         attrib.compiled_item.prim.int_hexadecimal_value = bitmask
     return compile
 
@@ -168,10 +168,11 @@ ANDROID_ATTRIBUTES = {
     'allowSingleTap': bool_resource(0x1010259),
     'allowTaskReparenting': bool_resource(0x1010204),
     'alwaysRetainTaskState': bool_resource(0x1010203),
+    'appCategory': enum_resource(0x01010545, "game", "audio", "video", "image", "social", "news", "maps", "productivity", "accessibility"),
     'clearTaskOnLaunch': bool_resource(0x1010015),
+    'configChanges': flag_resource(0x0101001f, mcc=0x0001, mnc=0x0002, locale=0x0004, touchscreen=0x0008, keyboard=0x0010, keyboardHidden=0x0020, navigation=0x0040, orientation=0x0080, screenLayout=0x0100, uiMode=0x0200, screenSize=0x0400, smallestScreenSize=0x0800, layoutDirection=0x2000, colorMode=0x4000, grammaticalGender=0x8000, fontScale=0x40000000, fontWeightAdjustment=0x10000000),
     'debuggable': bool_resource(0x0101000f),
     'documentLaunchMode': enum_resource(0x1010445, "none", "intoExisting", "always", "never"),
-    'configChanges': flag_resource(0x0101001f, mcc=0x0001, mnc=0x0002, locale=0x0004, touchscreen=0x0008, keyboard=0x0010, keyboardHidden=0x0020, navigation=0x0040, orientation=0x0080, screenLayout=0x0100, uiMode=0x0200, screenSize=0x0400, smallestScreenSize=0x0800, layoutDirection=0x2000, fontScale=0x40000000),
     'enabled': bool_resource(0x101000e),
     'excludeFromRecents': bool_resource(0x1010017),
     'exported': bool_resource(0x1010010),
@@ -179,6 +180,7 @@ ANDROID_ATTRIBUTES = {
     'finishOnTaskLaunch': bool_resource(0x1010014),
     'fullBackupContent': bool_resource(0x10104eb),
     'glEsVersion': int_resource(0x1010281),
+    'hardwareAccelerated': bool_resource(0x10102d3),
     'hasCode': bool_resource(0x101000c),
     'host': str_resource(0x1010028),
     'icon': ref_resource(0x1010002),
@@ -194,8 +196,9 @@ ANDROID_ATTRIBUTES = {
     'name': str_resource(0x1010003),
     'noHistory': bool_resource(0x101022d),
     'pathPattern': str_resource(0x101002c),
-    'resizeableActivity': bool_resource(0x10104f6),
+    'preferMinimalPostProcessing': bool_resource(0x101060c),
     'required': bool_resource(0x101028e),
+    'resizeableActivity': bool_resource(0x10104f6),
     'scheme': str_resource(0x1010027),
     'screenOrientation': enum_resource(0x101001e, 'landscape', 'portrait', 'user', 'behind', 'sensor', 'nosensor', 'sensorLandscape', 'sensorPortrait', 'reverseLandscape', 'reversePortrait', 'fullSensor', 'userLandscape', 'userPortrait', 'fullUser', 'locked'),
     'stateNotNeeded': bool_resource(0x1010016),

+ 66 - 26
direct/src/dist/commands.py

@@ -188,11 +188,26 @@ FrozenImporter.get_data = get_data
 """
 
 SITE_PY_ANDROID = """
+# Define this first, before we import anything that might import an extension
+# module.
 import sys, os
+from importlib import _bootstrap, _bootstrap_external
+
+class AndroidExtensionFinder:
+    @classmethod
+    def find_spec(cls, fullname, path=None, target=None):
+        soname = 'libpy.' + fullname + '.so'
+        path = os.path.join(sys.platlibdir, soname)
+
+        if os.path.exists(path):
+            loader = _bootstrap_external.ExtensionFileLoader(fullname, path)
+            return _bootstrap.ModuleSpec(fullname, loader, origin=path)
+
+
+sys.meta_path.append(AndroidExtensionFinder)
+
+
 from _frozen_importlib import _imp, FrozenImporter
-from importlib import _bootstrap_external
-from importlib.abc import Loader, MetaPathFinder
-from importlib.machinery import ModuleSpec
 from io import RawIOBase, TextIOWrapper
 
 from android_log import write as android_log_write
@@ -242,8 +257,9 @@ class AndroidLogStream:
     def writable(self):
         return True
 
-sys.stdout = AndroidLogStream(2, 'Python')
-sys.stderr = AndroidLogStream(3, 'Python')
+if sys.version_info < (3, 13):
+    sys.stdout = AndroidLogStream(4, 'python.stdout')
+    sys.stderr = AndroidLogStream(5, 'python.stderr')
 
 
 # Alter FrozenImporter to give a __file__ property to frozen modules.
@@ -262,20 +278,6 @@ def get_data(path):
 
 FrozenImporter.find_spec = find_spec
 FrozenImporter.get_data = get_data
-
-
-class AndroidExtensionFinder(MetaPathFinder):
-    @classmethod
-    def find_spec(cls, fullname, path=None, target=None):
-        soname = 'libpy.' + fullname + '.so'
-        path = os.path.join(os.path.dirname(sys.executable), soname)
-
-        if os.path.exists(path):
-            loader = _bootstrap_external.ExtensionFileLoader(fullname, path)
-            return ModuleSpec(fullname, loader, origin=path)
-
-
-sys.meta_path.append(AndroidExtensionFinder)
 """
 
 
@@ -294,6 +296,7 @@ class build_apps(setuptools.Command):
         self.application_id = None
         self.android_abis = None
         self.android_debuggable = False
+        self.android_app_category = None
         self.android_version_code = 1
         self.android_min_sdk_version = 21
         self.android_max_sdk_version = None
@@ -516,6 +519,18 @@ class build_apps(setuptools.Command):
         tmp.update(self.package_data_dirs)
         self.package_data_dirs = tmp
 
+        if 'android' in self.platforms:
+            assert self.application_id, \
+                'Must have a valid application_id when targeting Android!'
+
+            parts = self.application_id.split('.')
+            assert len(parts) >= 2, \
+                'application_id must contain at least one \'.\' separator!'
+
+            for part in parts:
+                assert part.isidentifier(), \
+                    'Each part of application_id must be a valid identifier!'
+
         # Default to all supported ABIs (for the given Android version).
         if self.android_max_sdk_version and self.android_max_sdk_version < 21:
             assert self.android_max_sdk_version >= 19, \
@@ -782,10 +797,29 @@ class build_apps(setuptools.Command):
         version = self.distribution.get_version()
         classifiers = self.distribution.get_classifiers()
 
-        is_game = False
-        for classifier in classifiers:
-            if classifier == 'Topic :: Games/Entertainment' or classifier.startswith('Topic :: Games/Entertainment ::'):
-                is_game = True
+        # If we have no app category, determine it based on the classifiers.
+        category = self.android_app_category
+        if not category:
+            for classifier in classifiers:
+                classifier = tuple(classifier.split(' :: '))
+                if len(classifier) < 2 or classifier[0] != 'Topic':
+                    continue
+
+                if classifier[:2] == ('Topic', 'Games/Entertainment'):
+                    category = 'game'
+                    break
+                elif classifier[:3] == ('Topic', 'Multimedia', 'Audio'):
+                    category = 'audio'
+                elif classifier[:4] == ('Topic', 'Multimedia', 'Graphics', 'Editors'):
+                    category = 'image'
+                elif classifier[:2] == ('Topic', 'Communications', 'Usenet News'):
+                    category = 'news'
+                elif classifier[:2] == ('Topic', 'Office/Business'):
+                    category = 'productivity'
+                elif classifier[:3] == ('Topic', 'Communications', 'Chat'):
+                    category = 'social'
+                elif classifier[:3] == ('Topic', 'Multimedia', 'Video'):
+                    category = 'video'
 
         manifest = ET.Element('manifest')
         manifest.set('xmlns:android', 'http://schemas.android.com/apk/res/android')
@@ -816,9 +850,13 @@ class build_apps(setuptools.Command):
 
         application = ET.SubElement(manifest, 'application')
         application.set('android:label', name)
-        application.set('android:isGame', ('false', 'true')[is_game])
+        if category == 'game':
+            application.set('android:isGame', 'true')
+        if category:
+            application.set('android:appCategory', category)
         application.set('android:debuggable', ('false', 'true')[self.android_debuggable])
         application.set('android:extractNativeLibs', 'true')
+        application.set('android:hardwareAccelerated', 'true')
 
         app_icon = self.icon_objects.get('*', self.icon_objects.get(self.macos_main_app))
         if app_icon:
@@ -828,9 +866,11 @@ class build_apps(setuptools.Command):
             activity = ET.SubElement(application, 'activity')
             activity.set('android:name', 'org.panda3d.android.PythonActivity')
             activity.set('android:label', appname)
-            activity.set('android:theme', '@android:style/Theme.NoTitleBar')
-            activity.set('android:configChanges', 'orientation|keyboardHidden')
+            activity.set('android:theme', '@android:style/Theme.NoTitleBar.Fullscreen')
+            activity.set('android:alwaysRetainTaskState', 'true')
+            activity.set('android:configChanges', 'layoutDirection|locale|grammaticalGender|fontScale|fontWeightAdjustment|orientation|uiMode|screenLayout|screenSize|smallestScreenSize|keyboard|keyboardHidden|navigation')
             activity.set('android:launchMode', 'singleInstance')
+            activity.set('android:preferMinimalPostProcessing', 'true')
 
             act_icon = self.icon_objects.get(appname)
             if act_icon and act_icon is not app_icon:

+ 2 - 2
direct/src/distributed/DistributedObjectAI.py

@@ -299,7 +299,7 @@ class DistributedObjectAI(DistributedObjectBase):
         # setLocation destroys self._zoneData if we move away to
         # a different zone
         if self._zoneData is None:
-            from otp.ai.AIZoneData import AIZoneData  # type: ignore[import]
+            from otp.ai.AIZoneData import AIZoneData  # type: ignore[import-not-found]
             self._zoneData = AIZoneData(self.air, self.parentId, self.zoneId)
         return self._zoneData
 
@@ -489,7 +489,7 @@ class DistributedObjectAI(DistributedObjectBase):
         # simultaneously on different lists of avatars, although they
         # should have different names.
 
-        from otp.ai import Barrier  # type: ignore[import]
+        from otp.ai import Barrier  # type: ignore[import-not-found]
         context = self.__nextBarrierContext
         # We assume the context number is passed as a uint16.
         self.__nextBarrierContext = (self.__nextBarrierContext + 1) & 0xffff

+ 1 - 1
direct/src/distributed/DistributedObjectUD.py

@@ -424,7 +424,7 @@ class DistributedObjectUD(DistributedObjectBase):
         # simultaneously on different lists of avatars, although they
         # should have different names.
 
-        from otp.ai import Barrier  # type: ignore[import]
+        from otp.ai import Barrier  # type: ignore[import-not-found]
         context = self.__nextBarrierContext
         # We assume the context number is passed as a uint16.
         self.__nextBarrierContext = (self.__nextBarrierContext + 1) & 0xffff

+ 12 - 5
direct/src/showbase/Loader.py

@@ -126,7 +126,7 @@ class Loader(DirectObject):
                 yield await req
 
     # special methods
-    def __init__(self, base):
+    def __init__(self, base=None):
         self.base = base
         self.loader = PandaLoader.getGlobalPtr()
 
@@ -134,15 +134,15 @@ class Loader(DirectObject):
 
         self.hook = "async_loader_%s" % (Loader.loaderIndex)
         Loader.loaderIndex += 1
-        self.accept(self.hook, self.__gotAsyncObject)
-
-        self._loadPythonFileTypes()
 
     def destroy(self):
         self.ignore(self.hook)
         self.loader.stopThreads()
         del self.base
-        del self.loader
+
+    def _init_base(self, base):
+        self.base = base
+        self.accept(self.hook, self.__gotAsyncObject)
 
     @classmethod
     def _loadPythonFileTypes(cls):
@@ -229,6 +229,10 @@ class Loader(DirectObject):
         """
 
         assert Loader.notify.debug("Loading model: %s" % (modelPath,))
+
+        if not self._loadedPythonFileTypes:
+            self._loadPythonFileTypes()
+
         if loaderOptions is None:
             loaderOptions = LoaderOptions()
         else:
@@ -416,6 +420,9 @@ class Loader(DirectObject):
         a callback is used, the model is saved asynchronously, and the
         true/false status is passed to the callback function. """
 
+        if not self._loadedPythonFileTypes:
+            self._loadPythonFileTypes()
+
         if loaderOptions is None:
             loaderOptions = LoaderOptions()
         else:

+ 27 - 23
direct/src/showbase/ShowBase.py

@@ -177,6 +177,8 @@ class ShowBase(DirectObject.DirectObject):
     aspect2d: NodePath
     pixel2d: NodePath
 
+    cluster: Any | None
+
     def __init__(self, fStartDirect: bool = True, windowType: str | None = None) -> None:
         """Opens a window, sets up a 3-D and several 2-D scene graphs, and
         everything else needed to render the scene graph to the window.
@@ -436,9 +438,9 @@ class ShowBase(DirectObject.DirectObject):
         self.useTrackball()
 
         #: `.Loader.Loader` object.
-        self.loader = Loader.Loader(self)
+        self.loader = ShowBaseGlobal.loader
+        self.loader._init_base(self)
         self.graphicsEngine.setDefaultLoader(self.loader.loader)
-        ShowBaseGlobal.loader = self.loader
 
         #: The global event manager, as imported from `.EventManagerGlobal`.
         self.eventMgr = eventMgr
@@ -679,7 +681,7 @@ class ShowBase(DirectObject.DirectObject):
         complete.
         """
 
-        if Thread.getCurrentThread() != Thread.getMainThread():
+        if sys.platform != "android" and Thread.getCurrentThread() != Thread.getMainThread():
             task = taskMgr.add(self.destroy, extraArgs=[])
             task.wait()
             return
@@ -1208,7 +1210,7 @@ class ShowBase(DirectObject.DirectObject):
             self.taskMgr.remove('clientSleep')
             self.taskMgr.add(self.__sleepCycleTask, 'clientSleep', sort = 55)
 
-    def __sleepCycleTask(self, task):
+    def __sleepCycleTask(self, task: object) -> int:
         Thread.sleep(self.clientSleep)
         #time.sleep(self.clientSleep)
         return Task.cont
@@ -1444,7 +1446,7 @@ class ShowBase(DirectObject.DirectObject):
         self.__configAspectRatio = aspectRatio
         self.adjustWindowAspectRatio(self.getAspectRatio())
 
-    def getAspectRatio(self, win = None):
+    def getAspectRatio(self, win: GraphicsOutput | None = None) -> float:
         # Returns the actual aspect ratio of the indicated (or main
         # window), or the default aspect ratio if there is not yet a
         # main window.
@@ -1453,7 +1455,7 @@ class ShowBase(DirectObject.DirectObject):
         if self.__configAspectRatio:
             return self.__configAspectRatio
 
-        aspectRatio = 1
+        aspectRatio: float = 1
 
         if win is None:
             win = self.win
@@ -1476,7 +1478,7 @@ class ShowBase(DirectObject.DirectObject):
 
         return aspectRatio
 
-    def getSize(self, win = None):
+    def getSize(self, win: GraphicsOutput | None = None) -> tuple[int, int]:
         """
         Returns the actual size of the indicated (or main window), or the
         default size if there is not yet a main window.
@@ -2176,7 +2178,7 @@ class ShowBase(DirectObject.DirectObject):
                 music.setLoop(looping)
                 music.play()
 
-    def __resetPrevTransform(self, state):
+    def __resetPrevTransform(self, state: object) -> int:
         # Clear out the previous velocity deltas now, after we have
         # rendered (the previous frame).  We do this after the render,
         # so that we have a chance to draw a representation of spheres
@@ -2187,7 +2189,7 @@ class ShowBase(DirectObject.DirectObject):
         PandaNode.resetAllPrevTransform()
         return Task.cont
 
-    def __dataLoop(self, state):
+    def __dataLoop(self, state: object) -> int:
         # Check if there were newly connected devices.
         self.devices.update()
 
@@ -2197,7 +2199,7 @@ class ShowBase(DirectObject.DirectObject):
         self.dgTrav.traverse(self.dataRootNode)
         return Task.cont
 
-    def __ivalLoop(self, state):
+    def __ivalLoop(self, state: object) -> int:
         # Execute all intervals in the global ivalMgr.
         IntervalManager.ivalMgr.step()
         return Task.cont
@@ -2215,7 +2217,7 @@ class ShowBase(DirectObject.DirectObject):
             self.shadowTrav.traverse(self.render)
         return Task.cont
 
-    def __collisionLoop(self, state):
+    def __collisionLoop(self, state: object) -> int:
         # run the collision traversal if we have a
         # CollisionTraverser set.
         if self.cTrav:
@@ -2227,14 +2229,14 @@ class ShowBase(DirectObject.DirectObject):
         messenger.send("collisionLoopFinished")
         return Task.cont
 
-    def __audioLoop(self, state):
+    def __audioLoop(self, state: object) -> int:
         if self.musicManager is not None:
             self.musicManager.update()
         for x in self.sfxManagerList:
             x.update()
         return Task.cont
 
-    def __garbageCollectStates(self, state):
+    def __garbageCollectStates(self, state: object) -> int:
         """ This task is started only when we have
         garbage-collect-states set in the Config.prc file, in which
         case we're responsible for taking out Panda's garbage from
@@ -2245,7 +2247,7 @@ class ShowBase(DirectObject.DirectObject):
         RenderState.garbageCollect()
         return Task.cont
 
-    def __igLoop(self, state):
+    def __igLoop(self, state: object) -> int:
         if __debug__:
             # We render the watch variables for the onScreenDebug as soon
             # as we reasonably can before the renderFrame().
@@ -2285,7 +2287,7 @@ class ShowBase(DirectObject.DirectObject):
         throw_new_frame()
         return Task.cont
 
-    def __igLoopSync(self, state):
+    def __igLoopSync(self, state: object) -> int:
         if __debug__:
             # We render the watch variables for the onScreenDebug as soon
             # as we reasonably can before the renderFrame().
@@ -2294,6 +2296,7 @@ class ShowBase(DirectObject.DirectObject):
         if self.recorder:
             self.recorder.recordFrame()
 
+        assert self.cluster is not None
         self.cluster.collectData()
 
         # Finally, render the frame.
@@ -2323,6 +2326,7 @@ class ShowBase(DirectObject.DirectObject):
             time.sleep(0.1)
 
         self.graphicsEngine.readyFlip()
+        assert self.cluster is not None
         self.cluster.waitForFlipCommand()
         self.graphicsEngine.flipFrame()
 
@@ -2753,7 +2757,7 @@ class ShowBase(DirectObject.DirectObject):
                 self.oobeVis.reparentTo(self.camera)
             self.oobeMode = 1
 
-    def __oobeButton(self, suffix, button):
+    def __oobeButton(self, suffix: str, button: str) -> None:
         if button.startswith('mouse'):
             # Eat mouse buttons.
             return
@@ -3073,7 +3077,7 @@ class ShowBase(DirectObject.DirectObject):
         else:
             return Task.cont
 
-    def windowEvent(self, win):
+    def windowEvent(self, win: GraphicsOutput) -> None:
         if win != self.win:
             # This event isn't about our window.
             return
@@ -3092,9 +3096,9 @@ class ShowBase(DirectObject.DirectObject):
                 self.userExit()
 
             if properties.getForeground() and not self.mainWinForeground:
-                self.mainWinForeground = 1
+                self.mainWinForeground = True
             elif not properties.getForeground() and self.mainWinForeground:
-                self.mainWinForeground = 0
+                self.mainWinForeground = False
                 if __debug__:
                     if self.__autoGarbageLogging:
                         GarbageReport.b_checkForGarbageLeaks()
@@ -3102,12 +3106,12 @@ class ShowBase(DirectObject.DirectObject):
             if properties.getMinimized() and not self.mainWinMinimized:
                 # If the main window is minimized, throw an event to
                 # stop the music.
-                self.mainWinMinimized = 1
+                self.mainWinMinimized = True
                 messenger.send('PandaPaused')
             elif not properties.getMinimized() and self.mainWinMinimized:
                 # If the main window is restored, throw an event to
                 # restart the music.
-                self.mainWinMinimized = 0
+                self.mainWinMinimized = False
                 messenger.send('PandaRestarted')
 
             # If we have not forced the aspect ratio, let's see if it has
@@ -3125,7 +3129,7 @@ class ShowBase(DirectObject.DirectObject):
                     if self.wantRender2dp:
                         self.pixel2dp.setScale(2.0 / xsize, 1.0, 2.0 / ysize)
 
-    def adjustWindowAspectRatio(self, aspectRatio):
+    def adjustWindowAspectRatio(self, aspectRatio: float) -> None:
         """ This function is normally called internally by
         `windowEvent()`, but it may also be called to explicitly adjust
         the aspect ratio of the render/render2d DisplayRegion, by a
@@ -3439,7 +3443,7 @@ class ShowBase(DirectObject.DirectObject):
         This method must be called from the main thread, otherwise an error is
         thrown.
         """
-        if Thread.getCurrentThread() != Thread.getMainThread():
+        if Thread.getCurrentThread() != Thread.getMainThread() and sys.platform != "android":
             self.notify.error("run() must be called from the main thread.")
             return
 

+ 2 - 1
direct/src/showbase/ShowBaseGlobal.py

@@ -62,7 +62,8 @@ aspect2d = render2d.attachNewNode(PGTop("aspect2d"))
 #: A dummy scene graph that is not being rendered by anything.
 hidden = NodePath("hidden")
 
-loader: Loader
+#: The global Loader instance for models, textures, etc.
+loader = Loader()
 
 # Set direct notify categories now that we have config
 directNotify.setDconfigLevels()

+ 1 - 1
direct/src/task/Task.py

@@ -28,7 +28,7 @@ if hasattr(sys, 'getandroidapilevel'):
     signal = None
 else:
     try:
-        import _signal as signal  # type: ignore[import, no-redef]
+        import _signal as signal  # type: ignore[import-not-found, no-redef]
     except ImportError:
         signal = None
 

+ 1 - 1
dtool/Config.cmake

@@ -265,7 +265,7 @@ if(BUILD_INTERROGATE)
     panda3d-interrogate
 
     GIT_REPOSITORY https://github.com/panda3d/interrogate.git
-    GIT_TAG 03418d6d7ddda7fb99abf27230aa42d1d8bd607e
+    GIT_TAG d2844d994fcc465a4e22b10001d3ac5c4012b814
 
     PREFIX ${_interrogate_dir}
     CMAKE_ARGS

+ 2 - 0
dtool/src/dtoolutil/CMakeLists.txt

@@ -69,6 +69,8 @@ set(P3DTOOLUTIL_IGATEEXT
   globPattern_ext.h
   iostream_ext.cxx
   iostream_ext.h
+  pyenv_init.cxx
+  pyenv_init.h
   textEncoder_ext.cxx
   textEncoder_ext.h
 )

+ 50 - 0
dtool/src/dtoolutil/console_preamble.js

@@ -0,0 +1,50 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file console_preamble.js
+ * @author rdb
+ * @date 2025-02-03
+ */
+
+if (ENVIRONMENT_IS_NODE) {
+  Module["preInit"] = Module["preInit"] || [];
+  Module["preInit"].push(function() {
+    if (typeof process === "object" && typeof process.env === "object") {
+      // These are made up by emscripten if we don't set them to undefined
+      ENV['USER'] = undefined;
+      ENV['LOGNAME'] = undefined;
+      ENV['PATH'] = undefined;
+      ENV['PWD'] = undefined;
+      ENV['HOME'] = undefined;
+      ENV['LANG'] = undefined;
+      ENV['_'] = undefined;
+      for (var variable in process.env) {
+        ENV[variable] = process.env[variable];
+      }
+    }
+
+    addOnPreMain(function preloadNodeEnv() {
+      var sp = stackSave();
+      var set_binary_name = wasmExports["_set_binary_name"];
+      if (set_binary_name && typeof __filename === "string") {
+        set_binary_name(stringToUTF8OnStack(__filename));
+      }
+
+      var set_env_var = wasmExports["_set_env_var"];
+      if (set_env_var) {
+        for (var variable in ENV) {
+          var value = ENV[variable];
+          if (value !== undefined) {
+            set_env_var(stringToUTF8OnStack(variable), stringToUTF8OnStack(value));
+          }
+        }
+      }
+      stackRestore(sp);
+    });
+  });
+}

+ 12 - 13
dtool/src/dtoolutil/executionEnvironment.cxx

@@ -125,12 +125,18 @@ static const char *const libp3dtool_filenames[] = {
 
 #if defined(__EMSCRIPTEN__) && !defined(CPPPARSER)
 extern "C" void EMSCRIPTEN_KEEPALIVE
-_set_env_var(ExecutionEnvironment *ptr, const char *var, const char *value) {
+_set_env_var(const char *var, const char *value) {
+  ExecutionEnvironment *ptr = ExecutionEnvironment::get_ptr();
   ptr->_variables[std::string(var)] = std::string(value);
 }
+
+extern "C" void EMSCRIPTEN_KEEPALIVE
+_set_binary_name(const char *path) {
+  ExecutionEnvironment::set_binary_name(std::string(path));
+}
 #endif
 
-// Linux with GNU libc does have global argvargc variables, but we can't
+// Linux with GNU libc does have global argv/argc variables, but we can't
 // safely access them at stat init time--at least, not in libc5. (It does seem
 // to work with glibc2, however.)
 
@@ -584,17 +590,10 @@ read_environment_variables() {
     }
   }
 #elif defined(__EMSCRIPTEN__)
-  // We only have environment variables if we're running in node.js.
-#ifndef CPPPARSER
-  EM_ASM({
-    if (typeof process === 'object' && typeof process.env === 'object') {
-      for (var variable in process.env) {
-        __set_env_var($0, stringToUTF8OnStack(variable),
-                          stringToUTF8OnStack(process.env[variable]));
-      }
-    }
-  }, this);
-#endif
+  // The environment variables get loaded in by the .js file before main()
+  // using the _set_env_var exported function, defined above.  Trying to load
+  // env vars at static init time otherwise makes some optimizations more
+  // difficult, notably wasm-ctor-eval/wizer.
 
 #elif defined(HAVE_PROC_SELF_ENVIRON)
   // In some cases, we may have a file called procselfenviron that may be read

+ 4 - 4
dtool/src/dtoolutil/executionEnvironment.h

@@ -22,10 +22,10 @@
 #include <map>
 
 #if defined(__EMSCRIPTEN__) && !defined(CPPPARSER)
-class ExecutionEnvironment;
-
 extern "C" void EMSCRIPTEN_KEEPALIVE
-_set_env_var(ExecutionEnvironment *ptr, const char *var, const char *value);
+_set_env_var(const char *var, const char *value);
+extern "C" void EMSCRIPTEN_KEEPALIVE
+_set_binary_name(const char *path);
 #endif
 
 /**
@@ -98,7 +98,7 @@ private:
   static ExecutionEnvironment *_global_ptr;
 
 #ifdef __EMSCRIPTEN__
-  friend void ::_set_env_var(ExecutionEnvironment *ptr, const char *var, const char *value);
+  friend void ::_set_env_var(const char *var, const char *value);
 #endif
 };
 

+ 1 - 0
dtool/src/dtoolutil/p3dtoolutil_ext_composite.cxx

@@ -1,4 +1,5 @@
 #include "filename_ext.cxx"
 #include "globPattern_ext.cxx"
 #include "iostream_ext.cxx"
+#include "pyenv_init.cxx"
 #include "textEncoder_ext.cxx"

+ 80 - 0
dtool/src/dtoolutil/pyenv_init.cxx

@@ -0,0 +1,80 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file pyenv_init.cxx
+ * @author rdb
+ * @date 2025-02-02
+ */
+
+#include "pyenv_init.h"
+#include "py_panda.h"
+#include "executionEnvironment.h"
+
+/**
+ * Called when panda3d.core is initialized, does some initialization specific
+ * to the Python environment.
+ */
+void
+pyenv_init() {
+  // MAIN_DIR needs to be set very early; this seems like a convenient place
+  // to do that.  Perhaps we'll find a better place for this in the future.
+  static bool initialized_main_dir = false;
+  if (!initialized_main_dir) {
+    /*if (interrogatedb_cat.is_debug()) {
+      // Good opportunity to print this out once, at startup.
+      interrogatedb_cat.debug()
+        << "Python " << version << "\n";
+    }*/
+
+    if (!ExecutionEnvironment::has_environment_variable("MAIN_DIR")) {
+      // Grab the __main__ module and extract its __file__ attribute.
+      Filename main_dir;
+      PyObject *main_module = PyImport_ImportModule("__main__");
+      PyObject *file_attr = nullptr;
+      if (main_module != nullptr) {
+        file_attr = PyObject_GetAttrString(main_module, "__file__");
+      } else {
+        std::cerr << "Warning: unable to import __main__\n";
+      }
+      if (file_attr == nullptr) {
+        // Must be running in the interactive interpreter.  Use the CWD.
+        main_dir = ExecutionEnvironment::get_cwd();
+      } else {
+#if PY_MAJOR_VERSION >= 3
+        Py_ssize_t length;
+        wchar_t *buffer = PyUnicode_AsWideCharString(file_attr, &length);
+        if (buffer != nullptr) {
+          main_dir = Filename::from_os_specific_w(std::wstring(buffer, length));
+          main_dir.make_absolute();
+          main_dir = main_dir.get_dirname();
+          PyMem_Free(buffer);
+        }
+#else
+        char *buffer;
+        Py_ssize_t length;
+        if (PyString_AsStringAndSize(file_attr, &buffer, &length) != -1) {
+          main_dir = Filename::from_os_specific(std::string(buffer, length));
+          main_dir.make_absolute();
+          main_dir = main_dir.get_dirname();
+        }
+#endif
+        else {
+          std::cerr << "Invalid string for __main__.__file__\n";
+        }
+      }
+      ExecutionEnvironment::shadow_environment_variable("MAIN_DIR", main_dir.to_os_specific());
+      PyErr_Clear();
+    }
+    initialized_main_dir = true;
+  }
+
+  // Also, while we are at it, initialize the thread swap hook.
+#if defined(HAVE_THREADS) && defined(SIMPLE_THREADS)
+  global_thread_state_swap = PyThreadState_Swap;
+#endif
+}

+ 14 - 0
dtool/src/dtoolutil/pyenv_init.h

@@ -0,0 +1,14 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file pyenv_init.h
+ * @author rdb
+ * @date 2025-02-02
+ */
+
+extern "C" void pyenv_init();

+ 3 - 3
dtool/src/prc/notify.cxx

@@ -467,12 +467,12 @@ config_initialized() {
 
   Notify *ptr = Notify::ptr();
 
-  for (int i = 0; i <= NS_fatal; ++i) {
+  for (int severity = 0; severity <= NS_fatal; ++severity) {
     int priority = ANDROID_LOG_UNKNOWN;
     if (severity != NS_unspecified) {
-      priority = i + 1;
+      priority = severity + 1;
     }
-    ptr->_log_streams[i] = new AndroidLogStream(priority);
+    ptr->_log_streams[severity] = new AndroidLogStream(priority);
   }
 
 #elif defined(__EMSCRIPTEN__)

+ 1 - 1
makepanda/makepackage.py

@@ -863,7 +863,7 @@ def MakeInstallerAndroid(version, **kwargs):
         shutil.copy(source, target)
 
         # Walk through the library dependencies.
-        handle = subprocess.Popen(['readelf', '--dynamic', target], stdout=subprocess.PIPE)
+        handle = subprocess.Popen(['llvm-readelf', '--dynamic', target], stdout=subprocess.PIPE)
         for line in handle.communicate()[0].splitlines():
             # The line will look something like:
             # 0x0000000000000001 (NEEDED)             Shared library: [libpanda.so]

+ 33 - 10
makepanda/makepanda.py

@@ -498,6 +498,8 @@ elif not CrossCompiling():
 else:
     if target_arch == 'amd64':
         target_arch = 'x86_64'
+    if target_arch == 'arm' and target == 'android':
+        target_arch = 'armv7a'
     PLATFORM = '{0}-{1}'.format(target, target_arch)
 
 
@@ -1436,10 +1438,10 @@ def CompileCxx(obj,src,opts):
                 cmd += ' -gcc-toolchain ' + SDK["ANDROID_GCC_TOOLCHAIN"].replace('\\', '/')
             cmd += ' -ffunction-sections -funwind-tables'
             cmd += ' -target ' + SDK["ANDROID_TRIPLE"]
-            if arch == 'armv7a':
+            if arch in ('armv7a', 'arm'):
                 cmd += ' -march=armv7-a -mfloat-abi=softfp -mfpu=vfpv3-d16'
-            elif arch == 'arm':
-                cmd += ' -march=armv5te -mtune=xscale -msoft-float'
+            #elif arch == 'arm':
+            #    cmd += ' -march=armv5te -mtune=xscale -msoft-float'
             elif arch == 'mips':
                 cmd += ' -mips32'
             elif arch == 'mips64':
@@ -1714,6 +1716,9 @@ def CompileImod(wobj, wsrc, opts):
     importmod = GetValueOption(opts, "IMPORT:")
     if importmod:
         cmd += ' -import ' + importmod
+    initfunc = GetValueOption(opts, "INIT:")
+    if initfunc:
+        cmd += ' -init ' + initfunc
     for x in wsrc: cmd += ' ' + BracketNameWithQuotes(x)
     oscmd(cmd)
     CompileCxx(wobj,woutc,opts)
@@ -1962,17 +1967,21 @@ def CompileLink(dll, obj, opts):
                 cmd += ' -gcc-toolchain ' + SDK["ANDROID_GCC_TOOLCHAIN"].replace('\\', '/')
             cmd += " -Wl,-z,noexecstack -Wl,-z,relro -Wl,-z,now"
             cmd += ' -target ' + SDK["ANDROID_TRIPLE"]
-            if arch == 'armv7a':
+            if arch in ('armv7a', 'arm'):
                 cmd += " -march=armv7-a -Wl,--fix-cortex-a8"
             elif arch == 'mips':
                 cmd += ' -mips32'
             cmd += ' -lc -lm'
 
         elif GetTarget() == 'emscripten':
-            cmd += " -s WARN_ON_UNDEFINED_SYMBOLS=1"
+            cmd += " -s WARN_ON_UNDEFINED_SYMBOLS=1 -mbulk-memory"
+
             if GetOrigExt(dll) == ".exe":
                 cmd += " -s EXIT_RUNTIME=1"
 
+                if dll.endswith(".js") and "SUBSYSTEM:WINDOWS" not in opts:
+                    cmd += " --pre-js dtool/src/dtoolutil/console_preamble.js"
+
         else:
             cmd += " -pthread"
             if "SYSROOT" in SDK:
@@ -2137,13 +2146,23 @@ def CompileJava(target, src, opts):
     if GetHost() == 'android':
         cmd = "ecj "
     else:
-        cmd = "javac -bootclasspath " + BracketNameWithQuotes(SDK["ANDROID_JAR"]) + " "
+        cmd = "javac "
+        home = os.environ.get('JAVA_HOME')
+        if home:
+            javac_path = os.path.join(home, 'bin', 'javac')
+            if GetHost() == 'windows':
+                javac_path += '.exe'
+            if os.path.isfile(javac_path):
+                cmd = BracketNameWithQuotes(javac_path) + " "
+
+        cmd += "-Xlint:deprecation "
 
     optlevel = GetOptimizeOption(opts)
     if optlevel >= 4:
         cmd += "-debug:none "
 
-    cmd += "-cp " + GetOutputDir() + "/classes "
+    classpath = BracketNameWithQuotes(SDK["ANDROID_JAR"] + ":" + GetOutputDir() + "/classes")
+    cmd += "-cp " + classpath + " "
     cmd += "-d " + GetOutputDir() + "/classes "
     cmd += BracketNameWithQuotes(src)
     oscmd(cmd)
@@ -4092,7 +4111,9 @@ TargetAdd('libp3pgui.in', opts=['IMOD:panda3d.core', 'ILIB:libp3pgui', 'SRCDIR:p
 # DIRECTORY: panda/src/pnmimagetypes/
 #
 
-OPTS=['DIR:panda/src/pnmimagetypes', 'DIR:panda/src/pnmimage', 'BUILDING:PANDA', 'PNG', 'ZLIB', 'JPEG', 'TIFF', 'OPENEXR', 'EXCEPTIONS']
+OPTS=['DIR:panda/src/pnmimagetypes', 'DIR:panda/src/pnmimage', 'BUILDING:PANDA', 'PNG', 'ZLIB', 'JPEG', 'TIFF', 'OPENEXR']
+if not PkgSkip('OPENEXR') and GetTarget() != 'emscripten':
+    OPTS.append('EXCEPTIONS')
 TargetAdd('p3pnmimagetypes_composite1.obj', opts=OPTS, input='p3pnmimagetypes_composite1.cxx')
 TargetAdd('p3pnmimagetypes_composite2.obj', opts=OPTS, input='p3pnmimagetypes_composite2.cxx')
 
@@ -4240,7 +4261,7 @@ if GetTarget() != "emscripten":
 if PkgSkip("FREETYPE")==0:
     PyTargetAdd('core_module.obj', input='libp3pnmtext.in')
 
-PyTargetAdd('core_module.obj', opts=['IMOD:panda3d.core', 'ILIB:core'])
+PyTargetAdd('core_module.obj', opts=['IMOD:panda3d.core', 'ILIB:core', 'INIT:pyenv_init'])
 
 PyTargetAdd('core.pyd', input='libp3dtoolbase_igate.obj')
 PyTargetAdd('core.pyd', input='p3dtoolbase_typeHandle_ext.obj')
@@ -5020,11 +5041,13 @@ if GetTarget() == 'android':
     TargetAdd('org/panda3d/android/NativeIStream.class', opts=OPTS, input='NativeIStream.java')
     TargetAdd('org/panda3d/android/NativeOStream.class', opts=OPTS, input='NativeOStream.java')
     TargetAdd('org/panda3d/android/PandaActivity.class', opts=OPTS, input='PandaActivity.java')
+    TargetAdd('org/panda3d/android/PandaActivity$1.class', opts=OPTS+['DEPENDENCYONLY'], input='PandaActivity.java')
     TargetAdd('org/panda3d/android/PythonActivity.class', opts=OPTS, input='PythonActivity.java')
 
     TargetAdd('classes.dex', input='org/panda3d/android/NativeIStream.class')
     TargetAdd('classes.dex', input='org/panda3d/android/NativeOStream.class')
     TargetAdd('classes.dex', input='org/panda3d/android/PandaActivity.class')
+    TargetAdd('classes.dex', input='org/panda3d/android/PandaActivity$1.class')
     TargetAdd('classes.dex', input='org/panda3d/android/PythonActivity.class')
 
     TargetAdd('p3android_composite1.obj', opts=OPTS, input='p3android_composite1.cxx')
@@ -6069,7 +6092,7 @@ if GetLinkAllStatic():
     if not PkgSkip('BULLET'):
         DefSymbol('RUN_TESTS_FLAGS', 'HAVE_BULLET')
 
-    OPTS=['DIR:tests', 'PYTHON', 'RUN_TESTS_FLAGS']
+    OPTS=['DIR:tests', 'PYTHON', 'RUN_TESTS_FLAGS', 'SUBSYSTEM:CONSOLE']
     PyTargetAdd('run_tests-main.obj', opts=OPTS, input='main.c')
     PyTargetAdd('run_tests.exe', input='run_tests-main.obj')
     PyTargetAdd('run_tests.exe', input='core.pyd')

+ 17 - 14
makepanda/makepandacore.py

@@ -357,11 +357,11 @@ def SetTarget(target, arch=None):
 
     elif target == 'android' or target.startswith('android-'):
         if arch is None:
-            # If compiling on Android, default to same architecture.  Otherwise, arm.
+            # If compiling on Android, default to same architecture.
             if host == 'android':
                 arch = host_arch
             else:
-                arch = 'armv7a'
+                exit('Specify an Android architecture using --arch')
 
         if arch == 'aarch64':
             arch = 'arm64'
@@ -371,12 +371,9 @@ def SetTarget(target, arch=None):
         target, _, api = target.partition('-')
         if api:
             ANDROID_API = int(api)
-        elif arch in ('mips64', 'arm64', 'x86_64'):
-            # 64-bit platforms were introduced in Android 21.
-            ANDROID_API = 21
         else:
             # Default to the lowest API level still supported by Google.
-            ANDROID_API = 19
+            ANDROID_API = 21
 
         # Determine the prefix for our gcc tools, eg. arm-linux-androideabi-gcc
         global ANDROID_ABI, ANDROID_TRIPLE
@@ -592,8 +589,8 @@ def GetInterrogateDir():
             return INTERROGATE_DIR
 
         dir = os.path.join(GetOutputDir(), "tmp", "interrogate")
-        if not os.path.isdir(os.path.join(dir, "panda3d_interrogate-0.4.0.dist-info")):
-            oscmd("\"%s\" -m pip install --force-reinstall --upgrade -t \"%s\" panda3d-interrogate==0.4.0" % (sys.executable, dir))
+        if not os.path.isdir(os.path.join(dir, "panda3d_interrogate-0.5.0.dist-info")):
+            oscmd("\"%s\" -m pip install --force-reinstall --upgrade -t \"%s\" panda3d-interrogate==0.5.0" % (sys.executable, dir))
 
         INTERROGATE_DIR = dir
 
@@ -669,11 +666,12 @@ def oscmd(cmd, ignoreError = False, cwd=None):
         print(GetColor("blue") + cmd.split(" ", 1)[0] + " " + GetColor("magenta") + cmd.split(" ", 1)[1] + GetColor())
     sys.stdout.flush()
 
+    if cmd[0] == '"':
+        exe = cmd[1 : cmd.index('"', 1)]
+    else:
+        exe = cmd.split()[0]
+
     if sys.platform == "win32":
-        if cmd[0] == '"':
-            exe = cmd[1 : cmd.index('"', 1)]
-        else:
-            exe = cmd.split()[0]
         exe_path = LocateBinary(exe)
         if exe_path is None:
             exit("Cannot find "+exe+" on search path")
@@ -709,7 +707,7 @@ def oscmd(cmd, ignoreError = False, cwd=None):
             exit("")
 
     if res != 0 and not ignoreError:
-        if "interrogate" in cmd.split(" ", 1)[0] and GetVerbose():
+        if "interrogate" in exe and "interrogate_module" not in exe and GetVerbose():
             print(ColorText("red", "Interrogate failed, retrieving debug output..."))
             sys.stdout.flush()
             verbose_cmd = cmd.split(" ", 1)[0] + " -vv " + cmd.split(" ", 1)[1]
@@ -1422,6 +1420,9 @@ def GetThirdpartyDir():
     elif (target == 'android'):
         THIRDPARTYDIR = base + "/android-libs-%s/" % (target_arch)
 
+        if target_arch == 'armv7a' and not os.path.isdir(THIRDPARTYDIR):
+            THIRDPARTYDIR = base + "/android-libs-arm/"
+
     elif (target == 'emscripten'):
         THIRDPARTYDIR = base + "/emscripten-libs/"
 
@@ -1858,7 +1859,7 @@ def SmartPkgEnable(pkg, pkgconfig = None, libs = None, incs = None, defs = None,
             DefSymbol(target_pkg, d, v)
         return
 
-    elif not custom_loc and GetHost() == "darwin" and framework is not None:
+    elif not custom_loc and GetHost() == "darwin" and GetTarget() == "darwin" and framework is not None:
         prefix = SDK["MACOSX"]
         if (os.path.isdir(prefix + "/Library/Frameworks/%s.framework" % framework) or
             os.path.isdir(prefix + "/System/Library/Frameworks/%s.framework" % framework) or
@@ -2639,6 +2640,8 @@ def SdkLocateAndroid():
     # We need to redistribute the C++ standard library.
     stdlibc = os.path.join(ndk_root, 'sources', 'cxx-stl', 'llvm-libc++')
     stl_lib = os.path.join(stdlibc, 'libs', abi, 'libc++_shared.so')
+    if not os.path.isfile(stl_lib):
+        stl_lib = os.path.join(prebuilt_dir, 'sysroot', 'usr', 'lib', ANDROID_TRIPLE.rstrip('0123456789'), 'libc++_shared.so')
     CopyFile(os.path.join(GetOutputDir(), 'lib', 'libc++_shared.so'), stl_lib)
 
     # The Android support library polyfills C++ features not available in the

+ 6 - 0
makepanda/makewheel.py

@@ -88,7 +88,9 @@ MANYLINUX_LIBS = [
     # These are not mentioned in manylinux1 spec but should nonetheless always
     # be excluded.
     "linux-vdso.so.1", "linux-gate.so.1", "ld-linux.so.2", "libdrm.so.2",
+    "ld-linux-x86-64.so.2", "ld-linux-aarch64.so.1",
     "libEGL.so.1", "libOpenGL.so.0", "libGLX.so.0", "libGLdispatch.so.0",
+    "libGLESv2.so.2",
 ]
 
 # Binaries to never scan for dependencies on non-Windows systems.
@@ -677,6 +679,7 @@ def makewheel(version, output_dir, platform=None):
         or platform.startswith('win_') \
         or platform.startswith('cygwin_')
     is_macosx = platform.startswith('macosx_')
+    is_android = platform.startswith('android_')
 
     # Global filepaths
     panda3d_dir = join(output_dir, "panda3d")
@@ -747,6 +750,9 @@ def makewheel(version, output_dir, platform=None):
     elif is_macosx:
         pylib_name = 'libpython{0}.{1}{2}.dylib'.format(sys.version_info[0], sys.version_info[1], suffix)
         pylib_path = os.path.join(get_config_var('LIBDIR'), pylib_name)
+    elif is_android and CrossCompiling():
+        pylib_name = 'libpython{0}.{1}{2}.so'.format(sys.version_info[0], sys.version_info[1], suffix)
+        pylib_path = os.path.join(GetThirdpartyDir(), 'python', 'lib', pylib_name)
     else:
         pylib_name = get_config_var('LDLIBRARY')
         pylib_arch = get_config_var('MULTIARCH')

+ 3 - 0
mypy.ini

@@ -11,3 +11,6 @@ ignore_missing_imports = True
 
 [mypy-Pmw.*]
 ignore_missing_imports = True
+
+[mypy-imp]
+ignore_missing_imports = True

+ 1 - 1
panda/CMakeLists.txt

@@ -108,7 +108,7 @@ if(HAVE_FREETYPE)
 endif()
 
 if(INTERROGATE_PYTHON_INTERFACE)
-  add_python_module(panda3d.core ${CORE_MODULE_COMPONENTS} LINK panda)
+  add_python_module(panda3d.core ${CORE_MODULE_COMPONENTS} LINK panda INIT pyenv_init)
 
   # Generate our __init__.py
   if(WIN32)

+ 11 - 7
panda/src/android/android_main.cxx

@@ -75,6 +75,7 @@ void android_main(struct android_app* app) {
     << "New native activity started on " << *current_thread << "\n";
 
   // Were we given an optional location to write the stdout/stderr streams?
+  bool owns_stdout = false;
   methodID = env->GetMethodID(activity_class, "getIntentOutputUri", "()Ljava/lang/String;");
   jstring joutput_uri = (jstring) env->CallObjectMethod(activity->clazz, methodID);
   if (joutput_uri != nullptr) {
@@ -92,6 +93,7 @@ void android_main(struct android_app* app) {
 
           dup2(fd, 1);
           dup2(fd, 2);
+          owns_stdout = true;
         } else {
           android_cat.error()
             << "Failed to open output path " << path << "\n";
@@ -109,6 +111,7 @@ void android_main(struct android_app* app) {
             << spec.get_server_and_port() << "\n";
           dup2(fd, 1);
           dup2(fd, 2);
+          owns_stdout = true;
         } else {
           android_cat.error()
             << "Failed to open output socket "
@@ -267,11 +270,10 @@ void android_main(struct android_app* app) {
 
     // We still need to keep an event loop going until Android gives us leave
     // to end the process.
-    int looper_id;
-    int events;
-    struct android_poll_source *source;
-    while ((looper_id = ALooper_pollAll(-1, nullptr, &events, (void**)&source)) >= 0) {
-      // Process this event, but intercept application command events.
+    while (!app->destroyRequested) {
+      int looper_id;
+      struct android_poll_source *source;
+      auto result = ALooper_pollOnce(-1, &looper_id, nullptr, (void **)&source);
       if (looper_id == LOOPER_ID_MAIN) {
         int8_t cmd = android_app_read_cmd(app);
         android_app_pre_exec_cmd(app, cmd);
@@ -300,8 +302,10 @@ void android_main(struct android_app* app) {
     env->ReleaseStringUTFChars(filename, filename_str);
   }
 
-  close(1);
-  close(2);
+  if (owns_stdout) {
+    close(1);
+    close(2);
+  }
 
   // Detach the thread before exiting.
   activity->vm->DetachCurrentThread();

+ 1 - 1
panda/src/android/pview_manifest.xml

@@ -8,7 +8,7 @@
     <uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />
     <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
     <uses-permission android:name="android.permission.INTERNET" />
-    <uses-sdk android:minSdkVersion="21" />
+    <uses-sdk android:minSdkVersion="21" android:targetSdkVersion="30" />
     <uses-feature android:glEsVersion="0x00020000" android:required="true" />
 
     <application android:label="Panda Viewer" android:hasCode="true" android:debuggable="true">

+ 11 - 12
panda/src/androiddisplay/androidGraphicsWindow.cxx

@@ -186,16 +186,14 @@ process_events() {
   GraphicsWindow::process_events();
 
   // Read all pending events.
-  int looper_id;
-  int events;
-  struct android_poll_source* source;
-
-  // Loop until all events are read.
-  while ((looper_id = ALooper_pollAll(0, nullptr, &events, (void**)&source)) >= 0) {
-    // Process this event.
-    if (source != nullptr) {
-      source->process(_app, source);
-    }
+  struct android_poll_source *source;
+
+  auto result = ALooper_pollOnce(0, nullptr, nullptr, (void **)&source);
+  nassertv(result != ALOOPER_POLL_ERROR);
+
+  // Process this event.
+  if (source != nullptr) {
+    source->process(_app, source);
   }
 }
 
@@ -442,14 +440,15 @@ ns_handle_command(int32_t command) {
     case APP_CMD_WINDOW_RESIZED:
       properties.set_size(ANativeWindow_getWidth(_app->window),
                           ANativeWindow_getHeight(_app->window));
+      system_changed_properties(properties);
       break;
     case APP_CMD_WINDOW_REDRAW_NEEDED:
       break;
     case APP_CMD_CONTENT_RECT_CHANGED:
-      properties.set_origin(_app->contentRect.left, _app->contentRect.top);
+      /*properties.set_origin(_app->contentRect.left, _app->contentRect.top);
       properties.set_size(_app->contentRect.right - _app->contentRect.left,
                           _app->contentRect.bottom - _app->contentRect.top);
-      system_changed_properties(properties);
+      system_changed_properties(properties);*/
       break;
     case APP_CMD_GAINED_FOCUS:
       properties.set_foreground(true);

+ 2 - 1
panda/src/cocoagldisplay/cocoaGLGraphicsBuffer.mm

@@ -98,7 +98,8 @@ open_buffer() {
     // If the old gsg has the wrong pixel format, create a new one that shares
     // with the old gsg.
     DCAST_INTO_R(cocoagsg, _gsg, false);
-    if (!cocoagsg->get_fb_properties().subsumes(_fb_properties)) {
+    if (cocoagsg->get_engine() != _engine ||
+        !cocoagsg->get_fb_properties().subsumes(_fb_properties)) {
       cocoagsg = new CocoaGLGraphicsStateGuardian(_engine, _pipe, cocoagsg);
       cocoagsg->choose_pixel_format(_fb_properties, cocoa_pipe->get_display_id(), false);
       _gsg = cocoagsg;

+ 2 - 2
panda/src/cocoagldisplay/cocoaGLGraphicsPipe.mm

@@ -125,12 +125,12 @@ make_output(const std::string &name,
         precertify = true;
       }
     }
-    if (host != nullptr) {
+    if (host != nullptr && host->get_engine() == engine) {
       return new GLGraphicsBuffer(engine, this, name, fb_prop, win_prop,
                                   flags, gsg, host);
     } else {
       return new CocoaGLGraphicsBuffer(engine, this, name, fb_prop, win_prop,
-                                       flags, gsg, host);
+                                       flags, gsg, nullptr);
     }
   }
 

+ 2 - 1
panda/src/cocoagldisplay/cocoaGLGraphicsWindow.mm

@@ -200,7 +200,8 @@ open_window() {
     // If the old gsg has the wrong pixel format, create a new one that shares
     // with the old gsg.
     DCAST_INTO_R(cocoagsg, _gsg, false);
-    if (!cocoagsg->get_fb_properties().subsumes(_fb_properties)) {
+    if (cocoagsg->get_engine() != _engine ||
+        !cocoagsg->get_fb_properties().subsumes(_fb_properties)) {
       cocoagsg = new CocoaGLGraphicsStateGuardian(_engine, _pipe, cocoagsg);
       cocoagsg->choose_pixel_format(_fb_properties, _display, false);
       _gsg = cocoagsg;

+ 2 - 0
panda/src/collide/CMakeLists.txt

@@ -74,6 +74,8 @@ set(P3COLLIDE_IGATEEXT
   collisionHandlerPhysical_ext.h
   collisionHandlerQueue_ext.cxx
   collisionHandlerQueue_ext.h
+  collisionNode_ext.cxx
+  collisionNode_ext.h
   collisionPolygon_ext.cxx
   collisionPolygon_ext.h
   collisionTraverser_ext.cxx

+ 21 - 0
panda/src/collide/collisionNode.I

@@ -166,3 +166,24 @@ INLINE CollideMask CollisionNode::
 get_default_collide_mask() {
   return default_collision_node_collide_mask;
 }
+
+/**
+ * Returns the custom pointer set via set_owner().
+ */
+INLINE void *CollisionNode::
+get_owner() const {
+  return _owner;
+}
+
+/**
+ * Sets a custom pointer, together with an optional callback that will be
+ * called when the node is deleted.
+ *
+ * The owner or the callback will not be copied along with the CollisionNode.
+ */
+INLINE void CollisionNode::
+set_owner(void *owner, OwnerCallback *callback) {
+  clear_owner();
+  _owner = owner;
+  _owner_callback = callback;
+}

+ 22 - 2
panda/src/collide/collisionNode.cxx

@@ -40,7 +40,9 @@ CollisionNode::
 CollisionNode(const std::string &name) :
   PandaNode(name),
   _from_collide_mask(get_default_collide_mask()),
-  _collider_sort(0)
+  _collider_sort(0),
+  _owner(nullptr),
+  _owner_callback(nullptr)
 {
   set_cull_callback();
   set_renderable();
@@ -60,7 +62,9 @@ CollisionNode(const CollisionNode &copy) :
   PandaNode(copy),
   _from_collide_mask(copy._from_collide_mask),
   _collider_sort(copy._collider_sort),
-  _solids(copy._solids)
+  _solids(copy._solids),
+  _owner(nullptr),
+  _owner_callback(nullptr)
 {
 }
 
@@ -69,6 +73,10 @@ CollisionNode(const CollisionNode &copy) :
  */
 CollisionNode::
 ~CollisionNode() {
+  if (_owner_callback != nullptr) {
+    _owner_callback(_owner);
+    _owner_callback = nullptr;
+  }
 }
 
 /**
@@ -251,6 +259,18 @@ set_from_collide_mask(CollideMask mask) {
   _from_collide_mask = mask;
 }
 
+/**
+ * Removes the owner that was previously set using set_owner().
+ */
+void CollisionNode::
+clear_owner() {
+  if (_owner_callback != nullptr) {
+    _owner_callback(_owner);
+  }
+  _owner = nullptr;
+  _owner_callback = nullptr;
+}
+
 /**
  * Called when needed to recompute the node's _internal_bound object.  Nodes
  * that contain anything of substance should redefine this to do the right

+ 19 - 0
panda/src/collide/collisionNode.h

@@ -77,6 +77,22 @@ PUBLISHED:
   INLINE static CollideMask get_default_collide_mask();
   MAKE_PROPERTY(default_collide_mask, get_default_collide_mask);
 
+public:
+  typedef void (OwnerCallback)(void *);
+
+  INLINE void *get_owner() const;
+
+#ifndef CPPPARSER
+  INLINE void set_owner(void *owner, OwnerCallback *callback = nullptr);
+  void clear_owner();
+#endif
+
+  EXTENSION(PyObject *get_owner() const);
+  EXTENSION(void set_owner(PyObject *owner));
+
+PUBLISHED:
+  MAKE_PROPERTY(owner, get_owner, set_owner);
+
 protected:
   virtual void compute_internal_bounds(CPT(BoundingVolume) &internal_bounds,
                                        int &internal_vertices,
@@ -94,6 +110,9 @@ private:
   typedef pvector< COWPT(CollisionSolid) > Solids;
   Solids _solids;
 
+  void *_owner = nullptr;
+  OwnerCallback *_owner_callback = nullptr;
+
   friend class CollisionTraverser;
 
 public:

+ 62 - 0
panda/src/collide/collisionNode_ext.cxx

@@ -0,0 +1,62 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file collisionNode_ext.cxx
+ * @author rdb
+ * @date 2024-12-12
+ */
+
+#include "collisionNode_ext.h"
+
+#ifdef HAVE_PYTHON
+
+#include "collisionNode.h"
+
+/**
+ * Returns the object previously set via set_owner().  If the object has been
+ * destroyed, returns None.
+ */
+PyObject *Extension<CollisionNode>::
+get_owner() const {
+  PyObject *owner = (PyObject *)_this->get_owner();
+
+#if PY_VERSION_HEX >= 0x030D0000 // 3.13
+  PyObject *strong_ref;
+  int result = 0;
+  if (owner != nullptr) {
+    result = PyWeakref_GetRef(owner, &strong_ref);
+  }
+  if (result > 0) {
+    return strong_ref;
+  }
+  else if (result == 0) {
+    return Py_NewRef(Py_None);
+  }
+  else {
+    return nullptr;
+  }
+#else
+  return Py_NewRef(owner != nullptr ? PyWeakref_GetObject(owner) : Py_None);
+#endif
+}
+
+/**
+ * Stores a weak reference to the given object on the CollisionNode, for later
+ * use in collision events and handlers.
+ */
+void Extension<CollisionNode>::
+set_owner(PyObject *owner) {
+  if (owner != Py_None) {
+    PyObject *ref = PyWeakref_NewRef(owner, nullptr);
+    _this->set_owner(ref, [](void *obj) { Py_DECREF((PyObject *)obj); });
+  } else {
+    _this->clear_owner();
+  }
+}
+
+#endif

+ 40 - 0
panda/src/collide/collisionNode_ext.h

@@ -0,0 +1,40 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file collisionNode_ext.h
+ * @author rdb
+ * @date 2024-12-12
+ */
+
+#ifndef COLLISIONNODE_EXT_H
+#define COLLISIONNODE_EXT_H
+
+#include "pandabase.h"
+
+#ifdef HAVE_PYTHON
+
+#include "extension.h"
+#include "collisionNode.h"
+#include "py_panda.h"
+
+/**
+ * This class defines the extension methods for CollisionNode, which are called
+ * instead of any C++ methods with the same prototype.
+ *
+ * @since 1.11.0
+ */
+template<>
+class Extension<CollisionNode> : public ExtensionBase<CollisionNode> {
+public:
+  PyObject *get_owner() const;
+  void set_owner(PyObject *owner);
+};
+
+#endif  // HAVE_PYTHON
+
+#endif

+ 1 - 0
panda/src/collide/p3collide_ext_composite.cxx

@@ -1,5 +1,6 @@
 #include "collisionHandlerEvent_ext.cxx"
 #include "collisionHandlerPhysical_ext.cxx"
 #include "collisionHandlerQueue_ext.cxx"
+#include "collisionNode_ext.cxx"
 #include "collisionPolygon_ext.cxx"
 #include "collisionTraverser_ext.cxx"

+ 1 - 1
panda/src/cull/cullBinBackToFront.cxx

@@ -40,7 +40,7 @@ add_object(CullableObject *object, Thread *current_thread) {
   // Determine the center of the bounding volume.
   CPT(BoundingVolume) volume = object->_geom->get_bounds(current_thread);
   if (volume->is_empty()) {
-    delete object;
+    // No point in culling objects with no volume.
     return;
   }
 

+ 1 - 1
panda/src/cull/cullBinFrontToBack.cxx

@@ -40,7 +40,7 @@ add_object(CullableObject *object, Thread *current_thread) {
   // Determine the center of the bounding volume.
   CPT(BoundingVolume) volume = object->_geom->get_bounds();
   if (volume->is_empty()) {
-    delete object;
+    // No point in culling objects with no volume.
     return;
   }
 

+ 4 - 4
panda/src/display/displayRegion.cxx

@@ -474,9 +474,7 @@ get_screenshot(PNMImage &image) {
  * it as a Texture, or NULL on failure.
  */
 PT(Texture) DisplayRegion::
-get_screenshot() {
-  Thread *current_thread = Thread::get_current_thread();
-
+get_screenshot(Thread *current_thread) {
   GraphicsOutput *window = get_window();
   nassertr(window != nullptr, nullptr);
 
@@ -487,7 +485,9 @@ get_screenshot() {
   if (gsg->get_threading_model().get_draw_stage() != current_thread->get_pipeline_stage()) {
     // Ask the engine to do on the draw thread.
     GraphicsEngine *engine = window->get_engine();
-    return engine->do_get_screenshot(this, gsg);
+    return engine->run_on_draw_thread([this] (Thread *current_thread) {
+      return get_screenshot(current_thread);
+    });
   }
 
   // We are on the draw thread.

+ 1 - 1
panda/src/display/displayRegion.h

@@ -161,7 +161,7 @@ PUBLISHED:
   bool save_screenshot(
     const Filename &filename, const std::string &image_comment = "");
   bool get_screenshot(PNMImage &image);
-  PT(Texture) get_screenshot();
+  PT(Texture) get_screenshot(Thread *current_thread = Thread::get_current_thread());
 
   void clear_cull_result();
   virtual PT(PandaNode) make_cull_result_graph();

+ 50 - 0
panda/src/display/graphicsEngine.I

@@ -171,3 +171,53 @@ INLINE void GraphicsEngine::
 dispatch_compute(const LVecBase3i &work_groups, const ShaderAttrib *sattr, GraphicsStateGuardian *gsg) {
   dispatch_compute(work_groups, RenderState::make(sattr), gsg);
 }
+
+#ifndef CPPPARSER
+/**
+ * Waits for the draw thread to become idle, then runs the given function on it.
+ */
+template<class Callable>
+INLINE auto GraphicsEngine::
+run_on_draw_thread(Callable &&callable) -> decltype(callable((Thread *)nullptr)) {
+  ReMutexHolder holder(_lock);
+  std::string draw_name = _threading_model.get_draw_name();
+  if (draw_name.empty()) {
+    return std::move(callable)(Thread::get_current_thread());
+  } else {
+    WindowRenderer *wr = get_window_renderer(draw_name, 0);
+    RenderThread *thread = (RenderThread *)wr;
+    return thread->run_on_thread(std::move(callable));
+  }
+}
+
+/**
+ * Waits for this thread to become idle, then runs the given function on it.
+ */
+template<class Callable>
+INLINE auto GraphicsEngine::RenderThread::
+run_on_thread(Callable &&callable) ->
+  typename std::enable_if<!std::is_void<decltype(callable(this))>::value, decltype(callable(this))>::type {
+
+  using ReturnType = decltype(callable(this));
+  alignas(ReturnType) unsigned char storage[sizeof(ReturnType)];
+
+  run_on_thread([] (RenderThread *data) {
+    new (data->_return_data) ReturnType(std::move(*(Callable *)data->_callback_data)(data));
+  }, &callable, storage);
+
+  return *(ReturnType *)storage;
+}
+
+/**
+ * Waits for this thread to become idle, then runs the given function on it.
+ */
+template<class Callable>
+INLINE auto GraphicsEngine::RenderThread::
+run_on_thread(Callable &&callable) ->
+  typename std::enable_if<std::is_void<decltype(callable(this))>::value, decltype(callable(this))>::type {
+
+  run_on_thread([] (RenderThread *data) {
+    std::move(*(Callable *)data->_callback_data)(data);
+  }, &callable, nullptr);
+}
+#endif  // CPPPARSER

+ 71 - 142
panda/src/display/graphicsEngine.cxx

@@ -152,8 +152,9 @@ INLINE static bool operator < (const CullKey &a, const CullKey &b) {
  * any Pipeline you choose.
  */
 GraphicsEngine::
-GraphicsEngine(Pipeline *pipeline) :
+GraphicsEngine(ClockObject *clock, Pipeline *pipeline) :
   _pipeline(pipeline),
+  _clock(clock),
   _app("app"),
   _lock("GraphicsEngine::_lock"),
   _loaded_textures_lock("GraphicsEngine::_loaded_textures_lock")
@@ -339,7 +340,7 @@ make_output(GraphicsPipe *pipe,
   nassertr(pipe != nullptr, nullptr);
   if (gsg != nullptr) {
     nassertr(pipe == gsg->get_pipe(), nullptr);
-    nassertr(this == gsg->get_engine(), nullptr);
+    //nassertr(this == gsg->get_engine(), nullptr);
   }
 
   // Are we really asking for a callback window?
@@ -729,11 +730,9 @@ render_frame() {
   // been rendered).
   open_windows();
 
-  ClockObject *global_clock = ClockObject::get_global_clock();
-
   if (display_cat.is_spam()) {
     display_cat.spam()
-      << "render_frame() - frame " << global_clock->get_frame_count() << "\n";
+      << "render_frame() - frame " << _clock->get_frame_count() << "\n";
   }
 
   {
@@ -846,8 +845,8 @@ render_frame() {
     }
 #endif  // THREADED_PIPELINE
 
-    global_clock->tick(current_thread);
-    if (global_clock->check_errors(current_thread)) {
+    _clock->tick(current_thread);
+    if (_clock->check_errors(current_thread)) {
       throw_event("clock_error");
     }
 
@@ -1135,12 +1134,7 @@ flip_frame() {
  */
 bool GraphicsEngine::
 extract_texture_data(Texture *tex, GraphicsStateGuardian *gsg) {
-  ReMutexHolder holder(_lock);
-
-  string draw_name = gsg->get_threading_model().get_draw_name();
-  if (draw_name.empty()) {
-    // A single-threaded environment.  No problem.
-    Thread *current_thread = Thread::get_current_thread();
+  return run_on_draw_thread([=] (Thread *current_thread) {
     if (!gsg->begin_frame(current_thread)) {
       return false;
     }
@@ -1148,41 +1142,33 @@ extract_texture_data(Texture *tex, GraphicsStateGuardian *gsg) {
     bool result = gsg->extract_texture_data(tex);
     gsg->end_frame(current_thread);
     return result;
-  }
-  else {
-    // A multi-threaded environment.  We have to wait until the draw thread
-    // has finished its current task.
-    WindowRenderer *wr = get_window_renderer(draw_name, 0);
-    RenderThread *thread = (RenderThread *)wr;
-    MutexHolder cv_holder(thread->_cv_mutex);
+  });
+}
 
-    while (thread->_thread_state != TS_wait) {
-      thread->_cv_done.wait();
+/**
+ * Asks the indicated GraphicsStateGuardian to retrieve the buffer memory
+ * image of the indicated ShaderBuffer and return it.
+ *
+ * This is mainly useful for debugging.  It is a very slow call because it
+ * introduces a pipeline stall both of Panda's pipeline and the graphics
+ * pipeline.
+ *
+ * The return value is empty if some kind of error occurred.
+ */
+vector_uchar GraphicsEngine::
+extract_shader_buffer_data(ShaderBuffer *buffer, GraphicsStateGuardian *gsg) {
+  return run_on_draw_thread([=] (Thread *current_thread) {
+    vector_uchar data;
+    if (!gsg->begin_frame(current_thread)) {
+      return data;
     }
 
-    // Temporarily set this so that it accesses data from the current thread.
-    int pipeline_stage = Thread::get_current_pipeline_stage();
-    int draw_pipeline_stage = thread->get_pipeline_stage();
-    thread->set_pipeline_stage(pipeline_stage);
-
-    // Now that the draw thread is idle, signal it to do the extraction task.
-    thread->_gsg = gsg;
-    thread->_texture = tex;
-    thread->_thread_state = TS_do_extract;
-    thread->_cv_mutex.release();
-    thread->_cv_start.notify();
-    thread->_cv_mutex.acquire();
-
-    // Wait for it to finish the extraction.
-    while (thread->_thread_state != TS_wait) {
-      thread->_cv_done.wait();
+    if (!gsg->extract_shader_buffer_data(buffer, data)) {
+      data.clear();
     }
-
-    thread->set_pipeline_stage(draw_pipeline_stage);
-    thread->_gsg = nullptr;
-    thread->_texture = nullptr;
-    return thread->_result;
-  }
+    gsg->end_frame(current_thread);
+    return data;
+  });
 }
 
 /**
@@ -1207,12 +1193,7 @@ dispatch_compute(const LVecBase3i &work_groups, const RenderState *state, Graphi
   nassertv(shader != nullptr);
   nassertv(gsg != nullptr);
 
-  ReMutexHolder holder(_lock);
-
-  string draw_name = gsg->get_threading_model().get_draw_name();
-  if (draw_name.empty()) {
-    // A single-threaded environment.  No problem.
-    Thread *current_thread = Thread::get_current_thread();
+  run_on_draw_thread([=] (Thread *current_thread) {
     if (!gsg->begin_frame(current_thread)) {
       display_cat.error()
         << "Failed to begin frame for compute shader dispatch.\n";
@@ -1223,41 +1204,7 @@ dispatch_compute(const LVecBase3i &work_groups, const RenderState *state, Graphi
     gsg->dispatch_compute(work_groups[0], work_groups[1], work_groups[2]);
     gsg->pop_group_marker();
     gsg->end_frame(current_thread);
-
-  } else {
-    // A multi-threaded environment.  We have to wait until the draw thread
-    // has finished its current task.
-    WindowRenderer *wr = get_window_renderer(draw_name, 0);
-    RenderThread *thread = (RenderThread *)wr;
-    MutexHolder cv_holder(thread->_cv_mutex);
-
-    while (thread->_thread_state != TS_wait) {
-      thread->_cv_done.wait();
-    }
-
-    // Temporarily set this so that it accesses data from the current thread.
-    int pipeline_stage = Thread::get_current_pipeline_stage();
-    int draw_pipeline_stage = thread->get_pipeline_stage();
-    thread->set_pipeline_stage(pipeline_stage);
-
-    // Now that the draw thread is idle, signal it to do the compute task.
-    thread->_gsg = gsg;
-    thread->_state = state;
-    thread->_work_groups = work_groups;
-    thread->_thread_state = TS_do_compute;
-    thread->_cv_mutex.release();
-    thread->_cv_start.notify();
-    thread->_cv_mutex.acquire();
-
-    // Wait for it to finish the compute task.
-    while (thread->_thread_state != TS_wait) {
-      thread->_cv_done.wait();
-    }
-
-    thread->set_pipeline_stage(draw_pipeline_stage);
-    thread->_gsg = nullptr;
-    thread->_state = nullptr;
-  }
+  });
 }
 
 /**
@@ -1293,43 +1240,6 @@ texture_uploaded(Texture *tex) {
 // Usually only called by DisplayRegion::do_cull.
 }
 
-/**
- * Called by DisplayRegion::do_get_screenshot
- */
-PT(Texture) GraphicsEngine::
-do_get_screenshot(DisplayRegion *region, GraphicsStateGuardian *gsg) {
-  // A multi-threaded environment.  We have to wait until the draw thread
-  // has finished its current task.
-
-  ReMutexHolder holder(_lock);
-
-  const std::string &draw_name = gsg->get_threading_model().get_draw_name();
-  WindowRenderer *wr = get_window_renderer(draw_name, 0);
-  RenderThread *thread = (RenderThread *)wr;
-  MutexHolder cv_holder(thread->_cv_mutex);
-
-  while (thread->_thread_state != TS_wait) {
-    thread->_cv_done.wait();
-  }
-
-  // Now that the draw thread is idle, signal it to do the extraction task.
-  thread->_region = region;
-  thread->_thread_state = TS_do_screenshot;
-  thread->_cv_mutex.release();
-  thread->_cv_start.notify();
-  thread->_cv_mutex.acquire();
-
-  // Wait for it to finish the extraction.
-  while (thread->_thread_state != TS_wait) {
-    thread->_cv_done.wait();
-  }
-
-  PT(Texture) tex = std::move(thread->_texture);
-  thread->_region = nullptr;
-  thread->_texture = nullptr;
-  return tex;
-}
-
 /**
  * Fires off a cull traversal using the indicated camera.
  */
@@ -2814,26 +2724,9 @@ thread_main() {
       do_pending(_engine, current_thread);
       break;
 
-    case TS_do_compute:
-      nassertd(_gsg != nullptr && _state != nullptr) break;
-      {
-        const ShaderAttrib *sattr;
-        _state->get_attrib(sattr);
-        _gsg->push_group_marker(std::string("Compute ") + sattr->get_shader()->get_filename(Shader::ST_compute).get_basename());
-        _gsg->set_state_and_transform(_state, TransformState::make_identity());
-        _gsg->dispatch_compute(_work_groups[0], _work_groups[1], _work_groups[2]);
-        _gsg->pop_group_marker();
-      }
-      break;
-
-    case TS_do_extract:
-      nassertd(_gsg != nullptr && _texture != nullptr) break;
-      _result = _gsg->extract_texture_data(_texture);
-      break;
-
-    case TS_do_screenshot:
-      nassertd(_region != nullptr) break;
-      _texture = _region->get_screenshot();
+    case TS_callback:
+      nassertd(_callback != nullptr) break;
+      _callback(this);
       break;
 
     case TS_terminate:
@@ -2858,3 +2751,39 @@ thread_main() {
     }
   }
 }
+
+/**
+ * Waits for this thread to become idle, then runs the given function on it.
+ */
+void GraphicsEngine::RenderThread::
+run_on_thread(Callback *callback, void *callback_data, void *return_data) {
+  MutexHolder cv_holder(_cv_mutex);
+
+  while (_thread_state != TS_wait) {
+    _cv_done.wait();
+  }
+
+  // Temporarily set this so that it accesses data from the current thread.
+  int pipeline_stage = Thread::get_current_pipeline_stage();
+  int thread_pipeline_stage = get_pipeline_stage();
+  set_pipeline_stage(pipeline_stage);
+
+  // Now that the draw thread is idle, signal it to run the callback.
+  _callback = callback;
+  _callback_data = callback_data;
+  _return_data = return_data;
+  _thread_state = TS_callback;
+  _cv_mutex.release();
+  _cv_start.notify();
+  _cv_mutex.acquire();
+
+  // Wait for it to finish the job.
+  while (_thread_state != TS_wait) {
+    _cv_done.wait();
+  }
+
+  set_pipeline_stage(thread_pipeline_stage);
+  _callback = nullptr;
+  _callback_data = nullptr;
+  _return_data = nullptr;
+}

+ 32 - 12
panda/src/display/graphicsEngine.h

@@ -33,6 +33,9 @@
 #include "loader.h"
 #include "referenceCount.h"
 #include "renderState.h"
+#include "clockObject.h"
+
+#include <type_traits>
 
 class Pipeline;
 class DisplayRegion;
@@ -53,7 +56,8 @@ class Texture;
  */
 class EXPCL_PANDA_DISPLAY GraphicsEngine : public ReferenceCount {
 PUBLISHED:
-  explicit GraphicsEngine(Pipeline *pipeline = nullptr);
+  explicit GraphicsEngine(ClockObject *clock = ClockObject::get_global_clock(),
+                          Pipeline *pipeline = nullptr);
   BLOCKING ~GraphicsEngine();
 
   void set_threading_model(const GraphicsThreadingModel &threading_model);
@@ -111,6 +115,7 @@ PUBLISHED:
   BLOCKING void flip_frame();
 
   bool extract_texture_data(Texture *tex, GraphicsStateGuardian *gsg);
+  vector_uchar extract_shader_buffer_data(ShaderBuffer *buffer, GraphicsStateGuardian *gsg);
   void dispatch_compute(const LVecBase3i &work_groups,
                         const RenderState *state,
                         GraphicsStateGuardian *gsg);
@@ -127,15 +132,17 @@ public:
     TS_do_flip,
     TS_do_release,
     TS_do_windows,
-    TS_do_compute,
-    TS_do_extract,
-    TS_do_screenshot,
+    TS_callback,
     TS_terminate,
     TS_done
   };
 
   void texture_uploaded(Texture *tex);
-  PT(Texture) do_get_screenshot(DisplayRegion *region, GraphicsStateGuardian *gsg);
+
+#ifndef CPPPARSER
+  template<class Callable>
+  INLINE auto run_on_draw_thread(Callable &&callable) -> decltype(callable((Thread *)nullptr));
+#endif
 
 public:
   static void do_cull(CullHandler *cull_handler, SceneSetup *scene_setup,
@@ -302,24 +309,37 @@ private:
     RenderThread(const std::string &name, GraphicsEngine *engine);
     virtual void thread_main();
 
+    typedef void Callback(RenderThread *thread);
+    void run_on_thread(Callback *callback,
+                       void *callback_data = nullptr,
+                       void *return_data = nullptr);
+
+#ifndef CPPPARSER
+    template<class Callable>
+    INLINE auto run_on_thread(Callable &&callable) ->
+      typename std::enable_if<!std::is_void<decltype(callable(this))>::value, decltype(callable(this))>::type;
+
+    template<class Callable>
+    INLINE auto run_on_thread(Callable &&callable) ->
+      typename std::enable_if<std::is_void<decltype(callable(this))>::value, decltype(callable(this))>::type;
+#endif
+
     GraphicsEngine *_engine;
     Mutex _cv_mutex;
     ConditionVar _cv_start;
     ConditionVar _cv_done;
     ThreadState _thread_state;
 
-    // These are stored for extract_texture_data and dispatch_compute.
-    GraphicsStateGuardian *_gsg;
-    PT(Texture) _texture;
-    const RenderState *_state;
-    DisplayRegion *_region;
-    LVecBase3i _work_groups;
-    bool _result;
+    // Used for TS_callback.
+    Callback *_callback;
+    void *_callback_data;
+    void *_return_data;
   };
 
   WindowRenderer *get_window_renderer(const std::string &name, int pipeline_stage);
 
   Pipeline *_pipeline;
+  ClockObject *const _clock;
   Windows _windows;
   bool _windows_sorted;
 

+ 41 - 1
panda/src/display/graphicsStateGuardian.I

@@ -596,7 +596,8 @@ get_supports_tessellation_shaders() const {
  */
 INLINE bool GraphicsStateGuardian::
 get_supports_compute_shaders() const {
-  return (_supported_shader_caps & ShaderModule::C_compute_shader) != 0;
+  return (_supported_shader_caps & ShaderModule::C_compute_shader) != 0
+      && _max_compute_work_group_invocations > 0;
 }
 
 /**
@@ -711,6 +712,45 @@ get_supports_dual_source_blending() const {
   return _supports_dual_source_blending;
 }
 
+/**
+ * Returns the maximum number of work groups that can be submitted in a single
+ * compute dispatch.
+ *
+ * If compute shaders are supported, this will be at least 65535x65535x65535.
+ * Otherwise, it will be zero.
+ */
+INLINE LVecBase3i GraphicsStateGuardian::
+get_max_compute_work_group_count() const {
+  return _max_compute_work_group_count;
+}
+
+/**
+ * Returns the maximum number of invocations in each work group split out
+ * separately to every x, y, z dimension.  This limit applies in addition to
+ * the overall number of invocations, which is specified by
+ * get_max_compute_work_group_invocations().
+ *
+ * If compute shaders are supported, this will be at least 128x128x64.
+ * Otherwise, it will be zero.
+ */
+INLINE LVecBase3i GraphicsStateGuardian::
+get_max_compute_work_group_size() const {
+  return _max_compute_work_group_size;
+}
+
+/**
+ * Returns the maximum number of invocations in each work group as a product
+ * of the x, y, z dimensions.  This limit applies in addition to the
+ * per-dimension limits specified by get_max_compute_work_group_size().
+ *
+ * If compute shaders are supported, this will be at least 128.  Otherwise, it
+ * will be zero.
+ */
+INLINE int GraphicsStateGuardian::
+get_max_compute_work_group_invocations() const {
+  return _max_compute_work_group_invocations;
+}
+
 /**
  * Deprecated.  Use get_max_color_targets() instead, which returns the exact
  * same value.

+ 36 - 0
panda/src/display/graphicsStateGuardian.cxx

@@ -100,6 +100,7 @@ PStatCollector GraphicsStateGuardian::_draw_primitive_pcollector("Draw:Primitive
 PStatCollector GraphicsStateGuardian::_draw_set_state_pcollector("Draw:Set State");
 PStatCollector GraphicsStateGuardian::_flush_pcollector("Draw:Flush");
 PStatCollector GraphicsStateGuardian::_compute_dispatch_pcollector("Draw:Compute dispatch");
+PStatCollector GraphicsStateGuardian::_compute_work_groups_pcollector("Compute work groups");
 
 PStatCollector GraphicsStateGuardian::_wait_occlusion_pcollector("Wait:Occlusion");
 PStatCollector GraphicsStateGuardian::_wait_timer_pcollector("Wait:Timer Queries");
@@ -246,6 +247,10 @@ GraphicsStateGuardian(CoordinateSystem internal_coordinate_system,
   _supports_framebuffer_multisample = false;
   _supports_framebuffer_blit = false;
 
+  _max_compute_work_group_count = LVecBase3i(0, 0, 0);
+  _max_compute_work_group_size = LVecBase3i(0, 0, 0);
+  _max_compute_work_group_invocations = 0;
+
   _supports_stencil = false;
   _supports_stencil_wrap = false;
   _supports_two_sided_stencil = false;
@@ -570,6 +575,23 @@ update_texture(TextureContext *, bool) {
   return true;
 }
 
+/**
+ * Ensures that the current Texture data is refreshed onto the GSG.  This
+ * means updating the texture properties and/or re-uploading the texture
+ * image, if necessary.  This should only be called within the draw thread.
+ *
+ * If force is true, this function will not return until the texture has been
+ * fully uploaded.  If force is false, the function may choose to upload a
+ * simple version of the texture instead, if the texture is not fully resident
+ * (and if get_incomplete_render() is true).
+ */
+bool GraphicsStateGuardian::
+update_texture(TextureContext *tc, bool force, CompletionToken token) {
+  bool result = update_texture(tc, force);
+  token.complete(result);
+  return result;
+}
+
 /**
  * Frees the resources previously allocated via a call to prepare_texture(),
  * including deleting the TextureContext itself, if it is non-NULL.
@@ -750,6 +772,18 @@ release_shader_buffers(const pvector<BufferContext *> &contexts) {
   }
 }
 
+/**
+ * This method should only be called by the GraphicsEngine.  Do not call it
+ * directly; call GraphicsEngine::extract_texture_data() instead.
+ *
+ * This method will be called in the draw thread to download the buffer's
+ * current contents synchronously.
+ */
+bool GraphicsStateGuardian::
+extract_shader_buffer_data(ShaderBuffer *buffer, vector_uchar &data) {
+  return false;
+}
+
 /**
  * Begins a new occlusion query.  After this call, you may call
  * begin_draw_primitives() and draw_triangles()/draw_whatever() repeatedly.
@@ -1375,6 +1409,7 @@ end_frame(Thread *current_thread) {
   _vertices_tri_pcollector.flush_level();
   _vertices_patch_pcollector.flush_level();
   _vertices_other_pcollector.flush_level();
+  _compute_work_groups_pcollector.flush_level();
 
   _state_pcollector.flush_level();
   _texture_state_pcollector.flush_level();
@@ -2035,6 +2070,7 @@ init_frame_pstats() {
     _vertices_tri_pcollector.clear_level();
     _vertices_patch_pcollector.clear_level();
     _vertices_other_pcollector.clear_level();
+    _compute_work_groups_pcollector.clear_level();
 
     _state_pcollector.clear_level();
     _transform_state_pcollector.clear_level();

+ 17 - 0
panda/src/display/graphicsStateGuardian.h

@@ -114,6 +114,7 @@ PUBLISHED:
   GraphicsEngine *get_engine() const;
   INLINE const GraphicsThreadingModel &get_threading_model() const;
   MAKE_PROPERTY(pipe, get_pipe);
+  MAKE_PROPERTY(engine, get_engine);
 
   virtual bool make_current() const;
 
@@ -177,6 +178,12 @@ PUBLISHED:
   INLINE int get_maximum_simultaneous_render_targets() const;
   INLINE bool get_supports_dual_source_blending() const;
 
+public:
+  INLINE LVecBase3i get_max_compute_work_group_count() const;
+  INLINE LVecBase3i get_max_compute_work_group_size() const;
+  INLINE int get_max_compute_work_group_invocations() const;
+
+PUBLISHED:
   MAKE_PROPERTY(max_vertices_per_array, get_max_vertices_per_array);
   MAKE_PROPERTY(max_vertices_per_primitive, get_max_vertices_per_primitive);
   MAKE_PROPERTY(max_texture_stages, get_max_texture_stages);
@@ -223,6 +230,9 @@ PUBLISHED:
   MAKE_PROPERTY(timer_queries_active, get_timer_queries_active);
   MAKE_PROPERTY(max_color_targets, get_max_color_targets);
   MAKE_PROPERTY(supports_dual_source_blending, get_supports_dual_source_blending);
+  MAKE_PROPERTY(max_compute_work_group_count, get_max_compute_work_group_count);
+  MAKE_PROPERTY(max_compute_work_group_size, get_max_compute_work_group_size);
+  MAKE_PROPERTY(max_compute_work_group_invocations, get_max_compute_work_group_invocations);
 
   INLINE ShaderModel get_shader_model() const;
   INLINE void set_shader_model(ShaderModel shader_model);
@@ -294,6 +304,7 @@ PUBLISHED:
 public:
   virtual TextureContext *prepare_texture(Texture *tex);
   virtual bool update_texture(TextureContext *tc, bool force);
+  virtual bool update_texture(TextureContext *tc, bool force, CompletionToken token);
   virtual void release_texture(TextureContext *tc);
   virtual void release_textures(const pvector<TextureContext *> &contexts);
   virtual bool extract_texture_data(Texture *tex);
@@ -318,6 +329,7 @@ public:
   virtual BufferContext *prepare_shader_buffer(ShaderBuffer *data);
   virtual void release_shader_buffer(BufferContext *ibc);
   virtual void release_shader_buffers(const pvector<BufferContext *> &contexts);
+  virtual bool extract_shader_buffer_data(ShaderBuffer *buffer, vector_uchar &data);
 
   virtual void begin_occlusion_query();
   virtual PT(OcclusionQueryContext) end_occlusion_query();
@@ -617,6 +629,10 @@ protected:
   bool _supports_framebuffer_multisample;
   bool _supports_framebuffer_blit;
 
+  LVecBase3i _max_compute_work_group_count;
+  LVecBase3i _max_compute_work_group_size;
+  int _max_compute_work_group_invocations;
+
   bool _supports_stencil;
   bool _supports_stencil_wrap;
   bool _supports_two_sided_stencil;
@@ -684,6 +700,7 @@ public:
   static PStatCollector _draw_set_state_pcollector;
   static PStatCollector _flush_pcollector;
   static PStatCollector _compute_dispatch_pcollector;
+  static PStatCollector _compute_work_groups_pcollector;
   static PStatCollector _wait_occlusion_pcollector;
   static PStatCollector _wait_timer_pcollector;
   static PStatCollector _timer_queries_pcollector;

+ 5 - 5
panda/src/display/shaderInputBinding_impls.cxx

@@ -1678,7 +1678,7 @@ fetch_from_input(const ShaderAttrib *target_shader, void *into) const {
  * nth resource, which is of the given type.
  */
 ShaderInputBinding::ResourceId ShaderLightStructBinding::
-get_resource_id(int index, const ShaderType *type) const {
+get_resource_id(int index) const {
   if (_input != nullptr) {
     nassertr(index == 0, 0);
   }
@@ -1734,7 +1734,7 @@ get_state_dep() const {
  * nth resource, which is of the given type.
  */
 ShaderInputBinding::ResourceId ShaderTextureStagesBinding::
-get_resource_id(int index, const ShaderType *type) const {
+get_resource_id(int index) const {
   return index;
 }
 
@@ -1787,7 +1787,7 @@ get_state_dep() const {
  * nth resource, which is of the given type.
  */
 ShaderInputBinding::ResourceId ShaderTextureBinding::
-get_resource_id(int index, const ShaderType *type) const {
+get_resource_id(int index) const {
   return (ResourceId)_input.p();
 }
 
@@ -1847,7 +1847,7 @@ get_state_dep() const {
  * nth resource, which is of the given type.
  */
 ShaderInputBinding::ResourceId ShaderBufferBinding::
-get_resource_id(int index, const ShaderType *type) const {
+get_resource_id(int index) const {
   return (ResourceId)_input.p();
 }
 
@@ -2212,7 +2212,7 @@ fetch_data(const State &state, void *into, bool packed) const {
  * nth resource, which is of the given type.
  */
 ShaderInputBinding::ResourceId ShaderAggregateBinding::
-get_resource_id(int index, const ShaderType *type) const {
+get_resource_id(int index) const {
   nassertr(index >= 0 && (size_t)index < _resources.size(), 0);
   return (ResourceId)_resources[index].p();
 }

+ 5 - 5
panda/src/display/shaderInputBinding_impls.h

@@ -167,7 +167,7 @@ public:
 
   virtual void fetch_data(const State &state, void *into, bool packed) const override;
 
-  virtual ResourceId get_resource_id(int index, const ShaderType *type) const;
+  virtual ResourceId get_resource_id(int index) const;
   virtual PT(Texture) fetch_texture(const State &state,
                                     ResourceId index,
                                     SamplerState &sampler, int &view) const;
@@ -213,7 +213,7 @@ public:
 
   virtual int get_state_dep() const override;
 
-  virtual ResourceId get_resource_id(int index, const ShaderType *type) const;
+  virtual ResourceId get_resource_id(int index) const;
   virtual PT(Texture) fetch_texture(const State &state,
                                     ResourceId resource_id,
                                     SamplerState &sampler, int &view) const;
@@ -235,7 +235,7 @@ public:
 
   virtual int get_state_dep() const override;
 
-  virtual ResourceId get_resource_id(int index, const ShaderType *type) const;
+  virtual ResourceId get_resource_id(int index) const;
   virtual PT(Texture) fetch_texture(const State &state,
                                     ResourceId resource_id,
                                     SamplerState &sampler, int &view) const;
@@ -259,7 +259,7 @@ public:
 
   virtual int get_state_dep() const override;
 
-  virtual ResourceId get_resource_id(int index, const ShaderType *type) const;
+  virtual ResourceId get_resource_id(int index) const;
   virtual PT(ShaderBuffer) fetch_shader_buffer(const State &state,
                                                ResourceId resource_id) const;
 
@@ -340,7 +340,7 @@ public:
 
   virtual void fetch_data(const State &state, void *into, bool packed) const override;
 
-  virtual ResourceId get_resource_id(int index, const ShaderType *type) const;
+  virtual ResourceId get_resource_id(int index) const;
   virtual PT(Texture) fetch_texture(const State &state,
                                     ResourceId index,
                                     SamplerState &sampler, int &view) const;

+ 124 - 15
panda/src/dxgsg9/dxShaderContext9.cxx

@@ -16,6 +16,7 @@
 #include "dxVertexBufferContext9.h"
 #include "shaderModuleSpirV.h"
 #include "spirVTransformer.h"
+#include "spirVEmulateTextureQueriesPass.h"
 #include "spirVHoistStructResourcesPass.h"
 #include "spirVRemoveUnusedVariablesPass.h"
 
@@ -118,26 +119,68 @@ compile_module(const ShaderModule *module, DWORD *&data) {
   // need to modify the SPIR-V to hoist those out.
   // We tell it not to remove the empty structs, since that changes the member
   // numbering, which we need to match between the original and the HLSL.
-  if (hoist_necessary) {
+  static const uint64_t emulate_caps = (Shader::C_texture_query_size | Shader::C_image_query_size | Shader::C_texture_query_levels);
+  if (hoist_necessary || (spv->_emulatable_caps & emulate_caps) != 0) {
     SpirVTransformer transformer(stream);
-    SpirVHoistStructResourcesPass hoist_pass(false);
-    transformer.run(hoist_pass);
-    transformer.run(SpirVRemoveUnusedVariablesPass());
-    stream = transformer.get_result();
 
-    for (const auto &item : hoist_pass._hoisted_vars) {
-      const auto &access_chain = item.first;
+    if ((spv->_emulatable_caps & emulate_caps) != 0) {
+      SpirVEmulateTextureQueriesPass pass(emulate_caps);
+      transformer.run(pass);
+
+      for (const auto &item : pass._size_var_ids) {
+        const auto &access_chain = item.first;
+        const auto &param_name = param_names[access_chain._var_id];
 
-      std::ostringstream str;
-      str << param_names[access_chain._var_id];
+        // Determine the resource index within the parameter.
+        const Shader::Parameter &param = _shader->_parameters[atoi(param_name.c_str() + 1)];
+        const ShaderType *type = param._type;
+        int resource_index = 0;
 
-      for (size_t i = 0; i < access_chain.size(); ++i) {
-        str << '_' << access_chain[i];
+        for (size_t i = 0; i < access_chain.size(); ++i) {
+          uint32_t index = access_chain[i];
+          if (const ShaderType::Array *array_type = type->as_array()) {
+            type = array_type->get_element_type();
+            resource_index += index * type->get_num_resources();
+          }
+          else if (const ShaderType::Struct *struct_type = type->as_struct()) {
+            for (size_t mi = 0; mi < index; ++mi) {
+              resource_index += struct_type->get_member(mi).type->get_num_resources();
+            }
+            type = struct_type->get_member(index).type;
+          }
+          else {
+            nassert_raise("invalid access chain");
+          }
+        }
+
+
+        char buf[256];
+        size_t size = sprintf(buf, "%ss_r%d", param_name.c_str(), resource_index);
+        param_names[item.second] = std::string(buf, size);
       }
+    }
+
+    if (hoist_necessary) {
+      SpirVHoistStructResourcesPass hoist_pass(false);
+      transformer.run(hoist_pass);
+
+      for (const auto &item : hoist_pass._hoisted_vars) {
+        // Note that this access chain contains only struct members.
+        const auto &access_chain = item.first;
 
-      param_names[item.second] = str.str();
+        std::ostringstream str;
+        str << param_names[access_chain._var_id];
+
+        for (size_t i = 0; i < access_chain.size(); ++i) {
+          str << '_' << access_chain[i];
+        }
+
+        param_names[item.second] = str.str();
+      }
+      transformer.run(SpirVRemoveUnusedVariablesPass());
     }
 
+    stream = transformer.get_result();
 #ifndef NDEBUG
     if (!stream.validate()) {
       return false;
@@ -295,6 +338,13 @@ query_constants(const ShaderModule *module, DWORD *data) {
     }
   }
 
+  struct SizeInput {
+    const Shader::Parameter &param;
+    int resource_index;
+    UINT reg;
+  };
+  pvector<SizeInput> size_inputs;
+
   Shader::Stage stage = module->get_stage();
 
   for (DWORD ci = 0; ci < table->Constants; ++ci) {
@@ -342,6 +392,13 @@ query_constants(const ShaderModule *module, DWORD *data) {
     }
 #endif
 
+    if (suffix[0] == 's') {
+      // Texture size input, named like p0s_r2
+      int resource_index = atoi(suffix + 3);
+      size_inputs.push_back({param, resource_index, constant.RegisterIndex});
+      continue;
+    }
+
     int reg_set = constant.RegisterSet;
     int reg_idx = constant.RegisterIndex;
     int reg_end = reg_idx + constant.RegisterCount;
@@ -383,6 +440,38 @@ query_constants(const ShaderModule *module, DWORD *data) {
     }
   }
 
+  for (const SizeInput &input : size_inputs) {
+    uint64_t resource_id = input.param._binding->get_resource_id(input.resource_index);
+
+    bool found_treg = false;
+    for (TextureRegister &treg : _textures) {
+      if (treg.binding == input.param._binding && treg.resource_id == resource_id) {
+        if (stage == Shader::Stage::VERTEX) {
+          if (treg.size_vreg >= 0) {
+            continue;
+          }
+          treg.size_vreg = input.reg;
+        }
+        if (stage == Shader::Stage::FRAGMENT) {
+          if (treg.size_freg >= 0) {
+            continue;
+          }
+          treg.size_freg = input.reg;
+        }
+        found_treg = true;
+        break;
+      }
+    }
+
+    if (!found_treg) {
+      // We have a size input for a texture that got optimized out.
+      // Generate a dummy texture register for this.
+      int vreg = (stage == Shader::Stage::VERTEX) ? input.reg : -1;
+      int freg = (stage == Shader::Stage::FRAGMENT) ? input.reg : -1;
+      _textures.push_back({(UINT)-1, input.param._binding, resource_id, vreg, freg});
+    }
+  }
+
   return true;
 }
 
@@ -516,7 +605,7 @@ r_query_resources(Shader::Stage stage, const Shader::Parameter &param,
       TextureRegister reg;
       reg.unit = reg_idx;
       reg.binding = param._binding;
-      reg.resource_id = param._binding->get_resource_id(resource_index, resource_type);
+      reg.resource_id = param._binding->get_resource_id(resource_index);
       _textures.push_back(std::move(reg));
       ++reg_idx;
     }
@@ -698,6 +787,9 @@ update_tables(GSG *gsg, const GeomVertexDataPipelineReader *data_reader) {
 void DXShaderContext9::
 disable_shader_texture_bindings(GSG *gsg) {
   for (const TextureRegister &reg : _textures) {
+    if (reg.unit == (UINT)-1) {
+      continue;
+    }
     HRESULT hr = gsg->_d3d_device->SetTexture(reg.unit, nullptr);
     if (FAILED(hr)) {
       dxgsg9_cat.error()
@@ -736,12 +828,29 @@ update_shader_texture_bindings(DXShaderContext9 *prev, GSG *gsg) {
       continue;
     }
 
-    TextureContext *tc = tex->prepare_now(gsg->_prepared_objects, gsg);
+    DXTextureContext9 *tc = (DXTextureContext9 *)tex->prepare_now(gsg->_prepared_objects, gsg);
     if (tc == nullptr) {
       continue;
     }
 
-    gsg->apply_texture(reg.unit, tc, view, sampler);
+    if (reg.unit != (UINT)-1) {
+      gsg->apply_texture(reg.unit, tc, view, sampler);
+    }
+    else if (!gsg->update_texture(tc, false)) {
+      continue;
+    }
+
+    if (reg.size_vreg >= 0 || reg.size_freg >= 0) {
+      DWORD levels = tc->get_d3d_texture(view)->GetLevelCount();
+      const float data[4] = {(float)tc->_width, (float)tc->_height, (float)tc->_depth, (float)levels};
+
+      if (reg.size_vreg >= 0) {
+        gsg->_d3d_device->SetVertexShaderConstantF(reg.size_vreg, data, 1);
+      }
+      if (reg.size_freg >= 0) {
+        gsg->_d3d_device->SetPixelShaderConstantF(reg.size_freg, data, 1);
+      }
+    }
   }
 }
 

+ 2 - 0
panda/src/dxgsg9/dxShaderContext9.h

@@ -88,6 +88,8 @@ private:
     UINT unit;
     PT(ShaderInputBinding) binding;
     uint64_t resource_id;
+    int size_vreg = -1;
+    int size_freg = -1;
   };
   pvector<TextureRegister> _textures;
 

+ 8 - 0
panda/src/dxgsg9/dxTextureContext9.cxx

@@ -1017,6 +1017,10 @@ create_texture(DXScreenData &scrn) {
       << "\n";
   }
 
+  _width = target_width;
+  _height = target_height;
+  _depth = target_depth;
+
   for (int view = 0; view < num_views; ++view) {
     IDirect3DBaseTexture9 *d3d_texture = nullptr;
     IDirect3DTexture9 *d3d_2d_texture;
@@ -1139,6 +1143,10 @@ create_simple_texture(DXScreenData &scrn) {
   DWORD usage = 0;
   D3DPOOL pool = D3DPOOL_MANAGED;
 
+  _width = target_width;
+  _height = target_height;
+  _depth = 1;
+
   int data_size = target_width * target_height * 4;
 
   IDirect3DTexture9 *d3d_2d_texture = nullptr;

+ 3 - 0
panda/src/dxgsg9/dxTextureContext9.h

@@ -53,6 +53,9 @@ private:
   unsigned int get_bits_per_pixel(Texture::Format format, int *alphbits);
   PN_stdfloat d3d_format_to_bytes_per_pixel (D3DFORMAT format);
 
+public:
+  UINT _width, _height, _depth;
+
 private:
   D3DFORMAT _d3d_format;    // the 'D3DFORMAT' the Panda TextureBuffer fmt corresponds to
   small_vector<IDirect3DBaseTexture9 *> _d3d_textures;

+ 2 - 1
panda/src/egldisplay/eglGraphicsBuffer.cxx

@@ -195,7 +195,8 @@ open_buffer() {
     // If the old gsg has the wrong pixel format, create a new one that shares
     // with the old gsg.
     DCAST_INTO_R(eglgsg, _gsg, false);
-    if (!eglgsg->get_fb_properties().subsumes(_fb_properties)) {
+    if (eglgsg->get_engine() != _engine ||
+        !eglgsg->get_fb_properties().subsumes(_fb_properties)) {
       eglgsg = new eglGraphicsStateGuardian(_engine, _pipe, eglgsg);
       eglgsg->choose_pixel_format(_fb_properties, egl_pipe, false, true, false);
       _gsg = eglgsg;

+ 3 - 0
panda/src/egldisplay/eglGraphicsPipe.cxx

@@ -288,6 +288,9 @@ make_output(const std::string &name,
         ((flags&BF_require_window)!=0)) {
       return nullptr;
     }
+    if (host->get_engine() != engine) {
+      return nullptr;
+    }
     // Early failure - if we are sure that this buffer WONT meet specs, we can
     // bail out early.
     if ((flags & BF_fb_props_optional) == 0) {

+ 2 - 1
panda/src/egldisplay/eglGraphicsWindow.cxx

@@ -228,7 +228,8 @@ open_window() {
     // If the old gsg has the wrong pixel format, create a new one that shares
     // with the old gsg.
     DCAST_INTO_R(eglgsg, _gsg, false);
-    if (!eglgsg->get_fb_properties().subsumes(_fb_properties)) {
+    if (eglgsg->get_engine() != _engine ||
+        !eglgsg->get_fb_properties().subsumes(_fb_properties)) {
       eglgsg = new eglGraphicsStateGuardian(_engine, _pipe, eglgsg);
       eglgsg->choose_pixel_format(_fb_properties, egl_pipe, true, false, false);
       _gsg = eglgsg;

+ 11 - 0
panda/src/event/asyncFuture.cxx

@@ -389,6 +389,17 @@ wake_task(AsyncTask *task) {
   }
 }
 
+/**
+ * Internal callback called when a CompletionToken created from this future
+ * completes.
+ */
+void AsyncFuture::
+token_callback(Completable::Data *data, bool success) {
+  AsyncFuture *future = (AsyncFuture *)data;
+  future->set_result(EventParameter(success));
+  unref_delete(future);
+}
+
 /**
  * @see AsyncFuture::gather
  */

+ 32 - 1
panda/src/event/asyncFuture.h

@@ -20,6 +20,7 @@
 #include "eventParameter.h"
 #include "patomic.h"
 #include "small_vector.h"
+#include "completionToken.h"
 
 class AsyncTaskManager;
 class AsyncTask;
@@ -58,7 +59,7 @@ class AsyncTask;
  *
  * @since 1.10.0
  */
-class EXPCL_PANDA_EVENT AsyncFuture : public TypedReferenceCount {
+class EXPCL_PANDA_EVENT AsyncFuture : public TypedReferenceCount, protected Completable::Data {
 PUBLISHED:
   INLINE AsyncFuture();
   virtual ~AsyncFuture();
@@ -109,6 +110,8 @@ public:
 private:
   void wake_task(AsyncTask *task);
 
+  static void token_callback(Completable::Data *, bool success);
+
 protected:
   enum FutureState : patomic_unsigned_lock_free::value_type {
     // Pending states
@@ -136,6 +139,7 @@ protected:
 
   friend class AsyncGatheringFuture;
   friend class AsyncTaskChain;
+  friend class CompletionToken;
   friend class PythonTask;
 
 public:
@@ -199,6 +203,33 @@ private:
   static TypeHandle _type_handle;
 };
 
+#ifndef CPPPARSER
+// Allow passing a future into a method accepting a CompletionToken.
+template<>
+INLINE CompletionToken::
+CompletionToken(AsyncFuture *future) {
+  if (future != nullptr) {
+    future->ref();
+    _callback._data = future;
+    if (_callback._data->_function == nullptr) {
+      _callback._data->_function = &AsyncFuture::token_callback;
+    }
+  }
+}
+
+template<>
+INLINE CompletionToken::
+CompletionToken(PT(AsyncFuture) future) {
+  if (future != nullptr) {
+    _callback._data = future;
+    if (_callback._data->_function == nullptr) {
+      _callback._data->_function = &AsyncFuture::token_callback;
+    }
+    future.cheat() = nullptr;
+  }
+}
+#endif
+
 #include "asyncFuture.I"
 
 #endif // !ASYNCFUTURE_H

+ 3 - 0
panda/src/event/asyncTaskChain.cxx

@@ -1438,6 +1438,9 @@ AsyncTaskChainThread(const string &name, AsyncTaskChain *chain) :
 void AsyncTaskChain::AsyncTaskChainThread::
 thread_main() {
 #ifdef HAVE_THREADS
+  // Let PStats know this thread exists.
+  PStatClient::thread_tick();
+
   MutexHolder holder(_chain->_manager->_lock);
   while (_chain->_state != S_shutdown && _chain->_state != S_interrupted) {
     thread_consider_yield();

+ 17 - 18
panda/src/express/trueClock.cxx

@@ -521,6 +521,7 @@ TrueClock() {
 #include <stdio.h>  // for perror
 
 static long _init_sec;
+static time_t _init_sec_monotonic = 0;
 
 /**
  *
@@ -553,25 +554,13 @@ get_long_time() {
  */
 double TrueClock::
 get_short_raw_time() {
-  struct timeval tv;
-
-  int result;
-
-#ifdef GETTIMEOFDAY_ONE_PARAM
-  result = gettimeofday(&tv);
-#else
-  result = gettimeofday(&tv, nullptr);
-#endif
-
-  if (result < 0) {
-    // Error in gettimeofday().
-    return 0.0;
+#if defined(CLOCK_MONOTONIC) && !defined(__APPLE__)
+  struct timespec spec;
+  if (clock_gettime(CLOCK_MONOTONIC, &spec) == 0) {
+    return (double)(spec.tv_sec - _init_sec_monotonic) + (double)spec.tv_nsec / 1000000000.0;
   }
-
-  // We subtract out the time at which the clock was initialized, because we
-  // don't care about the number of seconds all the way back to 1970, and we
-  // want to leave the double with as much precision as it can get.
-  return (double)(tv.tv_sec - _init_sec) + (double)tv.tv_usec / 1000000.0;
+#endif
+  return get_long_time();
 }
 
 /**
@@ -603,6 +592,16 @@ TrueClock() {
   } else {
     _init_sec = tv.tv_sec;
   }
+
+#if defined(CLOCK_MONOTONIC) && !defined(__APPLE__)
+  struct timespec spec;
+  if (clock_gettime(CLOCK_MONOTONIC, &spec) == 0) {
+    _init_sec_monotonic = spec.tv_sec;
+  } else {
+    perror("clock_gettime(CLOCK_MONOTONIC)");
+    _init_sec_monotonic = 0;
+  }
+#endif
 }
 
 #endif

+ 2 - 0
panda/src/framework/pandaFramework.cxx

@@ -204,6 +204,8 @@ close_framework() {
 
   _event_handler.remove_all_hooks();
 
+  _task_mgr.cleanup();
+
   _is_open = false;
   _made_default_pipe = false;
   _default_pipe.clear();

+ 9 - 0
panda/src/gles2gsg/gles2gsg.h

@@ -147,6 +147,11 @@ typedef char GLchar;
 #define GL_FRAMEBUFFER_BARRIER_BIT 0x400
 #define GL_TRANSFORM_FEEDBACK_BARRIER_BIT 0x800
 #define GL_ATOMIC_COUNTER_BARRIER_BIT 0x1000
+#define GL_SHADER_STORAGE_BARRIER_BIT 0x2000
+#define GL_MAP_INVALIDATE_RANGE_BIT 0x0004
+#define GL_MAP_INVALIDATE_BUFFER_BIT 0x0008
+#define GL_MAP_FLUSH_EXPLICIT_BIT 0x0010
+#define GL_MAP_UNSYNCHRONIZED_BIT 0x0020
 #define GL_HALF_FLOAT 0x140B
 #define GL_COLOR 0x1800
 #define GL_DEPTH 0x1801
@@ -181,6 +186,7 @@ typedef char GLchar;
 #define GL_WRITE_ONLY 0x88B9
 #define GL_READ_WRITE 0x88BA
 #define GL_PIXEL_PACK_BUFFER 0x88EB
+#define GL_PIXEL_UNPACK_BUFFER 0x88EC
 #define GL_MAX_ARRAY_TEXTURE_LAYERS 0x88FF
 #define GL_ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH 0x8A35
 #define GL_ACTIVE_UNIFORM_BLOCKS 0x8A36
@@ -276,6 +282,7 @@ typedef char GLchar;
 #define GL_SHADER_STORAGE_BUFFER_BINDING 0x90D3
 #define GL_SHADER_STORAGE_BUFFER_START 0x90D4
 #define GL_SHADER_STORAGE_BUFFER_SIZE 0x90D5
+#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB
 #define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117
 #define GL_UNSIGNALED 0x9118
 #define GL_SIGNALED 0x9119
@@ -283,6 +290,8 @@ typedef char GLchar;
 #define GL_TIMEOUT_EXPIRED 0x911B
 #define GL_CONDITION_SATISFIED 0x911C
 #define GL_COMPUTE_SHADER 0x91B9
+#define GL_MAX_COMPUTE_WORK_GROUP_COUNT 0x91BE
+#define GL_MAX_COMPUTE_WORK_GROUP_SIZE 0x91BF
 #define GL_FRAMEBUFFER_DEFAULT_WIDTH 0x9310
 #define GL_FRAMEBUFFER_DEFAULT_HEIGHT 0x9311
 #define GL_FRAMEBUFFER_DEFAULT_SAMPLES 0x9313

+ 4 - 0
panda/src/glstuff/glBufferContext_src.h

@@ -32,6 +32,10 @@ public:
   // This is the GL "name" of the data object.
   GLuint _index;
 
+  // This is set to glgsg->_shader_storage_barrier_counter if a write was
+  // performed, in which case a barrier is issued before the next use.
+  int _shader_storage_barrier_counter = -1;
+
 public:
   static TypeHandle get_class_type() {
     return _type_handle;

+ 181 - 22
panda/src/glstuff/glGraphicsBuffer_src.cxx

@@ -281,7 +281,7 @@ begin_frame(FrameMode mode, Thread *current_thread) {
       CLP(GraphicsStateGuardian) *glgsg = (CLP(GraphicsStateGuardian) *)_gsg.p();
 
       for (CLP(TextureContext) *gtc : _texture_contexts) {
-        if (gtc->needs_barrier(GL_FRAMEBUFFER_BARRIER_BIT)) {
+        if (gtc->needs_barrier(GL_FRAMEBUFFER_BARRIER_BIT, true)) {
           glgsg->issue_memory_barrier(GL_FRAMEBUFFER_BARRIER_BIT);
           // If we've done it for one, we've done it for all.
           break;
@@ -419,6 +419,7 @@ rebuild_bitplanes() {
   Texture *attach[RTP_COUNT];
   memset(attach, 0, sizeof(Texture *) * RTP_COUNT);
   _texture_contexts.clear();
+  _textures.clear();
 
   // Sort the textures list into appropriate slots.
   {
@@ -458,15 +459,17 @@ rebuild_bitplanes() {
       }
 
       // If we can't bind this type of texture, punt it.
-      if ((tex->get_texture_type() != Texture::TT_2d_texture) &&
-          (tex->get_texture_type() != Texture::TT_3d_texture) &&
-          (tex->get_texture_type() != Texture::TT_2d_texture_array) &&
-          (tex->get_texture_type() != Texture::TT_cube_map)) {
+      Texture::TextureType texture_type = tex->get_texture_type();
+      if (texture_type != Texture::TT_2d_texture &&
+          texture_type != Texture::TT_3d_texture &&
+          texture_type != Texture::TT_2d_texture_array &&
+          texture_type != Texture::TT_cube_map &&
+          texture_type != Texture::TT_cube_map_array) {
         ((CData *)cdata.p())->_textures[i]._rtm_mode = RTM_copy_texture;
         continue;
       }
 
-      if (_rb_size_z > 1 && tex->get_texture_type() == Texture::TT_2d_texture) {
+      if (_rb_size_z > 1 && texture_type == Texture::TT_2d_texture) {
         // We can't bind a 2D texture to a layered FBO.  If the user happened
         // to request RTM_bind_layered for a 2D texture, that's just silly,
         // and we can't render to anything but the first layer anyway.
@@ -520,6 +523,18 @@ rebuild_bitplanes() {
       // but it's a waste.  Let's not do it unless the user requested stencil.
       _use_depth_stencil = false;
 
+#ifdef __APPLE__
+    } else if (_fb_properties.get_depth_bits() > 0 && _requested_multisamples) {
+      // Apple's OpenGL driver doesn't like blitting depth-stencil targets.
+      // See GitHub issue #1719
+      _use_depth_stencil = false;
+      if (_fb_properties.get_depth_bits() < 24) {
+        // Make sure we do get at least as many depth bits as we would have
+        // gotten if we did get a depth-stencil buffer.
+        _fb_properties.set_depth_bits(24);
+      }
+#endif
+
     } else if (_fb_properties.get_depth_bits() > 0) {
       // Let's use a depth stencil buffer by default, if a depth buffer was
       // requested.
@@ -625,10 +640,23 @@ rebuild_bitplanes() {
 
     if (_have_any_color || have_any_depth) {
       // Clear if the fbo was just created, regardless of the clear settings per
-      // frame.
+      // frame.  However, we don't do this for textures, which may have useful
+      // contents that need to be preserved.
       if (_initial_clear) {
-        glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
-        glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
+        GLbitfield mask = 0;
+        if (_rb[RTP_color]) {
+          glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
+          mask |= GL_COLOR_BUFFER_BIT;
+        }
+        if (_rb[RTP_depth]) {
+          mask |= GL_DEPTH_BUFFER_BIT;
+        }
+        if (_rb[RTP_depth_stencil]) {
+          mask |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
+        }
+        if (mask != 0) {
+          glClear(mask);
+        }
       }
 #ifndef OPENGLES_1
     } else if (glgsg->_supports_empty_framebuffer) {
@@ -793,6 +821,12 @@ bind_slot(int layer, bool rb_resize, Texture **attach, RenderTexturePlane slot,
       _fb_properties.setup_color_texture(tex);
     }
 
+    if (slot == RTP_color && !tex->has_clear_color()) {
+      tex->set_clear_color(LColor(0, 0, 0, 1));
+    }
+
+    _textures.push_back(tex);
+
     TextureContext *tc = tex->prepare_now(glgsg->get_prepared_objects(), glgsg);
     nassertv(tc != nullptr);
     CLP(TextureContext) *gtc = DCAST(CLP(TextureContext), tc);
@@ -1132,7 +1166,9 @@ bind_slot(int layer, bool rb_resize, Texture **attach, RenderTexturePlane slot,
 
     if (slot == RTP_depth_stencil) {
       if (GLCAT.is_debug()) {
-        GLCAT.debug() << "Creating depth stencil renderbuffer.\n";
+        GLCAT.debug()
+          << "Creating depth stencil renderbuffer with format 0x" << std::hex
+          << gl_format << std::dec << ".\n";
       }
       // Allocate renderbuffer storage for depth stencil.
       GLint depth_size = 0, stencil_size = 0;
@@ -1160,7 +1196,9 @@ bind_slot(int layer, bool rb_resize, Texture **attach, RenderTexturePlane slot,
 
     } else if (slot == RTP_depth) {
       if (GLCAT.is_debug()) {
-        GLCAT.debug() << "Creating depth renderbuffer.\n";
+        GLCAT.debug()
+          << "Creating depth renderbuffer with format 0x" << std::hex
+          << gl_format << std::dec << ".\n";
       }
       // Allocate renderbuffer storage for regular depth.
       GLint depth_size = 0;
@@ -1176,6 +1214,11 @@ bind_slot(int layer, bool rb_resize, Texture **attach, RenderTexturePlane slot,
         } else {
           gl_format = GL_DEPTH_COMPONENT32F_NV;
         }
+        if (GLCAT.is_debug()) {
+          GLCAT.debug()
+            << "GL_DEPTH_COMPONENT32 not supported, switching to format 0x"
+            << std::hex << gl_format << std::dec << " instead.\n";
+        }
         glgsg->_glRenderbufferStorage(GL_RENDERBUFFER_EXT, gl_format, _rb_size_x, _rb_size_y);
         glgsg->_glGetRenderbufferParameteriv(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_DEPTH_SIZE_EXT, &depth_size);
 
@@ -1200,7 +1243,9 @@ bind_slot(int layer, bool rb_resize, Texture **attach, RenderTexturePlane slot,
 
     } else {
       if (GLCAT.is_debug()) {
-        GLCAT.debug() << "Creating color renderbuffer.\n";
+        GLCAT.debug()
+          << "Creating color renderbuffer with format 0x" << std::hex
+          << gl_format << std::dec << ".\n";
       }
       glgsg->_glRenderbufferStorage(GL_RENDERBUFFER_EXT, gl_format, _rb_size_x, _rb_size_y);
 
@@ -1248,12 +1293,33 @@ bind_slot_multisample(bool rb_resize, Texture **attach, RenderTexturePlane slot,
 #ifndef OPENGLES_2
     if (_use_depth_stencil) {
       glgsg->_glBindRenderbuffer(GL_RENDERBUFFER_EXT, _rbm[slot]);
+      GLuint format;
+#ifdef OPENGLES_1
+      format = GL_DEPTH24_STENCIL8_OES;
+#else
+      if (_fb_properties.get_depth_bits() > 24 ||
+          _fb_properties.get_float_depth()) {
+        if (!glgsg->_use_remapped_depth_range) {
+          format = GL_DEPTH32F_STENCIL8;
+        } else {
+          format = GL_DEPTH32F_STENCIL8_NV;
+        }
+      } else {
+        format = GL_DEPTH24_STENCIL8;
+      }
+#endif
+      if (GLCAT.is_debug()) {
+        GLCAT.debug()
+          << "Creating depth stencil renderbuffer with format 0x" << std::hex
+          << format << std::dec << " and " << _requested_multisamples
+          << " multisamples.\n";
+      }
       if (_requested_coverage_samples) {
         glgsg->_glRenderbufferStorageMultisampleCoverage(GL_RENDERBUFFER_EXT, _requested_coverage_samples,
-                                                         _requested_multisamples, GL_DEPTH_STENCIL_EXT,
+                                                         _requested_multisamples, format,
                                                          _rb_size_x, _rb_size_y);
       } else {
-        glgsg->_glRenderbufferStorageMultisample(GL_RENDERBUFFER_EXT, _requested_multisamples, GL_DEPTH_STENCIL_EXT,
+        glgsg->_glRenderbufferStorageMultisample(GL_RENDERBUFFER_EXT, _requested_multisamples, format,
                                                  _rb_size_x, _rb_size_y);
       }
 #ifndef OPENGLES
@@ -1292,6 +1358,22 @@ bind_slot_multisample(bool rb_resize, Texture **attach, RenderTexturePlane slot,
           default:
             break;
         }
+#ifndef OPENGLES
+      } else if (_fb_properties.get_depth_bits() > 24) {
+        format = GL_DEPTH_COMPONENT32;
+      } else if (_fb_properties.get_depth_bits() > 16) {
+        format = GL_DEPTH_COMPONENT24;
+      } else if (_fb_properties.get_depth_bits() > 1) {
+        format = GL_DEPTH_COMPONENT16;
+      } else {
+        format = GL_DEPTH_COMPONENT;
+#endif
+      }
+      if (GLCAT.is_debug()) {
+        GLCAT.debug()
+          << "Creating depth renderbuffer with format 0x" << std::hex
+          << format << std::dec << " and " << _requested_multisamples
+          << " multisamples.\n";
       }
       if (_requested_coverage_samples) {
         glgsg->_glRenderbufferStorageMultisampleCoverage(GL_RENDERBUFFER_EXT, _requested_coverage_samples,
@@ -1331,21 +1413,97 @@ bind_slot_multisample(bool rb_resize, Texture **attach, RenderTexturePlane slot,
       case RTP_aux_rgba_1:
       case RTP_aux_rgba_2:
       case RTP_aux_rgba_3:
+        gl_format = GL_RGBA;
+        break;
       default:
-        if (_fb_properties.get_srgb_color()) {
-          gl_format = GL_SRGB8_ALPHA8;
-        } else if (_fb_properties.get_float_color()) {
-          if (_fb_properties.get_color_bits() > 16 * 3) {
-            gl_format = GL_RGBA32F_ARB;
+        if (_fb_properties.get_alpha_bits() == 0) {
+          if (_fb_properties.get_srgb_color()) {
+            gl_format = GL_SRGB8;
+          } else if (_fb_properties.get_color_bits() > 16 * 3 ||
+                     _fb_properties.get_red_bits() > 16 ||
+                     _fb_properties.get_green_bits() > 16 ||
+                     _fb_properties.get_blue_bits() > 16) {
+            // 32-bit, which is always floating-point.
+            if (_fb_properties.get_blue_bits() > 0 ||
+                _fb_properties.get_color_bits() == 1 ||
+                _fb_properties.get_color_bits() > 32 * 2) {
+              gl_format = GL_RGB32F;
+            } else if (_fb_properties.get_green_bits() > 0 ||
+                       _fb_properties.get_color_bits() > 32) {
+              gl_format = GL_RG32F;
+            } else {
+              gl_format = GL_R32F;
+            }
+          } else if (_fb_properties.get_float_color()) {
+            // 16-bit floating-point.
+            if (_fb_properties.get_blue_bits() > 10 ||
+                _fb_properties.get_color_bits() == 1 ||
+                _fb_properties.get_color_bits() > 32) {
+              gl_format = GL_RGB16F;
+            } else if (_fb_properties.get_blue_bits() > 0) {
+              if (_fb_properties.get_red_bits() > 11 ||
+                  _fb_properties.get_green_bits() > 11) {
+                gl_format = GL_RGB16F;
+              } else {
+                gl_format = GL_R11F_G11F_B10F;
+              }
+            } else if (_fb_properties.get_green_bits() > 0 ||
+                       _fb_properties.get_color_bits() > 16) {
+              gl_format = GL_RG16F;
+            } else {
+              gl_format = GL_R16F;
+            }
+          } else if (_fb_properties.get_color_bits() > 10 * 3 ||
+                     _fb_properties.get_red_bits() > 10 ||
+                     _fb_properties.get_green_bits() > 10 ||
+                     _fb_properties.get_blue_bits() > 10) {
+            // 16-bit normalized.
+            if (_fb_properties.get_blue_bits() > 0 ||
+                _fb_properties.get_color_bits() == 1 ||
+                _fb_properties.get_color_bits() > 16 * 2) {
+              gl_format = GL_RGBA16;
+            } else if (_fb_properties.get_green_bits() > 0 ||
+                       _fb_properties.get_color_bits() > 16) {
+              gl_format = GL_RG16;
+            } else {
+              gl_format = GL_R16;
+            }
+          } else if (_fb_properties.get_color_bits() > 8 * 3 ||
+                     _fb_properties.get_red_bits() > 8 ||
+                     _fb_properties.get_green_bits() > 8 ||
+                     _fb_properties.get_blue_bits() > 8) {
+            gl_format = GL_RGB10_A2;
           } else {
-            gl_format = GL_RGBA16F_ARB;
+            gl_format = GL_RGB;
           }
         } else {
-          gl_format = GL_RGBA;
+          if (_fb_properties.get_srgb_color()) {
+            gl_format = GL_SRGB8_ALPHA8;
+          } else if (_fb_properties.get_float_color()) {
+            if (_fb_properties.get_color_bits() > 16 * 3) {
+              gl_format = GL_RGBA32F_ARB;
+            } else {
+              gl_format = GL_RGBA16F_ARB;
+            }
+          } else {
+            if (_fb_properties.get_color_bits() > 16 * 3) {
+              gl_format = GL_RGBA32F_ARB;
+            } else if (_fb_properties.get_color_bits() > 8 * 3) {
+              gl_format = GL_RGBA16;
+            } else {
+              gl_format = GL_RGBA;
+            }
+          }
         }
         break;
     }
 #endif
+    if (GLCAT.is_debug()) {
+      GLCAT.debug()
+        << "Creating color renderbuffer with format 0x" << std::hex
+        << gl_format << std::dec << " and " << _requested_multisamples
+        << " multisamples.\n";
+    }
     glgsg->_glBindRenderbuffer(GL_RENDERBUFFER_EXT, _rbm[slot]);
     if (_requested_coverage_samples) {
       glgsg->_glRenderbufferStorageMultisampleCoverage(GL_RENDERBUFFER_EXT, _requested_coverage_samples,
@@ -1405,6 +1563,7 @@ attach_tex(GLenum attachpoint, CLP(TextureContext) *gtc, int view, int layer) {
                                    target, index, 0, layer);
     break;
   case GL_TEXTURE_2D_ARRAY:
+  case GL_TEXTURE_CUBE_MAP_ARRAY:
     glgsg->_glFramebufferTextureLayer(GL_FRAMEBUFFER_EXT, attachpoint,
                                       index, 0, layer);
     break;
@@ -1967,7 +2126,7 @@ resolve_multisamples() {
     // Issue memory barriers as necessary to make sure that the texture memory
     // is synchronized before we blit to it.
     for (CLP(TextureContext) *gtc : _texture_contexts) {
-      if (gtc->needs_barrier(GL_FRAMEBUFFER_BARRIER_BIT)) {
+      if (gtc->needs_barrier(GL_FRAMEBUFFER_BARRIER_BIT, true)) {
         glgsg->issue_memory_barrier(GL_FRAMEBUFFER_BARRIER_BIT);
         // If we've done it for one, we've done it for all.
         break;

+ 4 - 0
panda/src/glstuff/glGraphicsBuffer_src.h

@@ -128,6 +128,10 @@ protected:
   typedef pvector<CLP(TextureContext)*> TextureContexts;
   TextureContexts _texture_contexts;
 
+  // List of textures we need to keep a reference to.
+  typedef pvector<PT(Texture)> Textures;
+  Textures _textures;
+
   // The cube map face we are currently drawing to or have just finished
   // drawing to, or -1 if we are not drawing to a cube map.
   int _bound_tex_page;

File diff suppressed because it is too large
+ 489 - 223
panda/src/glstuff/glGraphicsStateGuardian_src.cxx


+ 72 - 22
panda/src/glstuff/glGraphicsStateGuardian_src.h

@@ -39,6 +39,8 @@
 #include "geomVertexArrayData.h"
 #include "lightMutex.h"
 #include "pStatGPUTimer.h"
+#include "completionToken.h"
+#include "asyncTaskChain.h"
 
 class PlaneNode;
 class Light;
@@ -166,6 +168,7 @@ typedef void (APIENTRYP PFNGLGETPROGRAMINFOLOGPROC) (GLuint program, GLsizei buf
 typedef void (APIENTRYP PFNGLGETSHADERIVPROC) (GLuint shader, GLenum pname, GLint *params);
 typedef void (APIENTRYP PFNGLGETSHADERINFOLOGPROC) (GLuint shader, GLsizei bufSize, GLsizei *length, GLchar *infoLog);
 typedef GLint (APIENTRYP PFNGLGETUNIFORMLOCATIONPROC) (GLuint program, const GLchar *name);
+typedef void (APIENTRYP PFNGLGETINTEGERI_VPROC) (GLenum target, GLuint index, GLint *data);
 typedef void (APIENTRYP PFNGLLINKPROGRAMPROC) (GLuint program);
 typedef void (APIENTRYP PFNGLSHADERSOURCEPROC_P) (GLuint shader, GLsizei count, const GLchar* const *string, const GLint *length);
 typedef void (APIENTRYP PFNGLSPECIALIZESHADERARBPROC) (GLuint shader, const GLchar *, GLuint, const GLuint *, const GLuint *);
@@ -250,6 +253,7 @@ typedef void (APIENTRYP PFNGLGETPROGRAMBINARYPROC) (GLuint program, GLsizei bufS
 typedef void (APIENTRYP PFNGLPROGRAMBINARYPROC) (GLuint program, GLenum binaryFormat, const void *binary, GLsizei length);
 typedef void (APIENTRYP PFNGLGETINTERNALFORMATIVPROC) (GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint *params);
 typedef void (APIENTRYP PFNGLBUFFERSTORAGEPROC) (GLenum target, GLsizeiptr size, const void *data, GLbitfield flags);
+typedef void (APIENTRYP PFNGLCOPYBUFFERSUBDATAPROC) (GLenum readTarget, GLenum writeTarget, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size);
 typedef void (APIENTRYP PFNGLBINDIMAGETEXTUREPROC) (GLuint unit, GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum access, GLenum format);
 typedef void (APIENTRYP PFNGLCLEARTEXIMAGEPROC) (GLuint texture, GLint level, GLenum format, GLenum type, const void *data);
 typedef void (APIENTRYP PFNGLCLEARTEXSUBIMAGEPROC) (GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void *data);
@@ -365,7 +369,8 @@ public:
 #endif
 
   virtual TextureContext *prepare_texture(Texture *tex);
-  virtual bool update_texture(TextureContext *tc, bool force);
+  virtual bool update_texture(TextureContext *tc, bool force,
+                              CompletionToken token = CompletionToken());
   virtual void release_texture(TextureContext *tc);
   virtual void release_textures(const pvector<TextureContext *> &contexts);
   virtual bool extract_texture_data(Texture *tex);
@@ -411,9 +416,10 @@ public:
 
 #ifndef OPENGLES_1
   virtual BufferContext *prepare_shader_buffer(ShaderBuffer *data);
-  void apply_shader_buffer(GLuint base, ShaderBuffer *buffer);
+  CLP(BufferContext) *apply_shader_buffer(GLuint base, ShaderBuffer *buffer);
   virtual void release_shader_buffer(BufferContext *bc);
   virtual void release_shader_buffers(const pvector<BufferContext *> &contexts);
+  virtual bool extract_shader_buffer_data(ShaderBuffer *buffer, vector_uchar &data);
 #endif
 
 #ifndef OPENGLES
@@ -438,7 +444,6 @@ public:
   virtual bool framebuffer_copy_to_ram
     (Texture *tex, int view, int z, const DisplayRegion *dr, const RenderBuffer &rb,
      ScreenshotRequest *request);
-  void finish_async_framebuffer_ram_copies(bool force = false);
 
 #ifdef SUPPORT_FIXED_FUNCTION
   void apply_fog(Fog *fog);
@@ -649,12 +654,21 @@ protected:
   bool apply_texture(CLP(TextureContext) *gtc, int view);
   bool apply_sampler(GLuint unit, const SamplerState &sampler,
                      CLP(TextureContext) *gtc, int view);
-  bool upload_texture(CLP(TextureContext) *gtc, bool force, bool uses_mipmaps);
-  bool upload_texture_image(CLP(TextureContext) *gtc, int view,
-                            bool needs_reload, int mipmap_bias, int num_levels,
+  bool upload_texture(CLP(TextureContext) *gtc, bool force, bool uses_mipmaps,
+                      CompletionToken token = CompletionToken());
+  bool upload_texture_view(CLP(TextureContext) *gtc, int view,
+                           bool needs_reload, int mipmap_bias, int num_levels,
+                           GLint internal_format, GLint external_format,
+                           GLenum component_type, bool compressed,
+                           int async_buffers, CompletionToken token);
+  bool upload_texture_level(bool full_reload, bool compressed,
+                            GLenum target, int level,
+                            int width, int height, int depth,
                             GLint internal_format, GLint external_format,
                             GLenum component_type,
-                            Texture::CompressionMode image_compression);
+                            const unsigned char *image_ptr,
+                            size_t page_size, SparseArray pages,
+                            GLenum usage_hint);
   void generate_mipmaps(CLP(TextureContext) *gtc);
   bool upload_simple_texture(CLP(TextureContext) *gtc);
 
@@ -670,6 +684,20 @@ protected:
   void do_point_size();
 #endif
 
+#ifndef OPENGLES_1
+  void *map_read_buffer(GLenum target, GLuint buffer, size_t size);
+  void *map_write_discard_buffer(GLenum target, GLuint buffer, size_t size,
+                                 bool create_storage);
+#endif
+
+#ifndef OPENGLES_1
+  void insert_fence(CompletionToken &&callback);
+  void process_fences(bool force);
+#endif
+
+  void call_later(Completable &&job);
+  void process_pending_jobs(bool wait);
+
   enum AutoAntialiasMode {
     AA_poly,
     AA_line,
@@ -802,6 +830,10 @@ protected:
 #endif
 
 public:
+#ifndef OPENGLES_1
+  PFNGLGETINTEGERI_VPROC _glGetIntegeri_v;
+#endif
+
 #ifndef OPENGLES_1
   bool _use_depth_zero_to_one;
   bool _use_remapped_depth_range;
@@ -909,6 +941,10 @@ public:
   PFNGLGETBUFFERSUBDATAPROC _glGetBufferSubData;
 #endif
 
+#ifndef OPENGLES_1
+  PFNGLCOPYBUFFERSUBDATAPROC _glCopyBufferSubData;
+#endif
+
 #ifdef OPENGLES
   PFNGLMAPBUFFERRANGEEXTPROC _glMapBufferRange;
   PFNGLUNMAPBUFFEROESPROC _glUnmapBuffer;
@@ -916,6 +952,10 @@ public:
   PFNGLMAPBUFFERRANGEPROC _glMapBufferRange;
 #endif
 
+#ifndef OPENGLES_1
+  bool _supports_pixel_buffers;
+#endif
+
 #ifndef OPENGLES_1
   bool _supports_uniform_buffers;
   bool _supports_shader_buffers;
@@ -983,6 +1023,7 @@ public:
   PFNGLTEXTUREPARAMETERIPROC _glTextureParameteri;
   PFNGLGENERATETEXTUREMIPMAPPROC _glGenerateTextureMipmap;
   PFNGLBINDTEXTUREUNITPROC _glBindTextureUnit;
+  PFNGLMAPNAMEDBUFFERRANGEPROC _glMapNamedBufferRange;
 #endif
 
 #ifndef OPENGLES_1
@@ -1188,12 +1229,15 @@ public:
 #endif
 
 #ifndef OPENGLES_1
-  // Stores textures for which memory bariers should be issued.
-  typedef pset<TextureContext*> TextureSet;
-  TextureSet _textures_needing_fetch_barrier;
-  TextureSet _textures_needing_image_access_barrier;
-  TextureSet _textures_needing_update_barrier;
-  TextureSet _textures_needing_framebuffer_barrier;
+  // This count increments every time the corresponding barrier is issued.
+  // GLTextureContext et al store copies of this counter, when a write is
+  // performed on a texture, it will set its counter to match the value on the
+  // GSG to indicate that it is out of sync and the barrier needs to be issued.
+  int _texture_fetch_barrier_counter = 0;
+  int _shader_image_access_barrier_counter = 0;
+  int _texture_update_barrier_counter = 0;
+  int _framebuffer_barrier_counter = 0;
+  int _shader_storage_barrier_counter = 0;
 #endif
 
   // RenderState::SlotMask _inv_state_mask;
@@ -1243,16 +1287,21 @@ public:
   FrameTiming *_current_frame_timing = nullptr;
 #endif
 
-  struct AsyncRamCopy {
-    PT(ScreenshotRequest) _request;
-    GLuint _pbo;
-    GLsync _fence;
-    GLuint _external_format;
-    int _view;
-    void *_mapped_pointer;
-    size_t _size;
+  struct Fence {
+    GLsync _object;
+    CompletionToken _token;
   };
-  pdeque<AsyncRamCopy> _async_ram_copies;
+  pdeque<Fence> _fences;
+
+#ifdef HAVE_THREADS
+  AsyncTaskChain *_async_chain;
+#endif
+
+  // Min job system pending a real job system
+  typedef pvector<Completable> JobQueue;
+  Mutex _job_queue_mutex;
+  ConditionVar _job_queue_cvar;
+  JobQueue _job_queue;
 
   BufferResidencyTracker _renderbuffer_residency;
 
@@ -1296,6 +1345,7 @@ private:
   friend class CLP(IndexBufferContext);
   friend class CLP(BufferContext);
   friend class CLP(ShaderContext);
+  friend class CLP(TextureContext);
   friend class CLP(GraphicsBuffer);
   friend class CLP(OcclusionQueryContext);
 };

+ 159 - 31
panda/src/glstuff/glShaderContext_src.cxx

@@ -33,6 +33,7 @@
 #include "sparseArray.h"
 #include "spirVTransformer.h"
 #include "spirVInjectAlphaTestPass.h"
+#include "spirVEmulateTextureQueriesPass.h"
 
 #define SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS
 #include <spirv_cross/spirv_glsl.hpp>
@@ -198,6 +199,10 @@ CLP(ShaderContext)(CLP(GraphicsStateGuardian) *glgsg, Shader *s) : ShaderContext
   if (!valid) {
     _shader->_error_flag = true;
   }
+
+#ifdef DO_PSTATS
+  _compute_dispatch_pcollector = PStatCollector(_glgsg->_compute_dispatch_pcollector, s->get_debug_name());
+#endif
 }
 
 /**
@@ -393,9 +398,9 @@ compile_for(RenderAttrib::PandaCompareFunc alpha_test_mode) {
     }
 
     UniformCalls &calls = block._calls[alpha_test_mode];
-    r_collect_uniforms(program, param, calls, param._type, name.c_str(), sym_buffer,
-                       actual_location, active_locations, resource_index,
-                       binding);
+    r_collect_uniforms(alpha_test_mode, param, calls, param._type, name.c_str(),
+                       sym_buffer, actual_location, active_locations,
+                       resource_index, binding);
 
     if (block_index < 0 && (!calls._matrices.empty() || !calls._vectors.empty())) {
       block._dep = param._binding->get_state_dep();
@@ -493,12 +498,14 @@ r_count_locations_bindings(const ShaderType *type,
  * Also finds all resources and adds them to the respective arrays.
  */
 void CLP(ShaderContext)::
-r_collect_uniforms(GLuint program,
+r_collect_uniforms(RenderAttrib::PandaCompareFunc alpha_test_mode,
                    const Shader::Parameter &param, UniformCalls &calls,
                    const ShaderType *type, const char *name, const char *sym,
                    int &cur_location, const SparseArray &active_locations,
                    int &resource_index, int &cur_binding, size_t offset) {
 
+  GLuint program = _linked_programs[alpha_test_mode];
+
   ShaderType::ScalarType scalar_type;
   uint32_t num_elements;
   uint32_t num_rows;
@@ -620,7 +627,7 @@ r_collect_uniforms(GLuint program,
     for (uint32_t i = 0; i < array_type->get_num_elements(); ++i) {
       sprintf(name_buffer, "%s[%u]", name, i);
       sprintf(sym_buffer, "%s[%u]", sym, i);
-      r_collect_uniforms(program, param, calls, element_type, name_buffer, sym_buffer,
+      r_collect_uniforms(alpha_test_mode, param, calls, element_type, name_buffer, sym_buffer,
                          cur_location, active_locations, resource_index, cur_binding,
                          offset);
       offset += stride;
@@ -638,7 +645,7 @@ r_collect_uniforms(GLuint program,
 
       // We have named struct members m0, m1, etc. in declaration order.
       sprintf(sym_buffer, "%s.m%u", sym, i);
-      r_collect_uniforms(program, param, calls, member.type, qualname.c_str(), sym_buffer,
+      r_collect_uniforms(alpha_test_mode, param, calls, member.type, qualname.c_str(), sym_buffer,
                          cur_location, active_locations, resource_index, cur_binding,
                          offset + member.offset);
     }
@@ -670,7 +677,7 @@ r_collect_uniforms(GLuint program,
 
       StorageBlock block;
       block._binding = param._binding;
-      block._resource_id = param._binding->get_resource_id(resource_index++, type);
+      block._resource_id = param._binding->get_resource_id(resource_index++);
       block._binding_index = binding;
       _storage_blocks.push_back(std::move(block));
       _storage_block_bindings |= (1 << binding);
@@ -681,15 +688,34 @@ r_collect_uniforms(GLuint program,
   int location = cur_location;
   if (location < 0) {
     location = _glgsg->_glGetUniformLocation(program, _is_legacy ? name : sym);
-    if (location < 0) {
-      return;
-    }
   } else {
     ++cur_location;
     if (!active_locations.get_bit(location)) {
-      return;
+      location = -1;
     }
   }
+  int size_location = -1;
+  if (_emulated_caps & (Shader::C_image_query_size | Shader::C_texture_query_size | Shader::C_texture_query_levels)) {
+    // Do we have a separate size input?
+    size_t sym_len = strlen(sym);
+    char *size_name_buffer = (char *)alloca(sym_len + 3);
+    char *p = size_name_buffer;
+    for (size_t i = 0; i < sym_len; ++i) {
+      if (sym[i] == '[' || sym[i] == '.') {
+        *p++ = '_';
+      }
+      else if (sym[i] != 'm' && sym[i] != ']') {
+        *p++ = sym[i];
+      }
+    }
+    *p++ = '_';
+    *p++ = 's';
+    *p = '\0';
+    size_location = _glgsg->_glGetUniformLocation(program, size_name_buffer);
+  }
+  if (location < 0 && size_location < 0) {
+    return;
+  }
 
   if (GLCAT.is_debug()) {
     GLCAT.debug()
@@ -700,19 +726,30 @@ r_collect_uniforms(GLuint program,
   if (const ShaderType::SampledImage *sampler = type->as_sampled_image()) {
     TextureUnit unit;
     unit._binding = param._binding;
-    unit._resource_id = param._binding->get_resource_id(resource_index++, type);
+    unit._resource_id = param._binding->get_resource_id(resource_index++);
     unit._target = _glgsg->get_texture_target(sampler->get_texture_type());
 
+    for (int i = 0; i < RenderAttrib::M_always; ++i) {
+      unit._size_loc[i] = -1;
+    }
+
+    if (size_location >= 0) {
+      unit._size_loc[alpha_test_mode] = size_location;
+    }
+
     // Check if we already have a unit with these properties.  If so, we alias
     // the binding.  This will also prevent duplicating texture units when the
     // shader is compiled multiple times, for different alpha test modes.
     GLint binding = -1;
     for (size_t i = 0; i < _texture_units.size(); ++i) {
-      const TextureUnit &other_unit = _texture_units[i];
+      TextureUnit &other_unit = _texture_units[i];
       if (other_unit._binding == unit._binding &&
           other_unit._resource_id == unit._resource_id &&
           other_unit._target == unit._target) {
         binding = (GLint)i;
+        if (unit._size_loc[alpha_test_mode] >= 0) {
+          other_unit._size_loc[alpha_test_mode] = unit._size_loc[alpha_test_mode];
+        }
         break;
       }
     }
@@ -721,7 +758,9 @@ r_collect_uniforms(GLuint program,
       binding = (GLint)_texture_units.size();
       _texture_units.push_back(std::move(unit));
     }
-    _glgsg->_glUniform1i(location, binding);
+    if (location >= 0) {
+      _glgsg->_glUniform1i(location, binding);
+    }
   }
   else if (const ShaderType::Image *image = type->as_image()) {
     // In OpenGL ES, we can't specify a binding index after the fact.
@@ -729,6 +768,12 @@ r_collect_uniforms(GLuint program,
     // the driver (or the user) providing a unique one.
     GLint binding = -1;
 #ifdef OPENGLES
+    if (location < 0) {
+      // There's an edge case here if we use imageSize without any other
+      // accesses to the image, and the image itself is optimized out.
+      // I don't think it's very realistic, so I haven't bothered with it.
+      return;
+    }
     glGetUniformiv(program, location, &binding);
     if (binding < 0) {
       return;
@@ -741,18 +786,29 @@ r_collect_uniforms(GLuint program,
     ImageUnit unit;
 #endif
     unit._binding = param._binding;
-    unit._resource_id = param._binding->get_resource_id(resource_index++, type);
+    unit._resource_id = param._binding->get_resource_id(resource_index++);
     unit._access = image->get_access();
     unit._written = false;
 
+    for (int i = 0; i < RenderAttrib::M_always; ++i) {
+      unit._size_loc[i] = -1;
+    }
+
+    if (size_location >= 0) {
+      unit._size_loc[alpha_test_mode] = size_location;
+    }
+
 #ifndef OPENGLES
     // See note above in the SampledImage case.
     for (size_t i = 0; i < _image_units.size(); ++i) {
-      const ImageUnit &other_unit = _image_units[i];
+      ImageUnit &other_unit = _image_units[i];
       if (other_unit._binding == unit._binding &&
           other_unit._resource_id == unit._resource_id &&
           other_unit._access == unit._access) {
         binding = (GLint)i;
+        if (unit._size_loc[alpha_test_mode] >= 0) {
+          other_unit._size_loc[alpha_test_mode] = unit._size_loc[alpha_test_mode];
+        }
         break;
       }
     }
@@ -760,7 +816,9 @@ r_collect_uniforms(GLuint program,
       binding = (GLint)_image_units.size();
       _image_units.push_back(std::move(unit));
     }
-    _glgsg->_glUniform1i(location, binding);
+    if (location >= 0) {
+      _glgsg->_glUniform1i(location, binding);
+    }
 #endif
   }
   else if (type->as_resource()) {
@@ -2119,6 +2177,10 @@ update_shader_vertex_arrays(ShaderContext *prev, bool force) {
     issue_parameters(Shader::D_vertex_data);
   }
 
+  // This ought to be moved elsewhere, but it's convenient to do this here for
+  // now since it's called before every Geom is drawn.
+  issue_memory_barriers();
+
   _glgsg->report_my_gl_errors();
 
   return true;
@@ -2202,9 +2264,7 @@ update_shader_texture_bindings(ShaderContext *prev) {
   //  return;
   //}
 
-#ifndef OPENGLES
   GLbitfield barriers = 0;
-#endif
 
   ShaderInputBinding::State state;
   state.gsg = _glgsg;
@@ -2239,12 +2299,6 @@ update_shader_texture_bindings(ShaderContext *prev) {
 
           int view = _glgsg->get_current_tex_view_offset();
           gl_tex = gtc->get_view_index(view);
-
-#ifndef OPENGLES
-          if (gtc->needs_barrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT)) {
-            barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
-          }
-#endif
         }
       }
 
@@ -2301,8 +2355,19 @@ update_shader_texture_bindings(ShaderContext *prev) {
           break;
         }
 
+        if (gtc->needs_barrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT, unit._written)) {
+          barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
+        }
+
         _glgsg->_glBindImageTexture(i, gl_tex, bind_level, layered,
                                     bind_layer, gl_access, gtc->_internal_format);
+
+        // Update the size variable, if we have one.
+        GLint size_loc = unit._size_loc[_alpha_test_mode];
+        if (size_loc != -1) {
+          _glgsg->_glUniform4f(size_loc, (GLfloat)gtc->_width, (GLfloat)gtc->_height,
+                                         (GLfloat)gtc->_depth, (GLfloat)gtc->_num_levels);
+        }
       }
     }
   }
@@ -2359,7 +2424,7 @@ update_shader_texture_bindings(ShaderContext *prev) {
 #ifndef OPENGLES
     // If it was recently written to, we will have to issue a memory barrier
     // soon.
-    if (gtc->needs_barrier(GL_TEXTURE_FETCH_BARRIER_BIT)) {
+    if (gtc->needs_barrier(GL_TEXTURE_FETCH_BARRIER_BIT, false)) {
       barriers |= GL_TEXTURE_FETCH_BARRIER_BIT;
     }
 #endif
@@ -2396,6 +2461,13 @@ update_shader_texture_bindings(ShaderContext *prev) {
       _glgsg->apply_texture(gtc, view);
       _glgsg->apply_sampler(i, sampler, gtc, view);
     }
+
+    // Update the size variable, if we have one.
+    GLint size_loc = unit._size_loc[_alpha_test_mode];
+    if (size_loc != -1) {
+      _glgsg->_glUniform4f(size_loc, (GLfloat)gtc->_width, (GLfloat)gtc->_height,
+                                     (GLfloat)gtc->_depth, (GLfloat)gtc->_num_levels);
+    }
   }
 
 #ifndef OPENGLES
@@ -2403,12 +2475,12 @@ update_shader_texture_bindings(ShaderContext *prev) {
     _glgsg->_glBindTextures(0, num_textures, textures);
     _glgsg->_glBindSamplers(0, num_textures, samplers);
   }
+#endif
 
   if (barriers != 0) {
     // Issue a memory barrier prior to this shader's execution.
     _glgsg->issue_memory_barrier(barriers);
   }
-#endif
 
   _glgsg->report_my_gl_errors();
 }
@@ -2423,10 +2495,38 @@ update_shader_buffer_bindings(ShaderContext *prev) {
   state.gsg = _glgsg;
   state.matrix_cache = &_matrix_cache[0];
 
-  for (const StorageBlock &block : _storage_blocks) {
+  for (StorageBlock &block : _storage_blocks) {
     PT(ShaderBuffer) buffer = block._binding->fetch_shader_buffer(state, block._resource_id);
-    _glgsg->apply_shader_buffer(block._binding_index, buffer);
+    block._gbc = _glgsg->apply_shader_buffer(block._binding_index, buffer);
+  }
+}
+
+/**
+ * Issues memory barriers for shader buffers, should be called before a draw.
+ */
+void CLP(ShaderContext)::
+issue_memory_barriers() {
+#ifndef OPENGLES
+  bool barrier_needed = false;
+  for (StorageBlock &block : _storage_blocks) {
+    if (block._gbc != nullptr &&
+        block._gbc->_shader_storage_barrier_counter == _glgsg->_shader_storage_barrier_counter) {
+      barrier_needed = true;
+      break;
+    }
+  }
+
+  if (barrier_needed) {
+    _glgsg->issue_memory_barrier(GL_SHADER_STORAGE_BARRIER_BIT);
+  }
+
+  // We assume that all SSBOs will be written to, for now.
+  for (StorageBlock &block : _storage_blocks) {
+    if (block._gbc != nullptr) {
+      block._gbc->_shader_storage_barrier_counter = _glgsg->_shader_storage_barrier_counter;
+    }
   }
+#endif
 }
 
 /**
@@ -2724,6 +2824,20 @@ create_shader(GLuint program, const ShaderModule *module, size_t mi,
 
       ShaderModuleSpirV::InstructionStream stream = spv->_instructions;
 
+      // Do we need to emulate certain caps, like texture queries?
+      pmap<SpirVTransformPass::AccessChain, uint32_t> size_var_ids;
+      uint64_t supported_caps = _glgsg->get_supported_shader_capabilities();
+      uint64_t emulate_caps = spv->_emulatable_caps & ~supported_caps;
+      if (emulate_caps != 0u) {
+        _emulated_caps |= emulate_caps;
+
+        SpirVTransformer transformer(spv->_instructions);
+        SpirVEmulateTextureQueriesPass pass(emulate_caps);
+        transformer.run(pass);
+        size_var_ids = std::move(pass._size_var_ids);
+        stream = transformer.get_result();
+      }
+
       if (stage != ShaderModule::Stage::FRAGMENT) {
         alpha_test_mode = RenderAttrib::M_none;
       }
@@ -2782,11 +2896,11 @@ create_shader(GLuint program, const ShaderModule *module, size_t mi,
       // Assign names based on locations.  This is important to make sure that
       // uniforms shared between shader stages have the same name, or the
       // compiler may start to complain about overlapping locations.
+      char buf[1024];
       for (spirv_cross::VariableID id : compiler.get_active_interface_variables()) {
         uint32_t loc = compiler.get_decoration(id, spv::DecorationLocation);
         spv::StorageClass sc = compiler.get_storage_class(id);
 
-        char buf[1024];
         if (sc == spv::StorageClassUniformConstant) {
           auto it = id_to_location.find(id);
           if (it != id_to_location.end()) {
@@ -2834,6 +2948,21 @@ create_shader(GLuint program, const ShaderModule *module, size_t mi,
         }
       }
 
+      // Assign names to emulated texture/image size variables.
+      for (auto &item : size_var_ids) {
+        const SpirVTransformPass::AccessChain &chain = item.first;
+        auto it = id_to_location.find(chain._var_id);
+        if (it != id_to_location.end()) {
+          int location = it->second;
+          size_t size = sprintf(buf, "p%u", location);
+          for (size_t i = 0; i < chain.size(); ++i) {
+            size += sprintf(buf + size, "_%d", chain[i]);
+          }
+          strcpy(buf + size, "_s");
+          compiler.set_name(item.second, buf);
+        }
+      }
+
       // For all uniform constant structs, we need to ensure we have procedural
       // names like _m0, _m1, _m2, etc.  Furthermore, we need to assign each
       // struct a name that is guaranteed to be the same between stages, since
@@ -2844,7 +2973,6 @@ create_shader(GLuint program, const ShaderModule *module, size_t mi,
         item.second->output_signature(str);
         compiler.set_name(item.first, str.str());
 
-        char buf[32];
         for (size_t i = 0; i < item.second->get_num_members(); ++i) {
           sprintf(buf, "m%d", (int)i);
           compiler.set_member_name(item.first, i, buf);

+ 10 - 1
panda/src/glstuff/glShaderContext_src.h

@@ -65,7 +65,7 @@ private:
                                          GLint &num_ssbo_bindings,
                                          GLint &num_image_bindings);
 
-  void r_collect_uniforms(GLuint program,
+  void r_collect_uniforms(RenderAttrib::PandaCompareFunc alpha_test_mode,
                           const Shader::Parameter &param, UniformCalls &calls,
                           const ShaderType *type, const char *name,
                           const char *sym, int &location,
@@ -92,6 +92,7 @@ private:
   void disable_shader_texture_bindings();
   void update_shader_texture_bindings(ShaderContext *prev);
   void update_shader_buffer_bindings(ShaderContext *prev);
+  void issue_memory_barriers();
 
   bool uses_standard_vertex_arrays(void) {
     return _uses_standard_vertex_arrays;
@@ -150,6 +151,7 @@ private:
     PT(ShaderInputBinding) _binding;
     ShaderInputBinding::ResourceId _resource_id;
     GLenum _target;
+    GLint _size_loc[RenderAttrib::M_always];
   };
   typedef pvector<TextureUnit> TextureUnits;
   TextureUnits _texture_units;
@@ -160,6 +162,7 @@ private:
     CLP(TextureContext) *_gtc = nullptr;
     ShaderType::Access _access;
     bool _written = false;
+    GLint _size_loc[RenderAttrib::M_always];
   };
   typedef pvector<ImageUnit> ImageUnits;
   ImageUnits _image_units;
@@ -170,6 +173,7 @@ private:
 
   struct StorageBlock {
     PT(ShaderInputBinding) _binding;
+    CLP(BufferContext) *_gbc = nullptr;
     ShaderInputBinding::ResourceId _resource_id;
     GLint _binding_index;
   };
@@ -178,6 +182,7 @@ private:
   uint32_t _storage_block_bindings = 0;
 
   CLP(GraphicsStateGuardian) *_glgsg;
+  uint64_t _emulated_caps = 0u;
 
   bool _remap_locations = false;
   LocationMap _locations;
@@ -185,6 +190,10 @@ private:
 
   bool _uses_standard_vertex_arrays;
 
+#ifdef DO_PSTATS
+  PStatCollector _compute_dispatch_pcollector;
+#endif
+
   void report_shader_errors(GLuint handle, Shader::Stage stage, bool fatal);
   void report_program_errors(GLuint program, bool fatal);
   GLuint create_shader(GLuint program, const ShaderModule *module, size_t mi,

+ 40 - 0
panda/src/glstuff/glTextureContext_src.I

@@ -59,3 +59,43 @@ get_view_buffer(int view) const {
     return 0;
   }
 }
+
+/**
+ * Returns true if an async upload is pending.
+ */
+INLINE bool CLP(TextureContext)::
+is_upload_pending() const {
+  // We can't simply compare _uploads_started to _uploads_finished, since
+  // they also get set to the same by cancel_pending_uploads()
+  return _uploads_pending > 0;
+}
+
+/**
+ * Waits for all uploads to be finished.
+ */
+INLINE void CLP(TextureContext)::
+wait_pending_uploads() const {
+  if (is_upload_pending()) {
+    do_wait_pending_uploads();
+  }
+}
+
+/**
+ * Cancels all asynchronous uploads.  Not guaranteed to be cancelled by the
+ * time this returns, consider following this up with a call to
+ * wait_pending_uploads().
+ */
+INLINE void CLP(TextureContext)::
+cancel_pending_uploads() {
+  _uploads_finished = _uploads_started;
+}
+
+/**
+ * Waits for an unused PBO unless we're not at the given limit of PBOs yet.
+ */
+INLINE void CLP(TextureContext)::
+wait_for_unused_pbo(int limit) const {
+  if (_unused_pbos.empty() && _num_pbos >= limit) {
+    do_wait_for_unused_pbo(limit);
+  }
+}

+ 110 - 24
panda/src/glstuff/glTextureContext_src.cxx

@@ -13,6 +13,8 @@
 
 #include "pnotify.h"
 
+static PStatCollector _wait_async_texture_uploads_pcollector("Wait:Async Texture Uploads");
+
 TypeHandle CLP(TextureContext)::_type_handle;
 
 /**
@@ -48,6 +50,8 @@ evict_lru() {
  */
 void CLP(TextureContext)::
 reset_data(GLenum target, int num_views) {
+  cancel_pending_uploads();
+
   // Free the texture resources.
   set_num_views(0);
 
@@ -63,12 +67,13 @@ reset_data(GLenum target, int num_views) {
 
 #ifndef OPENGLES_1
   // Mark the texture as coherent.
-  if (gl_enable_memory_barriers) {
-    _glgsg->_textures_needing_fetch_barrier.erase(this);
-    _glgsg->_textures_needing_image_access_barrier.erase(this);
-    _glgsg->_textures_needing_update_barrier.erase(this);
-    _glgsg->_textures_needing_framebuffer_barrier.erase(this);
-  }
+  _texture_fetch_barrier_counter = _glgsg->_texture_fetch_barrier_counter - 1;
+  _shader_image_read_barrier_counter = _glgsg->_shader_image_access_barrier_counter - 1;
+  _shader_image_write_barrier_counter = _glgsg->_shader_image_access_barrier_counter - 1;
+  _texture_read_barrier_counter = _glgsg->_texture_update_barrier_counter - 1;
+  _texture_write_barrier_counter = _glgsg->_shader_image_access_barrier_counter - 1;
+  _framebuffer_read_barrier_counter = _glgsg->_framebuffer_barrier_counter - 1;
+  _framebuffer_write_barrier_counter = _glgsg->_framebuffer_barrier_counter - 1;
 #endif
 }
 
@@ -168,26 +173,50 @@ set_num_views(int num_views) {
 
 #ifndef OPENGLES_1
 /**
- *
+ * Returns true if the texture needs a barrier before a read or write of the
+ * given kind.  If writing is false, only writes are synced, otherwise both
+ * reads and writes are synced.
  */
 bool CLP(TextureContext)::
-needs_barrier(GLbitfield barrier) {
+needs_barrier(GLbitfield barrier, bool writing) {
   if (!gl_enable_memory_barriers) {
     return false;
   }
 
-  return (((barrier & GL_TEXTURE_FETCH_BARRIER_BIT) &&
-           _glgsg->_textures_needing_fetch_barrier.count(this)))
-      || (((barrier & GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) &&
-           _glgsg->_textures_needing_image_access_barrier.count(this)))
-      || (((barrier & GL_TEXTURE_UPDATE_BARRIER_BIT) &&
-           _glgsg->_textures_needing_update_barrier.count(this)))
-      || (((barrier & GL_FRAMEBUFFER_BARRIER_BIT) &&
-           _glgsg->_textures_needing_framebuffer_barrier.count(this)));
+  if (barrier & GL_TEXTURE_FETCH_BARRIER_BIT) {
+    // This is always a read, so only sync RAW.
+    if (_glgsg->_texture_fetch_barrier_counter == _texture_fetch_barrier_counter) {
+      return true;
+    }
+  }
+
+  if (barrier & GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) {
+    // Sync WAR, WAW and RAW, but not RAR.
+    if ((writing && _glgsg->_shader_image_access_barrier_counter == _shader_image_read_barrier_counter) ||
+        (_glgsg->_shader_image_access_barrier_counter == _shader_image_write_barrier_counter)) {
+      return true;
+    }
+  }
+
+  if (barrier & GL_TEXTURE_UPDATE_BARRIER_BIT) {
+    if ((writing && _glgsg->_texture_update_barrier_counter == _texture_read_barrier_counter) ||
+        (_glgsg->_texture_update_barrier_counter == _texture_write_barrier_counter)) {
+      return true;
+    }
+  }
+
+  if (barrier & GL_FRAMEBUFFER_BARRIER_BIT) {
+    if ((writing && _glgsg->_framebuffer_barrier_counter == _framebuffer_read_barrier_counter) ||
+        (_glgsg->_framebuffer_barrier_counter == _framebuffer_write_barrier_counter)) {
+      return true;
+    }
+  }
+
+  return false;
 }
 
 /**
- * Mark a texture as needing a memory barrier, since a non-coherent read or
+ * Mark a texture as needing a memory barrier, since an unsynchronized read or
  * write just happened to it.  If 'wrote' is true, it was written to.
  */
 void CLP(TextureContext)::
@@ -199,16 +228,73 @@ mark_incoherent(bool wrote) {
   // If we only read from it, the next read operation won't need another
   // barrier, since it'll be reading the same data.
   if (wrote) {
-    _glgsg->_textures_needing_fetch_barrier.insert(this);
+    _texture_fetch_barrier_counter = _glgsg->_texture_fetch_barrier_counter;
+    _shader_image_write_barrier_counter = _glgsg->_shader_image_access_barrier_counter;
+    _texture_write_barrier_counter = _glgsg->_shader_image_access_barrier_counter;
+    _framebuffer_write_barrier_counter = _glgsg->_framebuffer_barrier_counter;
   }
 
   // We could still write to it before we read from it, so we have to always
-  // insert these barriers.  This could be slightly optimized so that we don't
-  // issue a barrier between consecutive image reads, but that may not be
-  // worth the trouble.
-  _glgsg->_textures_needing_image_access_barrier.insert(this);
-  _glgsg->_textures_needing_update_barrier.insert(this);
-  _glgsg->_textures_needing_framebuffer_barrier.insert(this);
+  // insert these barriers.
+  _shader_image_read_barrier_counter = _glgsg->_shader_image_access_barrier_counter;
+  _texture_read_barrier_counter = _glgsg->_texture_update_barrier_counter;
+  _framebuffer_read_barrier_counter = _glgsg->_framebuffer_barrier_counter;
 }
 
 #endif  // !OPENGLES_1
+
+/**
+ * Returns a PBO with the given size to the pool of unused PBOs.
+ */
+void CLP(TextureContext)::
+return_pbo(GLuint pbo, size_t size) {
+  // Also triggers when the number of buffers is -1 (which effectively means
+  // to always delete the buffers after use).
+  if (_num_pbos > get_texture()->get_num_async_transfer_buffers() ||
+      size < _pbo_size) {
+    // We have too many PBOs, or this PBO is no longer of the proper
+    // size, so delete it rather than returning it to the pool.
+    _num_pbos--;
+    _glgsg->_glDeleteBuffers(1, &pbo);
+  } else {
+    _unused_pbos.push_front(pbo);
+  }
+}
+
+/**
+ * Deletes all unused PBOs.
+ */
+void CLP(TextureContext)::
+delete_unused_pbos() {
+  if (!_unused_pbos.empty()) {
+    for (GLuint pbo : _unused_pbos) {
+      _glgsg->_glDeleteBuffers(1, &pbo);
+    }
+    _num_pbos -= (int)_unused_pbos.size();
+    _unused_pbos.clear();
+  }
+}
+
+/**
+ * Waits for all uploads to be finished.
+ */
+void CLP(TextureContext)::
+do_wait_pending_uploads() const {
+  PStatTimer timer(_wait_async_texture_uploads_pcollector);
+  do {
+    _glgsg->process_pending_jobs(true);
+  }
+  while (is_upload_pending());
+}
+
+/**
+ *
+ */
+void CLP(TextureContext)::
+do_wait_for_unused_pbo(int limit) const {
+  PStatTimer timer(_wait_async_texture_uploads_pcollector);
+  do {
+    _glgsg->process_pending_jobs(true);
+  }
+  while (_unused_pbos.empty() && _num_pbos >= limit);
+}

+ 31 - 2
panda/src/glstuff/glTextureContext_src.h

@@ -41,12 +41,24 @@ public:
   INLINE GLuint get_view_buffer(int view) const;
 
 #ifdef OPENGLES_1
-  static constexpr bool needs_barrier(GLbitfield barrier) { return false; };
+  static constexpr bool needs_barrier(GLbitfield barrier, bool writing) { return false; };
 #else
-  bool needs_barrier(GLbitfield barrier);
+  bool needs_barrier(GLbitfield barrier, bool writing);
   void mark_incoherent(bool wrote);
 #endif
 
+  INLINE bool is_upload_pending() const;
+  INLINE void wait_pending_uploads() const;
+  INLINE void cancel_pending_uploads();
+
+  void return_pbo(GLuint pbo, size_t size);
+  void delete_unused_pbos();
+  INLINE void wait_for_unused_pbo(int limit) const;
+
+private:
+  void do_wait_pending_uploads() const;
+  void do_wait_for_unused_pbo(int limit) const;
+
 private:
   // This is the GL "name" of the texture object.
   GLuint _index;
@@ -76,8 +88,25 @@ public:
   GLenum _target;
   SamplerState _active_sampler;
 
+  // These counters are used to prevent out-of-order updates.
+  int _uploads_started = 0;
+  int _uploads_finished = 0;
+  int _uploads_pending = 0;
+  pdeque<GLuint> _unused_pbos;
+  int _num_pbos = 0;
+  size_t _pbo_size = 0;
+
   CLP(GraphicsStateGuardian) *_glgsg;
 
+  // These are set to the equivalent counter in glgsg when a write is performed.
+  int _texture_fetch_barrier_counter = -1;
+  int _shader_image_read_barrier_counter = -1;
+  int _shader_image_write_barrier_counter = -1;
+  int _texture_read_barrier_counter = -1;
+  int _texture_write_barrier_counter = -1;
+  int _framebuffer_read_barrier_counter = -1;
+  int _framebuffer_write_barrier_counter = -1;
+
 public:
   static TypeHandle get_class_type() {
     return _type_handle;

+ 18 - 0
panda/src/glstuff/glmisc_src.cxx

@@ -22,6 +22,11 @@ ConfigVariableBool gl_forward_compatible
    PRC_DESC("Setting this to true will request a forward-compatible OpenGL "
             "context, which will not support the fixed-function pipeline."));
 
+ConfigVariableBool gl_support_dsa
+  ("gl-support-dsa", true,
+   PRC_DESC("Configure this false if you suspect your GL's implementation of "
+            "Direct State Access is broken."));
+
 ConfigVariableBool gl_support_fbo
   ("gl-support-fbo", true,
    PRC_DESC("Configure this false if your GL's implementation of "
@@ -335,6 +340,19 @@ ConfigVariableBool gl_depth_zero_to_one
             "range from 0 to 1, matching other graphics APIs.  This setting "
             "requires OpenGL 4.5, or NVIDIA GeForce 8+ hardware."));
 
+ConfigVariableInt gl_texture_transfer_num_threads
+ ("gl-texture-transfer-num-threads", 2,
+  PRC_DESC("The number of threads that will be started to upload and download "
+           "texture data asynchronously, either via the setup_async_transfer "
+           "interface on the the Texture class or via the async screenshot "
+           "interface."));
+
+ConfigVariableEnum<ThreadPriority> gl_texture_transfer_thread_priority
+ ("gl-texture-transfer-thread-priority", TP_normal,
+  PRC_DESC("The default thread priority to assign to the threads created for "
+           "asynchronous texture transfers.  The default is 'normal'; you may "
+           "also specify 'low', 'high', or 'urgent'."));
+
 extern ConfigVariableBool gl_parallel_arrays;
 
 void CLP(init_classes)() {

+ 4 - 0
panda/src/glstuff/glmisc_src.h

@@ -17,6 +17,7 @@
 #include "configVariableEnum.h"
 #include "geomEnums.h"
 #include "coordinateSystem.h"
+#include "threadPriority.h"
 
 // Define some macros to transparently map to the double or float versions of
 // the OpenGL function names.
@@ -34,6 +35,7 @@
 
 extern EXPCL_GL ConfigVariableInt gl_version;
 extern EXPCL_GL ConfigVariableBool gl_forward_compatible;
+extern ConfigVariableBool gl_support_dsa;
 extern EXPCL_GL ConfigVariableBool gl_support_fbo;
 extern ConfigVariableBool gl_support_spirv;
 extern ConfigVariableInt gl_force_glsl_version;
@@ -77,6 +79,8 @@ extern ConfigVariableBool gl_support_shadow_filter;
 extern ConfigVariableBool gl_support_vertex_array_bgra;
 extern ConfigVariableBool gl_force_image_bindings_writeonly;
 extern ConfigVariableEnum<CoordinateSystem> gl_coordinate_system;
+extern ConfigVariableInt gl_texture_transfer_num_threads;
+extern ConfigVariableEnum<ThreadPriority> gl_texture_transfer_thread_priority;
 
 extern EXPCL_GL void CLP(init_classes)();
 

+ 2 - 1
panda/src/glxdisplay/glxGraphicsBuffer.cxx

@@ -171,7 +171,8 @@ open_buffer() {
     // with the old gsg.
     DCAST_INTO_R(glxgsg, _gsg, false);
 
-    if (!glxgsg->_context_has_pbuffer ||
+    if (glxgsg->get_engine() != _engine ||
+        !glxgsg->_context_has_pbuffer ||
         !glxgsg->get_fb_properties().subsumes(_fb_properties)) {
       // We need a new pixel format, and hence a new GSG.
       glxgsg = new glxGraphicsStateGuardian(_engine, _pipe, glxgsg);

+ 3 - 0
panda/src/glxdisplay/glxGraphicsPipe.cxx

@@ -130,6 +130,9 @@ make_output(const string &name,
         (flags & (BF_require_parasite | BF_require_window)) != 0) {
       return nullptr;
     }
+    if (host->get_engine() != engine) {
+      return nullptr;
+    }
     // Early failure - if we are sure that this buffer WONT meet specs, we can
     // bail out early.
     if ((flags & BF_fb_props_optional) == 0) {

+ 2 - 1
panda/src/glxdisplay/glxGraphicsWindow.cxx

@@ -189,7 +189,8 @@ open_window() {
     // If the old gsg has the wrong pixel format, create a new one that shares
     // with the old gsg.
     DCAST_INTO_R(glxgsg, _gsg, false);
-    if (!glxgsg->get_fb_properties().subsumes(_fb_properties)) {
+    if (glxgsg->get_engine() != _engine ||
+        !glxgsg->get_fb_properties().subsumes(_fb_properties)) {
       glxgsg = new glxGraphicsStateGuardian(_engine, _pipe, glxgsg);
       glxgsg->choose_pixel_format(_fb_properties, glx_pipe->get_display(), glx_pipe->get_screen(), false, false);
       _gsg = glxgsg;

+ 4 - 1
panda/src/gobj/bufferContext.cxx

@@ -12,6 +12,7 @@
  */
 
 #include "bufferContext.h"
+#include "lightMutexHolder.h"
 
 TypeHandle BufferContext::_type_handle;
 
@@ -43,7 +44,8 @@ BufferContext::
 void BufferContext::
 set_owning_chain(BufferContextChain *chain) {
   if (chain != _owning_chain) {
-    if (_owning_chain != nullptr){
+    if (_owning_chain != nullptr) {
+      LightMutexHolder holder(_owning_chain->_lock);
       --(_owning_chain->_count);
       _owning_chain->adjust_bytes(-(int)_data_size_bytes);
       remove_from_list();
@@ -52,6 +54,7 @@ set_owning_chain(BufferContextChain *chain) {
     _owning_chain = chain;
 
     if (_owning_chain != nullptr) {
+      LightMutexHolder holder(_owning_chain->_lock);
       ++(_owning_chain->_count);
       _owning_chain->adjust_bytes((int)_data_size_bytes);
       insert_before(_owning_chain);

+ 1 - 1
panda/src/gobj/bufferContext.h

@@ -73,7 +73,7 @@ protected:
   TypedWritableReferenceCount *_object;
 
 private:
-  BufferResidencyTracker *_residency;
+  BufferResidencyTracker *const _residency;
   int _residency_state;
 
   size_t _data_size_bytes;

+ 7 - 0
panda/src/gobj/bufferContextChain.cxx

@@ -14,11 +14,15 @@
 #include "bufferContextChain.h"
 #include "bufferContext.h"
 #include "indent.h"
+#include "lightMutexHolder.h"
 
 /**
  * Returns the first BufferContext object stored in the tracker.  You can walk
  * through the entire list of objects stored on the tracker by calling
  * get_next() on each returned object, until the return value is NULL.
+ *
+ * This does not grab the lock; make sure you are holding the lock while
+ * iterating over the chain.
  */
 BufferContext *BufferContextChain::
 get_first() {
@@ -32,9 +36,11 @@ get_first() {
 
 /**
  * Moves all of the BufferContexts from the other tracker onto this one.
+ * The other chain must be locked.
  */
 void BufferContextChain::
 take_from(BufferContextChain &other) {
+  LightMutexHolder holder(_lock);
   _total_size += other._total_size;
   _count += other._count;
   other._total_size = 0;
@@ -55,6 +61,7 @@ take_from(BufferContextChain &other) {
  */
 void BufferContextChain::
 write(std::ostream &out, int indent_level) const {
+  LightMutexHolder holder(_lock);
   indent(out, indent_level)
     << _count << " objects, consuming " << _total_size << " bytes:\n";
 

+ 4 - 0
panda/src/gobj/bufferContextChain.h

@@ -16,6 +16,7 @@
 
 #include "pandabase.h"
 #include "linkedListNode.h"
+#include "lightMutex.h"
 
 class BufferContext;
 
@@ -47,6 +48,9 @@ private:
   size_t _total_size;
   int _count;
 
+public:
+  LightMutex _lock;
+
   friend class BufferContext;
 };
 

+ 1 - 0
panda/src/gobj/bufferResidencyTracker.cxx

@@ -117,6 +117,7 @@ write(std::ostream &out, int indent_level) const {
  */
 void BufferResidencyTracker::
 move_inactive(BufferContextChain &inactive, BufferContextChain &active) {
+  LightMutexHolder active_holder(active._lock);
   BufferContext *node = active.get_first();
   while (node != nullptr) {
     nassertv((node->_residency_state & S_active) != 0);

+ 18 - 3
panda/src/gobj/preparedGraphicsObjects.cxx

@@ -1476,9 +1476,24 @@ begin_frame(GraphicsStateGuardianBase *gsg, Thread *current_thread) {
     Texture *tex = qti->first;
     TextureContext *tc = tex->prepare_now(this, gsg);
     if (tc != nullptr) {
-      gsg->update_texture(tc, true);
-      if (qti->second != nullptr) {
-        qti->second->set_result(tc);
+      if (tex->get_num_async_transfer_buffers() == 0) {
+        gsg->update_texture(tc, true);
+        if (qti->second != nullptr) {
+          qti->second->set_result(tc);
+        }
+      } else {
+        // Async update
+        CompletionToken token;
+        if (qti->second != nullptr) {
+          token = [tc, fut = std::move(qti->second)] (bool success) {
+            if (success) {
+              fut->set_result(tc);
+            } else {
+              fut->notify_removed();
+            }
+          };
+        }
+        gsg->update_texture(tc, false, std::move(token));
       }
     }
   }

+ 2 - 2
panda/src/gobj/shaderInputBinding.cxx

@@ -63,10 +63,10 @@ fetch_data(const State &state, void *into, bool packed) const {
 
 /**
  * Returns an opaque resource identifier that can later be used to fetch the
- * nth resource, which is of the given type.
+ * nth resource, numbered using a depth-first traversal of the parameter type.
  */
 ShaderInputBinding::ResourceId ShaderInputBinding::
-get_resource_id(int index, const ShaderType *type) const {
+get_resource_id(int index) const {
   return 0;
 }
 

+ 1 - 1
panda/src/gobj/shaderInputBinding.h

@@ -57,7 +57,7 @@ public:
                           bool packed = false) const;
 
   typedef uintptr_t ResourceId;
-  virtual ResourceId get_resource_id(int index, const ShaderType *type) const;
+  virtual ResourceId get_resource_id(int index) const;
   virtual PT(Texture) fetch_texture(const State &state,
                                     ResourceId resource_id,
                                     SamplerState &sampler, int &view) const;

+ 9 - 1
panda/src/gobj/texture.I

@@ -278,7 +278,7 @@ set_clear_color(const LColor &color) {
 INLINE void Texture::
 clear_clear_color() {
   CDWriter cdata(_cycler, true);
-  cdata->_has_clear_color = true;
+  cdata->_has_clear_color = false;
 }
 
 /**
@@ -2139,6 +2139,14 @@ rescale_texture() {
   return do_rescale_texture(cdata);
 }
 
+/**
+ * Returns the number previously passed to setup_async_transfer().
+ */
+INLINE int Texture::
+get_num_async_transfer_buffers() const {
+  return _num_async_transfer_buffers.load(std::memory_order_relaxed);
+}
+
 /**
  * Works like adjust_size, but also considers the texture class.  Movie
  * textures, for instance, always pad outwards, regardless of textures-

+ 38 - 2
panda/src/gobj/texture.cxx

@@ -1570,6 +1570,27 @@ get_view_modified_pages(UpdateSeq since, int view, int n) const {
   return result;
 }
 
+/**
+ * Sets the number of buffers for asynchronous upload of texture data.  If this
+ * number is higher than 0, future texture uploads will occur in the background,
+ * up to the provided amount at a time.  The asynchronous upload will be
+ * triggered by calls to prepare() or when the texture comes into view and
+ * allow-incomplete-render is true.
+ *
+ * Each buffer is only large enough to contain a single view, so you may wish
+ * to create twice as many buffers if you want to update twice as many views.
+ *
+ * You can also pass the special value -1, which means to create as many
+ * buffers as is necessary for all asynchronous uploads to take place, and they
+ * will be deleted afterwards automatically.
+ *
+ * This setting will take effect immediately.
+ */
+void Texture::
+setup_async_transfer(int num_buffers) {
+  _num_async_transfer_buffers.store(num_buffers);
+}
+
 /**
  * Indicates that the texture should be enqueued to be prepared in the
  * indicated prepared_objects at the beginning of the next frame.  This will
@@ -5704,7 +5725,14 @@ do_modify_ram_image(CData *cdata) {
   } else {
     do_clear_ram_mipmap_images(cdata);
   }
-  return cdata->_ram_images[0]._image;
+  PTA_uchar data = cdata->_ram_images[0]._image;
+  if (data.get_node_ref_count() > 0) {
+    // Copy on write, if an upload thread is reading this now.
+    PTA_uchar new_data = PTA_uchar::empty_array(0);
+    new_data.v() = data.v();
+    data.swap(new_data);
+  }
+  return data;
 }
 
 /**
@@ -5779,7 +5807,15 @@ do_modify_ram_mipmap_image(CData *cdata, int n) {
       cdata->_ram_images[n]._image.empty()) {
     do_make_ram_mipmap_image(cdata, n);
   }
-  return cdata->_ram_images[n]._image;
+
+  PTA_uchar data = cdata->_ram_images[n]._image;
+  if (data.get_node_ref_count() > 0) {
+    // Copy on write, if an upload thread is reading this now.
+    PTA_uchar new_data = PTA_uchar::empty_array(0);
+    new_data.v() = data.v();
+    data.swap(new_data);
+  }
+  return data;
 }
 
 /**

+ 6 - 0
panda/src/gobj/texture.h

@@ -47,6 +47,7 @@
 #include "pfmFile.h"
 #include "asyncTask.h"
 #include "extension.h"
+#include "patomic.h"
 
 class TextureContext;
 class FactoryParams;
@@ -536,6 +537,8 @@ PUBLISHED:
   MAKE_PROPERTY(auto_texture_scale, get_auto_texture_scale,
                                     set_auto_texture_scale);
 
+  void setup_async_transfer(int num_buffers);
+
   PT(AsyncFuture) prepare(PreparedGraphicsObjects *prepared_objects);
   bool is_prepared(PreparedGraphicsObjects *prepared_objects) const;
   bool was_image_modified(PreparedGraphicsObjects *prepared_objects) const;
@@ -628,6 +631,7 @@ PUBLISHED:
 
 public:
   void texture_uploaded();
+  INLINE int get_num_async_transfer_buffers() const;
 
   virtual bool has_cull_callback() const;
   virtual bool cull_callback(CullTraverser *trav, const CullTraverserData &data) const;
@@ -1072,6 +1076,8 @@ protected:
   typedef pmap<PreparedGraphicsObjects *, TextureContext *> Contexts;
   Contexts _contexts;
 
+  patomic_signed_lock_free _num_async_transfer_buffers { 0 };
+
   // It is common, when using normal maps, specular maps, gloss maps, and
   // such, to use a file naming convention where the filenames of the special
   // maps are derived by concatenating a suffix to the name of the diffuse

+ 2 - 0
panda/src/gsgbase/graphicsStateGuardianBase.h

@@ -22,6 +22,7 @@
 #include "lightMutex.h"
 #include "patomic.h"
 #include "small_vector.h"
+#include "completionToken.h"
 
 // A handful of forward references.
 
@@ -150,6 +151,7 @@ public:
 
   virtual TextureContext *prepare_texture(Texture *tex)=0;
   virtual bool update_texture(TextureContext *tc, bool force)=0;
+  virtual bool update_texture(TextureContext *tc, bool force, CompletionToken token)=0;
   virtual void release_texture(TextureContext *tc)=0;
   virtual void release_textures(const pvector<TextureContext *> &contexts)=0;
   virtual bool extract_texture_data(Texture *tex)=0;

+ 133 - 127
panda/src/linmath/compose_matrix_src.cxx

@@ -323,79 +323,82 @@ unwind_yup_rotation(FLOATNAME(LMatrix3) &mat, FLOATNAME(LVecBase3) &hpr) {
   mat.get_row(z,2);
 
   // Project Z into the XZ plane.
+  FLOATTYPE heading = 0;
   FLOATNAME(LVector2) xz(z[0], z[2]);
-  xz = normalize(xz);
-
-  // Compute the rotation about the +Y (up) axis.  This is yaw, or "heading".
-  FLOATTYPE heading = catan2(xz[0], xz[1]);
-
-  // Unwind the heading, and continue.
-  FLOATNAME(LMatrix3) rot_y;
-  rot_y._m(0, 0) = xz[1];
-  rot_y._m(0, 1) = 0;
-  rot_y._m(0, 2) = xz[0];
-
-  rot_y._m(1, 0) = 0;
-  rot_y._m(1, 1) = 1;
-  rot_y._m(1, 2) = 0;
-
-  rot_y._m(2, 0) = -xz[0];
-  rot_y._m(2, 1) = 0;
-  rot_y._m(2, 2) = xz[1];
-
-  x = x * rot_y;
-  y = y * rot_y;
-  z = z * rot_y;
+  if (xz.normalize()) {
+    // Compute the rotation about the +Y (up) axis.  This is yaw, or "heading".
+    heading = catan2(xz[0], xz[1]);
+
+    // Unwind the heading, and continue.
+    FLOATNAME(LMatrix3) rot_y;
+    rot_y._m(0, 0) = xz[1];
+    rot_y._m(0, 1) = 0;
+    rot_y._m(0, 2) = xz[0];
+
+    rot_y._m(1, 0) = 0;
+    rot_y._m(1, 1) = 1;
+    rot_y._m(1, 2) = 0;
+
+    rot_y._m(2, 0) = -xz[0];
+    rot_y._m(2, 1) = 0;
+    rot_y._m(2, 2) = xz[1];
+
+    x = x * rot_y;
+    y = y * rot_y;
+    z = z * rot_y;
+  }
 
   // Project the rotated Z into the YZ plane.
+  FLOATTYPE pitch = 0;
   FLOATNAME(LVector2) yz(z[1], z[2]);
-  yz = normalize(yz);
-
-  // Compute the rotation about the +X (right) axis.  This is pitch.
-  FLOATTYPE pitch = -catan2(yz[0], yz[1]);
-
-  // Unwind the pitch.
-  FLOATNAME(LMatrix3) rot_x;
-  rot_x._m(0, 0) = 1;
-  rot_x._m(0, 1) = 0;
-  rot_x._m(0, 2) = 0;
-
-  rot_x._m(1, 0) = 0;
-  rot_x._m(1, 1) = yz[1];
-  rot_x._m(1, 2) = yz[0];
-
-  rot_x._m(2, 0) = 0;
-  rot_x._m(2, 1) = -yz[0];
-  rot_x._m(2, 2) = yz[1];
-
-  x = x * rot_x;
-  y = y * rot_x;
-  z = z * rot_x;
+  if (yz.normalize()) {
+    // Compute the rotation about the +X (right) axis.  This is pitch.
+    pitch = -catan2(yz[0], yz[1]);
+
+    // Unwind the pitch.
+    FLOATNAME(LMatrix3) rot_x;
+    rot_x._m(0, 0) = 1;
+    rot_x._m(0, 1) = 0;
+    rot_x._m(0, 2) = 0;
+
+    rot_x._m(1, 0) = 0;
+    rot_x._m(1, 1) = yz[1];
+    rot_x._m(1, 2) = yz[0];
+
+    rot_x._m(2, 0) = 0;
+    rot_x._m(2, 1) = -yz[0];
+    rot_x._m(2, 2) = yz[1];
+
+    x = x * rot_x;
+    y = y * rot_x;
+    z = z * rot_x;
+  }
 
   // Project the rotated X onto the XY plane.
+  FLOATTYPE roll = 0;
   FLOATNAME(LVector2) xy(x[0], x[1]);
-  xy = normalize(xy);
-
-  // Compute the rotation about the +Z (back) axis.  This is roll.
-  FLOATTYPE roll = -catan2(xy[1], xy[0]);
-
-  // Unwind the roll from the axes, and continue.
-  FLOATNAME(LMatrix3) rot_z;
-  rot_z._m(0, 0) = xy[0];
-  rot_z._m(0, 1) = -xy[1];
-  rot_z._m(0, 2) = 0;
-
-  rot_z._m(1, 0) = xy[1];
-  rot_z._m(1, 1) = xy[0];
-  rot_z._m(1, 2) = 0;
-
-  rot_z._m(2, 0) = 0;
-  rot_z._m(2, 1) = 0;
-  rot_z._m(2, 2) = 1;
-
-  x = x * rot_z;
-  y = y * rot_z;
-  z = z * rot_z;
+  if (xy.normalize()) {
+    // Compute the rotation about the +Z (back) axis.  This is roll.
+    roll = -catan2(xy[1], xy[0]);
+
+    // Unwind the roll from the axes, and continue.
+    FLOATNAME(LMatrix3) rot_z;
+    rot_z._m(0, 0) = xy[0];
+    rot_z._m(0, 1) = -xy[1];
+    rot_z._m(0, 2) = 0;
+
+    rot_z._m(1, 0) = xy[1];
+    rot_z._m(1, 1) = xy[0];
+    rot_z._m(1, 2) = 0;
+
+    rot_z._m(2, 0) = 0;
+    rot_z._m(2, 1) = 0;
+    rot_z._m(2, 2) = 1;
+
+    x = x * rot_z;
+    y = y * rot_z;
+    z = z * rot_z;
+  }
 
   // Reset the matrix to reflect the unwinding.
   mat.set_row(0, x);
@@ -425,79 +428,82 @@ unwind_zup_rotation(FLOATNAME(LMatrix3) &mat, FLOATNAME(LVecBase3) &hpr) {
   mat.get_row(z,2);
 
   // Project Y into the XY plane.
+  FLOATTYPE heading = 0;
   FLOATNAME(LVector2) xy(y[0], y[1]);
-  xy = normalize(xy);
-
-  // Compute the rotation about the +Z (up) axis.  This is yaw, or "heading".
-  FLOATTYPE heading = -catan2(xy[0], xy[1]);
-
-  // Unwind the heading, and continue.
-  FLOATNAME(LMatrix3) rot_z;
-  rot_z._m(0, 0) = xy[1];
-  rot_z._m(0, 1) = xy[0];
-  rot_z._m(0, 2) = 0;
-
-  rot_z._m(1, 0) = -xy[0];
-  rot_z._m(1, 1) = xy[1];
-  rot_z._m(1, 2) = 0;
-
-  rot_z._m(2, 0) = 0;
-  rot_z._m(2, 1) = 0;
-  rot_z._m(2, 2) = 1;
-
-  x = x * rot_z;
-  y = y * rot_z;
-  z = z * rot_z;
+  if (xy.normalize()) {
+    // Compute the rotation about the +Z (up) axis.  This is yaw, or "heading".
+    heading = -catan2(xy[0], xy[1]);
+
+    // Unwind the heading, and continue.
+    FLOATNAME(LMatrix3) rot_z;
+    rot_z._m(0, 0) = xy[1];
+    rot_z._m(0, 1) = xy[0];
+    rot_z._m(0, 2) = 0;
+
+    rot_z._m(1, 0) = -xy[0];
+    rot_z._m(1, 1) = xy[1];
+    rot_z._m(1, 2) = 0;
+
+    rot_z._m(2, 0) = 0;
+    rot_z._m(2, 1) = 0;
+    rot_z._m(2, 2) = 1;
+
+    x = x * rot_z;
+    y = y * rot_z;
+    z = z * rot_z;
+  }
 
   // Project the rotated Y into the YZ plane.
+  FLOATTYPE pitch = 0;
   FLOATNAME(LVector2) yz(y[1], y[2]);
-  yz = normalize(yz);
-
-  // Compute the rotation about the +X (right) axis.  This is pitch.
-  FLOATTYPE pitch = catan2(yz[1], yz[0]);
-
-  // Unwind the pitch.
-  FLOATNAME(LMatrix3) rot_x;
-  rot_x._m(0, 0) = 1;
-  rot_x._m(0, 1) = 0;
-  rot_x._m(0, 2) = 0;
-
-  rot_x._m(1, 0) = 0;
-  rot_x._m(1, 1) = yz[0];
-  rot_x._m(1, 2) = -yz[1];
-
-  rot_x._m(2, 0) = 0;
-  rot_x._m(2, 1) = yz[1];
-  rot_x._m(2, 2) = yz[0];
-
-  x = x * rot_x;
-  y = y * rot_x;
-  z = z * rot_x;
+  if (yz.normalize()) {
+    // Compute the rotation about the +X (right) axis.  This is pitch.
+    pitch = catan2(yz[1], yz[0]);
+
+    // Unwind the pitch.
+    FLOATNAME(LMatrix3) rot_x;
+    rot_x._m(0, 0) = 1;
+    rot_x._m(0, 1) = 0;
+    rot_x._m(0, 2) = 0;
+
+    rot_x._m(1, 0) = 0;
+    rot_x._m(1, 1) = yz[0];
+    rot_x._m(1, 2) = -yz[1];
+
+    rot_x._m(2, 0) = 0;
+    rot_x._m(2, 1) = yz[1];
+    rot_x._m(2, 2) = yz[0];
+
+    x = x * rot_x;
+    y = y * rot_x;
+    z = z * rot_x;
+  }
 
   // Project X into the XZ plane.
+  FLOATTYPE roll = 0;
   FLOATNAME(LVector2) xz(x[0], x[2]);
-  xz = normalize(xz);
-
+  if (xz.normalize()) {
   // Compute the rotation about the -Y (back) axis.  This is roll.
-  FLOATTYPE roll = -catan2(xz[1], xz[0]);
+    roll = -catan2(xz[1], xz[0]);
 
-  // Unwind the roll from the axes, and continue.
-  FLOATNAME(LMatrix3) rot_y;
-  rot_y._m(0, 0) = xz[0];
-  rot_y._m(0, 1) = 0;
-  rot_y._m(0, 2) = -xz[1];
+    // Unwind the roll from the axes, and continue.
+    FLOATNAME(LMatrix3) rot_y;
+    rot_y._m(0, 0) = xz[0];
+    rot_y._m(0, 1) = 0;
+    rot_y._m(0, 2) = -xz[1];
 
-  rot_y._m(1, 0) = 0;
-  rot_y._m(1, 1) = 1;
-  rot_y._m(1, 2) = 0;
+    rot_y._m(1, 0) = 0;
+    rot_y._m(1, 1) = 1;
+    rot_y._m(1, 2) = 0;
 
-  rot_y._m(2, 0) = xz[1];
-  rot_y._m(2, 1) = 0;
-  rot_y._m(2, 2) = xz[0];
+    rot_y._m(2, 0) = xz[1];
+    rot_y._m(2, 1) = 0;
+    rot_y._m(2, 2) = xz[0];
 
-  x = x * rot_y;
-  y = y * rot_y;
-  z = z * rot_y;
+    x = x * rot_y;
+    y = y * rot_y;
+    z = z * rot_y;
+  }
 
   // Reset the matrix to reflect the unwinding.
   mat.set_row(0, x);

+ 11 - 7
panda/src/pgraph/loader.cxx

@@ -392,13 +392,17 @@ try_load_file(const Filename &pathname, const LoaderOptions &options,
     sgr.premunge(result, RenderState::make_empty());
   }
 
-  if (allow_ram_cache && result->is_of_type(ModelRoot::get_class_type())) {
-    // Store the loaded model in the RAM cache, and make sure we return a
-    // copy so that this node can be modified independently from the RAM
-    // cached version.
-    ModelPool::add_model(pathname, DCAST(ModelRoot, result.p()));
-    if ((options.get_flags() & LoaderOptions::LF_allow_instance) == 0) {
-      result = NodePath(result).copy_to(NodePath()).node();
+  if (result->is_of_type(ModelRoot::get_class_type())) {
+    ((ModelRoot *)result.p())->set_fullpath(pathname);
+
+    if (allow_ram_cache) {
+      // Store the loaded model in the RAM cache, and make sure we return a
+      // copy so that this node can be modified independently from the RAM
+      // cached version.
+      ModelPool::add_model(pathname, DCAST(ModelRoot, result.p()));
+      if ((options.get_flags() & LoaderOptions::LF_allow_instance) == 0) {
+        result = NodePath(result).copy_to(NodePath()).node();
+      }
     }
   }
 

+ 2 - 2
panda/src/pgraph/modelPool.I

@@ -42,7 +42,7 @@ verify_model(const Filename &filename) {
  * date (and hasn't been modified in the meantime), and if not, will still
  * return NULL.
  */
-INLINE ModelRoot *ModelPool::
+INLINE PT(ModelRoot) ModelPool::
 get_model(const Filename &filename, bool verify) {
   return get_ptr()->ns_get_model(filename, verify);
 }
@@ -54,7 +54,7 @@ get_model(const Filename &filename, bool verify) {
  * is true and the file has recently changed).  If the model file cannot be
  * found, or cannot be loaded for some reason, returns NULL.
  */
-INLINE ModelRoot *ModelPool::
+INLINE PT(ModelRoot) ModelPool::
 load_model(const Filename &filename, const LoaderOptions &options) {
   return get_ptr()->ns_load_model(filename, options);
 }

+ 6 - 29
panda/src/pgraph/modelPool.cxx

@@ -48,7 +48,7 @@ ns_has_model(const Filename &filename) {
 /**
  * The nonstatic implementation of get_model().
  */
-ModelRoot *ModelPool::
+PT(ModelRoot) ModelPool::
 ns_get_model(const Filename &filename, bool verify) {
 
   PT(ModelRoot) cached_model;
@@ -116,54 +116,31 @@ ns_get_model(const Filename &filename, bool verify) {
 /**
  * The nonstatic implementation of load_model().
  */
-ModelRoot *ModelPool::
+PT(ModelRoot) ModelPool::
 ns_load_model(const Filename &filename, const LoaderOptions &options) {
-
-  // First check if it has already been loaded and is still current.
+  // First check if it's been cached under the given filename (for backward
+  // compatibility reasons)
   PT(ModelRoot) cached_model = ns_get_model(filename, true);
   if (cached_model != nullptr) {
     return cached_model;
   }
 
-  // Look on disk for the current file.
   LoaderOptions new_options(options);
-  new_options.set_flags((new_options.get_flags() | LoaderOptions::LF_no_ram_cache) &
-                        ~LoaderOptions::LF_search);
+  new_options.set_flags(new_options.get_flags() & ~LoaderOptions::LF_no_ram_cache);
 
   Loader *model_loader = Loader::get_global_ptr();
   PT(PandaNode) panda_node = model_loader->load_sync(filename, new_options);
   PT(ModelRoot) node;
 
-  if (panda_node.is_null()) {
-    // This model was not found.
-
-  } else {
+  if (!panda_node.is_null()) {
     if (panda_node->is_of_type(ModelRoot::get_class_type())) {
       node = DCAST(ModelRoot, panda_node);
-
     } else {
       // We have to construct a ModelRoot node to put it under.
       node = new ModelRoot(filename);
       node->add_child(panda_node);
     }
-    node->set_fullpath(filename);
-  }
-
-  {
-    LightMutexHolder holder(_lock);
-
-    // Look again, in case someone has just loaded the model in another
-    // thread.
-    Models::const_iterator ti;
-    ti = _models.find(filename);
-    if (ti != _models.end() && (*ti).second != cached_model) {
-      // This model was previously loaded.
-      return (*ti).second;
-    }
-
-    _models[filename] = node;
   }
-
   return node;
 }
 

+ 6 - 6
panda/src/pgraph/modelPool.h

@@ -43,9 +43,9 @@ class EXPCL_PANDA_PGRAPH ModelPool {
 PUBLISHED:
   INLINE static bool has_model(const Filename &filename);
   INLINE static bool verify_model(const Filename &filename);
-  INLINE static ModelRoot *get_model(const Filename &filename, bool verify);
-  BLOCKING INLINE static ModelRoot *load_model(const Filename &filename,
-                                               const LoaderOptions &options = LoaderOptions());
+  INLINE static PT(ModelRoot) get_model(const Filename &filename, bool verify);
+  BLOCKING INLINE static PT(ModelRoot) load_model(const Filename &filename,
+                                                  const LoaderOptions &options = LoaderOptions());
 
   INLINE static void add_model(const Filename &filename, ModelRoot *model);
   INLINE static void release_model(const Filename &filename);
@@ -65,9 +65,9 @@ private:
   INLINE ModelPool();
 
   bool ns_has_model(const Filename &filename);
-  ModelRoot *ns_get_model(const Filename &filename, bool verify);
-  ModelRoot *ns_load_model(const Filename &filename,
-                           const LoaderOptions &options);
+  PT(ModelRoot) ns_get_model(const Filename &filename, bool verify);
+  PT(ModelRoot) ns_load_model(const Filename &filename,
+                              const LoaderOptions &options);
   void ns_add_model(const Filename &filename, ModelRoot *model);
   void ns_release_model(const Filename &filename);
 

+ 2 - 0
panda/src/pgraph/nodePath.h

@@ -884,6 +884,8 @@ PUBLISHED:
   INLINE void set_collide_mask(CollideMask new_mask, CollideMask bits_to_change = CollideMask::all_on(),
                                TypeHandle node_type = TypeHandle::none());
 
+  EXTENSION(void set_collide_owner(PyObject *owner));
+
   // Comparison methods
   INLINE bool operator == (const NodePath &other) const;
   INLINE bool operator != (const NodePath &other) const;

+ 59 - 0
panda/src/pgraph/nodePath_ext.cxx

@@ -15,6 +15,7 @@
 #include "typedWritable_ext.h"
 #include "shaderInput_ext.h"
 #include "shaderAttrib.h"
+#include "collisionNode.h"
 
 #ifdef HAVE_PYTHON
 
@@ -327,4 +328,62 @@ get_tight_bounds(const NodePath &other) const {
   }
 }
 
+/**
+ * Recursively assigns a weak reference to the given owner object to all
+ * collision nodes at this level and below.
+ *
+ * You may pass in None to clear all owners below this level.
+ *
+ * Note that there is no corresponding get_collide_owner(), since there may be
+ * multiple nodes below this level with different owners.
+ */
+void Extension<NodePath>::
+set_collide_owner(PyObject *owner) {
+  if (owner != Py_None) {
+    PyObject *ref = PyWeakref_NewRef(owner, nullptr);
+    if (ref != nullptr) {
+      r_set_collide_owner(_this->node(), ref);
+      Py_DECREF(ref);
+    }
+  } else {
+    r_clear_collide_owner(_this->node());
+  }
+}
+
+/**
+ * Recursive implementation of set_collide_owner.  weakref must be a weak ref
+ * object.
+ */
+void Extension<NodePath>::
+r_set_collide_owner(PandaNode *node, PyObject *weakref) {
+  if (node->is_collision_node()) {
+    CollisionNode *cnode = (CollisionNode *)node;
+    cnode->set_owner(Py_NewRef(weakref),
+                     [](void *obj) { Py_DECREF((PyObject *)obj); });
+  }
+
+  PandaNode::Children cr = node->get_children();
+  int num_children = cr.get_num_children();
+  for (int i = 0; i < num_children; i++) {
+    r_set_collide_owner(cr.get_child(i), weakref);
+  }
+}
+
+/**
+ * Recursive implementation of set_collide_owner(None).
+ */
+void Extension<NodePath>::
+r_clear_collide_owner(PandaNode *node) {
+  if (node->is_collision_node()) {
+    CollisionNode *cnode = (CollisionNode *)node;
+    cnode->clear_owner();
+  }
+
+  PandaNode::Children cr = node->get_children();
+  int num_children = cr.get_num_children();
+  for (int i = 0; i < num_children; i++) {
+    r_clear_collide_owner(cr.get_child(i));
+  }
+}
+
 #endif  // HAVE_PYTHON

Some files were not shown because too many files changed in this diff