Browse Source

Merge branch 'master' into webgl-port

rdb 5 years ago
parent
commit
daf3773607
96 changed files with 2569 additions and 376 deletions
  1. 78 26
      .github/workflows/ci.yml
  2. 2 3
      BACKERS.md
  3. 2 2
      README.md
  4. 3 3
      cmake/macros/PackageConfig.cmake
  5. 3 3
      direct/src/directtools/DirectSelection.py
  6. 9 9
      direct/src/directtools/DirectSession.py
  7. 8 3
      direct/src/dist/FreezeTool.py
  8. 11 3
      direct/src/dist/commands.py
  9. 5 1
      direct/src/dist/pefile.py
  10. 30 29
      direct/src/showbase/Loader.py
  11. 6 0
      direct/src/showbase/ShowBase.py
  12. 8 3
      direct/src/showbase/VFSImporter.py
  13. 2 0
      dtool/src/dtoolbase/dtool_platform.h
  14. 1 1
      makepanda/installer.nsi
  15. 2 2
      makepanda/makepackage.py
  16. 13 14
      makepanda/makepanda.py
  17. 57 12
      makepanda/makepandacore.py
  18. 2 1
      makepanda/makewheel.py
  19. 2 0
      panda/src/audiotraits/openalAudioSound.cxx
  20. 8 0
      panda/src/bullet/bulletSoftBodyNode.cxx
  21. 6 1
      panda/src/collide/CMakeLists.txt
  22. 5 1
      panda/src/collide/collisionPolygon.h
  23. 86 0
      panda/src/collide/collisionPolygon_ext.cxx
  24. 43 0
      panda/src/collide/collisionPolygon_ext.h
  25. 1 1
      panda/src/collide/collisionTraverser.h
  26. 1 0
      panda/src/collide/p3collide_ext_composite.cxx
  27. 2 18
      panda/src/cull/cullBinBackToFront.cxx
  28. 2 18
      panda/src/cull/cullBinFixed.cxx
  29. 2 18
      panda/src/cull/cullBinFrontToBack.cxx
  30. 2 18
      panda/src/cull/cullBinStateSorted.cxx
  31. 2 18
      panda/src/cull/cullBinUnsorted.cxx
  32. 6 1
      panda/src/display/graphicsStateGuardian.cxx
  33. 1 1
      panda/src/display/graphicsStateGuardian.h
  34. 3 2
      panda/src/display/standardMunger.cxx
  35. 6 3
      panda/src/dxgsg9/dxGraphicsStateGuardian9.cxx
  36. 1 1
      panda/src/dxgsg9/dxGraphicsStateGuardian9.h
  37. 3 3
      panda/src/event/pythonTask.cxx
  38. 2 0
      panda/src/express/config_express.cxx
  39. 1 1
      panda/src/glstuff/glCgShaderContext_src.cxx
  40. 97 49
      panda/src/glstuff/glGraphicsStateGuardian_src.cxx
  41. 4 1
      panda/src/glstuff/glGraphicsStateGuardian_src.h
  42. 31 0
      panda/src/glstuff/glShaderContext_src.cxx
  43. 7 4
      panda/src/gobj/geom.cxx
  44. 2 2
      panda/src/gobj/geom.h
  45. 21 0
      panda/src/gobj/geomVertexArrayFormat.cxx
  46. 2 0
      panda/src/gobj/geomVertexArrayFormat.h
  47. 23 0
      panda/src/gobj/geomVertexData.I
  48. 47 1
      panda/src/gobj/geomVertexData.cxx
  49. 5 1
      panda/src/gobj/geomVertexData.h
  50. 31 0
      panda/src/gobj/geomVertexFormat.cxx
  51. 3 1
      panda/src/gobj/geomVertexFormat.h
  52. 11 0
      panda/src/gobj/internalName.I
  53. 1 0
      panda/src/gobj/internalName.cxx
  54. 2 0
      panda/src/gobj/internalName.h
  55. 1 1
      panda/src/gobj/matrixLens.I
  56. 1 1
      panda/src/gsgbase/graphicsStateGuardianBase.h
  57. 4 0
      panda/src/pgraph/CMakeLists.txt
  58. 19 1
      panda/src/pgraph/billboardEffect.cxx
  59. 50 14
      panda/src/pgraph/compassEffect.cxx
  60. 6 0
      panda/src/pgraph/config_pgraph.cxx
  61. 18 2
      panda/src/pgraph/cullTraverser.cxx
  62. 3 1
      panda/src/pgraph/cullTraverserData.I
  63. 9 0
      panda/src/pgraph/cullTraverserData.cxx
  64. 2 0
      panda/src/pgraph/cullTraverserData.h
  65. 18 3
      panda/src/pgraph/cullableObject.I
  66. 34 0
      panda/src/pgraph/cullableObject.cxx
  67. 5 0
      panda/src/pgraph/cullableObject.h
  68. 1 2
      panda/src/pgraph/geomDrawCallbackData.cxx
  69. 11 0
      panda/src/pgraph/geomNode.cxx
  70. 280 0
      panda/src/pgraph/instanceList.I
  71. 213 0
      panda/src/pgraph/instanceList.cxx
  72. 159 0
      panda/src/pgraph/instanceList.h
  73. 39 0
      panda/src/pgraph/instancedNode.I
  74. 492 0
      panda/src/pgraph/instancedNode.cxx
  75. 136 0
      panda/src/pgraph/instancedNode.h
  76. 1 0
      panda/src/pgraph/lightAttrib.I
  77. 60 0
      panda/src/pgraph/lightAttrib.cxx
  78. 2 0
      panda/src/pgraph/lightAttrib.h
  79. 3 3
      panda/src/pgraph/loader.cxx
  80. 77 2
      panda/src/pgraph/nodePath.cxx
  81. 4 0
      panda/src/pgraph/nodePath.h
  82. 2 0
      panda/src/pgraph/p3pgraph_composite2.cxx
  83. 57 35
      panda/src/pgraph/pandaNode.cxx
  84. 6 0
      panda/src/pgraph/pandaNode.h
  85. 1 1
      panda/src/pgraph/shaderAttrib.I
  86. 2 0
      panda/src/pgraph/shaderAttrib.cxx
  87. 1 0
      panda/src/pgraph/shaderAttrib.h
  88. 72 19
      panda/src/pgraphnodes/lodNode.cxx
  89. 3 1
      panda/src/pgraphnodes/shaderGenerator.cxx
  90. 3 3
      panda/src/pipeline/pythonThread.cxx
  91. 5 0
      panda/src/pstatclient/pStatCollector.I
  92. 6 5
      panda/src/putil/pythonCallbackObject.cxx
  93. 2 2
      panda/src/tinydisplay/tinyGraphicsStateGuardian.cxx
  94. 1 1
      panda/src/tinydisplay/tinyGraphicsStateGuardian.h
  95. 1 0
      setup.cfg
  96. 45 0
      tests/collide/test_collision_polygon.py

+ 78 - 26
.github/workflows/ci.yml

@@ -92,12 +92,18 @@ jobs:
     - name: Install dependencies (macOS)
       if: runner.os == 'macOS'
       run: |
-        curl -O https://www.panda3d.org/download/panda3d-1.10.5/panda3d-1.10.5-tools-mac.tar.gz
-        tar -xf panda3d-1.10.5-tools-mac.tar.gz
+        curl -O https://www.panda3d.org/download/panda3d-1.10.8/panda3d-1.10.8-tools-mac.tar.gz
+        tar -xf panda3d-1.10.8-tools-mac.tar.gz
+        mv panda3d-1.10.8/thirdparty thirdparty
+        rmdir panda3d-1.10.8
 
-        brew install ccache
+        mkdir -p build/Frameworks
+        cp -R thirdparty/darwin-libs-a/nvidiacg/Cg.framework build/Frameworks/Cg.framework
+
+        mkdir -p "build/${{ matrix.config }}/Frameworks"
+        cp -R thirdparty/darwin-libs-a/nvidiacg/Cg.framework "build/${{ matrix.config }}/Frameworks/Cg.framework"
 
-        echo "##[set-env name=thirdpartyOption;]-D THIRDPARTY_DIRECTORY=../panda3d-1.10.5/thirdparty" -DHAVE_CG=OFF
+        brew install ccache
 
     - name: Install dependencies (Ubuntu)
       if: startsWith(matrix.os, 'ubuntu')
@@ -120,20 +126,19 @@ jobs:
       if: runner.os == 'Windows'
       uses: actions/cache@v1
       with:
-        path: thirdparty-tools
-        key: ci-cmake-${{ runner.OS }}-thirdparty-v1.10.5-r1
+        path: thirdparty
+        key: ci-cmake-${{ runner.OS }}-thirdparty-v1.10.7-r1
     - name: Install dependencies (Windows)
       if: runner.os == 'Windows'
       shell: powershell
       run: |
-        if (!(Test-Path thirdparty-tools/panda3d-1.10.5)) {
+        if (!(Test-Path thirdparty/win-libs-vc14-x64)) {
           $wc = New-Object System.Net.WebClient
-          $wc.DownloadFile("https://www.panda3d.org/download/panda3d-1.10.5/panda3d-1.10.5-tools-win64.zip", "thirdparty-tools.zip")
+          $wc.DownloadFile("https://www.panda3d.org/download/panda3d-1.10.7/panda3d-1.10.7-tools-win64.zip", "thirdparty-tools.zip")
           Expand-Archive -Path thirdparty-tools.zip
+          Move-Item -Path thirdparty-tools/panda3d-1.10.7/thirdparty -Destination .
         }
 
-        echo "##[set-env name=thirdpartyOption;]-D THIRDPARTY_DIRECTORY=../thirdparty-tools/panda3d-1.10.5/thirdparty"
-
     - name: ccache (non-Windows)
       if: runner.os != 'Windows'
       uses: actions/cache@v1
@@ -146,15 +151,15 @@ jobs:
       env:
         CMAKE_GENERATOR: "${{ matrix.generator }}"
       run: >
-        mkdir build
+        mkdir -p build
 
         cd build
 
         if ${{ matrix.compiler == 'Clang' }}; then
           if [[ "$CMAKE_GENERATOR" == *Studio*2019* ]]; then
-            export CMAKE_GENERATOR_TOOLSET=ClangCL thirdpartyOption="$thirdpartyOption -DHAVE_HARFBUZZ=NO"
+            export CMAKE_GENERATOR_TOOLSET=ClangCL
           elif [[ "$CMAKE_GENERATOR" == *Studio* ]]; then
-            export CMAKE_GENERATOR_TOOLSET=LLVM thirdpartyOption="$thirdpartyOption -DHAVE_HARFBUZZ=NO"
+            export CMAKE_GENERATOR_TOOLSET=LLVM
           else
             export CC=clang CXX=clang++
           fi
@@ -162,7 +167,8 @@ jobs:
 
         if ${{ runner.os != 'Windows' }}; then
           compilerLauncher=$(echo -DCMAKE_C{,XX}_COMPILER_LAUNCHER=ccache)
-          echo "##[set-env name=CCACHE_DIR;]$(dirname $PWD)/ccache"
+          export CCACHE_DIR="$(dirname $PWD)/ccache"
+          echo "CCACHE_DIR=$(dirname $PWD)/ccache" >> $GITHUB_ENV
         fi
 
         cmake
@@ -172,7 +178,6 @@ jobs:
         -D BUILD_METALIBS=${{ matrix.metalibs }}
         -D HAVE_PYTHON=${{ matrix.python }}
         -D HAVE_EIGEN=${{ matrix.eigen }}
-        ${thirdpartyOption:-}
         ..
 
     - name: Build (no Python)
@@ -193,7 +198,7 @@ jobs:
       shell: bash
       run: >
         cmake -DWANT_PYTHON_VERSION=3.6
-        -DPython_FIND_REGISTRY=NEVER -DPython_ROOT=$pythonLocation .
+        -DPython_FIND_REGISTRY=NEVER -DPython_ROOT="$pythonLocation" .
     - name: Build (Python 3.6)
       if: contains(matrix.python, 'YES')
       # BEGIN A
@@ -225,7 +230,7 @@ jobs:
       shell: bash
       run: >
         cmake -DWANT_PYTHON_VERSION=3.7
-        -DPython_FIND_REGISTRY=NEVER -DPython_ROOT=$pythonLocation .
+        -DPython_FIND_REGISTRY=NEVER -DPython_ROOT="$pythonLocation" .
     - name: Build (Python 3.7)
       if: contains(matrix.python, 'YES')
       # BEGIN A
@@ -257,7 +262,7 @@ jobs:
       shell: bash
       run: >
         cmake -DWANT_PYTHON_VERSION=3.8
-        -DPython_FIND_REGISTRY=NEVER -DPython_ROOT=$pythonLocation .
+        -DPython_FIND_REGISTRY=NEVER -DPython_ROOT="$pythonLocation" .
     - name: Build (Python 3.8)
       if: contains(matrix.python, 'YES')
       # BEGIN A
@@ -278,6 +283,38 @@ jobs:
         $PYTHON_EXECUTABLE -m pytest ../tests --cov=.
       # END B
 
+    - name: Setup Python (Python 3.9)
+      if: contains(matrix.python, 'YES')
+      uses: actions/setup-python@v1
+      with:
+        python-version: 3.9
+    - name: Configure (Python 3.9)
+      if: contains(matrix.python, 'YES')
+      working-directory: build
+      shell: bash
+      run: >
+        cmake -DWANT_PYTHON_VERSION=3.9
+        -DPython_FIND_REGISTRY=NEVER -DPython_ROOT="$pythonLocation" .
+    - name: Build (Python 3.9)
+      if: contains(matrix.python, 'YES')
+      # BEGIN A
+      working-directory: build
+      run: cmake --build . --config ${{ matrix.config }} --parallel 4
+      # END A
+    - name: Test (Python 3.9)
+      # BEGIN B
+      if: contains(matrix.python, 'YES')
+      working-directory: build
+      shell: bash
+      env:
+        PYTHONPATH: ${{ matrix.config }}
+      run: |
+        PYTHON_EXECUTABLE=$(grep 'Python_EXECUTABLE:' CMakeCache.txt | sed 's/.*=//')
+        $PYTHON_EXECUTABLE -m pip install pytest pytest-cov
+        export COVERAGE_FILE=.coverage.$RANDOM LLVM_PROFILE_FILE=$PWD/pid-%p.profraw
+        $PYTHON_EXECUTABLE -m pytest ../tests --cov=.
+      # END B
+
     - name: Upload coverage reports
       if: always() && matrix.config == 'Coverage'
       working-directory: build
@@ -313,24 +350,38 @@ jobs:
       shell: powershell
       run: |
         $wc = New-Object System.Net.WebClient
-        $wc.DownloadFile("https://www.panda3d.org/download/panda3d-1.10.5/panda3d-1.10.5-tools-win64.zip", "thirdparty-tools.zip")
+        $wc.DownloadFile("https://www.panda3d.org/download/panda3d-1.10.7/panda3d-1.10.7-tools-win64.zip", "thirdparty-tools.zip")
         Expand-Archive -Path thirdparty-tools.zip
-        Move-Item -Path thirdparty-tools/panda3d-1.10.5/thirdparty -Destination .
+        Move-Item -Path thirdparty-tools/panda3d-1.10.7/thirdparty -Destination .
     - name: Get thirdparty packages (macOS)
       if: runner.os == 'macOS'
       run: |
-        curl -O https://www.panda3d.org/download/panda3d-1.10.5/panda3d-1.10.5-tools-mac.tar.gz
-        tar -xf panda3d-1.10.5-tools-mac.tar.gz
-        mv panda3d-1.10.5/thirdparty thirdparty
-        rmdir panda3d-1.10.5
+        curl -O https://www.panda3d.org/download/panda3d-1.10.8/panda3d-1.10.8-tools-mac.tar.gz
+        tar -xf panda3d-1.10.8-tools-mac.tar.gz
+        mv panda3d-1.10.8/thirdparty thirdparty
+        rmdir panda3d-1.10.8
         (cd thirdparty/darwin-libs-a && rm -rf rocket)
+    - name: Set up Python 3.9
+      uses: actions/setup-python@v1
+      with:
+        python-version: 3.9
+    - name: Build Python 3.9
+      shell: bash
+      run: |
+        python makepanda/makepanda.py --git-commit=${{github.sha}} --outputdir=built --everything --no-eigen --python-incdir="$pythonLocation/include" --python-libdir="$pythonLocation/lib" --verbose --threads=4
+    - name: Test Python 3.9
+      shell: bash
+      run: |
+        python -m pip install pytest
+        PYTHONPATH=built LD_LIBRARY_PATH=built/lib DYLD_LIBRARY_PATH=built/lib python -m pytest
     - name: Set up Python 3.8
       uses: actions/setup-python@v1
       with:
         python-version: 3.8
     - name: Build Python 3.8
+      shell: bash
       run: |
-        python makepanda/makepanda.py --git-commit=${{github.sha}} --outputdir=built --everything --no-eigen --python-incdir=$pythonLocation/include --python-libdir=$pythonLocation/lib --verbose --threads=4
+        python makepanda/makepanda.py --git-commit=${{github.sha}} --outputdir=built --everything --no-eigen --python-incdir="$pythonLocation/include" --python-libdir="$pythonLocation/lib" --verbose --threads=4
     - name: Test Python 3.8
       shell: bash
       run: |
@@ -341,8 +392,9 @@ jobs:
       with:
         python-version: 3.7
     - name: Build Python 3.7
+      shell: bash
       run: |
-        python makepanda/makepanda.py --git-commit=${{github.sha}} --outputdir=built --everything --no-eigen --python-incdir=$pythonLocation/include --python-libdir=$pythonLocation/lib --verbose --threads=4
+        python makepanda/makepanda.py --git-commit=${{github.sha}} --outputdir=built --everything --no-eigen --python-incdir="$pythonLocation/include" --python-libdir="$pythonLocation/lib" --verbose --threads=4
     - name: Test Python 3.7
       shell: bash
       run: |

+ 2 - 3
BACKERS.md

@@ -15,7 +15,6 @@ This is a list of all the people who are contributing financially to Panda3D.  I
 * [Mitchell Stokes](https://opencollective.com/mitchell-stokes)
 * [Daniel Stokes](https://opencollective.com/daniel-stokes)
 * [David Rose](https://opencollective.com/david-rose)
-* [Carnetsoft](https://cs-driving-simulator.com/)
 
 ## Benefactors
 
@@ -23,14 +22,14 @@ This is a list of all the people who are contributing financially to Panda3D.  I
 
 * Sam Edwards
 * Max Voss
-* Will Nielsen
 
 ## Enthusiasts
 
-![Benefactors](https://opencollective.com/panda3d/tiers/enthusiast.svg?avatarHeight=48&width=600)
+![Enthusiasts](https://opencollective.com/panda3d/tiers/enthusiast.svg?avatarHeight=48&width=600)
 
 * Eric Thomson
 * Kyle Roach
+* Brian Lach
 
 ## Backers
 

+ 2 - 2
README.md

@@ -1,4 +1,4 @@
-[![Build Status](https://travis-ci.org/panda3d/panda3d.svg?branch=master)](https://travis-ci.org/panda3d/panda3d)
+[![Build Status](https://github.com/panda3d/panda3d/workflows/Continuous%20Integration/badge.svg?branch=master)](https://github.com/panda3d/panda3d/actions?query=branch%3Amaster+workflow%3A%22Continuous+Integration%22)
 [![OpenCollective](https://opencollective.com/panda3d/backers/badge.svg)](https://opencollective.com/panda3d)
 [![OpenCollective](https://opencollective.com/panda3d/sponsors/badge.svg)](https://opencollective.com/panda3d)
 
@@ -136,7 +136,7 @@ macOS
 -----
 
 On macOS, you will need to download a set of precompiled thirdparty packages in order to
-compile Panda3D, which can be acquired from [here](https://www.panda3d.org/download/panda3d-1.10.7/panda3d-1.10.7-tools-mac.tar.gz).
+compile Panda3D, which can be acquired from [here](https://www.panda3d.org/download/panda3d-1.10.8/panda3d-1.10.8-tools-mac.tar.gz).
 
 After placing the thirdparty directory inside the panda3d source directory,
 you may build Panda3D using a command like the following:

+ 3 - 3
cmake/macros/PackageConfig.cmake

@@ -145,6 +145,8 @@ function(package_option name)
     endif()
   endif()
 
+  option("HAVE_${name}" "${cache_string}" "${default}")
+
   # If it was set by the user but not found, display an error.
   string(TOUPPER "${found_as}" FOUND_AS)
   if(HAVE_${name} AND NOT ${found_as}_FOUND AND NOT ${FOUND_AS}_FOUND)
@@ -180,9 +182,7 @@ function(package_option name)
       "$<TARGET_PROPERTY:PKG::${name},INTERFACE_INCLUDE_DIRECTORIES>")
   endif()
 
-  # Create the option, and if it actually is enabled, populate the INTERFACE
-  # library created above
-  option("HAVE_${name}" "${cache_string}" "${default}")
+  # If the option actually is enabled, populate the INTERFACE library created above
   if(HAVE_${name})
     set(use_variables ON)
 

+ 3 - 3
direct/src/directtools/DirectSelection.py

@@ -529,7 +529,7 @@ class SelectionQueue(CollisionHandlerQueue):
             # Well, no way to tell.  Assume we're not backfacing.
             return 0
 
-        if direct:
+        if base.direct:
             cam = base.direct.cam
         else:
             cam = base.cam
@@ -602,7 +602,7 @@ class SelectionRay(SelectionQueue):
         if xy:
             mx = xy[0]
             my = xy[1]
-        elif direct:
+        elif base.direct:
             mx = base.direct.dr.mouseX
             my = base.direct.dr.mouseY
         else:
@@ -613,7 +613,7 @@ class SelectionRay(SelectionQueue):
             mx = base.mouseWatcherNode.getMouseX()
             my = base.mouseWatcherNode.getMouseY()
 
-        if direct:
+        if base.direct:
             self.collider.setFromLens(base.direct.camNode, mx, my)
         else:
             self.collider.setFromLens(base.camNode, mx, my)

+ 9 - 9
direct/src/directtools/DirectSession.py

@@ -68,24 +68,24 @@ class DirectSession(DirectObject):
         self.activeParent = None
 
         self.selectedNPReadout = OnscreenText.OnscreenText(
-            pos = (-1.0, -0.9), bg=Vec4(1, 1, 1, 1),
-            scale = 0.05, align = TextNode.ALeft,
+            pos = (0.1, 0.1), bg=Vec4(0, 0, 0, .2), style=3,
+            fg = (1, 1, 1, 1), scale = 0.05, align = TextNode.ALeft,
             mayChange = 1, font = self.font)
         # Make sure readout is never lit or drawn in wireframe
         useDirectRenderStyle(self.selectedNPReadout)
         self.selectedNPReadout.reparentTo(hidden)
 
         self.activeParentReadout = OnscreenText.OnscreenText(
-            pos = (-1.0, -0.975), bg=Vec4(1, 1, 1, 1),
-            scale = 0.05, align = TextNode.ALeft,
+            pos = (0.1, 0.16), bg=Vec4(0, 0, 0, .2), style = 3,
+            fg = (1, 1, 1, 1), scale = 0.05, align = TextNode.ALeft,
             mayChange = 1, font = self.font)
         # Make sure readout is never lit or drawn in wireframe
         useDirectRenderStyle(self.activeParentReadout)
         self.activeParentReadout.reparentTo(hidden)
 
         self.directMessageReadout = OnscreenText.OnscreenText(
-            pos = (-1.0, 0.9), bg=Vec4(1, 1, 1, 1),
-            scale = 0.05, align = TextNode.ALeft,
+            pos = (0.1, -0.1), bg=Vec4(0, 0, 0, .2), style = 3,
+            fg = (1, 1, 1, 1), scale = 0.05, align = TextNode.ALeft,
             mayChange = 1, font = self.font)
         # Make sure readout is never lit or drawn in wireframe
         useDirectRenderStyle(self.directMessageReadout)
@@ -698,7 +698,7 @@ class DirectSession(DirectObject):
                 self.ancestry = dnp.getAncestors()
                 self.ancestryIndex = 0
             # Update the selectedNPReadout
-            self.selectedNPReadout.reparentTo(aspect2d)
+            self.selectedNPReadout.reparentTo(base.a2dBottomLeft)
             self.selectedNPReadout.setText(
                 'Selected:' + dnp.getName())
             # Show the manipulation widget
@@ -782,7 +782,7 @@ class DirectSession(DirectObject):
         # Record new parent
         self.activeParent = nodePath
         # Update the activeParentReadout
-        self.activeParentReadout.reparentTo(aspect2d)
+        self.activeParentReadout.reparentTo(base.a2dBottomLeft)
         self.activeParentReadout.setText(
             'Active Reparent Target:' + nodePath.getName())
         # Alert everyone else
@@ -1031,7 +1031,7 @@ class DirectSession(DirectObject):
     def message(self, text):
         taskMgr.remove('hideDirectMessage')
         taskMgr.remove('hideDirectMessageLater')
-        self.directMessageReadout.reparentTo(aspect2d)
+        self.directMessageReadout.reparentTo(base.a2dTopLeft)
         self.directMessageReadout.setText(text)
         self.hideDirectMessageLater()
 

+ 8 - 3
direct/src/dist/FreezeTool.py

@@ -61,7 +61,7 @@ except ImportError:
     def pytest_imports():
         return []
 
-hiddenImports = {
+defaultHiddenImports = {
     'pytest': pytest_imports(),
     'pkg_resources': [
         'pkg_resources.*.*',
@@ -751,7 +751,7 @@ class Freezer:
             return 'ModuleDef(%s)' % (', '.join(args))
 
     def __init__(self, previous = None, debugLevel = 0,
-                 platform = None, path=None):
+                 platform = None, path=None, hiddenImports=None):
         # Normally, we are freezing for our own platform.  Change this
         # if untrue.
         self.platform = platform or PandaSystem.getPlatform()
@@ -825,6 +825,11 @@ class Freezer:
                 if path:
                     modulefinder.AddPackagePath(moduleName, path[0])
 
+        # Module with non-obvious dependencies
+        self.hiddenImports = defaultHiddenImports.copy()
+        if hiddenImports is not None:
+            self.hiddenImports.update(hiddenImports)
+
         # Suffix/extension for Python C extension modules
         if self.platform == PandaSystem.getPlatform():
             self.moduleSuffixes = imp.get_suffixes()
@@ -1166,7 +1171,7 @@ class Freezer:
 
         # Check if any new modules we found have "hidden" imports
         for origName in list(self.mf.modules.keys()):
-            hidden = hiddenImports.get(origName, [])
+            hidden = self.hiddenImports.get(origName, [])
             for modname in hidden:
                 if modname.endswith('.*'):
                     mdefs = self._gatherSubmodules(modname, implicit = True)

+ 11 - 3
direct/src/dist/commands.py

@@ -238,6 +238,7 @@ class build_apps(setuptools.Command):
             self.exclude_dependencies += ['bcrypt.dll']
 
         self.package_data_dirs = {}
+        self.hidden_imports = {}
 
         # We keep track of the zip files we've opened.
         self._zip_files = {}
@@ -271,6 +272,10 @@ class build_apps(setuptools.Command):
         self.platforms = _parse_list(self.platforms)
         self.plugins = _parse_list(self.plugins)
         self.extra_prc_files = _parse_list(self.extra_prc_files)
+        self.hidden_imports = {
+            key: _parse_list(value)
+            for key, value in _parse_dict(self.hidden_imports).items()
+        }
 
         if self.default_prc_dir is None:
             self.default_prc_dir = '<auto>etc' if not self.embed_prc_data else ''
@@ -639,7 +644,11 @@ class build_apps(setuptools.Command):
             return search_path
 
         def create_runtime(appname, mainscript, use_console):
-            freezer = FreezeTool.Freezer(platform=platform, path=path)
+            freezer = FreezeTool.Freezer(
+                platform=platform,
+                path=path,
+                hiddenImports=self.hidden_imports
+            )
             freezer.addModule('__main__', filename=mainscript)
             freezer.addModule('site', filename='site.py', text=SITE_PY)
             for incmod in self.include_modules.get(appname, []) + self.include_modules.get('*', []):
@@ -1311,7 +1320,6 @@ class bdist_apps(setuptools.Command):
             for i in apps
         ]
 
-        fullname = self.distribution.get_fullname()
         shortname = self.distribution.get_name()
 
         # Create the .nsi installer script
@@ -1321,7 +1329,7 @@ class bdist_apps(setuptools.Command):
 
         # Some global info
         nsi.write('Name "%s"\n' % shortname)
-        nsi.write('OutFile "%s"\n' % (fullname+'.exe'))
+        nsi.write('OutFile "%s"\n' % os.path.join(self.dist_dir, basename+'.exe'))
         if is_64bit:
             nsi.write('InstallDir "$PROGRAMFILES64\\%s"\n' % shortname)
         else:

+ 5 - 1
direct/src/dist/pefile.py

@@ -242,7 +242,11 @@ class VersionInfoResource(object):
         length, value_length = unpack('<HH', data[0:4])
         offset = 40 + value_length + (value_length & 1)
         dwords = array('I')
-        dwords.fromstring(bytes(data[40:offset]))
+        if sys.version_info >= (3, 2):
+            dwords.frombytes(bytes(data[40:offset]))
+        else:
+            dwords.fromstring(bytes(data[40:offset]))
+
         if len(dwords) > 0:
             self.signature = dwords[0]
         if len(dwords) > 1:

+ 30 - 29
direct/src/showbase/Loader.py

@@ -192,21 +192,21 @@ class Loader(DirectObject):
         pathname), the return value will be a NodePath to the model
         loaded if the load was successful, or None otherwise.  If the
         input modelPath is a list of pathnames, the return value will
-        be a list of NodePaths and/or Nones.
+        be a list of `.NodePath` objects and/or Nones.
 
         loaderOptions may optionally be passed in to control details
         about the way the model is searched and loaded.  See the
-        LoaderOptions class for more.
+        `.LoaderOptions` class for more.
 
-        The default is to look in the ModelPool (RAM) cache first, and
-        return a copy from that if the model can be found there.  If
-        the bam cache is enabled (via the model-cache-dir config
+        The default is to look in the `.ModelPool` (RAM) cache first,
+        and return a copy from that if the model can be found there.
+        If the bam cache is enabled (via the `model-cache-dir` config
         variable), then that will be consulted next, and if both
         caches fail, the file will be loaded from disk.  If noCache is
         True, then neither cache will be consulted or updated.
 
         If allowInstance is True, a shared instance may be returned
-        from the ModelPool.  This is dangerous, since it is easy to
+        from the `.ModelPool`.  This is dangerous, since it is easy to
         accidentally modify the shared instance, and invalidate future
         load attempts of the same model.  Normally, you should leave
         allowInstance set to False, which will always return a unique
@@ -214,10 +214,10 @@ class Loader(DirectObject):
 
         If okMissing is True, None is returned if the model is not
         found or cannot be read, and no error message is printed.
-        Otherwise, an IOError is raised if the model is not found or
+        Otherwise, an `IOError` is raised if the model is not found or
         cannot be read (similar to attempting to open a nonexistent
-        file).  (If modelPath is a list of filenames, then IOError is
-        raised if *any* of the models could not be loaded.)
+        file).  (If modelPath is a list of filenames, then `IOError`
+        is raised if *any* of the models could not be loaded.)
 
         If callback is not None, then the model load will be performed
         asynchronously.  In this case, loadModel() will initiate a
@@ -235,7 +235,7 @@ class Loader(DirectObject):
 
         True asynchronous model loading requires Panda to have been
         compiled with threading support enabled (you can test
-        Thread.isThreadingSupported()).  In the absence of threading
+        `.Thread.isThreadingSupported()`).  In the absence of threading
         support, the asynchronous interface still exists and still
         behaves exactly as described, except that loadModel() might
         not return immediately.
@@ -420,7 +420,7 @@ class Loader(DirectObject):
     def saveModel(self, modelPath, node, loaderOptions = None,
                   callback = None, extraArgs = [], priority = None,
                   blocking = None):
-        """ Saves the model (a NodePath or PandaNode) to the indicated
+        """ Saves the model (a `NodePath` or `PandaNode`) to the indicated
         filename path.  Returns true on success, false on failure.  If
         a callback is used, the model is saved asynchronously, and the
         true/false status is passed to the callback function. """
@@ -508,8 +508,8 @@ class Loader(DirectObject):
         """
         modelPath is a string.
 
-        This loads a special model as a TextFont object, for rendering
-        text with a TextNode.  A font file must be either a special
+        This loads a special model as a `TextFont` object, for rendering
+        text with a `TextNode`.  A font file must be either a special
         egg file (or bam file) generated with egg-mkfont, which is
         considered a static font, or a standard font file (like a TTF
         file) that is supported by FreeType, which is considered a
@@ -573,7 +573,7 @@ class Loader(DirectObject):
 
         If color is not None, it should be a VBase4 specifying the
         foreground color of the font.  Specifying this option breaks
-        TextNode.setColor(), so you almost never want to use this
+        `TextNode.setColor()`, so you almost never want to use this
         option; the default (white) is the most appropriate for a
         font, as it allows text to have any arbitrary color assigned
         at generation time.  However, if you want to use a colored
@@ -695,7 +695,8 @@ class Loader(DirectObject):
         texturePath is a string.
 
         Attempt to load a texture from the given file path using
-        TexturePool class.
+        `TexturePool` class.  Returns a `Texture` object, or raises
+        `IOError` if the file could not be loaded.
 
         okMissing should be True to indicate the method should return
         None if the texture file is not found.  If it is False, the
@@ -713,17 +714,17 @@ class Loader(DirectObject):
         the texture and the number of expected mipmap images.
 
         If minfilter or magfilter is not None, they should be a symbol
-        like SamplerState.FTLinear or SamplerState.FTNearest.  (minfilter
-        may be further one of the Mipmap filter type symbols.)  These
-        specify the filter mode that will automatically be applied to
-        the texture when it is loaded.  Note that this setting may
+        like `SamplerState.FTLinear` or `SamplerState.FTNearest`.
+        (minfilter may be further one of the Mipmap filter type symbols.)
+        These specify the filter mode that will automatically be applied
+        to the texture when it is loaded.  Note that this setting may
         override the texture's existing settings, even if it has
-        already been loaded.  See egg-texture-cards for a more robust
+        already been loaded.  See `egg-texture-cards` for a more robust
         way to apply per-texture filter types and settings.
 
         If anisotropicDegree is not None, it specifies the anisotropic degree
         to apply to the texture when it is loaded.  Like minfilter and
-        magfilter, egg-texture-cards may be a more robust way to apply
+        magfilter, `egg-texture-cards` may be a more robust way to apply
         this setting.
 
         If multiview is true, it indicates to load a multiview or
@@ -769,7 +770,7 @@ class Loader(DirectObject):
         """
         texturePattern is a string that contains a sequence of one or
         more hash characters ('#'), which will be filled in with the
-        z-height number.  Returns a 3-D Texture object, suitable for
+        z-height number.  Returns a 3-D `Texture` object, suitable for
         rendering volumetric textures.
 
         okMissing should be True to indicate the method should return
@@ -826,7 +827,7 @@ class Loader(DirectObject):
         """
         texturePattern is a string that contains a sequence of one or
         more hash characters ('#'), which will be filled in with the
-        z-height number.  Returns a 2-D Texture array object, suitable
+        z-height number.  Returns a 2-D `Texture` array object, suitable
         for rendering array of textures.
 
         okMissing should be True to indicate the method should return
@@ -884,7 +885,7 @@ class Loader(DirectObject):
         texturePattern is a string that contains a sequence of one or
         more hash characters ('#'), which will be filled in with the
         face index number (0 through 6).  Returns a six-face cube map
-        Texture object.
+        `Texture` object.
 
         okMissing should be True to indicate the method should return
         None if the texture file is not found.  If it is False, the
@@ -951,8 +952,8 @@ class Loader(DirectObject):
         """Loads one or more sound files, specifically designated as a
         "sound effect" file (that is, uses the sfxManager to load the
         sound).  There is no distinction between sound effect files
-        and music files other than the particular AudioManager used to
-        load the sound file, but this distinction allows the sound
+        and music files other than the particular `AudioManager` used
+        to load the sound file, but this distinction allows the sound
         effects and/or the music files to be adjusted as a group,
         independently of the other group."""
 
@@ -965,8 +966,8 @@ class Loader(DirectObject):
         """Loads one or more sound files, specifically designated as a
         "music" file (that is, uses the musicManager to load the
         sound).  There is no distinction between sound effect files
-        and music files other than the particular AudioManager used to
-        load the sound file, but this distinction allows the sound
+        and music files other than the particular `AudioManager` used
+        to load the sound file, but this distinction allows the sound
         effects and/or the music files to be adjusted as a group,
         independently of the other group."""
         if(self.base.musicManager):
@@ -1052,7 +1053,7 @@ class Loader(DirectObject):
                            callback = None, extraArgs = []):
         """ Performs a model.flattenStrong() operation in a sub-thread
         (if threading is compiled into Panda).  The model may be a
-        single NodePath, or it may be a list of NodePaths.
+        single `.NodePath`, or it may be a list of NodePaths.
 
         Each model is duplicated and flattened in the sub-thread.
 

+ 6 - 0
direct/src/showbase/ShowBase.py

@@ -152,6 +152,7 @@ class ShowBase(DirectObject.DirectObject):
         self.wantStats = self.config.GetBool('want-pstats', 0)
         self.wantTk = False
         self.wantWx = False
+        self.wantDirect = False
 
         #: Fill this in with a function to invoke when the user "exits"
         #: the program by closing the main window.
@@ -3264,7 +3265,12 @@ class ShowBase(DirectObject.DirectObject):
     def startDirect(self, fWantDirect = 1, fWantTk = 1, fWantWx = 0):
         self.startTk(fWantTk)
         self.startWx(fWantWx)
+
+        if self.wantDirect == fWantDirect:
+            return
+
         self.wantDirect = fWantDirect
+
         if self.wantDirect:
             # Use importlib to prevent this import from being picked up
             # by modulefinder when packaging an application.

+ 8 - 3
direct/src/showbase/VFSImporter.py

@@ -65,7 +65,7 @@ class VFSImporter:
         vfile = vfs.getFile(filename, True)
         if vfile:
             return VFSLoader(dir_path, vfile, filename,
-                             desc=('.py', 'U', imp.PY_SOURCE))
+                             desc=('.py', 'r', imp.PY_SOURCE))
 
         # If there's no .py file, but there's a .pyc file, load that
         # anyway.
@@ -93,7 +93,7 @@ class VFSImporter:
         vfile = vfs.getFile(filename, True)
         if vfile:
             return VFSLoader(dir_path, vfile, filename, packagePath=path,
-                             desc=('.py', 'U', imp.PY_SOURCE))
+                             desc=('.py', 'r', imp.PY_SOURCE))
         for ext in compiledExtensions:
             filename = Filename(path, '__init__.' + ext)
             vfile = vfs.getFile(filename, True)
@@ -181,7 +181,12 @@ class VFSLoader:
         filename = Filename(self.filename)
         filename.setExtension('py')
         filename.setText()
-        return open(self.filename, self.desc[1]).read()
+
+        # Use the tokenize module to detect the encoding.
+        import tokenize
+        fh = open(self.filename, 'rb')
+        encoding, lines = tokenize.detect_encoding(fh.readline)
+        return (b''.join(lines) + fh.read()).decode(encoding)
 
     def _import_extension_module(self, fullname):
         """ Loads the binary shared object as a Python module, and

+ 2 - 0
dtool/src/dtoolbase/dtool_platform.h

@@ -39,6 +39,8 @@
 #define DTOOL_PLATFORM "osx_i386"
 #elif defined(__x86_64)
 #define DTOOL_PLATFORM "osx_amd64"
+#elif defined(__aarch64__)
+#define DTOOL_PLATFORM "osx_aarch64"
 #endif
 
 #elif defined(__FreeBSD__)

+ 1 - 1
makepanda/installer.nsi

@@ -229,7 +229,7 @@ SectionGroup "Panda3D Libraries"
         SetDetailsPrint listonly
 
         SetOutPath $INSTDIR\models
-        File /r /x CVS "${BUILT}\models\*"
+        File /nonfatal /r /x CVS "${BUILT}\models\*"
 
         SetDetailsPrint both
         DetailPrint "Installing optional components..."

+ 2 - 2
makepanda/makepackage.py

@@ -117,8 +117,8 @@ deps: {DEPENDS}
 MACOS_SCRIPT_PREFIX = \
 """#!/bin/bash
 IFS=.
-read -a version_info <<< "`sw_vers -productVersion`'"
-if (( ${version_info[1]} < 15 )); then
+read -a version_info <<< "`sw_vers -productVersion`"
+if (( ${version_info[0]} == 10 && ${version_info[1]} < 15 )); then
 """
 
 MACOS_SCRIPT_POSTFIX = \

+ 13 - 14
makepanda/makepanda.py

@@ -87,7 +87,7 @@ PkgListSet(["PYTHON", "DIRECT",                        # Python support
   "ARTOOLKIT", "OPENCV", "DIRECTCAM", "VISION",        # Augmented Reality
   "GTK2",                                              # GTK2 is used for PStats on Unix
   "MFC", "WX", "FLTK",                                 # Used for web plug-in only
-  "COCOA",                                             # Mac OS X toolkits
+  "COCOA",                                             # macOS toolkits
   "X11",                                               # Unix platform support
   "PANDATOOL", "PVIEW", "DEPLOYTOOLS",                 # Toolchain
   "SKEL",                                              # Example SKEL project
@@ -132,7 +132,7 @@ def usage(problem):
     print("  --distributor X   (short string identifying the distributor of the build)")
     print("  --outputdir X     (use the specified directory instead of 'built')")
     print("  --threads N       (use the multithreaded build system. see manual)")
-    print("  --osxtarget N     (the OS X version number to build for (OS X only))")
+    print("  --osxtarget N     (the macOS version number to build for (macOS only))")
     print("  --override \"O=V\"  (override dtool_config/prc option value)")
     print("  --static          (builds libraries for static linking)")
     print("  --target X        (experimental cross-compilation (android only))")
@@ -272,7 +272,7 @@ def parseopts(args):
         try:
             maj, min = OSXTARGET.strip().split('.')
             OSXTARGET = int(maj), int(min)
-            assert OSXTARGET[0] == 10
+            assert OSXTARGET[0] >= 10
         except:
             usage("Invalid setting for OSXTARGET")
 
@@ -777,7 +777,7 @@ if (COMPILER=="GCC"):
     SmartPkgEnable("FFMPEG",    ffmpeg_libs, ffmpeg_libs, ("libavformat/avformat.h", "libavcodec/avcodec.h", "libavutil/avutil.h"))
     SmartPkgEnable("SWSCALE",   "libswscale", "libswscale", ("libswscale/swscale.h"), target_pkg = "FFMPEG", thirdparty_dir = "ffmpeg")
     SmartPkgEnable("SWRESAMPLE","libswresample", "libswresample", ("libswresample/swresample.h"), target_pkg = "FFMPEG", thirdparty_dir = "ffmpeg")
-    SmartPkgEnable("FFTW",      "",          ("fftw3"), ("fftw.h"))
+    SmartPkgEnable("FFTW",      "fftw3",     ("fftw3"), ("fftw.h"))
     SmartPkgEnable("FMODEX",    "",          ("fmodex"), ("fmodex", "fmodex/fmod.h"))
     SmartPkgEnable("NVIDIACG",  "",          ("Cg"), "Cg/cg.h", framework = "Cg")
     SmartPkgEnable("ODE",       "",          ("ode"), "ode/ode.h", tool = "ode-config")
@@ -961,7 +961,7 @@ if (COMPILER=="GCC"):
         if (PkgSkip(pkg)==0 and (pkg in SDK)):
             if (GetHost() == "darwin"):
                 # Sheesh, Autodesk really can't make up their mind
-                # regarding the location of the Maya devkit on OS X.
+                # regarding the location of the Maya devkit on macOS.
                 if (os.path.isdir(SDK[pkg] + "/Maya.app/Contents/lib")):
                     LibDirectory(pkg, SDK[pkg] + "/Maya.app/Contents/lib")
                 if (os.path.isdir(SDK[pkg] + "/Maya.app/Contents/MacOS")):
@@ -1148,10 +1148,10 @@ def CompileCxx(obj,src,opts):
 
             if (optlevel==1): cmd += " /MDd /Zi /RTCs /GS"
             if (optlevel==2): cmd += " /MDd /Zi"
-            if (optlevel==3): cmd += " /MD /Zi /GS- /O2 /Ob2 /Oi /Ot /fp:fast"
+            if (optlevel==3): cmd += " /MD /Zi /GS- /O2 /fp:fast"
             if (optlevel==4):
-                cmd += " /MD /Zi /GS- /Ox /Ob2 /Oi /Ot /fp:fast /DFORCE_INLINING /DNDEBUG /GL"
-                cmd += " /Oy /Zp16"      # jean-claude add /Zp16 insures correct static alignment for SSEx
+                cmd += " /MD /Zi /GS- /O2 /fp:fast /DFORCE_INLINING /DNDEBUG /GL"
+                cmd += " /Zp16"      # jean-claude add /Zp16 insures correct static alignment for SSEx
 
             cmd += " /Fd" + os.path.splitext(obj)[0] + ".pdb"
 
@@ -1519,7 +1519,7 @@ def CompileIgate(woutd,wsrc,opts):
         target_arch = GetTargetArch()
         if target_arch in ("x86_64", "amd64"):
             cmd += ' -D_LP64'
-        elif target_arch == 'aarch64':
+        elif target_arch in ('aarch64', 'arm64'):
             cmd += ' -D_LP64 -D__LP64__ -D__aarch64__'
         else:
             cmd += ' -D__i386__'
@@ -1811,7 +1811,7 @@ def CompileLink(dll, obj, opts):
         if (GetOrigExt(dll) == ".exe" and GetTarget() == 'windows' and "NOICON" not in opts):
             cmd += " " + GetOutputDir() + "/tmp/pandaIcon.res"
 
-        # Mac OS X specific flags.
+        # macOS specific flags.
         if GetTarget() == 'darwin':
             cmd += " -headerpad_max_install_names"
             if OSXTARGET is not None:
@@ -2073,7 +2073,7 @@ def FreezePy(target, inputs, opts):
 ##########################################################################################
 
 def CompileBundle(target, inputs, opts):
-    assert GetTarget() == "darwin", 'bundles can only be made for Mac OS X'
+    assert GetTarget() == "darwin", 'bundles can only be made for macOS'
     plist = None
     resources = []
     objects = []
@@ -2422,9 +2422,6 @@ def WriteConfigSettings():
         dtool_config["HAVE_VIDEO4LINUX"] = 'UNDEF'
         dtool_config["PHAVE_LINUX_INPUT_H"] = 'UNDEF'
         dtool_config["IS_OSX"] = '1'
-        # 10.4 had a broken ucontext implementation
-        if int(platform.mac_ver()[0][3]) <= 4:
-            dtool_config["PHAVE_UCONTEXT_H"] = 'UNDEF'
 
     if PkgSkip("X11"):
         dtool_config["HAVE_GLX"] = 'UNDEF'
@@ -3913,6 +3910,7 @@ OPTS=['DIR:panda/src/collide']
 IGATEFILES=GetDirectoryContents('panda/src/collide', ["*.h", "*_composite*.cxx"])
 TargetAdd('libp3collide.in', opts=OPTS, input=IGATEFILES)
 TargetAdd('libp3collide.in', opts=['IMOD:panda3d.core', 'ILIB:libp3collide', 'SRCDIR:panda/src/collide'])
+PyTargetAdd('p3collide_ext_composite.obj', opts=OPTS, input='p3collide_ext_composite.cxx')
 
 #
 # DIRECTORY: panda/src/parametrics/
@@ -4163,6 +4161,7 @@ PyTargetAdd('core.pyd', input='p3event_pythonTask.obj')
 PyTargetAdd('core.pyd', input='p3gobj_ext_composite.obj')
 PyTargetAdd('core.pyd', input='p3pgraph_ext_composite.obj')
 PyTargetAdd('core.pyd', input='p3display_ext_composite.obj')
+PyTargetAdd('core.pyd', input='p3collide_ext_composite.obj')
 
 PyTargetAdd('core.pyd', input='core_module.obj')
 if not GetLinkAllStatic() and GetTarget() != 'emscripten':

+ 57 - 12
makepanda/makepandacore.py

@@ -44,7 +44,7 @@ DEFAULT_RANLIB = "ranlib"
 # Is the current Python a 32-bit or 64-bit build?  There doesn't
 # appear to be a universal test for this.
 if sys.platform == 'darwin':
-    # On OSX, platform.architecture reports '64bit' even if it is
+    # On macOS, platform.architecture reports '64bit' even if it is
     # currently running in 32-bit mode.  But sys.maxint is a reliable
     # indicator.
     host_64 = (sys.maxsize > 0x100000000)
@@ -373,11 +373,13 @@ def SetTarget(target, arch=None):
 
         if arch == 'amd64':
             arch = 'x86_64'
+        if arch == 'aarch64':
+            arch = 'arm64'
 
         if arch is not None:
-            choices = ('i386', 'x86_64', 'ppc', 'ppc64')
+            choices = ('i386', 'x86_64', 'ppc', 'ppc64', 'arm64')
             if arch not in choices:
-                exit('Mac OS X architecture must be one of %s' % (', '.join(choices)))
+                exit('macOS architecture must be one of %s' % (', '.join(choices)))
 
     elif target == 'android' or target.startswith('android-'):
         if arch is None:
@@ -1311,7 +1313,7 @@ def GetThirdpartyDir():
             THIRDPARTYDIR = base + "/win-libs-vc" + vc + "/"
 
     elif (target == 'darwin'):
-        # OSX thirdparty binaries are universal, where possible.
+        # macOS thirdparty binaries are universal, where possible.
         THIRDPARTYDIR = base + "/darwin-libs-a/"
 
     elif (target == 'linux'):
@@ -2055,16 +2057,25 @@ def SdkLocatePython(prefer_thirdparty_python=False):
     abiflags = getattr(sys, 'abiflags', '')
 
     if GetTarget() == 'windows':
-        sdkdir = GetThirdpartyBase() + "/win-python"
-        sdkdir += "%d.%d" % sys.version_info[:2]
+        if PkgHasCustomLocation("PYTHON"):
+            # Check our custom location instead (--python-libdir, --python-incdir)
+            sdkdir = FindOptDirectory("PYTHON")
 
-        if GetOptimize() <= 2:
-            sdkdir += "-dbg"
-        if GetTargetArch() == 'x64':
-            sdkdir += "-x64"
+            if sdkdir is None:
+                exit("Could not find a Python installation using these command line options.")
+        else:
+            sdkdir = GetThirdpartyBase() + "/win-python"
+            sdkdir += "%d.%d" % sys.version_info[:2]
+
+            if GetOptimize() <= 2:
+                sdkdir += "-dbg"
+            if GetTargetArch() == 'x64':
+                sdkdir += "-x64"
 
+        sdkdir = sdkdir.replace('\\', '/')
         SDK["PYTHON"] = sdkdir
-        SDK["PYTHONEXEC"] = SDK["PYTHON"].replace('\\', '/') + "/python"
+        SDK["PYTHONEXEC"] = SDK["PYTHON"] + "/python"
+
         if (GetOptimize() <= 2):
             SDK["PYTHONEXEC"] += "_d.exe"
         else:
@@ -2764,6 +2775,40 @@ def LibDirectory(opt, dir):
 def FrameworkDirectory(opt, dir):
     FRAMEWORKDIRECTORIES.append((opt, dir))
 
+def FindIncDirectory(opt):
+    # Find the include directory associated with this module
+    for mod, dir in INCDIRECTORIES:
+        if mod == opt:
+            return os.path.abspath(dir)
+
+def FindLibDirectory(opt):
+    # Find the library directory associated with this module
+    for mod, dir in LIBDIRECTORIES:
+        if mod == opt:
+            return os.path.abspath(dir)
+
+def FindOptDirectory(opt):
+    # Find the common directory associated with this module
+    # using the include and library directories as a guide
+    include_dir = FindIncDirectory(opt)
+    lib_dir = FindLibDirectory(opt)
+
+    if include_dir and lib_dir:
+        # The module's common directory is the common prefix of
+        # its include and library directory
+        common_dir = os.path.commonprefix([include_dir, lib_dir])
+
+        if common_dir:
+            return os.path.abspath(common_dir)
+    elif include_dir:
+        # The module's common directory is the parent of the include
+        # directory
+        return os.path.abspath(os.path.join(include_dir, os.pardir))
+    elif lib_dir:
+        # The module's common directory is the parent of the library
+        # directory
+        return os.path.abspath(os.path.join(lib_dir, os.pardir))
+
 def LibName(opt, name):
     # Check to see if the lib file actually exists for the thirdparty library given
     # Are we a thirdparty library?
@@ -2986,7 +3031,7 @@ def SetupBuildEnvironment(compiler):
             dyldpath.insert(0, os.path.join(builtdir, 'lib'))
             os.environ["DYLD_LIBRARY_PATH"] = os.pathsep.join(dyldpath)
 
-            # OS X 10.11 removed DYLD_LIBRARY_PATH, but we still need to pass
+            # macOS 10.11 removed DYLD_LIBRARY_PATH, but we still need to pass
             # on our lib directory to ppackage, so add it to PATH instead.
             os.environ["PATH"] = os.path.join(builtdir, 'lib') + ':' + os.environ.get("PATH", "")
 

+ 2 - 1
makepanda/makewheel.py

@@ -381,7 +381,8 @@ class WheelFile(object):
             # On macOS, if no fat wheel was requested, extract the right architecture.
             if sys.platform == "darwin" and is_fat_file(source_path) \
                 and not self.platform.endswith("_intel") \
-                and "_fat" not in self.platform:
+                and "_fat" not in self.platform \
+                and "_universal" not in self.platform:
 
                 if self.platform.endswith("_x86_64"):
                     arch = 'x86_64'

+ 2 - 0
panda/src/audiotraits/openalAudioSound.cxx

@@ -832,6 +832,8 @@ set_active(bool active) {
           // ...we're pausing a looping sound.
           _paused=true;
         }
+        // Store off the current time so we can resume from where we paused.
+        _start_time = get_time();
         stop();
       }
     }

+ 8 - 0
panda/src/bullet/bulletSoftBodyNode.cxx

@@ -187,14 +187,22 @@ transform_changed() {
 
     // Offset between current approx center and current initial transform
     btVector3 pos = LVecBase3_to_btVector3(this->do_get_aabb().get_approx_center());
+#if BT_BULLET_VERSION >= 290
+    btVector3 origin = _soft->getWorldTransform().getOrigin();
+#else
     btVector3 origin = _soft->m_initialWorldTransform.getOrigin();
+#endif
     btVector3 offset = pos - origin;
 
     // Subtract offset to get new transform for the body
     trans.setOrigin(trans.getOrigin() - offset);
 
     // Now apply the new transform
+#if BT_BULLET_VERSION >= 290
+    _soft->transform(_soft->getWorldTransform().inverse());
+#else
     _soft->transform(_soft->m_initialWorldTransform.inverse());
+#endif
     _soft->transform(trans);
 
     if (ts->has_scale()) {

+ 6 - 1
panda/src/collide/CMakeLists.txt

@@ -65,11 +65,16 @@ set(P3COLLIDE_SOURCES
   config_collide.cxx
 )
 
+set(P3COLLIDE_IGATEEXT
+  collisionPolygon_ext.cxx
+  collisionPolygon_ext.h
+)
+
 composite_sources(p3collide P3COLLIDE_SOURCES)
 add_component_library(p3collide SYMBOL BUILDING_PANDA_COLLIDE
   ${P3COLLIDE_HEADERS} ${P3COLLIDE_SOURCES})
 target_link_libraries(p3collide p3tform)
-target_interrogate(p3collide ALL)
+target_interrogate(p3collide ALL EXTENSIONS ${P3COLLIDE_IGATEEXT})
 
 if(NOT BUILD_METALIBS)
   install(TARGETS p3collide

+ 5 - 1
panda/src/collide/collisionPolygon.h

@@ -59,6 +59,9 @@ PUBLISHED:
   bool is_valid() const;
   bool is_concave() const;
 
+  EXTENSION(static bool verify_points(PyObject *points));
+  EXTENSION(void setup_points(PyObject *points));
+
 PUBLISHED:
   MAKE_SEQ_PROPERTY(points, get_num_points, get_point);
   MAKE_PROPERTY(valid, is_valid);
@@ -71,6 +74,8 @@ public:
                                 const CullTraverserData &data,
                                 bool bounds_only) const;
 
+  void setup_points(const LPoint3 *begin, const LPoint3 *end);
+
   virtual PStatCollector &get_volume_pcollector();
   virtual PStatCollector &get_test_pcollector();
 
@@ -128,7 +133,6 @@ private:
   PN_stdfloat dist_to_polygon(const LPoint2 &p, LPoint2 &edge_p, const Points &points) const;
   void project(const LVector3 &axis, PN_stdfloat &center, PN_stdfloat &extent) const;
 
-  void setup_points(const LPoint3 *begin, const LPoint3 *end);
   INLINE LPoint2 to_2d(const LVecBase3 &point3d) const;
   INLINE void calc_to_3d_mat(LMatrix4 &to_3d_mat) const;
   INLINE void rederive_to_3d_mat(LMatrix4 &to_3d_mat) const;

+ 86 - 0
panda/src/collide/collisionPolygon_ext.cxx

@@ -0,0 +1,86 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file collisionPolygon_ext.cxx
+ * @author Derzsi Daniel
+ * @date 2020-10-13
+ */
+
+#include "collisionPolygon_ext.h"
+
+#ifdef HAVE_PYTHON
+
+#include "collisionPolygon.h"
+
+#ifdef STDFLOAT_DOUBLE
+extern struct Dtool_PyTypedObject Dtool_LPoint3d;
+#else
+extern struct Dtool_PyTypedObject Dtool_LPoint3f;
+#endif
+
+/**
+ * Verifies that the indicated Python list of points will define a
+ * CollisionPolygon.
+ */
+bool Extension<CollisionPolygon>::
+verify_points(PyObject *points) {
+  const pvector<LPoint3> vec = convert_points(points);
+  const LPoint3 *verts_begin = &vec[0];
+  const LPoint3 *verts_end = verts_begin + vec.size();
+
+  return CollisionPolygon::verify_points(verts_begin, verts_end);
+}
+
+/**
+ * Initializes this CollisionPolygon with the given Python list of
+ * points.
+ */
+void Extension<CollisionPolygon>::
+setup_points(PyObject *points) {
+  const pvector<LPoint3> vec = convert_points(points);
+  const LPoint3 *verts_begin = &vec[0];
+  const LPoint3 *verts_end = verts_begin + vec.size();
+
+  _this->setup_points(verts_begin, verts_end);
+}
+
+/**
+ * Converts a Python sequence to a list of LPoint3 objects.
+ */
+pvector<LPoint3> Extension<CollisionPolygon>::
+convert_points(PyObject *points) {
+  pvector<LPoint3> vec;
+  PyObject *seq = PySequence_Fast(points, "function expects a sequence");
+
+  if (!seq) {
+    return vec;
+  }
+
+  PyObject **items = PySequence_Fast_ITEMS(seq);
+  Py_ssize_t len = PySequence_Fast_GET_SIZE(seq);
+  void *ptr;
+
+  vec.reserve(len);
+
+  for (Py_ssize_t i = 0; i < len; ++i) {
+#ifdef STDFLOAT_DOUBLE
+    if (ptr = DtoolInstance_UPCAST(items[i], Dtool_LPoint3d)) {
+#else
+    if (ptr = DtoolInstance_UPCAST(items[i], Dtool_LPoint3f)) {
+#endif
+      vec.push_back(*(LPoint3 *)ptr);
+    } else {
+      collide_cat.warning() << "Argument must be of LPoint3 type.\n";
+    }
+  }
+
+  Py_DECREF(seq);
+  return vec;
+}
+
+#endif

+ 43 - 0
panda/src/collide/collisionPolygon_ext.h

@@ -0,0 +1,43 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file collisionPolygon_ext.h
+ * @author Derzsi Daniel
+ * @date 2020-10-13
+ */
+
+#ifndef COLLISIONPOLYGON_EXT_H
+#define COLLISIONPOLYGON_EXT_H
+
+#include "pandabase.h"
+
+#ifdef HAVE_PYTHON
+
+#include "extension.h"
+#include "collisionPolygon.h"
+#include "py_panda.h"
+
+/**
+ * This class defines the extension methods for CollisionPolygon, which are called
+ * instead of any C++ methods with the same prototype.
+ *
+ * @since 1.11.0
+ */
+template<>
+class Extension<CollisionPolygon> : public ExtensionBase<CollisionPolygon> {
+public:
+  static bool verify_points(PyObject *points);
+  void setup_points(PyObject *points);
+
+private:
+  static pvector<LPoint3> convert_points(PyObject *points);
+};
+
+#endif  // HAVE_PYTHON
+
+#endif

+ 1 - 1
panda/src/collide/collisionTraverser.h

@@ -62,7 +62,7 @@ PUBLISHED:
   void clear_colliders();
   MAKE_SEQ_PROPERTY(colliders, get_num_colliders, get_collider);
 
-  void traverse(const NodePath &root);
+  BLOCKING void traverse(const NodePath &root);
 
 #if defined(DO_COLLISION_RECORDING) || !defined(CPPPARSER)
   void set_recorder(CollisionRecorder *recorder);

+ 1 - 0
panda/src/collide/p3collide_ext_composite.cxx

@@ -0,0 +1 @@
+#include "collisionPolygon_ext.cxx"

+ 2 - 18
panda/src/cull/cullBinBackToFront.cxx

@@ -85,24 +85,8 @@ void CullBinBackToFront::
 draw(bool force, Thread *current_thread) {
   PStatTimer timer(_draw_this_pcollector, current_thread);
 
-  Objects::const_iterator oi;
-  for (oi = _objects.begin(); oi != _objects.end(); ++oi) {
-    CullableObject *object = (*oi)._object;
-
-    if (object->_draw_callback == nullptr) {
-      nassertd(object->_geom != nullptr) continue;
-
-      _gsg->set_state_and_transform(object->_state, object->_internal_transform);
-
-      GeomPipelineReader geom_reader(object->_geom, current_thread);
-      GeomVertexDataPipelineReader data_reader(object->_munged_data, current_thread);
-      data_reader.check_array_readers();
-      geom_reader.draw(_gsg, &data_reader, force);
-    } else {
-      // It has a callback associated.
-      object->draw_callback(_gsg, force, current_thread);
-      // Now the callback has taken care of drawing.
-    }
+  for (const ObjectData &data : _objects) {
+    data._object->draw(_gsg, force, current_thread);
   }
 }
 

+ 2 - 18
panda/src/cull/cullBinFixed.cxx

@@ -71,24 +71,8 @@ void CullBinFixed::
 draw(bool force, Thread *current_thread) {
   PStatTimer timer(_draw_this_pcollector, current_thread);
 
-  Objects::const_iterator oi;
-  for (oi = _objects.begin(); oi != _objects.end(); ++oi) {
-    CullableObject *object = (*oi)._object;
-
-    if (object->_draw_callback == nullptr) {
-      nassertd(object->_geom != nullptr) continue;
-
-      _gsg->set_state_and_transform(object->_state, object->_internal_transform);
-
-      GeomPipelineReader geom_reader(object->_geom, current_thread);
-      GeomVertexDataPipelineReader data_reader(object->_munged_data, current_thread);
-      data_reader.check_array_readers();
-      geom_reader.draw(_gsg, &data_reader, force);
-    } else {
-      // It has a callback associated.
-      object->draw_callback(_gsg, force, current_thread);
-      // Now the callback has taken care of drawing.
-    }
+  for (const ObjectData &data : _objects) {
+    data._object->draw(_gsg, force, current_thread);
   }
 }
 

+ 2 - 18
panda/src/cull/cullBinFrontToBack.cxx

@@ -85,24 +85,8 @@ void CullBinFrontToBack::
 draw(bool force, Thread *current_thread) {
   PStatTimer timer(_draw_this_pcollector, current_thread);
 
-  Objects::const_iterator oi;
-  for (oi = _objects.begin(); oi != _objects.end(); ++oi) {
-    CullableObject *object = (*oi)._object;
-
-    if (object->_draw_callback == nullptr) {
-      nassertd(object->_geom != nullptr) continue;
-
-      _gsg->set_state_and_transform(object->_state, object->_internal_transform);
-
-      GeomPipelineReader geom_reader(object->_geom, current_thread);
-      GeomVertexDataPipelineReader data_reader(object->_munged_data, current_thread);
-      data_reader.check_array_readers();
-      geom_reader.draw(_gsg, &data_reader, force);
-    } else {
-      // It has a callback associated.
-      object->draw_callback(_gsg, force, current_thread);
-      // Now the callback has taken care of drawing.
-    }
+  for (const ObjectData &data : _objects) {
+    data._object->draw(_gsg, force, current_thread);
   }
 }
 

+ 2 - 18
panda/src/cull/cullBinStateSorted.cxx

@@ -70,24 +70,8 @@ void CullBinStateSorted::
 draw(bool force, Thread *current_thread) {
   PStatTimer timer(_draw_this_pcollector, current_thread);
 
-  Objects::const_iterator oi;
-  for (oi = _objects.begin(); oi != _objects.end(); ++oi) {
-    CullableObject *object = (*oi)._object;
-
-    if (object->_draw_callback == nullptr) {
-      nassertd(object->_geom != nullptr) continue;
-
-      _gsg->set_state_and_transform(object->_state, object->_internal_transform);
-
-      GeomPipelineReader geom_reader(object->_geom, current_thread);
-      GeomVertexDataPipelineReader data_reader(object->_munged_data, current_thread);
-      data_reader.check_array_readers();
-      geom_reader.draw(_gsg, &data_reader, force);
-    } else {
-      // It has a callback associated.
-      object->draw_callback(_gsg, force, current_thread);
-      // Now the callback has taken care of drawing.
-    }
+  for (const ObjectData &data : _objects) {
+    data._object->draw(_gsg, force, current_thread);
   }
 }
 

+ 2 - 18
panda/src/cull/cullBinUnsorted.cxx

@@ -55,24 +55,8 @@ void CullBinUnsorted::
 draw(bool force, Thread *current_thread) {
   PStatTimer timer(_draw_this_pcollector, current_thread);
 
-  Objects::iterator oi;
-  for (oi = _objects.begin(); oi != _objects.end(); ++oi) {
-    CullableObject *object = (*oi);
-
-    if (object->_draw_callback == nullptr) {
-      nassertd(object->_geom != nullptr) continue;
-
-      _gsg->set_state_and_transform(object->_state, object->_internal_transform);
-
-      GeomPipelineReader geom_reader(object->_geom, current_thread);
-      GeomVertexDataPipelineReader data_reader(object->_munged_data, current_thread);
-      data_reader.check_array_readers();
-      geom_reader.draw(_gsg, &data_reader, force);
-    } else {
-      // It has a callback associated.
-      object->draw_callback(_gsg, force, current_thread);
-      // Now the callback has taken care of drawing.
-    }
+  for (CullableObject *object : _objects) {
+    object->draw(_gsg, force, current_thread);
   }
 }
 

+ 6 - 1
panda/src/display/graphicsStateGuardian.cxx

@@ -2498,9 +2498,13 @@ finish_decal() {
 bool GraphicsStateGuardian::
 begin_draw_primitives(const GeomPipelineReader *geom_reader,
                       const GeomVertexDataPipelineReader *data_reader,
-                      bool force) {
+                      size_t num_instances, bool force) {
   _data_reader = data_reader;
 
+  if (num_instances == 0) {
+    return false;
+  }
+
   // Always draw if we have a shader, since the shader might use a different
   // mechanism for fetching vertex data.
   return _data_reader->has_vertex() || (_target_shader && _target_shader->has_shader());
@@ -2616,6 +2620,7 @@ reset() {
   _state_rs = RenderState::make_empty();
   _target_rs = nullptr;
   _state_mask.clear();
+  _inv_state_mask = RenderState::SlotMask::all_on();
   _internal_transform = _cs_transform;
   _scene_null = new SceneSetup;
   _scene_setup = _scene_null;

+ 1 - 1
panda/src/display/graphicsStateGuardian.h

@@ -375,7 +375,7 @@ public:
 
   virtual bool begin_draw_primitives(const GeomPipelineReader *geom_reader,
                                      const GeomVertexDataPipelineReader *data_reader,
-                                     bool force);
+                                     size_t num_instances, bool force);
   virtual bool draw_triangles(const GeomPrimitivePipelineReader *reader,
                               bool force);
   virtual bool draw_triangles_adj(const GeomPrimitivePipelineReader *reader,

+ 3 - 2
panda/src/display/standardMunger.cxx

@@ -123,8 +123,9 @@ munge_data_impl(const GeomVertexData *data) {
   }
 
   GeomVertexAnimationSpec animation = new_data->get_format()->get_animation();
-  if (_shader_skinning || (_auto_shader && hardware_animated_vertices &&
-      !basic_shaders_only && animation.get_animation_type() == AT_panda)) {
+  if ((_shader_skinning && animation.get_animation_type() != AT_none) ||
+      (_auto_shader && hardware_animated_vertices &&
+       !basic_shaders_only && animation.get_animation_type() == AT_panda)) {
     animation.set_hardware(4, true);
 
   } else if (hardware_animated_vertices &&

+ 6 - 3
panda/src/dxgsg9/dxGraphicsStateGuardian9.cxx

@@ -1180,8 +1180,8 @@ end_frame(Thread *current_thread) {
 bool DXGraphicsStateGuardian9::
 begin_draw_primitives(const GeomPipelineReader *geom_reader,
                       const GeomVertexDataPipelineReader *data_reader,
-                      bool force) {
-  if (!GraphicsStateGuardian::begin_draw_primitives(geom_reader, data_reader, force)) {
+                      size_t num_instances, bool force) {
+  if (!GraphicsStateGuardian::begin_draw_primitives(geom_reader, data_reader, num_instances, force)) {
     return false;
   }
   nassertr(_data_reader != nullptr, false);
@@ -3223,11 +3223,14 @@ set_state_and_transform(const RenderState *target,
     _state_mask.set_bit(color_blend_slot);
   }
 
-  if (_target_shader != _state_shader) {
+  int shader_slot = ShaderAttrib::get_class_slot();
+  if (_target_shader != _state_shader ||
+      !_state_mask.get_bit(shader_slot)) {
     // PStatTimer timer(_draw_set_state_shader_pcollector);
     do_issue_shader();
     _state_shader = _target_shader;
     _state_mask.clear_bit(TextureAttrib::get_class_slot());
+    _state_mask.set_bit(shader_slot);
   }
 
   int texture_slot = TextureAttrib::get_class_slot();

+ 1 - 1
panda/src/dxgsg9/dxGraphicsStateGuardian9.h

@@ -107,7 +107,7 @@ public:
 
   virtual bool begin_draw_primitives(const GeomPipelineReader *geom_reader,
                                      const GeomVertexDataPipelineReader *data_reader,
-                                     bool force);
+                                     size_t num_instances, bool force);
   virtual bool draw_triangles(const GeomPrimitivePipelineReader *reader,
                               bool force);
   virtual bool draw_tristrips(const GeomPrimitivePipelineReader *reader,

+ 3 - 3
panda/src/event/pythonTask.cxx

@@ -71,12 +71,12 @@ PythonTask(PyObject *func_or_coro, const std::string &name) :
 
   __dict__ = PyDict_New();
 
-#ifndef SIMPLE_THREADS
+#if !defined(SIMPLE_THREADS) && defined(WITH_THREAD) && PY_VERSION_HEX < 0x03090000
   // Ensure that the Python threading system is initialized and ready to go.
-#ifdef WITH_THREAD  // This symbol defined within Python.h
+  // WITH_THREAD symbol defined within Python.h
+  // PyEval_InitThreads is now a deprecated no-op in Python 3.9+
   PyEval_InitThreads();
 #endif
-#endif
 }
 
 /**

+ 2 - 0
panda/src/express/config_express.cxx

@@ -26,6 +26,7 @@
 #include "virtualFileMountMultifile.h"
 #include "virtualFileMountRamdisk.h"
 #include "virtualFileMountSystem.h"
+#include "virtualFileMountZip.h"
 #include "virtualFileSimple.h"
 #include "fileReference.h"
 #include "temporaryFile.h"
@@ -116,6 +117,7 @@ init_libexpress() {
   VirtualFileMountMultifile::init_type();
   VirtualFileMountRamdisk::init_type();
   VirtualFileMountSystem::init_type();
+  VirtualFileMountZip::init_type();
   VirtualFileSimple::init_type();
   FileReference::init_type();
   TemporaryFile::init_type();

+ 1 - 1
panda/src/glstuff/glCgShaderContext_src.cxx

@@ -166,7 +166,7 @@ CLP(CgShaderContext)(CLP(GraphicsStateGuardian) *glgsg, Shader *s) : ShaderConte
             if (cgGetParameterSemantic(p)) {
               GLCAT.error(false) << " : " << cgGetParameterSemantic(p);
             }
-            GLCAT.error(false) << " should be declared as float4, not float3!\n";
+            GLCAT.error(false) << " should be declared as float3, not float4!\n";
           }
           break;
         case 3:  // gl_Color

+ 97 - 49
panda/src/glstuff/glGraphicsStateGuardian_src.cxx

@@ -609,10 +609,10 @@ reset() {
   GraphicsStateGuardian::reset();
 
   // Build _inv_state_mask as a mask of 1's where we don't care, and 0's where
-  // we do care, about the state.  _inv_state_mask =
-  // RenderState::SlotMask::all_on();
+  // we do care, about the state.
+#ifndef OPENGLES_1
   _inv_state_mask.clear_bit(ShaderAttrib::get_class_slot());
-  _inv_state_mask.clear_bit(AlphaTestAttrib::get_class_slot());
+#endif
   _inv_state_mask.clear_bit(AntialiasAttrib::get_class_slot());
   _inv_state_mask.clear_bit(ClipPlaneAttrib::get_class_slot());
   _inv_state_mask.clear_bit(ColorAttrib::get_class_slot());
@@ -622,21 +622,30 @@ reset() {
   _inv_state_mask.clear_bit(DepthTestAttrib::get_class_slot());
   _inv_state_mask.clear_bit(DepthWriteAttrib::get_class_slot());
   _inv_state_mask.clear_bit(RenderModeAttrib::get_class_slot());
-  _inv_state_mask.clear_bit(RescaleNormalAttrib::get_class_slot());
-  _inv_state_mask.clear_bit(ShadeModelAttrib::get_class_slot());
   _inv_state_mask.clear_bit(TransparencyAttrib::get_class_slot());
   _inv_state_mask.clear_bit(ColorWriteAttrib::get_class_slot());
   _inv_state_mask.clear_bit(ColorBlendAttrib::get_class_slot());
+#if !defined(OPENGLES) || defined(OPENGLES_1)
   _inv_state_mask.clear_bit(LogicOpAttrib::get_class_slot());
+#endif
   _inv_state_mask.clear_bit(TextureAttrib::get_class_slot());
-  _inv_state_mask.clear_bit(TexGenAttrib::get_class_slot());
   _inv_state_mask.clear_bit(TexMatrixAttrib::get_class_slot());
-  _inv_state_mask.clear_bit(MaterialAttrib::get_class_slot());
-  _inv_state_mask.clear_bit(LightAttrib::get_class_slot());
   _inv_state_mask.clear_bit(StencilAttrib::get_class_slot());
-  _inv_state_mask.clear_bit(FogAttrib::get_class_slot());
   _inv_state_mask.clear_bit(ScissorAttrib::get_class_slot());
 
+  // We only care about this state if we are using the fixed-function pipeline.
+#ifdef SUPPORT_FIXED_FUNCTION
+  if (has_fixed_function_pipeline()) {
+    _inv_state_mask.clear_bit(AlphaTestAttrib::get_class_slot());
+    _inv_state_mask.clear_bit(RescaleNormalAttrib::get_class_slot());
+    _inv_state_mask.clear_bit(ShadeModelAttrib::get_class_slot());
+    _inv_state_mask.clear_bit(TexGenAttrib::get_class_slot());
+    _inv_state_mask.clear_bit(MaterialAttrib::get_class_slot());
+    _inv_state_mask.clear_bit(LightAttrib::get_class_slot());
+    _inv_state_mask.clear_bit(FogAttrib::get_class_slot());
+  }
+#endif
+
   // Output the vendor and version strings.
   query_gl_version();
 
@@ -1923,6 +1932,8 @@ reset() {
          get_extension_func("glUniform3uiv");
       _glUniform4uiv = (PFNGLUNIFORM4UIVPROC)
          get_extension_func("glUniform4uiv");
+      _glVertexAttribI4ui = (PFNGLVERTEXATTRIBI4UIPROC)
+         get_extension_func("glVertexAttribI4ui");
 
     } else if (has_extension("GL_EXT_gpu_shader4")) {
       _glBindFragDataLocation = (PFNGLBINDFRAGDATALOCATIONPROC)
@@ -1937,10 +1948,13 @@ reset() {
          get_extension_func("glUniform3uivEXT");
       _glUniform4uiv = (PFNGLUNIFORM4UIVPROC)
          get_extension_func("glUniform4uivEXT");
+      _glVertexAttribI4ui = (PFNGLVERTEXATTRIBI4UIPROC)
+         get_extension_func("glVertexAttribI4uiEXT");
 
     } else {
       _glBindFragDataLocation = nullptr;
       _glVertexAttribIPointer = nullptr;
+      _glVertexAttribI4ui = nullptr;
     }
     if (is_at_least_gl_version(4, 1) ||
         has_extension("GL_ARB_vertex_attrib_64bit")) {
@@ -1969,8 +1983,11 @@ reset() {
        get_extension_func("glVertexAttribPointerARB");
 
     _glBindFragDataLocation = nullptr;
+    _glVertexAttribI4ui = nullptr;
     _glVertexAttribIPointer = nullptr;
     _glVertexAttribLPointer = nullptr;
+  } else {
+    _glVertexAttribI4ui = nullptr;
   }
 #endif
 
@@ -2017,8 +2034,11 @@ reset() {
   if (is_at_least_gles_version(3, 0)) {
     _glVertexAttribIPointer = (PFNGLVERTEXATTRIBIPOINTERPROC)
       get_extension_func("glVertexAttribIPointer");
+    _glVertexAttribI4ui = (PFNGLVERTEXATTRIBI4UIPROC)
+      get_extension_func("glVertexAttribI4ui");
   } else {
     _glVertexAttribIPointer = nullptr;
+    _glVertexAttribI4ui = nullptr;
   }
 
   if (has_extension("GL_EXT_blend_func_extended")) {
@@ -4290,7 +4310,7 @@ end_frame(Thread *current_thread) {
 bool CLP(GraphicsStateGuardian)::
 begin_draw_primitives(const GeomPipelineReader *geom_reader,
                       const GeomVertexDataPipelineReader *data_reader,
-                      bool force) {
+                      size_t num_instances, bool force) {
 #ifndef NDEBUG
   if (GLCAT.is_spam()) {
     GLCAT.spam() << "begin_draw_primitives: " << *(data_reader->get_object()) << "\n";
@@ -4307,11 +4327,13 @@ begin_draw_primitives(const GeomPipelineReader *geom_reader,
   }
 #endif
 
-  if (!GraphicsStateGuardian::begin_draw_primitives(geom_reader, data_reader, force)) {
+  if (!GraphicsStateGuardian::begin_draw_primitives(geom_reader, data_reader, num_instances, force)) {
     return false;
   }
   nassertr(_data_reader != nullptr, false);
 
+  _instance_count = _supports_geometry_instancing ? num_instances : 1;
+
   _geom_display_list = 0;
 
   if (_auto_antialias_mode) {
@@ -4859,7 +4881,7 @@ draw_triangles(const GeomPrimitivePipelineReader *reader, bool force) {
       }
 
 #ifndef OPENGLES_1
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawElementsInstanced(GL_TRIANGLES, num_vertices,
                                  get_numeric_type(reader->get_index_type()),
                                  client_pointer, _instance_count);
@@ -4875,7 +4897,7 @@ draw_triangles(const GeomPrimitivePipelineReader *reader, bool force) {
       }
     } else {
 #ifndef OPENGLES_1
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawArraysInstanced(GL_TRIANGLES,
                                reader->get_first_vertex(),
                                num_vertices, _instance_count);
@@ -4925,7 +4947,7 @@ draw_triangles_adj(const GeomPrimitivePipelineReader *reader, bool force) {
         return false;
       }
 
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawElementsInstanced(GL_TRIANGLES_ADJACENCY, num_vertices,
                                  get_numeric_type(reader->get_index_type()),
                                  client_pointer, _instance_count);
@@ -4938,7 +4960,7 @@ draw_triangles_adj(const GeomPrimitivePipelineReader *reader, bool force) {
                              client_pointer);
       }
     } else {
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawArraysInstanced(GL_TRIANGLES_ADJACENCY,
                                reader->get_first_vertex(),
                                num_vertices, _instance_count);
@@ -4990,7 +5012,7 @@ draw_tristrips(const GeomPrimitivePipelineReader *reader, bool force) {
           return false;
         }
 #ifndef OPENGLES_1
-        if (_supports_geometry_instancing && _instance_count > 0) {
+        if (_instance_count != 1) {
           _glDrawElementsInstanced(GL_TRIANGLE_STRIP, num_vertices,
                                    get_numeric_type(reader->get_index_type()),
                                    client_pointer, _instance_count);
@@ -5006,7 +5028,7 @@ draw_tristrips(const GeomPrimitivePipelineReader *reader, bool force) {
         }
       } else {
 #ifndef OPENGLES_1
-        if (_supports_geometry_instancing && _instance_count > 0) {
+        if (_instance_count != 1) {
           _glDrawArraysInstanced(GL_TRIANGLE_STRIP,
                                  reader->get_first_vertex(),
                                  num_vertices, _instance_count);
@@ -5040,7 +5062,7 @@ draw_tristrips(const GeomPrimitivePipelineReader *reader, bool force) {
         for (size_t i = 0; i < ends.size(); i++) {
           _vertices_tristrip_pcollector.add_level(ends[i] - start);
 #ifndef OPENGLES_1
-          if (_supports_geometry_instancing && _instance_count > 0) {
+          if (_instance_count != 1) {
             _glDrawElementsInstanced(GL_TRIANGLE_STRIP, ends[i] - start,
                                      get_numeric_type(reader->get_index_type()),
                                      client_pointer + start * index_stride,
@@ -5062,7 +5084,7 @@ draw_tristrips(const GeomPrimitivePipelineReader *reader, bool force) {
         for (size_t i = 0; i < ends.size(); i++) {
           _vertices_tristrip_pcollector.add_level(ends[i] - start);
 #ifndef OPENGLES_1
-          if (_supports_geometry_instancing && _instance_count > 0) {
+          if (_instance_count != 1) {
             _glDrawArraysInstanced(GL_TRIANGLE_STRIP, first_vertex + start,
                                    ends[i] - start, _instance_count);
           } else
@@ -5120,7 +5142,7 @@ draw_tristrips_adj(const GeomPrimitivePipelineReader *reader, bool force) {
         if (!setup_primitive(client_pointer, reader, force)) {
           return false;
         }
-        if (_supports_geometry_instancing && _instance_count > 0) {
+        if (_instance_count != 1) {
           _glDrawElementsInstanced(GL_TRIANGLE_STRIP_ADJACENCY, num_vertices,
                                    get_numeric_type(reader->get_index_type()),
                                    client_pointer, _instance_count);
@@ -5133,7 +5155,7 @@ draw_tristrips_adj(const GeomPrimitivePipelineReader *reader, bool force) {
                                client_pointer);
         }
       } else {
-        if (_supports_geometry_instancing && _instance_count > 0) {
+        if (_instance_count != 1) {
           _glDrawArraysInstanced(GL_TRIANGLE_STRIP_ADJACENCY,
                                  reader->get_first_vertex(),
                                  num_vertices, _instance_count);
@@ -5166,7 +5188,7 @@ draw_tristrips_adj(const GeomPrimitivePipelineReader *reader, bool force) {
         unsigned int start = 0;
         for (size_t i = 0; i < ends.size(); i++) {
           _vertices_tristrip_pcollector.add_level(ends[i] - start);
-          if (_supports_geometry_instancing && _instance_count > 0) {
+          if (_instance_count != 1) {
             _glDrawElementsInstanced(GL_TRIANGLE_STRIP_ADJACENCY, ends[i] - start,
                                      get_numeric_type(reader->get_index_type()),
                                      client_pointer + start * index_stride,
@@ -5185,7 +5207,7 @@ draw_tristrips_adj(const GeomPrimitivePipelineReader *reader, bool force) {
         int first_vertex = reader->get_first_vertex();
         for (size_t i = 0; i < ends.size(); i++) {
           _vertices_tristrip_pcollector.add_level(ends[i] - start);
-          if (_supports_geometry_instancing && _instance_count > 0) {
+          if (_instance_count != 1) {
             _glDrawArraysInstanced(GL_TRIANGLE_STRIP_ADJACENCY, first_vertex + start,
                                    ends[i] - start, _instance_count);
           } else {
@@ -5243,7 +5265,7 @@ draw_trifans(const GeomPrimitivePipelineReader *reader, bool force) {
       for (size_t i = 0; i < ends.size(); i++) {
         _vertices_trifan_pcollector.add_level(ends[i] - start);
 #ifndef OPENGLES_1
-        if (_supports_geometry_instancing && _instance_count > 0) {
+        if (_instance_count != 1) {
           _glDrawElementsInstanced(GL_TRIANGLE_FAN, ends[i] - start,
                                    get_numeric_type(reader->get_index_type()),
                                    client_pointer + start * index_stride,
@@ -5264,7 +5286,7 @@ draw_trifans(const GeomPrimitivePipelineReader *reader, bool force) {
       for (size_t i = 0; i < ends.size(); i++) {
         _vertices_trifan_pcollector.add_level(ends[i] - start);
 #ifndef OPENGLES_1
-        if (_supports_geometry_instancing && _instance_count > 0) {
+        if (_instance_count != 1) {
           _glDrawArraysInstanced(GL_TRIANGLE_FAN, first_vertex + start,
                                  ends[i] - start, _instance_count);
         } else
@@ -5322,7 +5344,7 @@ draw_patches(const GeomPrimitivePipelineReader *reader, bool force) {
       }
 
 #ifndef OPENGLES_1
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawElementsInstanced(GL_PATCHES, num_vertices,
                                  get_numeric_type(reader->get_index_type()),
                                  client_pointer, _instance_count);
@@ -5338,7 +5360,7 @@ draw_patches(const GeomPrimitivePipelineReader *reader, bool force) {
       }
     } else {
 #ifndef OPENGLES_1
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawArraysInstanced(GL_PATCHES,
                                reader->get_first_vertex(),
                                num_vertices, _instance_count);
@@ -5388,7 +5410,7 @@ draw_lines(const GeomPrimitivePipelineReader *reader, bool force) {
         return false;
       }
 #ifndef OPENGLES_1
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawElementsInstanced(GL_LINES, num_vertices,
                                  get_numeric_type(reader->get_index_type()),
                                  client_pointer, _instance_count);
@@ -5404,7 +5426,7 @@ draw_lines(const GeomPrimitivePipelineReader *reader, bool force) {
       }
     } else {
 #ifndef OPENGLES_1
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawArraysInstanced(GL_LINES,
                                reader->get_first_vertex(),
                                num_vertices, _instance_count);
@@ -5452,7 +5474,7 @@ draw_lines_adj(const GeomPrimitivePipelineReader *reader, bool force) {
       if (!setup_primitive(client_pointer, reader, force)) {
         return false;
       }
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawElementsInstanced(GL_LINES_ADJACENCY, num_vertices,
                                  get_numeric_type(reader->get_index_type()),
                                  client_pointer, _instance_count);
@@ -5465,7 +5487,7 @@ draw_lines_adj(const GeomPrimitivePipelineReader *reader, bool force) {
                              client_pointer);
       }
     } else {
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawArraysInstanced(GL_LINES_ADJACENCY,
                                reader->get_first_vertex(),
                                num_vertices, _instance_count);
@@ -5524,7 +5546,7 @@ draw_linestrips(const GeomPrimitivePipelineReader *reader, bool force) {
         return false;
       }
 #ifndef OPENGLES_1
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawElementsInstanced(GL_LINE_STRIP, num_vertices,
                                  get_numeric_type(reader->get_index_type()),
                                  client_pointer, _instance_count);
@@ -5564,7 +5586,7 @@ draw_linestrips(const GeomPrimitivePipelineReader *reader, bool force) {
         for (size_t i = 0; i < ends.size(); i++) {
           _vertices_other_pcollector.add_level(ends[i] - start);
 #ifndef OPENGLES_1
-          if (_supports_geometry_instancing && _instance_count > 0) {
+          if (_instance_count != 1) {
             _glDrawElementsInstanced(GL_LINE_STRIP, ends[i] - start,
                                      get_numeric_type(reader->get_index_type()),
                                      client_pointer + start * index_stride,
@@ -5586,7 +5608,7 @@ draw_linestrips(const GeomPrimitivePipelineReader *reader, bool force) {
         for (size_t i = 0; i < ends.size(); i++) {
           _vertices_other_pcollector.add_level(ends[i] - start);
 #ifndef OPENGLES_1
-          if (_supports_geometry_instancing && _instance_count > 0) {
+          if (_instance_count != 1) {
             _glDrawArraysInstanced(GL_LINE_STRIP, first_vertex + start,
                                    ends[i] - start, _instance_count);
           } else
@@ -5644,7 +5666,7 @@ draw_linestrips_adj(const GeomPrimitivePipelineReader *reader, bool force) {
       if (!setup_primitive(client_pointer, reader, force)) {
         return false;
       }
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawElementsInstanced(GL_LINE_STRIP_ADJACENCY, num_vertices,
                                  get_numeric_type(reader->get_index_type()),
                                  client_pointer, _instance_count);
@@ -5679,7 +5701,7 @@ draw_linestrips_adj(const GeomPrimitivePipelineReader *reader, bool force) {
         unsigned int start = 0;
         for (size_t i = 0; i < ends.size(); i++) {
           _vertices_other_pcollector.add_level(ends[i] - start);
-          if (_supports_geometry_instancing && _instance_count > 0) {
+          if (_instance_count != 1) {
             _glDrawElementsInstanced(GL_LINE_STRIP_ADJACENCY, ends[i] - start,
                                      get_numeric_type(reader->get_index_type()),
                                      client_pointer + start * index_stride,
@@ -5698,7 +5720,7 @@ draw_linestrips_adj(const GeomPrimitivePipelineReader *reader, bool force) {
         int first_vertex = reader->get_first_vertex();
         for (size_t i = 0; i < ends.size(); i++) {
           _vertices_other_pcollector.add_level(ends[i] - start);
-          if (_supports_geometry_instancing && _instance_count > 0) {
+          if (_instance_count != 1) {
             _glDrawArraysInstanced(GL_LINE_STRIP_ADJACENCY, first_vertex + start,
                                    ends[i] - start, _instance_count);
           } else {
@@ -5745,7 +5767,7 @@ draw_points(const GeomPrimitivePipelineReader *reader, bool force) {
         return false;
       }
 #ifndef OPENGLES_1
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawElementsInstanced(GL_POINTS, num_vertices,
                                  get_numeric_type(reader->get_index_type()),
                                  client_pointer, _instance_count);
@@ -5761,7 +5783,7 @@ draw_points(const GeomPrimitivePipelineReader *reader, bool force) {
       }
     } else {
 #ifndef OPENGLES_1
-      if (_supports_geometry_instancing && _instance_count > 0) {
+      if (_instance_count != 1) {
         _glDrawArraysInstanced(GL_POINTS,
                                reader->get_first_vertex(),
                                num_vertices, _instance_count);
@@ -9766,15 +9788,22 @@ get_external_image_format(Texture *tex) const {
 
   case Texture::F_rgba:
   case Texture::F_rgbm:
-  case Texture::F_rgba4:
-  case Texture::F_rgba5:
   case Texture::F_rgba8:
   case Texture::F_rgba12:
+    return _supports_bgr ? GL_BGRA : GL_RGBA;
+
+  case Texture::F_rgba4:
+  case Texture::F_rgba5:
   case Texture::F_rgba16:
   case Texture::F_rgba32:
   case Texture::F_srgb_alpha:
   case Texture::F_rgb10_a2:
+#ifdef OPENGLES
+    // OpenGL ES doesn't have sized BGRA formats.
+    return GL_RGBA;
+#else
     return _supports_bgr ? GL_BGRA : GL_RGBA;
+#endif
 
   case Texture::F_luminance:
 #ifdef OPENGLES
@@ -10300,9 +10329,9 @@ get_internal_image_format(Texture *tex, bool force_sized) const {
 
 #ifdef OPENGLES
   case Texture::F_rgba8:
-    return GL_RGBA8_OES;
+    return _supports_bgr ? GL_BGRA : GL_RGBA8_OES;
   case Texture::F_rgba12:
-    return force_sized ? GL_RGBA8 : GL_RGBA;
+    return _supports_bgr ? GL_BGRA : (force_sized ? GL_RGBA8 : GL_RGBA);
 #else
   case Texture::F_rgba8:
     if (Texture::is_unsigned(tex->get_component_type())) {
@@ -11419,31 +11448,43 @@ set_state_and_transform(const RenderState *target,
   _state_pcollector.add_level(1);
   PStatGPUTimer timer1(this, _draw_set_state_pcollector);
 
-  if (transform != _internal_transform) {
+  bool transform_changed = transform != _internal_transform;
+  if (transform_changed) {
     // PStatGPUTimer timer(this, _draw_set_state_transform_pcollector);
     _transform_state_pcollector.add_level(1);
     _internal_transform = transform;
     do_issue_transform();
   }
 
-  //XXX the _inv_state_mask system does not appear to be used at the moment.
-  //if (target == _state_rs && (_state_mask | _inv_state_mask).is_all_on()) {
-  //  return;
-  //}
+  if (target == _state_rs && (_state_mask | _inv_state_mask).is_all_on()) {
+#ifndef OPENGLES_1
+    if (transform_changed) {
+      // The state has not changed, but the transform has. Set the new
+      // transform on the shader, if we have one.
+      if (_current_shader_context != nullptr) {
+        _current_shader_context->set_state_and_transform(_state_rs, transform,
+          _scene_setup->get_camera_transform(), _projection_mat);
+      }
+    }
+#endif
+    return;
+  }
   _target_rs = target;
 
 #ifndef OPENGLES_1
   determine_target_shader();
-  _instance_count = _target_shader->get_instance_count();
+  _sattr_instance_count = _target_shader->get_instance_count();
 
   if (_target_shader != _state_shader) {
     do_issue_shader();
     _state_shader = _target_shader;
     _state_mask.clear_bit(TextureAttrib::get_class_slot());
+    _state_mask.set_bit(ShaderAttrib::get_class_slot());
   }
   else if (!has_fixed_function_pipeline() && _current_shader == nullptr) { // In the case of OpenGL ES 2.x, we need to glUseShader before we draw anything.
     do_issue_shader();
     _state_mask.clear_bit(TextureAttrib::get_class_slot());
+    _state_mask.set_bit(ShaderAttrib::get_class_slot());
   }
 
   // Update all of the state that is bound to the shader program.
@@ -13780,7 +13821,11 @@ upload_simple_texture(CLP(TextureContext) *gtc) {
   Texture *tex = gtc->get_texture();
   nassertr(tex != nullptr, false);
 
+#ifdef OPENGLES
+  GLenum internal_format = GL_BGRA;
+#else
   GLenum internal_format = GL_RGBA;
+#endif
   GLenum external_format = GL_BGRA;
 
   const unsigned char *image_ptr = tex->get_simple_ram_image();
@@ -13794,6 +13839,9 @@ upload_simple_texture(CLP(TextureContext) *gtc) {
     // If the GL doesn't claim to support BGR, we may have to reverse the
     // component ordering of the image.
     external_format = GL_RGBA;
+#ifdef OPENGLES
+    internal_format = GL_RGBA;
+#endif
     image_ptr = fix_component_ordering(bgr_image, image_ptr, image_size,
                                        external_format, tex);
   }

+ 4 - 1
panda/src/glstuff/glGraphicsStateGuardian_src.h

@@ -186,6 +186,7 @@ typedef void (APIENTRYP PFNGLUNIFORMMATRIX4FVPROC) (GLint location, GLsizei coun
 typedef void (APIENTRYP PFNGLVALIDATEPROGRAMPROC) (GLuint program);
 typedef void (APIENTRYP PFNGLVERTEXATTRIB4FVPROC) (GLuint index, const GLfloat *v);
 typedef void (APIENTRYP PFNGLVERTEXATTRIB4DVPROC) (GLuint index, const GLdouble *v);
+typedef void (APIENTRYP PFNGLVERTEXATTRIBI4UIPROC) (GLuint index, GLuint x, GLuint y, GLuint z, GLuint w);
 typedef void (APIENTRYP PFNGLVERTEXATTRIBPOINTERPROC) (GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const GLvoid *pointer);
 typedef void (APIENTRYP PFNGLVERTEXATTRIBIPOINTERPROC) (GLuint index, GLint size, GLenum type, GLsizei stride, const GLvoid *pointer);
 typedef void (APIENTRYP PFNGLVERTEXATTRIBLPOINTERPROC) (GLuint index, GLint size, GLenum type, GLsizei stride, const GLvoid *pointer);
@@ -294,7 +295,7 @@ public:
 
   virtual bool begin_draw_primitives(const GeomPipelineReader *geom_reader,
                                      const GeomVertexDataPipelineReader *data_reader,
-                                     bool force);
+                                     size_t num_instances, bool force);
   virtual bool draw_triangles(const GeomPrimitivePipelineReader *reader,
                               bool force);
 #ifndef OPENGLES
@@ -1018,6 +1019,7 @@ public:
   PFNGLVALIDATEPROGRAMPROC _glValidateProgram;
   PFNGLVERTEXATTRIB4FVPROC _glVertexAttrib4fv;
   PFNGLVERTEXATTRIB4DVPROC _glVertexAttrib4dv;
+  PFNGLVERTEXATTRIBI4UIPROC _glVertexAttribI4ui;
   PFNGLVERTEXATTRIBPOINTERPROC _glVertexAttribPointer;
   PFNGLVERTEXATTRIBIPOINTERPROC _glVertexAttribIPointer;
   PFNGLVERTEXATTRIBLPOINTERPROC _glVertexAttribLPointer;
@@ -1080,6 +1082,7 @@ public:
   bool _supports_texture_max_level;
 
 #ifndef OPENGLES_1
+  GLsizei _sattr_instance_count;
   GLsizei _instance_count;
 #endif
 

+ 31 - 0
panda/src/glstuff/glShaderContext_src.cxx

@@ -487,6 +487,13 @@ reflect_attribute(int i, char *name_buffer, GLsizei name_buflen) {
       bind._name = InternalName::get_texcoord();
       bind._append_uv = atoi(noprefix.substr(13).c_str());
 
+    } else if (noprefix == "InstanceMatrix") {
+      bind._name = InternalName::get_instance_matrix();
+
+      if (param_type != GL_FLOAT_MAT4x3) {
+        GLCAT.error() << "p3d_InstanceMatrix should be mat4x3!\n";
+      }
+
     } else {
       GLCAT.error() << "Unrecognized vertex attrib '" << name_buffer << "'!\n";
       return;
@@ -498,15 +505,23 @@ reflect_attribute(int i, char *name_buffer, GLsizei name_buflen) {
 
   // Get the number of bind points for arrays and matrices.
   switch (param_type) {
+  case GL_FLOAT_MAT3x2:
   case GL_FLOAT_MAT3:
+  case GL_FLOAT_MAT3x4:
 #ifndef OPENGLES
+  case GL_DOUBLE_MAT3x2:
   case GL_DOUBLE_MAT3:
+  case GL_DOUBLE_MAT3x4:
 #endif
     bind._elements = 3 * param_size;
     break;
 
+  case GL_FLOAT_MAT4x2:
+  case GL_FLOAT_MAT4x3:
   case GL_FLOAT_MAT4:
 #ifndef OPENGLES
+  case GL_DOUBLE_MAT4x2:
+  case GL_DOUBLE_MAT4x3:
   case GL_DOUBLE_MAT4:
 #endif
     bind._elements = 4 * param_size;
@@ -2461,6 +2476,22 @@ update_shader_vertex_arrays(ShaderContext *prev, bool force) {
           _glgsg->_glVertexAttrib4fv(p, _glgsg->_scene_graph_color.get_data());
 #endif
         }
+        else if (name == InternalName::get_transform_index() &&
+                 _glgsg->_glVertexAttribI4ui != nullptr) {
+          _glgsg->_glVertexAttribI4ui(p, 0, 1, 2, 3);
+        }
+        else if (name == InternalName::get_instance_matrix()) {
+          const LMatrix4 &ident_mat = LMatrix4::ident_mat();
+
+          for (int i = 0; i < bind._elements; ++i) {
+#ifdef STDFLOAT_DOUBLE
+            _glgsg->_glVertexAttrib4dv(p, ident_mat.get_data() + i * 4);
+#else
+            _glgsg->_glVertexAttrib4fv(p, ident_mat.get_data() + i * 4);
+#endif
+            ++p;
+          }
+        }
       }
     }
 

+ 7 - 4
panda/src/gobj/geom.cxx

@@ -1298,18 +1298,20 @@ prepare_now(PreparedGraphicsObjects *prepared_objects,
  * Actually draws the Geom with the indicated GSG, using the indicated vertex
  * data (which might have been pre-munged to support the GSG's needs).
  *
+ * num_instances specifies the number of times to render the geometry.
+ *
  * Returns true if all of the primitives were drawn normally, false if there
  * was a problem (for instance, some of the data was nonresident).  If force
  * is passed true, it will wait for the data to become resident if necessary.
  */
 bool Geom::
 draw(GraphicsStateGuardianBase *gsg, const GeomVertexData *vertex_data,
-     bool force, Thread *current_thread) const {
+     size_t num_instances, bool force, Thread *current_thread) const {
   GeomPipelineReader geom_reader(this, current_thread);
   GeomVertexDataPipelineReader data_reader(vertex_data, current_thread);
   data_reader.check_array_readers();
 
-  return geom_reader.draw(gsg, &data_reader, force);
+  return geom_reader.draw(gsg, &data_reader, num_instances, force);
 }
 
 /**
@@ -1847,11 +1849,12 @@ check_valid(const GeomVertexDataPipelineReader *data_reader) const {
  */
 bool GeomPipelineReader::
 draw(GraphicsStateGuardianBase *gsg,
-     const GeomVertexDataPipelineReader *data_reader, bool force) const {
+     const GeomVertexDataPipelineReader *data_reader,
+     size_t num_instances, bool force) const {
   bool all_ok;
   {
     PStatTimer timer(Geom::_draw_primitive_setup_pcollector);
-    all_ok = gsg->begin_draw_primitives(this, data_reader, force);
+    all_ok = gsg->begin_draw_primitives(this, data_reader, num_instances, force);
   }
   if (all_ok) {
     Geom::Primitives::const_iterator pi;

+ 2 - 2
panda/src/gobj/geom.h

@@ -158,7 +158,7 @@ PUBLISHED:
 
 public:
   bool draw(GraphicsStateGuardianBase *gsg,
-            const GeomVertexData *vertex_data,
+            const GeomVertexData *vertex_data, size_t num_instances,
             bool force, Thread *current_thread) const;
 
   INLINE void calc_tight_bounds(LPoint3 &min_point, LPoint3 &max_point,
@@ -433,7 +433,7 @@ public:
 
   bool draw(GraphicsStateGuardianBase *gsg,
             const GeomVertexDataPipelineReader *data_reader,
-            bool force) const;
+            size_t num_instances, bool force) const;
 
 private:
   const Geom *_object;

+ 21 - 0
panda/src/gobj/geomVertexArrayFormat.cxx

@@ -655,6 +655,27 @@ compare_to(const GeomVertexArrayFormat &other) const {
   return 0;
 }
 
+/**
+ * Returns a suitable format for sending an array of instances to the graphics
+ * backend.
+ *
+ * This may only be called after the format has been registered.  The return
+ * value will have been already registered.
+ */
+const GeomVertexArrayFormat *GeomVertexArrayFormat::
+get_instance_array_format() {
+  static CPT(GeomVertexArrayFormat) inst_array_format;
+
+  if (inst_array_format == nullptr) {
+    GeomVertexArrayFormat *new_array_format = new GeomVertexArrayFormat("instance_matrix", 4, NT_stdfloat, C_matrix);
+    new_array_format->set_divisor(1);
+    inst_array_format = GeomVertexArrayFormat::register_format(new_array_format);
+  }
+
+  nassertr(inst_array_format != nullptr, nullptr);
+  return inst_array_format.p();
+}
+
 /**
  * Resorts the _columns vector so that the columns are listed in the same
  * order they appear in the record.

+ 2 - 0
panda/src/gobj/geomVertexArrayFormat.h

@@ -123,6 +123,8 @@ PUBLISHED:
 public:
   int compare_to(const GeomVertexArrayFormat &other) const;
 
+  static const GeomVertexArrayFormat *get_instance_array_format();
+
 private:
   class Registry;
   INLINE static Registry *get_registry();

+ 23 - 0
panda/src/gobj/geomVertexData.I

@@ -208,6 +208,29 @@ set_array(size_t i, const GeomVertexArrayData *array) {
   writer.set_array(i, array);
 }
 
+/**
+ * Removes the array with the given index from the GeomVertexData.
+ */
+INLINE void GeomVertexData::
+remove_array(size_t i) {
+  GeomVertexDataPipelineWriter writer(this, true, Thread::get_current_thread());
+  writer.remove_array(i);
+}
+
+/**
+ * Inserts the indicated vertex data array into the list of arrays, which also
+ * modifies the format.  You should be careful that the new array has the same
+ * number of rows as the vertex data.
+ *
+ * Don't call this in a downstream thread unless you don't mind it blowing
+ * away other changes you might have recently made in an upstream thread.
+ */
+INLINE void GeomVertexData::
+insert_array(size_t i, const GeomVertexArrayData *array) {
+  GeomVertexDataPipelineWriter writer(this, true, Thread::get_current_thread());
+  writer.insert_array(i, array);
+}
+
 /**
  * Returns a const pointer to the TransformTable assigned to this data.
  * Vertices within the table will index into this table to indicate their

+ 47 - 1
panda/src/gobj/geomVertexData.cxx

@@ -659,7 +659,7 @@ copy_from(const GeomVertexData *source, bool keep_data_objects,
             for (size_t i = 0; i < blend.get_num_transforms(); i++) {
               int index = add_transform(transform_table, blend.get_transform(i),
                                         already_added);
-              nassertv(index <= 4);
+              nassertv(index < 4);
               weights[index] = blend.get_weight(i);
             }
             if (weight.has_column()) {
@@ -2582,6 +2582,52 @@ set_array(size_t i, const GeomVertexArrayData *array) {
   }
 }
 
+/**
+ *
+ */
+void GeomVertexDataPipelineWriter::
+remove_array(size_t i) {
+  nassertv(i < _cdata->_arrays.size());
+
+  GeomVertexFormat *new_format = new GeomVertexFormat(*_cdata->_format);
+  new_format->remove_array(i);
+  _cdata->_format = GeomVertexFormat::register_format(new_format);
+  _cdata->_arrays.erase(_cdata->_arrays.begin() + i);
+
+  _object->clear_cache_stage();
+  _cdata->_modified = Geom::get_next_modified();
+  _cdata->_animated_vertices.clear();
+
+  if (_got_array_writers) {
+    _array_writers.erase(_array_writers.begin() + i);
+  }
+}
+
+/**
+ *
+ */
+void GeomVertexDataPipelineWriter::
+insert_array(size_t i, const GeomVertexArrayData *array) {
+  const GeomVertexArrayFormat *array_format = array->get_array_format();
+
+  if (i > _cdata->_arrays.size()) {
+    i = _cdata->_arrays.size();
+  }
+
+  GeomVertexFormat *new_format = new GeomVertexFormat(*_cdata->_format);
+  new_format->insert_array(i, array_format);
+  _cdata->_format = GeomVertexFormat::register_format(new_format);
+  _cdata->_arrays.insert(_cdata->_arrays.begin() + i, (GeomVertexArrayData *)array);
+
+  _object->clear_cache_stage();
+  _cdata->_modified = Geom::get_next_modified();
+  _cdata->_animated_vertices.clear();
+
+  if (_got_array_writers) {
+    _array_writers.insert(_array_writers.begin() + i, new GeomVertexArrayDataHandle(_cdata->_arrays[i].get_write_pointer(), _current_thread));
+  }
+}
+
 /**
  * Copies a single row of the data from the other array into the indicated row
  * of this array.  In this case, the source format must exactly match the

+ 5 - 1
panda/src/gobj/geomVertexData.h

@@ -112,7 +112,9 @@ PUBLISHED:
   INLINE PT(GeomVertexArrayData) modify_array(size_t i);
   INLINE PT(GeomVertexArrayDataHandle) modify_array_handle(size_t i);
   INLINE void set_array(size_t i, const GeomVertexArrayData *array);
-  MAKE_SEQ_PROPERTY(arrays, get_num_arrays, get_array, set_array);
+  INLINE void remove_array(size_t i);
+  INLINE void insert_array(size_t i, const GeomVertexArrayData *array);
+  MAKE_SEQ_PROPERTY(arrays, get_num_arrays, get_array, set_array, remove_array, insert_array);
 
   INLINE const TransformTable *get_transform_table() const;
   void set_transform_table(const TransformTable *table);
@@ -520,6 +522,8 @@ public:
 
   PT(GeomVertexArrayData) modify_array(size_t i);
   void set_array(size_t i, const GeomVertexArrayData *array);
+  void remove_array(size_t i);
+  void insert_array(size_t i, const GeomVertexArrayData *array);
 
   int get_num_rows() const;
   bool set_num_rows(int n);

+ 31 - 0
panda/src/gobj/geomVertexFormat.cxx

@@ -134,6 +134,32 @@ get_post_animated_format() const {
   return _post_animated_format;
 }
 
+/**
+ * Returns a suitable vertex format for sending the animated vertices to the
+ * graphics backend.  This is the same format as the source format, with the
+ * instancing columns added.
+ *
+ * This may only be called after the format has been registered.  The return
+ * value will have been already registered.
+ */
+CPT(GeomVertexFormat) GeomVertexFormat::
+get_post_instanced_format() const {
+  nassertr(is_registered(), nullptr);
+
+  if (_post_instanced_format == nullptr) {
+    PT(GeomVertexFormat) new_format = new GeomVertexFormat(*this);
+    new_format->add_array(GeomVertexArrayFormat::register_format(GeomVertexArrayFormat::get_instance_array_format()));
+
+    CPT(GeomVertexFormat) registered =
+      GeomVertexFormat::register_format(new_format);
+    ((GeomVertexFormat *)this)->_post_instanced_format = registered;
+  }
+
+  _post_instanced_format->test_ref_count_integrity();
+
+  return _post_instanced_format;
+}
+
 /**
  * Returns a new GeomVertexFormat that includes all of the columns defined in
  * either this GeomVertexFormat or the other one.  If any column is defined in
@@ -818,6 +844,11 @@ do_unregister() {
     unref_delete(_post_animated_format);
   }
   _post_animated_format = nullptr;
+
+  if (_post_instanced_format != nullptr) {
+    unref_delete(_post_instanced_format);
+    _post_instanced_format = nullptr;
+  }
 }
 
 /**

+ 3 - 1
panda/src/gobj/geomVertexFormat.h

@@ -72,6 +72,7 @@ PUBLISHED:
   MAKE_PROPERTY(animation, get_animation, set_animation);
 
   CPT(GeomVertexFormat) get_post_animated_format() const;
+  CPT(GeomVertexFormat) get_post_instanced_format() const;
   CPT(GeomVertexFormat) get_union_format(const GeomVertexFormat *other) const;
 
   INLINE size_t get_num_arrays() const;
@@ -222,7 +223,8 @@ private:
   typedef pvector<MorphRecord> Morphs;
   Morphs _morphs;
 
-  const GeomVertexFormat *_post_animated_format;
+  const GeomVertexFormat *_post_animated_format = nullptr;
+  const GeomVertexFormat *_post_instanced_format = nullptr;
 
   // This is the global registry of all currently-in-use formats.
   typedef pset<GeomVertexFormat *, IndirectCompareTo<GeomVertexFormat> > Formats;

+ 11 - 0
panda/src/gobj/internalName.I

@@ -366,6 +366,17 @@ get_view() {
   return _view;
 }
 
+/**
+ * Returns the standard InternalName "instance_matrix".
+ */
+INLINE PT(InternalName) InternalName::
+get_instance_matrix() {
+  if (_instance_matrix == nullptr) {
+    _instance_matrix = InternalName::make("instance_matrix");
+  }
+  return _instance_matrix;
+}
+
 /**
  *
  */

+ 1 - 0
panda/src/gobj/internalName.cxx

@@ -40,6 +40,7 @@ PT(InternalName) InternalName::_world;
 PT(InternalName) InternalName::_camera;
 PT(InternalName) InternalName::_model;
 PT(InternalName) InternalName::_view;
+PT(InternalName) InternalName::_instance_matrix;
 
 TypeHandle InternalName::_type_handle;
 TypeHandle InternalName::_texcoord_type_handle;

+ 2 - 0
panda/src/gobj/internalName.h

@@ -92,6 +92,7 @@ PUBLISHED:
   INLINE static PT(InternalName) get_camera();
   INLINE static PT(InternalName) get_model();
   INLINE static PT(InternalName) get_view();
+  INLINE static PT(InternalName) get_instance_matrix();
 
 #ifdef HAVE_PYTHON
   // These versions are exposed to Python, which have additional logic to map
@@ -141,6 +142,7 @@ private:
   static PT(InternalName) _camera;
   static PT(InternalName) _model;
   static PT(InternalName) _view;
+  static PT(InternalName) _instance_matrix;
 
 public:
   // Datagram stuff

+ 1 - 1
panda/src/gobj/matrixLens.I

@@ -54,7 +54,7 @@ operator = (const MatrixLens &copy) {
  * Explicitly specifies the projection matrix.  This matrix should convert X
  * and Y to the range [-film_size/2, film_size/2], where (-fs/2,-fs/2) is the
  * lower left corner of the screen and (fs/2, fs/2) is the upper right.  Z
- * should go to the range [-1, 1], where -1 is the far plane and 1 is the near
+ * should go to the range [-1, 1], where -1 is the near plane and 1 is the far
  * plane.  Note that this is a left-handed Y-up coordinate system.
  *
  * The default film_size for a MatrixLens is 2, so the default range is [-1,

+ 1 - 1
panda/src/gsgbase/graphicsStateGuardianBase.h

@@ -203,7 +203,7 @@ public:
 
   virtual bool begin_draw_primitives(const GeomPipelineReader *geom_reader,
                                      const GeomVertexDataPipelineReader *data_reader,
-                                     bool force)=0;
+                                     size_t num_instances, bool force)=0;
   virtual bool draw_triangles(const GeomPrimitivePipelineReader *reader, bool force)=0;
   virtual bool draw_triangles_adj(const GeomPrimitivePipelineReader *reader, bool force)=0;
   virtual bool draw_tristrips(const GeomPrimitivePipelineReader *reader, bool force)=0;

+ 4 - 0
panda/src/pgraph/CMakeLists.txt

@@ -39,6 +39,8 @@ set(P3PGRAPH_HEADERS
   geomDrawCallbackData.I geomDrawCallbackData.h
   geomNode.I geomNode.h
   geomTransformer.I geomTransformer.h
+  instanceList.I instanceList.h
+  instancedNode.I instancedNode.h
   internalNameCollection.I internalNameCollection.h
   lensNode.I lensNode.h
   light.I light.h
@@ -139,6 +141,8 @@ set(P3PGRAPH_SOURCES
   geomDrawCallbackData.cxx
   geomNode.cxx
   geomTransformer.cxx
+  instanceList.cxx
+  instancedNode.cxx
   internalNameCollection.cxx
   lensNode.cxx
   light.cxx

+ 19 - 1
panda/src/pgraph/billboardEffect.cxx

@@ -140,7 +140,25 @@ cull_callback(CullTraverser *trav, CullTraverserData &data,
     camera_transform = trav->get_camera_transform()->invert_compose(_look_at.get_net_transform());
   }
 
-  compute_billboard(node_transform, modelview_transform, camera_transform);
+  if (data._instances == nullptr) {
+    compute_billboard(node_transform, modelview_transform, camera_transform);
+  }
+  else {
+    // Compute the billboard effect for every instance individually.
+    InstanceList *instances = new InstanceList(*data._instances);
+    data._instances = instances;
+
+    for (InstanceList::Instance &instance : *instances) {
+      CPT(TransformState) inst_node_transform = node_transform;
+      CPT(TransformState) inst_modelview_transform = modelview_transform->compose(instance.get_transform());
+      compute_billboard(inst_node_transform, inst_modelview_transform, camera_transform);
+
+      instance.set_transform(instance.get_transform()->compose(inst_node_transform));
+    }
+
+    // We've already applied this onto the instances.
+    node_transform = TransformState::make_identity();
+  }
 }
 
 /**

+ 50 - 14
panda/src/pgraph/compassEffect.cxx

@@ -120,20 +120,56 @@ cull_callback(CullTraverser *trav, CullTraverserData &data,
     return;
   }
 
-  CPT(TransformState) true_net_transform = data.get_net_transform(trav);
-  CPT(TransformState) want_net_transform = true_net_transform;
-  adjust_transform(want_net_transform, node_transform, data.node());
-
-  // Now compute the transform that will convert true_net_transform to
-  // want_transform.  This is inv(true_net_transform) * want_transform.
-  CPT(TransformState) compass_transform =
-    true_net_transform->invert_compose(want_net_transform);
-
-  // And modify our local node's apparent transform so that
-  // true_net_transform->compose(new_node_transform) produces the same result
-  // we would have gotten had we actually computed
-  // want_transform->compose(orig_node_transform).
-  node_transform = compass_transform->compose(node_transform);
+  if (data._instances == nullptr) {
+    CPT(TransformState) true_net_transform = data.get_net_transform(trav);
+    CPT(TransformState) want_net_transform = true_net_transform;
+    adjust_transform(want_net_transform, node_transform, data.node());
+
+    // Now compute the transform that will convert true_net_transform to
+    // want_transform.  This is inv(true_net_transform) * want_transform.
+    CPT(TransformState) compass_transform =
+      true_net_transform->invert_compose(want_net_transform);
+
+    // And modify our local node's apparent transform so that
+    // true_net_transform->compose(new_node_transform) produces the same result
+    // we would have gotten had we actually computed
+    // want_transform->compose(orig_node_transform).
+    node_transform = compass_transform->compose(node_transform);
+  }
+  else {
+    // Compute the billboard effect for every instance individually.
+    InstanceList *instances = new InstanceList(*data._instances);
+    data._instances = instances;
+
+    CPT(TransformState) parent_net_transform = data.get_net_transform(trav);
+    CPT(TransformState) invert_net_transform = parent_net_transform->get_inverse();
+
+    // We make use of the fact that we know adjust_transform() does not modify
+    // its node_transform parameter.
+    CPT(TransformState) node_transform_copy = node_transform;
+    if (node_transform_copy->is_identity()) {
+      // Slightly optimized case.
+      for (InstanceList::Instance &instance : *instances) {
+        CPT(TransformState) true_net_transform = parent_net_transform->compose(instance.get_transform());
+        CPT(TransformState) want_net_transform = true_net_transform;
+        adjust_transform(want_net_transform, node_transform_copy, data.node());
+
+        instance.set_transform(invert_net_transform->compose(want_net_transform));
+      }
+    }
+    else {
+      // We apply the node_transform to the instances.
+      node_transform = TransformState::make_identity();
+
+      for (InstanceList::Instance &instance : *instances) {
+        CPT(TransformState) true_net_transform = parent_net_transform->compose(instance.get_transform());
+        CPT(TransformState) want_net_transform = true_net_transform;
+        adjust_transform(want_net_transform, node_transform_copy, data.node());
+
+        instance.set_transform(invert_net_transform->compose(want_net_transform)->compose(node_transform_copy));
+      }
+    }
+  }
 }
 
 /**

+ 6 - 0
panda/src/pgraph/config_pgraph.cxx

@@ -42,6 +42,8 @@
 #include "geomDrawCallbackData.h"
 #include "geomNode.h"
 #include "geomTransformer.h"
+#include "instanceList.h"
+#include "instancedNode.h"
 #include "lensNode.h"
 #include "light.h"
 #include "lightAttrib.h"
@@ -416,6 +418,8 @@ init_libpgraph() {
   GeomDrawCallbackData::init_type();
   GeomNode::init_type();
   GeomTransformer::init_type();
+  InstanceList::init_type();
+  InstancedNode::init_type();
   LensNode::init_type();
   Light::init_type();
   LightAttrib::init_type();
@@ -484,6 +488,8 @@ init_libpgraph() {
   Fog::register_with_read_factory();
   FogAttrib::register_with_read_factory();
   GeomNode::register_with_read_factory();
+  InstanceList::register_with_read_factory();
+  InstancedNode::register_with_read_factory();
   LensNode::register_with_read_factory();
   LightAttrib::register_with_read_factory();
   LightRampAttrib::register_with_read_factory();

+ 18 - 2
panda/src/pgraph/cullTraverser.cxx

@@ -271,10 +271,10 @@ show_bounds(CullTraverserData &data, bool tight) {
       CullableObject *outer_viz =
         new CullableObject(std::move(bounds_viz), get_bounds_outer_viz_state(),
                            internal_transform);
+      outer_viz->_instances = data._instances;
       _cull_handler->record_object(outer_viz, this);
     }
-
-  } else {
+  } else if (data._instances == nullptr) {
     draw_bounding_volume(node->get_bounds(), internal_transform);
 
     if (node->is_geom_node()) {
@@ -287,6 +287,22 @@ show_bounds(CullTraverserData &data, bool tight) {
                              internal_transform);
       }
     }
+  } else {
+    // Draw bounds for every instance.
+    for (const InstanceList::Instance &instance : *data._instances) {
+      CPT(TransformState) transform = internal_transform->compose(instance.get_transform());
+      draw_bounding_volume(node->get_bounds(), transform);
+
+      if (node->is_geom_node()) {
+        // Also show the bounding volumes of included Geoms.
+        transform = transform->compose(node->get_transform());
+        GeomNode *gnode = (GeomNode *)node;
+        int num_geoms = gnode->get_num_geoms();
+        for (int i = 0; i < num_geoms; ++i) {
+          draw_bounding_volume(gnode->get_geom(i)->get_bounds(), transform);
+        }
+      }
+    }
   }
 }
 

+ 3 - 1
panda/src/pgraph/cullTraverserData.I

@@ -50,6 +50,7 @@ CullTraverserData(const CullTraverserData &parent, PandaNode *child) :
   _state(parent._state),
   _view_frustum(parent._view_frustum),
   _cull_planes(parent._cull_planes),
+  _instances(parent._instances),
   _draw_mask(parent._draw_mask),
   _portal_depth(parent._portal_depth)
 {
@@ -110,7 +111,8 @@ get_modelview_transform(const CullTraverser *trav) const {
  */
 INLINE CPT(TransformState) CullTraverserData::
 get_internal_transform(const CullTraverser *trav) const {
-  return trav->get_scene()->get_cs_world_transform()->compose(_net_transform);
+  const TransformState *cs_world_transform = trav->get_scene()->get_cs_world_transform();
+  return cs_world_transform->compose(_net_transform);
 }
 
 /**

+ 9 - 0
panda/src/pgraph/cullTraverserData.cxx

@@ -77,6 +77,15 @@ apply_transform_and_state(CullTraverser *trav) {
 void CullTraverserData::
 apply_transform(const TransformState *node_transform) {
   if (!node_transform->is_identity()) {
+    if (_instances != nullptr) {
+      InstanceList *instances = new InstanceList(*_instances);
+      for (InstanceList::Instance &instance : *instances) {
+        instance.set_transform(instance.get_transform()->compose(node_transform));
+      }
+      _instances = std::move(instances);
+      return;
+    }
+
     _net_transform = _net_transform->compose(node_transform);
 
     if ((_view_frustum != nullptr) ||

+ 2 - 0
panda/src/pgraph/cullTraverserData.h

@@ -23,6 +23,7 @@
 #include "pointerTo.h"
 #include "drawMask.h"
 #include "pvector.h"
+#include "instanceList.h"
 
 class PandaNode;
 class CullTraverser;
@@ -81,6 +82,7 @@ public:
   CPT(RenderState) _state;
   PT(GeometricBoundingVolume) _view_frustum;
   CPT(CullPlanes) _cull_planes;
+  CPT(InstanceList) _instances;
   DrawMask _draw_mask;
   int _portal_depth;
 

+ 18 - 3
panda/src/pgraph/cullableObject.I

@@ -70,7 +70,7 @@ operator = (const CullableObject &copy) {
  */
 INLINE void CullableObject::
 draw(GraphicsStateGuardianBase *gsg, bool force, Thread *current_thread) {
-  if (_draw_callback != nullptr) {
+  if (UNLIKELY(_draw_callback != nullptr)) {
     // It has a callback associated.
     gsg->clear_before_callback();
     gsg->set_state_and_transform(_state, _internal_transform);
@@ -81,11 +81,26 @@ draw(GraphicsStateGuardianBase *gsg, bool force, Thread *current_thread) {
       gsg->clear_state_and_transform();
     }
     // Now the callback has taken care of drawing.
-  } else {
+  }
+  else if (LIKELY(_instances == nullptr)) {
     nassertv(_geom != nullptr);
     gsg->set_state_and_transform(_state, _internal_transform);
     draw_inline(gsg, force, current_thread);
   }
+  else {
+    // It has an instance list left over (not munged into vertex data), which
+    // means the shader doesn't implement instancing.  Just render the object
+    // more than once.
+    nassertv(_geom != nullptr);
+    GeomPipelineReader geom_reader(_geom, current_thread);
+    GeomVertexDataPipelineReader data_reader(_munged_data, current_thread);
+    data_reader.check_array_readers();
+
+    for (const InstanceList::Instance &instance : *_instances) {
+      gsg->set_state_and_transform(_state, _internal_transform->compose(instance.get_transform()));
+      geom_reader.draw(gsg, &data_reader, _num_instances, force);
+    }
+  }
 }
 
 /**
@@ -130,7 +145,7 @@ flush_level() {
  */
 INLINE void CullableObject::
 draw_inline(GraphicsStateGuardianBase *gsg, bool force, Thread *current_thread) {
-  _geom->draw(gsg, _munged_data, force, current_thread);
+  _geom->draw(gsg, _munged_data, _num_instances, force, current_thread);
 }
 
 /**

+ 34 - 0
panda/src/pgraph/cullableObject.cxx

@@ -39,6 +39,7 @@ PStatCollector CullableObject::_munge_geom_pcollector("*:Munge:Geom");
 PStatCollector CullableObject::_munge_sprites_pcollector("*:Munge:Sprites");
 PStatCollector CullableObject::_munge_sprites_verts_pcollector("*:Munge:Sprites:Verts");
 PStatCollector CullableObject::_munge_sprites_prims_pcollector("*:Munge:Sprites:Prims");
+PStatCollector CullableObject::_munge_instances_pcollector("*:Munge:Instances");
 PStatCollector CullableObject::_sw_sprites_pcollector("SW Sprites");
 
 TypeHandle CullableObject::_type_handle;
@@ -173,6 +174,23 @@ munge_geom(GraphicsStateGuardianBase *gsg, GeomMunger *munger,
       std::swap(_munged_data, animated_vertices);
     }
 
+    if (sattr != nullptr) {
+      if (_instances != nullptr &&
+          sattr->get_flag(ShaderAttrib::F_hardware_instancing)) {
+        // We are under an InstancedNode, and the shader implements hardware.
+        // Munge the instance list into the vertex data.
+        munge_instances(current_thread);
+        _num_instances = _instances->size();
+        _instances = nullptr;
+      } else {
+        // No, use the instance count from the ShaderAttrib.
+        int count = sattr->get_instance_count();
+        _num_instances = (count > 0) ? (size_t)count : 1;
+      }
+    } else {
+      _num_instances = 1;
+    }
+
 #ifndef NDEBUG
     if (show_vertex_animation) {
       GeomVertexDataPipelineReader data_reader(_munged_data, current_thread);
@@ -204,6 +222,22 @@ output(std::ostream &out) const {
   }
 }
 
+/**
+ * Returns a GeomVertexData that represents the results of computing the
+ * instance arrays for this data.
+ */
+void CullableObject::
+munge_instances(Thread *current_thread) {
+  PStatTimer timer(_munge_instances_pcollector, current_thread);
+
+  PT(GeomVertexData) instanced_data = new GeomVertexData(*_munged_data);
+  const GeomVertexArrayFormat *array_format = GeomVertexArrayFormat::get_instance_array_format();
+
+  CPT(GeomVertexArrayData) new_array = _instances->get_array_data(array_format);
+  instanced_data->insert_array((size_t)-1, new_array);
+  _munged_data = instanced_data;
+}
+
 /**
  * Converts a table of points to quads for rendering on systems that don't
  * support fancy points.

+ 5 - 0
panda/src/pgraph/cullableObject.h

@@ -30,6 +30,7 @@
 #include "lightMutex.h"
 #include "callbackObject.h"
 #include "geomDrawCallbackData.h"
+#include "instanceList.h"
 
 class CullTraverser;
 class GeomMunger;
@@ -73,8 +74,11 @@ public:
   CPT(RenderState) _state;
   CPT(TransformState) _internal_transform;
   PT(CallbackObject) _draw_callback;
+  CPT(InstanceList) _instances;
+  int _num_instances = 1;
 
 private:
+  void munge_instances(Thread *current_thread);
   bool munge_points_to_quads(const CullTraverser *traverser, bool force);
 
   static CPT(RenderState) get_flash_cpu_state();
@@ -113,6 +117,7 @@ private:
   static PStatCollector _munge_sprites_pcollector;
   static PStatCollector _munge_sprites_verts_pcollector;
   static PStatCollector _munge_sprites_prims_pcollector;
+  static PStatCollector _munge_instances_pcollector;
   static PStatCollector _sw_sprites_pcollector;
 
 public:

+ 1 - 2
panda/src/pgraph/geomDrawCallbackData.cxx

@@ -45,7 +45,6 @@ upcall() {
       _gsg->clear_state_and_transform();
     }
 
-    _obj->_geom->draw(_gsg, _obj->_munged_data, _force,
-                      Thread::get_current_thread());
+    _obj->draw_inline(_gsg, _force, Thread::get_current_thread());
   }
 }

+ 11 - 0
panda/src/pgraph/geomNode.cxx

@@ -39,6 +39,7 @@
 #include "boundingSphere.h"
 #include "config_mathutil.h"
 #include "preparedGraphicsObjects.h"
+#include "instanceList.h"
 
 
 bool allow_flatten_color = ConfigVariableBool
@@ -527,6 +528,16 @@ add_for_draw(CullTraverser *trav, CullTraverserData &data) {
       continue;
     }
 
+    if (data._instances != nullptr) {
+      // Draw each individual instance.  We don't bother culling each
+      // individual Geom for each instance; that is probably way too slow.
+      CullableObject *object =
+        new CullableObject(std::move(geom), std::move(state), internal_transform);
+      object->_instances = data._instances;
+      trav->get_cull_handler()->record_object(object, trav);
+      continue;
+    }
+
     // Cull the Geom bounding volume against the view frustum andor the cull
     // planes.  Don't bother unless we've got more than one Geom, since
     // otherwise the bounding volume of the GeomNode is (probably) the same as

+ 280 - 0
panda/src/pgraph/instanceList.I

@@ -0,0 +1,280 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file instanceList.I
+ * @author rdb
+ * @date 2019-03-10
+ */
+
+/**
+ * Initializes an instance with the identity transform.
+ */
+INLINE InstanceList::Instance::
+Instance() : _transform(TransformState::make_identity()) {
+}
+
+/**
+ * Initializes an instance with the given transformation.
+ */
+INLINE InstanceList::Instance::
+Instance(CPT(TransformState) transform) : _transform(std::move(transform)) {
+}
+
+/**
+ *
+ */
+INLINE LPoint3 InstanceList::Instance::
+get_pos() const {
+  return get_transform()->get_pos();
+}
+
+/**
+ *
+ */
+INLINE void InstanceList::Instance::
+set_pos(const LPoint3 &pos) {
+  set_transform(get_transform()->set_pos(pos));
+}
+
+/**
+ *
+ */
+INLINE void InstanceList::Instance::
+set_pos(PN_stdfloat x, PN_stdfloat y, PN_stdfloat z) {
+  set_pos(LPoint3(x, y, z));
+}
+
+/**
+ *
+ */
+INLINE LVecBase3 InstanceList::Instance::
+get_hpr() const {
+  return get_transform()->get_hpr();
+}
+
+/**
+ *
+ */
+INLINE void InstanceList::Instance::
+set_hpr(const LVecBase3 &hpr) {
+  set_transform(get_transform()->set_hpr(hpr));
+}
+
+/**
+ *
+ */
+INLINE void InstanceList::Instance::
+set_hpr(PN_stdfloat h, PN_stdfloat p, PN_stdfloat r) {
+  set_hpr(LVecBase3(h, p, r));
+}
+
+/**
+ *
+ */
+INLINE LQuaternion InstanceList::Instance::
+get_quat() const {
+  return get_transform()->get_quat();
+}
+
+/**
+ *
+ */
+INLINE void InstanceList::Instance::
+set_quat(const LQuaternion &quat) {
+  set_transform(get_transform()->set_quat(quat));
+}
+
+/**
+ *
+ */
+INLINE LVecBase3 InstanceList::Instance::
+get_scale() const {
+  return get_transform()->get_scale();
+}
+
+/**
+ *
+ */
+INLINE void InstanceList::Instance::
+set_scale(const LVecBase3 &scale) {
+  set_transform(get_transform()->set_scale(scale));
+}
+
+/**
+ *
+ */
+INLINE void InstanceList::Instance::
+set_scale(PN_stdfloat sx, PN_stdfloat sy, PN_stdfloat sz) {
+  set_scale(LVecBase3(sx, sy, sz));
+}
+
+/**
+ *
+ */
+INLINE const LMatrix4 &InstanceList::Instance::
+get_mat() const {
+  return get_transform()->get_mat();
+}
+
+/**
+ *
+ */
+INLINE void InstanceList::Instance::
+set_mat(const LMatrix4 &mat) {
+  set_transform(TransformState::make_mat(mat));
+}
+
+/**
+ *
+ */
+INLINE const TransformState *InstanceList::Instance::
+get_transform() const {
+  return _transform.p();
+}
+
+/**
+ *
+ */
+INLINE void InstanceList::Instance::
+set_transform(CPT(TransformState) transform) {
+  _transform = std::move(transform);
+}
+
+/**
+ * Adds a new instance with the indicated transformation to the list.
+ */
+INLINE void InstanceList::
+append(InstanceList::Instance instance) {
+  _instances.push_back(std::move(instance));
+  _cached_array.clear();
+}
+
+/**
+ * Adds a new instance with the indicated transformation to the list.
+ */
+INLINE void InstanceList::
+append(const TransformState *transform) {
+  _instances.push_back(Instance(transform));
+  _cached_array.clear();
+}
+
+/**
+ * Adds a new instance with the indicated transformation to the list.
+ */
+INLINE void InstanceList::
+append(const LPoint3 &pos, const LVecBase3 &hpr, const LVecBase3 &scale) {
+
+  append(TransformState::make_pos_hpr_scale(pos, hpr, scale));
+}
+
+/**
+ * Adds a new instance with the indicated transformation to the list.
+ */
+INLINE void InstanceList::
+append(const LPoint3 &pos, const LQuaternion &quat, const LVecBase3 &scale) {
+
+  append(TransformState::make_pos_quat_scale(pos, quat, scale));
+}
+
+/**
+ * Returns the total number of instances in the list.
+ */
+INLINE size_t InstanceList::
+size() const {
+  return _instances.size();
+}
+
+/**
+ * Returns the nth instance in the list.
+ */
+INLINE const InstanceList::Instance &InstanceList::
+operator [] (size_t n) const {
+  return _instances[n];
+}
+
+/**
+ * Returns the nth instance in the list.
+ */
+INLINE InstanceList::Instance &InstanceList::
+operator [] (size_t n) {
+  _cached_array.clear();
+  return _instances[n];
+}
+
+/**
+ * Empties the instance list.
+ */
+INLINE void InstanceList::
+clear() {
+  _instances.clear();
+  _cached_array.clear();
+}
+
+/**
+ * Reserves space for the given number of instances.
+ */
+INLINE void InstanceList::
+reserve(size_t n) {
+  _instances.reserve(n);
+}
+
+/**
+ * Returns true if the InstanceList is empty.
+ */
+INLINE bool InstanceList::
+empty() const {
+  return _instances.empty();
+}
+
+/**
+ * Returns an iterator to the beginning of the list.
+ */
+INLINE InstanceList::iterator InstanceList::
+begin() {
+  return _instances.begin();
+}
+
+/**
+ * Returns a const_iterator to the beginning of the list.
+ */
+INLINE InstanceList::const_iterator InstanceList::
+begin() const {
+  return _instances.begin();
+}
+
+/**
+ * Returns a const_iterator to the beginning of the list.
+ */
+INLINE InstanceList::const_iterator InstanceList::
+cbegin() const {
+  return _instances.cbegin();
+}
+
+/**
+ * Returns an iterator to the end of the list.
+ */
+INLINE InstanceList::iterator InstanceList::
+end() {
+  return _instances.end();
+}
+
+/**
+ * Returns a const_iterator to the end of the list.
+ */
+INLINE InstanceList::const_iterator InstanceList::
+end() const {
+  return _instances.end();
+}
+
+/**
+ * Returns a const_iterator to the end of the list.
+ */
+INLINE InstanceList::const_iterator InstanceList::
+cend() const {
+  return _instances.cend();
+}

+ 213 - 0
panda/src/pgraph/instanceList.cxx

@@ -0,0 +1,213 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file instanceList.cxx
+ * @author rdb
+ * @date 2019-03-10
+ */
+
+#include "instanceList.h"
+#include "indent.h"
+#include "bamReader.h"
+#include "bamWriter.h"
+#include "bitArray.h"
+#include "geomVertexWriter.h"
+
+TypeHandle InstanceList::_type_handle;
+
+/**
+ * Required to implement CopyOnWriteObject.
+ */
+PT(CopyOnWriteObject) InstanceList::
+make_cow_copy() {
+  return new InstanceList(*this);
+}
+
+/**
+ *
+ */
+InstanceList::
+InstanceList() {
+}
+
+/**
+ *
+ */
+InstanceList::
+InstanceList(const InstanceList &copy) :
+  _instances(copy._instances)
+{
+}
+
+/**
+ *
+ */
+InstanceList::
+~InstanceList() {
+}
+
+/**
+ * Transforms all of the instances in the list by the indicated matrix.
+ */
+void InstanceList::
+xform(const LMatrix4 &mat) {
+
+}
+
+/**
+ * Returns an immutable copy without the bits turned on in the indicated mask.
+ */
+CPT(InstanceList) InstanceList::
+without(const BitArray &mask) const {
+  size_t num_instances = size();
+  size_t num_culled = (size_t)mask.get_num_on_bits();
+  if (num_culled == 0) {
+    return this;
+  }
+  else if (num_culled >= num_instances) {
+    static CPT(InstanceList) empty_list;
+    if (empty_list == nullptr) {
+      empty_list = new InstanceList;
+    }
+
+    nassertr(num_culled <= num_instances, empty_list);
+    return empty_list;
+  }
+
+  InstanceList *new_list = new InstanceList;
+  new_list->_instances.reserve(num_instances - num_culled);
+
+  for (size_t i = (size_t)mask.get_lowest_off_bit(); i < num_instances; ++i) {
+    if (!mask.get_bit(i)) {
+      new_list->_instances.push_back(_instances[i]);
+    }
+  }
+
+  return new_list;
+}
+
+/**
+ * Returns a GeomVertexArrayData containing the matrices.
+ */
+CPT(GeomVertexArrayData) InstanceList::
+get_array_data(const GeomVertexArrayFormat *format) const {
+  CPT(GeomVertexArrayData) array_data = _cached_array;
+  if (array_data != nullptr) {
+    if (array_data->get_array_format() == format) {
+      return array_data;
+    }
+  }
+
+  nassertr(format != nullptr, nullptr);
+
+  size_t num_instances = size();
+  PT(GeomVertexArrayData) new_array = new GeomVertexArrayData(format, GeomEnums::UH_stream);
+  new_array->unclean_set_num_rows(num_instances);
+
+  {
+    GeomVertexWriter writer(new_array, Thread::get_current_thread());
+    writer.set_column(InternalName::get_instance_matrix());
+    for (size_t i = 0; i < num_instances; ++i) {
+      writer.set_matrix4(_instances[i].get_mat());
+    }
+  }
+
+  _cached_array = new_array;
+  return new_array;
+}
+
+/**
+ *
+ */
+void InstanceList::
+output(std::ostream &out) const {
+  out << "InstanceList[" << size() << "]";
+}
+
+/**
+ *
+ */
+void InstanceList::
+write(std::ostream &out, int indent_level) const {
+  indent(out, indent_level) << "InstanceList[" << size() << "]:\n";
+  for (const Instance &instance : *this) {
+    indent(out, indent_level + 2) << *instance.get_transform() << "\n";
+  }
+}
+
+/**
+ * Tells the BamReader how to create objects of type InstanceList.
+ */
+void InstanceList::
+register_with_read_factory() {
+  BamReader::get_factory()->register_factory(get_class_type(), make_from_bam);
+}
+
+/**
+ * Writes the contents of this object to the datagram for shipping out to a
+ * Bam file.
+ */
+void InstanceList::
+write_datagram(BamWriter *manager, Datagram &dg) {
+  CopyOnWriteObject::write_datagram(manager, dg);
+
+  for (const Instance &instance : *(const InstanceList *)this) {
+    manager->write_pointer(dg, instance.get_transform());
+  }
+}
+
+/**
+ * Receives an array of pointers, one for each time manager->read_pointer()
+ * was called in fillin(). Returns the number of pointers processed.
+ */
+int InstanceList::
+complete_pointers(TypedWritable **p_list, BamReader *manager) {
+  int pi = CopyOnWriteObject::complete_pointers(p_list, manager);
+
+  for (Instance &instance : *this) {
+    instance = Instance(DCAST(TransformState, p_list[pi++]));
+  }
+
+  return pi;
+}
+
+/**
+ * This function is called by the BamReader's factory when a new object of
+ * type InstanceList is encountered in the Bam file.  It should create
+ * the InstanceList and extract its information from the file.
+ */
+TypedWritable *InstanceList::
+make_from_bam(const FactoryParams &params) {
+  InstanceList *object = new InstanceList;
+  DatagramIterator scan;
+  BamReader *manager;
+
+  parse_params(params, scan, manager);
+  object->fillin(scan, manager);
+
+  return object;
+}
+
+/**
+ * This internal function is called by make_from_bam to read in all of the
+ * relevant data from the BamFile for the new InstanceList.
+ */
+void InstanceList::
+fillin(DatagramIterator &scan, BamReader *manager) {
+  CopyOnWriteObject::fillin(scan, manager);
+
+  size_t num_instances = scan.get_uint16();
+  _instances.clear();
+  _instances.resize(num_instances);
+
+  for (size_t i = 0; i < num_instances; ++i) {
+    manager->read_pointer(scan);
+  }
+
+  _cached_array.clear();
+}

+ 159 - 0
panda/src/pgraph/instanceList.h

@@ -0,0 +1,159 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file instanceList.h
+ * @author rdb
+ * @date 2019-03-10
+ */
+
+#ifndef INSTANCELIST_H
+#define INSTANCELIST_H
+
+#include "pandabase.h"
+#include "copyOnWriteObject.h"
+#include "transformState.h"
+#include "pvector.h"
+#include "geomVertexArrayData.h"
+
+class BitArray;
+class FactoryParams;
+
+/**
+ * This structure stores a list of per-instance data, used by InstancedNode.
+ *
+ * @since 1.11.0
+ */
+class EXPCL_PANDA_PGRAPH InstanceList : public CopyOnWriteObject {
+protected:
+  virtual PT(CopyOnWriteObject) make_cow_copy() override;
+
+PUBLISHED:
+  InstanceList();
+  InstanceList(const InstanceList &copy);
+  virtual ~InstanceList();
+  ALLOC_DELETED_CHAIN(InstanceList);
+
+  /**
+   * An individual instance in an InstanceList.
+   *
+   * @since 1.11.0
+   */
+  class EXPCL_PANDA_PGRAPH Instance {
+  public:
+    INLINE explicit Instance();
+    INLINE explicit Instance(CPT(TransformState) transform);
+
+  PUBLISHED:
+    INLINE LPoint3 get_pos() const;
+    INLINE void set_pos(const LPoint3 &);
+    INLINE void set_pos(PN_stdfloat x, PN_stdfloat y, PN_stdfloat z);
+
+    INLINE LVecBase3 get_hpr() const;
+    INLINE void set_hpr(const LVecBase3 &);
+    INLINE void set_hpr(PN_stdfloat h, PN_stdfloat p, PN_stdfloat r);
+
+    INLINE LQuaternion get_quat() const;
+    INLINE void set_quat(const LQuaternion &);
+
+    INLINE LVecBase3 get_scale() const;
+    INLINE void set_scale(const LVecBase3 &);
+    INLINE void set_scale(PN_stdfloat sx, PN_stdfloat sy, PN_stdfloat sz);
+
+    INLINE const LMatrix4 &get_mat() const;
+    INLINE void set_mat(const LMatrix4 &mat);
+
+    INLINE const TransformState *get_transform() const;
+    INLINE void set_transform(CPT(TransformState));
+    MAKE_PROPERTY(transform, get_transform);
+
+  private:
+    CPT(TransformState) _transform;
+  };
+
+  void append(Instance instance);
+  void append(const TransformState *transform);
+  void append(const LPoint3 &pos,
+              const LVecBase3 &hpr = LVecBase3(0),
+              const LVecBase3 &scale = LVecBase3(1));
+  void append(const LPoint3 &pos,
+              const LQuaternion &quat,
+              const LVecBase3 &scale = LVecBase3(1));
+
+  INLINE size_t size() const;
+  INLINE const Instance &operator [] (size_t n) const;
+  INLINE Instance &operator [] (size_t n);
+  INLINE void clear();
+  INLINE void reserve(size_t);
+
+  void xform(const LMatrix4 &mat);
+
+public:
+  typedef pvector<Instance> Instances;
+  typedef Instances::iterator iterator;
+  typedef Instances::const_iterator const_iterator;
+
+  INLINE bool empty() const;
+
+  INLINE iterator begin();
+  INLINE const_iterator begin() const;
+  INLINE const_iterator cbegin() const;
+
+  INLINE iterator end();
+  INLINE const_iterator end() const;
+  INLINE const_iterator cend() const;
+
+  CPT(InstanceList) without(const BitArray &mask) const;
+
+  CPT(GeomVertexArrayData) get_array_data(const GeomVertexArrayFormat *format) const;
+
+  virtual void output(std::ostream &out) const;
+  virtual void write(std::ostream &out, int indent_level) const;
+
+private:
+  Instances _instances;
+
+  mutable CPT(GeomVertexArrayData) _cached_array;
+
+public:
+  static void register_with_read_factory();
+  virtual void write_datagram(BamWriter *manager, Datagram &dg) override;
+  virtual int complete_pointers(TypedWritable **plist, BamReader *manager) override;
+
+protected:
+  static TypedWritable *make_from_bam(const FactoryParams &params);
+  void fillin(DatagramIterator &scan, BamReader *manager) override;
+
+public:
+  static TypeHandle get_class_type() {
+    return _type_handle;
+  }
+  static void init_type() {
+    CopyOnWriteObject::init_type();
+    register_type(_type_handle, "InstanceList",
+                  CopyOnWriteObject::get_class_type());
+  }
+  virtual TypeHandle get_type() const override {
+    return get_class_type();
+  }
+  virtual TypeHandle force_init_type() override {
+    init_type();
+    return get_class_type();
+  }
+
+private:
+  static TypeHandle _type_handle;
+};
+
+inline std::ostream &operator <<(std::ostream &out, const InstanceList &list) {
+  list.output(out);
+  return out;
+}
+
+#include "instanceList.I"
+
+#endif

+ 39 - 0
panda/src/pgraph/instancedNode.I

@@ -0,0 +1,39 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file instancedNode.I
+ * @author rdb
+ * @date 2019-03-10
+ */
+
+/**
+ * Returns the number of instances.
+ */
+INLINE size_t InstancedNode::
+get_num_instances() const {
+  Thread *current_thread = Thread::get_current_thread();
+  CDReader cdata(_cycler, current_thread);
+  nassertr_always(cdata->_instances != nullptr, 0);
+  return cdata->_instances.get_read_pointer(current_thread)->size();
+}
+
+/**
+ * Returns the list of instances.
+ */
+INLINE CPT(InstanceList) InstancedNode::
+get_instances(Thread *current_thread) const {
+  CDReader cdata(_cycler, current_thread);
+  return cdata->_instances.get_read_pointer(current_thread);
+}
+
+/**
+ *
+ */
+INLINE InstancedNode::CData::
+CData() : _instances(new InstanceList) {
+}

+ 492 - 0
panda/src/pgraph/instancedNode.cxx

@@ -0,0 +1,492 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file instancedNode.cxx
+ * @author rdb
+ * @date 2019-03-10
+ */
+
+#include "instancedNode.h"
+#include "boundingBox.h"
+#include "boundingSphere.h"
+#include "cullTraverserData.h"
+#include "cullPlanes.h"
+
+TypeHandle InstancedNode::_type_handle;
+
+/**
+ *
+ */
+InstancedNode::
+InstancedNode(const std::string &name) :
+  PandaNode(name)
+{
+  set_cull_callback();
+}
+
+/**
+ *
+ */
+InstancedNode::
+InstancedNode(const InstancedNode &copy) :
+  PandaNode(copy),
+  _cycler(copy._cycler)
+{
+  set_cull_callback();
+}
+
+/**
+ *
+ */
+InstancedNode::
+~InstancedNode() {
+}
+
+/**
+ * Returns a newly-allocated PandaNode that is a shallow copy of this one.  It
+ * will be a different pointer, but its internal data may or may not be shared
+ * with that of the original PandaNode.  No children will be copied.
+ */
+PandaNode *InstancedNode::
+make_copy() const {
+  return new InstancedNode(*this);
+}
+
+/**
+ * Returns the list of instances.
+ *
+ * Don't call this in a downstream thread unless you don't mind it blowing
+ * away other changes you might have recently made in an upstream thread.
+ */
+PT(InstanceList) InstancedNode::
+modify_instances() {
+  Thread *current_thread = Thread::get_current_thread();
+  CDWriter cdata(_cycler, true, current_thread);
+  PT(InstanceList) instances = cdata->_instances.get_write_pointer();
+  mark_bounds_stale(current_thread->get_pipeline_stage(), current_thread);
+  mark_bam_modified();
+  return instances;
+}
+
+/**
+ * Entirely replaces the list of instances with the given list.
+ *
+ * Don't call this in a downstream thread unless you don't mind it blowing
+ * away other changes you might have recently made in an upstream thread.
+ */
+void InstancedNode::
+set_instances(PT(InstanceList) instances) {
+  Thread *current_thread = Thread::get_current_thread();
+  CDWriter cdata(_cycler, true);
+  cdata->_instances = std::move(instances);
+  mark_bounds_stale(current_thread->get_pipeline_stage(), current_thread);
+  mark_bam_modified();
+}
+
+/**
+ * Returns true if it is generally safe to flatten out this particular kind of
+ * PandaNode by duplicating instances (by calling dupe_for_flatten()), false
+ * otherwise (for instance, a Camera cannot be safely flattened, because the
+ * Camera pointer itself is meaningful).
+ */
+bool InstancedNode::
+safe_to_flatten() const {
+  return false;
+}
+
+/**
+ * Returns true if it is generally safe to combine this particular kind of
+ * PandaNode with other kinds of PandaNodes of compatible type, adding
+ * children or whatever.  For instance, an LODNode should not be combined with
+ * any other PandaNode, because its set of children is meaningful.
+ */
+bool InstancedNode::
+safe_to_combine() const {
+  // This can happen iff the instance list is identical; see combine_with().
+  return true;
+}
+
+/**
+ * Transforms the contents of this node by the indicated matrix, if it means
+ * anything to do so.  For most kinds of nodes, this does nothing.
+ */
+void InstancedNode::
+xform(const LMatrix4 &mat) {
+}
+
+/**
+ * Collapses this node with the other node, if possible, and returns a pointer
+ * to the combined node, or NULL if the two nodes cannot safely be combined.
+ *
+ * The return value may be this, other, or a new node altogether.
+ *
+ * This function is called from GraphReducer::flatten(), and need not deal
+ * with children; its job is just to decide whether to collapse the two nodes
+ * and what the collapsed node should look like.
+ */
+PandaNode *InstancedNode::
+combine_with(PandaNode *other) {
+  if (is_exact_type(get_class_type()) && other->is_exact_type(get_class_type())) {
+    InstancedNode *iother = DCAST(InstancedNode, other);
+
+    // Only combine them if the instance lists for both are identical.
+    Thread *current_thread = Thread::get_current_thread();
+    CDReader this_cdata(_cycler, current_thread);
+    CDReader other_cdata(iother->_cycler, current_thread);
+    CPT(InstanceList) this_instances = this_cdata->_instances.get_read_pointer(current_thread);
+    CPT(InstanceList) other_instances = other_cdata->_instances.get_read_pointer(current_thread);
+    if (this_instances == other_instances) {
+      return this;
+    }
+  }
+
+  return nullptr;
+}
+
+/**
+ * This is used to support NodePath::calc_tight_bounds().  It is not intended
+ * to be called directly, and it has nothing to do with the normal Panda
+ * bounding-volume computation.
+ *
+ * If the node contains any geometry, this updates min_point and max_point to
+ * enclose its bounding box.  found_any is to be set true if the node has any
+ * geometry at all, or left alone if it has none.  This method may be called
+ * over several nodes, so it may enter with min_point, max_point, and
+ * found_any already set.
+ */
+CPT(TransformState) InstancedNode::
+calc_tight_bounds(LPoint3 &min_point, LPoint3 &max_point, bool &found_any,
+                  const TransformState *transform, Thread *current_thread) const {
+
+  CPT(InstanceList) instances = get_instances(current_thread);
+  CPT(TransformState) next_transform = transform->compose(get_transform(current_thread));
+
+  for (size_t ii = 0; ii < instances->size(); ++ii) {
+    CPT(TransformState) instance_transform = next_transform->compose((*instances)[ii].get_transform());
+
+    Children cr = get_children(current_thread);
+    size_t num_children = cr.get_num_children();
+    for (size_t ci = 0; ci < num_children; ++ci) {
+      cr.get_child(ci)->calc_tight_bounds(min_point, max_point,
+                                          found_any, instance_transform,
+                                          current_thread);
+    }
+  }
+
+  return next_transform;
+}
+
+/**
+ * This function will be called during the cull traversal to perform any
+ * additional operations that should be performed at cull time.  This may
+ * include additional manipulation of render state or additional
+ * visible/invisible decisions, or any other arbitrary operation.
+ *
+ * Note that this function will *not* be called unless set_cull_callback() is
+ * called in the constructor of the derived class.  It is necessary to call
+ * set_cull_callback() to indicated that we require cull_callback() to be
+ * called.
+ *
+ * By the time this function is called, the node has already passed the
+ * bounding-volume test for the viewing frustum, and the node's transform and
+ * state have already been applied to the indicated CullTraverserData object.
+ *
+ * The return value is true if this node should be visible, or false if it
+ * should be culled.
+ */
+bool InstancedNode::
+cull_callback(CullTraverser *trav, CullTraverserData &data) {
+  Thread *current_thread = trav->get_current_thread();
+
+  CPT(InstanceList) instances = get_instances(current_thread);
+
+  if (data._instances != nullptr) {
+    // We are already under an instanced node.  Create a new combined list.
+    InstanceList *new_list = new InstanceList();
+    new_list->reserve(data._instances->size() * instances->size());
+    for (const InstanceList::Instance &parent_instance : *data._instances) {
+      for (const InstanceList::Instance &this_instance : *instances) {
+        new_list->append(parent_instance.get_transform()->compose(this_instance.get_transform()));
+      }
+    }
+    instances = new_list;
+  }
+
+  if (data._view_frustum != nullptr || !data._cull_planes->is_empty()) {
+    // Culling is on, so we need to figure out which instances are visible.
+    Children children = data.node_reader()->get_children();
+    data.node_reader()->release();
+
+    // Keep track of which instances should be culled away.
+    BitArray culled_instances;
+    culled_instances.set_range(0, instances->size());
+
+    for (size_t ii = 0; ii < instances->size(); ++ii) {
+      CullTraverserData instance_data(data);
+      instance_data.apply_transform((*instances)[ii].get_transform());
+
+      for (size_t ci = 0; ci < children.size(); ++ci) {
+        CullTraverserData child_data(instance_data, children.get_child(ci));
+        if (child_data.is_in_view(trav->get_camera_mask())) {
+          // Yep, the instance is in view.
+          culled_instances.clear_bit(ii);
+          break;
+        }
+      }
+    }
+
+    instances = instances->without(culled_instances);
+  } else {
+    data.node_reader()->release();
+  }
+
+  if (instances->empty()) {
+    // There are no instances, or they are all culled away.
+    return false;
+  }
+
+  data._instances = std::move(instances);
+
+  // Disable culling from this point on, for now.  It's probably not worth it
+  // to keep lists of transformed bounding volumes for each instance.
+  data._view_frustum = nullptr;
+  data._cull_planes = CullPlanes::make_empty();
+
+  return true;
+
+  /*
+  for (const InstanceList::Instance &instance : *instances) {
+    CullTraverserData instance_data(data);
+    instance_data.apply_transform(instance.get_transform());
+    trav->traverse_below(instance_data);
+  }
+  return false;
+  */
+}
+
+/**
+ *
+ */
+void InstancedNode::
+output(std::ostream &out) const {
+  PandaNode::output(out);
+  out << " (" << get_num_instances() << " instances)";
+}
+
+/**
+ * Returns a BoundingVolume that represents the external contents of the node.
+ * This should encompass the internal bounds, but also the bounding volumes of
+ * of all this node's children, which are passed in.
+ */
+void InstancedNode::
+compute_external_bounds(CPT(BoundingVolume) &external_bounds,
+                        BoundingVolume::BoundsType btype,
+                        const BoundingVolume **volumes, size_t num_volumes,
+                        int pipeline_stage, Thread *current_thread) const {
+
+  CPT(InstanceList) instances = get_instances(current_thread);
+
+  PT(GeometricBoundingVolume) gbv;
+  if (btype == BoundingVolume::BT_sphere) {
+    gbv = new BoundingSphere;
+  } else {
+    gbv = new BoundingBox;
+  }
+
+  if (num_volumes == 0 || instances->empty()) {
+    external_bounds = gbv;
+    return;
+  }
+
+  // Compute a sphere at the origin, encompassing the children.  This may not
+  // be the most optimal shape, but it allows us to easily estimate a bounding
+  // volume without having to take each instance transform into account.
+  PN_stdfloat max_radius = 0;
+  LVector3 max_abs_box(0);
+
+  for (size_t i = 0; i < num_volumes; ++i) {
+    const BoundingVolume *child_volume = volumes[i];
+    if (child_volume->is_empty()) {
+      continue;
+    }
+    if (child_volume->is_infinite()) {
+      gbv->set_infinite();
+      break;
+    }
+    if (const BoundingSphere *child_sphere = child_volume->as_bounding_sphere()) {
+      max_radius = child_sphere->get_center().length() + child_sphere->get_radius();
+    }
+    else if (const FiniteBoundingVolume *child_finite = child_volume->as_finite_bounding_volume()) {
+      LPoint3 min1 = child_finite->get_min();
+      LPoint3 max1 = child_finite->get_max();
+      max_abs_box.set(
+        std::max(max_abs_box[0], std::max(std::fabs(min1[0]), std::fabs(max1[0]))),
+        std::max(max_abs_box[1], std::max(std::fabs(min1[1]), std::fabs(max1[1]))),
+        std::max(max_abs_box[2], std::max(std::fabs(min1[2]), std::fabs(max1[2]))));
+    }
+    else {
+      gbv->set_infinite();
+      break;
+    }
+  }
+
+  max_radius = std::max(max_radius, max_abs_box.length());
+  if (max_radius == 0 || gbv->is_infinite()) {
+    external_bounds = gbv;
+    return;
+  }
+
+  // Now that we have a sphere encompassing the children, we will make a box
+  // surrounding all the instances, extended by the computed radius.
+  LPoint3 min_point = (*instances)[0].get_pos();
+  LPoint3 max_point(min_point);
+
+  for (const InstanceList::Instance &instance : *instances) {
+    // To make the math easier and not have to take rotations into account, we
+    // take the highest scale component and multiply it by the radius of the
+    // bounding sphere on the origin we just calculated.
+    LVecBase3 scale = instance.get_scale();
+    PN_stdfloat max_scale = std::max(std::fabs(scale[0]), std::max(std::fabs(scale[1]), std::fabs(scale[2])));
+    PN_stdfloat inst_radius = max_scale * max_radius;
+    LVector3 extends_by(inst_radius);
+    LPoint3 pos = instance.get_pos();
+    min_point = min_point.fmin(pos - extends_by);
+    max_point = max_point.fmax(pos + extends_by);
+  }
+
+  if (min_point == max_point) {
+    external_bounds = gbv;
+    return;
+  }
+
+  // If we really need to make a sphere, we use the center of the bounding box
+  // as our sphere center, and iterate again to find the furthest instance.
+  if (btype == BoundingVolume::BT_sphere) {
+    LPoint3 center = (min_point + max_point) * 0.5;
+
+    PN_stdfloat max_distance = 0;
+    for (const InstanceList::Instance &instance : *instances) {
+      LVecBase3 scale = instance.get_scale();
+      PN_stdfloat max_scale = std::max(std::fabs(scale[0]), std::max(std::fabs(scale[1]), std::fabs(scale[2])));
+      PN_stdfloat inst_radius = max_scale * max_radius;
+      PN_stdfloat distance = (instance.get_pos() - center).length() + inst_radius;
+      max_distance = std::max(max_distance, distance);
+    }
+
+    if (max_distance == 0) {
+      external_bounds = gbv;
+      return;
+    }
+    ((BoundingSphere *)gbv.p())->set_center(center);
+    ((BoundingSphere *)gbv.p())->set_radius(max_distance);
+  } else {
+    ((BoundingBox *)gbv.p())->set_min_max(min_point, max_point);
+  }
+
+  // If we have a transform, apply it to the bounding volume we just
+  // computed.
+  CPT(TransformState) transform = get_transform(current_thread);
+  if (!transform->is_identity()) {
+    gbv->xform(transform->get_mat());
+  }
+
+  external_bounds = gbv;
+}
+
+/**
+ * Tells the BamReader how to create objects of type GeomNode.
+ */
+void InstancedNode::
+register_with_read_factory() {
+  BamReader::get_factory()->register_factory(get_class_type(), make_from_bam);
+}
+
+/**
+ * Writes the contents of this object to the datagram for shipping out to a
+ * Bam file.
+ */
+void InstancedNode::
+write_datagram(BamWriter *manager, Datagram &dg) {
+  PandaNode::write_datagram(manager, dg);
+  manager->write_cdata(dg, _cycler);
+}
+
+/**
+ * This function is called by the BamReader's factory when a new object of
+ * type InstancedNode is encountered in the Bam file.  It should create the
+ * InstancedNode and extract its information from the file.
+ */
+TypedWritable *InstancedNode::
+make_from_bam(const FactoryParams &params) {
+  InstancedNode *node = new InstancedNode("");
+  DatagramIterator scan;
+  BamReader *manager;
+
+  parse_params(params, scan, manager);
+  node->fillin(scan, manager);
+
+  return node;
+}
+
+/**
+ * This internal function is called by make_from_bam to read in all of the
+ * relevant data from the BamFile for the new InstancedNode.
+ */
+void InstancedNode::
+fillin(DatagramIterator &scan, BamReader *manager) {
+  PandaNode::fillin(scan, manager);
+  manager->read_cdata(scan, _cycler);
+}
+
+/**
+ *
+ */
+InstancedNode::CData::
+CData(const InstancedNode::CData &copy) :
+  _instances(copy._instances)
+{
+}
+
+/**
+ *
+ */
+CycleData *InstancedNode::CData::
+make_copy() const {
+  return new CData(*this);
+}
+
+/**
+ * Writes the contents of this object to the datagram for shipping out to a
+ * Bam file.
+ */
+void InstancedNode::CData::
+write_datagram(BamWriter *manager, Datagram &dg) const {
+  CPT(InstanceList) instances = _instances.get_read_pointer();
+  manager->write_pointer(dg, instances.p());
+}
+
+/**
+ * Receives an array of pointers, one for each time manager->read_pointer()
+ * was called in fillin(). Returns the number of pointers processed.
+ */
+int InstancedNode::CData::
+complete_pointers(TypedWritable **p_list, BamReader *manager) {
+  int pi = CycleData::complete_pointers(p_list, manager);
+
+  _instances = DCAST(InstanceList, p_list[pi++]);
+  return pi;
+}
+
+/**
+ * This internal function is called by make_from_bam to read in all of the
+ * relevant data from the BamFile for the new GeomNode.
+ */
+void InstancedNode::CData::
+fillin(DatagramIterator &scan, BamReader *manager) {
+  manager->read_pointer(scan);
+}

+ 136 - 0
panda/src/pgraph/instancedNode.h

@@ -0,0 +1,136 @@
+/**
+ * PANDA 3D SOFTWARE
+ * Copyright (c) Carnegie Mellon University.  All rights reserved.
+ *
+ * All use of this software is subject to the terms of the revised BSD
+ * license.  You should have received a copy of this license along
+ * with this source code in a file named "LICENSE."
+ *
+ * @file instancedNode.h
+ * @author rdb
+ * @date 2019-03-09
+ */
+
+#ifndef INSTANCEDNODE_H
+#define INSTANCEDNODE_H
+
+#include "pandabase.h"
+#include "pandaNode.h"
+#include "copyOnWritePointer.h"
+#include "instanceList.h"
+
+/**
+ * This is a special node that instances its contents using a list of
+ * transforms that get applied on top of the node's own transform.  This is a
+ * bit more limited than the regular instance_to mechanism, but it is a better
+ * choice for hardware instancing.
+ *
+ * For best performance, it is highly recommended to flatten the nodes under
+ * this (by calling flatten_strong()), since culling will not be performed for
+ * individual sub-nodes under each instance.
+ *
+ * @since 1.11.0
+ */
+class EXPCL_PANDA_PGRAPH InstancedNode : public PandaNode {
+PUBLISHED:
+  explicit InstancedNode(const std::string &name);
+
+protected:
+  InstancedNode(const InstancedNode &copy);
+
+public:
+  virtual ~InstancedNode();
+  virtual PandaNode *make_copy() const override;
+
+  INLINE size_t get_num_instances() const;
+  INLINE CPT(InstanceList) get_instances(Thread *current_thread = Thread::get_current_thread()) const;
+  PT(InstanceList) modify_instances();
+  void set_instances(PT(InstanceList) instances);
+
+PUBLISHED:
+  MAKE_PROPERTY(instances, modify_instances, set_instances);
+
+public:
+  virtual bool safe_to_flatten() const override;
+  virtual bool safe_to_combine() const override;
+  virtual void xform(const LMatrix4 &mat) override;
+  virtual PandaNode *combine_with(PandaNode *other) override;
+
+  virtual CPT(TransformState)
+    calc_tight_bounds(LPoint3 &min_point, LPoint3 &max_point,
+                      bool &found_any,
+                      const TransformState *transform,
+                      Thread *current_thread) const override;
+
+  virtual bool cull_callback(CullTraverser *trav, CullTraverserData &data) override;
+
+  virtual void output(std::ostream &out) const override;
+
+protected:
+  virtual void compute_external_bounds(CPT(BoundingVolume) &external_bounds,
+                                       BoundingVolume::BoundsType btype,
+                                       const BoundingVolume **volumes,
+                                       size_t num_volumes,
+                                       int pipeline_stage,
+                                       Thread *current_thread) const override;
+
+private:
+  // This is the data that must be cycled between pipeline stages.
+  class EXPCL_PANDA_PGRAPH CData final : public CycleData {
+  public:
+    INLINE CData();
+    CData(const CData &copy);
+    virtual CycleData *make_copy() const override;
+    virtual void write_datagram(BamWriter *manager, Datagram &dg) const override;
+    virtual int complete_pointers(TypedWritable **plist, BamReader *manager) override;
+    virtual void fillin(DatagramIterator &scan, BamReader *manager) override;
+    virtual TypeHandle get_parent_type() const override {
+      return InstancedNode::get_class_type();
+    }
+
+  private:
+    COWPT(InstanceList) _instances;
+
+    friend class InstancedNode;
+  };
+
+  PipelineCycler<CData> _cycler;
+  typedef CycleDataReader<CData> CDReader;
+  typedef CycleDataWriter<CData> CDWriter;
+  typedef CycleDataStageReader<CData> CDStageReader;
+  typedef CycleDataLockedStageReader<CData> CDLockedStageReader;
+  typedef CycleDataStageWriter<CData> CDStageWriter;
+
+public:
+  static void register_with_read_factory();
+  virtual void write_datagram(BamWriter *manager, Datagram &dg) override;
+
+protected:
+  static TypedWritable *make_from_bam(const FactoryParams &params);
+  void fillin(DatagramIterator &scan, BamReader *manager) override;
+
+public:
+  static TypeHandle get_class_type() {
+    return _type_handle;
+  }
+  static void init_type() {
+    PandaNode::init_type();
+    register_type(_type_handle, "InstancedNode",
+                  PandaNode::get_class_type());
+    CData::init_type();
+  }
+  virtual TypeHandle get_type() const override {
+    return get_class_type();
+  }
+  virtual TypeHandle force_init_type() override {
+    init_type();
+    return get_class_type();
+  }
+
+private:
+  static TypeHandle _type_handle;
+};
+
+#include "instancedNode.I"
+
+#endif

+ 1 - 0
panda/src/pgraph/lightAttrib.I

@@ -42,6 +42,7 @@ get_num_non_ambient_lights() const {
  */
 INLINE NodePath LightAttrib::
 get_on_light(size_t n) const {
+  check_sorted();
   nassertr(n < _sorted_on_lights.size(), NodePath::fail());
   return _sorted_on_lights[n];
 }

+ 60 - 0
panda/src/pgraph/lightAttrib.cxx

@@ -443,6 +443,36 @@ remove_on_light(const NodePath &light) const {
   return return_new(attrib);
 }
 
+/**
+ * Returns a new LightAttrib, just like this one, but with the indicated light
+ * replaced with the given other light.
+ */
+CPT(RenderAttrib) LightAttrib::
+replace_on_light(const NodePath &source, const NodePath &dest) const {
+  if (source == dest) {
+    return this;
+  }
+
+  nassertr(!source.is_empty(), this);
+  Light *slobj = source.node()->as_light();
+  nassertr(slobj != nullptr, this);
+
+  nassertr(!dest.is_empty(), this);
+  Light *dlobj = dest.node()->as_light();
+  nassertr(dlobj != nullptr, this);
+
+  LightAttrib *attrib = new LightAttrib(*this);
+
+  auto it = attrib->_on_lights.find(source);
+  if (it != attrib->_on_lights.end()) {
+    dlobj->attrib_ref();
+    slobj->attrib_unref();
+
+    *it = dest;
+  }
+  return return_new(attrib);
+}
+
 /**
  * Returns a new LightAttrib, just like this one, but with the indicated light
  * added to the list of lights turned off by this attrib.
@@ -475,6 +505,36 @@ remove_off_light(const NodePath &light) const {
   return return_new(attrib);
 }
 
+/**
+ * Returns a new LightAttrib, just like this one, but with the indicated light
+ * replaced with the given other light.
+ */
+CPT(RenderAttrib) LightAttrib::
+replace_off_light(const NodePath &source, const NodePath &dest) const {
+  if (source == dest) {
+    return this;
+  }
+
+  nassertr(!source.is_empty(), this);
+  Light *slobj = source.node()->as_light();
+  nassertr(slobj != nullptr, this);
+
+  nassertr(!dest.is_empty(), this);
+  Light *dlobj = dest.node()->as_light();
+  nassertr(dlobj != nullptr, this);
+
+  LightAttrib *attrib = new LightAttrib(*this);
+
+  auto it = attrib->_off_lights.find(source);
+  if (it != attrib->_off_lights.end()) {
+    dlobj->attrib_ref();
+    slobj->attrib_unref();
+
+    *it = dest;
+  }
+  return return_new(attrib);
+}
+
 /**
  * Returns the most important light (that is, the light with the highest
  * priority) in the LightAttrib, excluding any ambient lights.  Returns an

+ 2 - 0
panda/src/pgraph/lightAttrib.h

@@ -85,8 +85,10 @@ PUBLISHED:
 
   CPT(RenderAttrib) add_on_light(const NodePath &light) const;
   CPT(RenderAttrib) remove_on_light(const NodePath &light) const;
+  CPT(RenderAttrib) replace_on_light(const NodePath &source, const NodePath &dest) const;
   CPT(RenderAttrib) add_off_light(const NodePath &light) const;
   CPT(RenderAttrib) remove_off_light(const NodePath &light) const;
+  CPT(RenderAttrib) replace_off_light(const NodePath &source, const NodePath &dest) const;
 
   NodePath get_most_important_light() const;
   LColor get_ambient_contribution() const;

+ 3 - 3
panda/src/pgraph/loader.cxx

@@ -292,7 +292,7 @@ try_load_file(const Filename &pathname, const LoaderOptions &options,
             << "Model " << pathname << " found in ModelPool.\n";
         }
         // But return a deep copy of the shared model.
-        node = node->copy_subgraph();
+        node = NodePath(node).copy_to(NodePath()).node();
       }
       return node;
     }
@@ -329,7 +329,7 @@ try_load_file(const Filename &pathname, const LoaderOptions &options,
             // from the RAM cached version.
             ModelPool::add_model(pathname, model_root);
             if ((options.get_flags() & LoaderOptions::LF_allow_instance) == 0) {
-              return model_root->copy_subgraph();
+              return NodePath(model_root).copy_to(NodePath()).node();
             }
           }
         }
@@ -398,7 +398,7 @@ try_load_file(const Filename &pathname, const LoaderOptions &options,
     // cached version.
     ModelPool::add_model(pathname, DCAST(ModelRoot, result.p()));
     if ((options.get_flags() & LoaderOptions::LF_allow_instance) == 0) {
-      result = result->copy_subgraph();
+      result = NodePath(result).copy_to(NodePath()).node();
     }
   }
 

+ 77 - 2
panda/src/pgraph/nodePath.cxx

@@ -539,12 +539,45 @@ copy_to(const NodePath &other, int sort, Thread *current_thread) const {
   nassertr(other._error_type == ET_ok, fail());
 
   PandaNode *source_node = node();
-  PT(PandaNode) copy_node = source_node->copy_subgraph(current_thread);
+  PandaNode::InstanceMap inst_map;
+  PT(PandaNode) copy_node = source_node->r_copy_subgraph(inst_map, current_thread);
   nassertr(copy_node != nullptr, fail());
 
   copy_node->reset_prev_transform(current_thread);
 
-  return other.attach_new_node(copy_node, sort, current_thread);
+  NodePath result = other.attach_new_node(copy_node, sort, current_thread);
+
+  // Temporary hack fix: if this root NodePath had lights applied that are
+  // located inside this subgraph, we need to fix them.
+  const RenderState *state = source_node->get_state();
+  const LightAttrib *lattr;
+  if (state->get_attrib(lattr)) {
+    CPT(LightAttrib) new_lattr = lattr;
+
+    for (size_t i = 0; i < lattr->get_num_off_lights(); ++i) {
+      NodePath light = lattr->get_off_light(i);
+      NodePath light2 = light;
+
+      if (light2.replace_copied_nodes(*this, result, inst_map, current_thread)) {
+        new_lattr = DCAST(LightAttrib, new_lattr->replace_off_light(light, light2));
+      }
+    }
+
+    for (size_t i = 0; i < lattr->get_num_on_lights(); ++i) {
+      NodePath light = lattr->get_on_light(i);
+      NodePath light2 = light;
+
+      if (light2.replace_copied_nodes(*this, result, inst_map, current_thread)) {
+        new_lattr = DCAST(LightAttrib, new_lattr->replace_on_light(light, light2));
+      }
+    }
+
+    if (new_lattr != lattr) {
+      result.set_state(state->set_attrib(std::move(new_lattr)));
+    }
+  }
+
+  return result;
 }
 
 /**
@@ -5803,6 +5836,48 @@ decode_from_bam_stream(vector_uchar data, BamReader *reader) {
   return result;
 }
 
+/**
+ * If the given root node is an ancestor of this NodePath, replaces all
+ * components below it using the given instance map.
+ *
+ * This is a helper method used by copy_to().
+ */
+bool NodePath::
+replace_copied_nodes(const NodePath &source, const NodePath &dest,
+                     const PandaNode::InstanceMap &inst_map,
+                     Thread *current_thread) {
+  nassertr(!dest.is_empty(), false);
+
+  int pipeline_stage = current_thread->get_pipeline_stage();
+
+  pvector<PandaNode *> nodes;
+
+  NodePathComponent *comp = _head;
+  while (comp != nullptr && comp != source._head) {
+    nodes.push_back(comp->get_node());
+
+    comp = comp->get_next(pipeline_stage, current_thread);
+  }
+
+  if (comp == nullptr) {
+    // The given source NodePath isn't an ancestor of this NodePath.
+    return false;
+  }
+
+  // Start at the dest NodePath and compose the new NodePath.
+  PT(NodePathComponent) new_comp = dest._head;
+  pvector<PandaNode *>::reverse_iterator it;
+  for (it = nodes.rbegin(); it != nodes.rend(); ++it) {
+    PandaNode::InstanceMap::const_iterator iit = inst_map.find(*it);
+    nassertr_always(iit != inst_map.end(), false);
+    new_comp = PandaNode::get_component(new_comp, iit->second, pipeline_stage, current_thread);
+  }
+
+  nassertr(new_comp != nullptr, false);
+  _head = std::move(new_comp);
+  return true;
+}
+
 /**
  * Walks up from both NodePaths to find the first node that both have in
  * common, if any.  Fills a_count and b_count with the number of nodes below

+ 4 - 0
panda/src/pgraph/nodePath.h

@@ -952,6 +952,10 @@ PUBLISHED:
   static NodePath decode_from_bam_stream(vector_uchar data, BamReader *reader = nullptr);
 
 private:
+  bool replace_copied_nodes(const NodePath &source, const NodePath &dest,
+                            const PandaNode::InstanceMap &inst_map,
+                            Thread *current_thread);
+
   static NodePathComponent *
   find_common_ancestor(const NodePath &a, const NodePath &b,
                        int &a_count, int &b_count,

+ 2 - 0
panda/src/pgraph/p3pgraph_composite2.cxx

@@ -20,3 +20,5 @@
 #include "geomDrawCallbackData.cxx"
 #include "geomNode.cxx"
 #include "geomTransformer.cxx"
+#include "instanceList.cxx"
+#include "instancedNode.cxx"

+ 57 - 35
panda/src/pgraph/pandaNode.cxx

@@ -2315,6 +2315,59 @@ compute_internal_bounds(CPT(BoundingVolume) &internal_bounds,
   internal_vertices = 0;
 }
 
+/**
+ * Returns a BoundingVolume that represents the external contents of the node.
+ * This should encompass the internal bounds, but also the bounding volumes of
+ * of all this node's children, which are passed in.
+ */
+void PandaNode::
+compute_external_bounds(CPT(BoundingVolume) &external_bounds,
+                        BoundingVolume::BoundsType btype,
+                        const BoundingVolume **volumes, size_t num_volumes,
+                        int pipeline_stage, Thread *current_thread) const {
+
+  CPT(TransformState) transform = get_transform(current_thread);
+  PT(GeometricBoundingVolume) gbv;
+
+  if (btype == BoundingVolume::BT_box) {
+    gbv = new BoundingBox;
+  }
+  else if (btype == BoundingVolume::BT_sphere || !transform->is_identity()) {
+    gbv = new BoundingSphere;
+  }
+  else {
+    // If all of the child volumes are a BoundingBox, and we have no
+    // transform, then our volume is also a BoundingBox.
+    bool all_box = true;
+
+    for (size_t i = 0; i < num_volumes; ++i) {
+      if (volumes[i]->as_bounding_box() == nullptr) {
+        all_box = false;
+      }
+    }
+
+    if (all_box) {
+      gbv = new BoundingBox;
+    } else {
+      gbv = new BoundingSphere;
+    }
+  }
+
+  if (num_volumes > 0) {
+    const BoundingVolume **child_begin = &volumes[0];
+    const BoundingVolume **child_end = child_begin + num_volumes;
+    ((BoundingVolume *)gbv)->around(child_begin, child_end);
+
+    // If we have a transform, apply it to the bounding volume we just
+    // computed.
+    if (!transform->is_identity()) {
+      gbv->xform(transform->get_mat());
+    }
+  }
+
+  external_bounds = gbv;
+}
+
 /**
  * Called after a scene graph update that either adds or remove parents from
  * this node, this just provides a hook for derived PandaNode objects that
@@ -3263,7 +3316,6 @@ update_cached(bool update_bounds, int pipeline_stage, PandaNode::CDLockedStageRe
 #endif
     int child_volumes_i = 0;
 
-    bool all_box = true;
     CPT(BoundingVolume) internal_bounds = nullptr;
 
     if (update_bounds) {
@@ -3276,9 +3328,6 @@ update_cached(bool update_bounds, int pipeline_stage, PandaNode::CDLockedStageRe
 #endif
         nassertr(child_volumes_i < num_children + 1, CDStageWriter(_cycler, pipeline_stage, cdata));
         child_volumes[child_volumes_i++] = internal_bounds;
-        if (internal_bounds->as_bounding_box() == nullptr) {
-          all_box = false;
-        }
       }
     }
 
@@ -3374,9 +3423,6 @@ update_cached(bool update_bounds, int pipeline_stage, PandaNode::CDLockedStageRe
 #endif
             nassertr(child_volumes_i < num_children + 1, CDStageWriter(_cycler, pipeline_stage, cdata));
             child_volumes[child_volumes_i++] = child_cdataw->_external_bounds;
-            if (child_cdataw->_external_bounds->as_bounding_box() == nullptr) {
-              all_box = false;
-            }
           }
           num_vertices += child_cdataw->_nested_vertices;
         }
@@ -3429,9 +3475,6 @@ update_cached(bool update_bounds, int pipeline_stage, PandaNode::CDLockedStageRe
 #endif
             nassertr(child_volumes_i < num_children + 1, CDStageWriter(_cycler, pipeline_stage, cdata));
             child_volumes[child_volumes_i++] = child_cdata->_external_bounds;
-            if (child_cdata->_external_bounds->as_bounding_box() == nullptr) {
-              all_box = false;
-            }
           }
           num_vertices += child_cdata->_nested_vertices;
         }
@@ -3485,38 +3528,17 @@ update_cached(bool update_bounds, int pipeline_stage, PandaNode::CDLockedStageRe
         if (update_bounds) {
           cdataw->_nested_vertices = num_vertices;
 
-          CPT(TransformState) transform = get_transform(current_thread);
-          PT(GeometricBoundingVolume) gbv;
-
           BoundingVolume::BoundsType btype = cdataw->_bounds_type;
           if (btype == BoundingVolume::BT_default) {
             btype = bounds_type;
           }
 
-          if (btype == BoundingVolume::BT_box ||
-              (btype != BoundingVolume::BT_sphere && all_box && transform->is_identity())) {
-            // If all of the child volumes are a BoundingBox, and we have no
-            // transform, then our volume is also a BoundingBox.
+          compute_external_bounds(cdataw->_external_bounds, btype,
+                                  child_volumes, child_volumes_i,
+                                  pipeline_stage, current_thread);
 
-            gbv = new BoundingBox;
-          } else {
-            // Otherwise, it's a sphere.
-            gbv = new BoundingSphere;
-          }
-
-          if (child_volumes_i > 0) {
-            const BoundingVolume **child_begin = &child_volumes[0];
-            const BoundingVolume **child_end = child_begin + child_volumes_i;
-            ((BoundingVolume *)gbv)->around(child_begin, child_end);
-
-            // If we have a transform, apply it to the bounding volume we just
-            // computed.
-            if (!transform->is_identity()) {
-              gbv->xform(transform->get_mat());
-            }
-          }
+          nassertr(cdataw->_external_bounds != nullptr, cdataw);
 
-          cdataw->_external_bounds = gbv;
           cdataw->_last_bounds_update = next_update;
         }
 

+ 6 - 0
panda/src/pgraph/pandaNode.h

@@ -351,6 +351,12 @@ protected:
                                        int &internal_vertices,
                                        int pipeline_stage,
                                        Thread *current_thread) const;
+  virtual void compute_external_bounds(CPT(BoundingVolume) &external_bounds,
+                                       BoundingVolume::BoundsType btype,
+                                       const BoundingVolume **volumes,
+                                       size_t num_volumes,
+                                       int pipeline_stage,
+                                       Thread *current_thread) const;
   virtual void parents_changed();
   virtual void children_changed();
   virtual void transform_changed();

+ 1 - 1
panda/src/pgraph/shaderAttrib.I

@@ -82,7 +82,7 @@ get_shader_priority() const {
 
 /**
  * Returns the number of geometry instances.  A value of 0 means not to use
- * instancing at all.
+ * instancing at all.  This value is ignored if F_hardware_instancing is set.
  */
 INLINE int ShaderAttrib::
 get_instance_count() const {

+ 2 - 0
panda/src/pgraph/shaderAttrib.cxx

@@ -249,6 +249,8 @@ set_shader_inputs(const pvector<ShaderInput> &inputs) const {
  * Sets the geometry instance count.  Do not confuse this with instanceTo,
  * which is used for animation instancing, and has nothing to do with this.  A
  * value of 0 means not to use instancing at all.
+ *
+ * This value should not be set if F_hardware_instancing is also set.
  */
 CPT(RenderAttrib) ShaderAttrib::
 set_instance_count(int instance_count) const {

+ 1 - 0
panda/src/pgraph/shaderAttrib.h

@@ -51,6 +51,7 @@ PUBLISHED:
     F_subsume_alpha_test  = 1 << 1,  // Shader promises to subsume the alpha test using TEXKILL
     F_hardware_skinning   = 1 << 2,  // Shader needs pre-animated vertices
     F_shader_point_size   = 1 << 3,  // Shader provides point size, not RenderModeAttrib
+    F_hardware_instancing = 1 << 4,  // Shader needs instance list
   };
 
   INLINE bool               has_shader() const;

+ 72 - 19
panda/src/pgraphnodes/lodNode.cxx

@@ -143,26 +143,62 @@ cull_callback(CullTraverser *trav, CullTraverserData &data) {
   CDReader cdata(_cycler);
 
   CPT(TransformState) rel_transform = get_rel_transform(trav, data);
-  LPoint3 center = cdata->_center * rel_transform->get_mat();
-  PN_stdfloat dist2 = center.dot(center);
+  PN_stdfloat lod_scale = cdata->_lod_scale *
+    trav->get_scene()->get_camera_node()->get_lod_scale();
 
   int num_children = std::min(get_num_children(), (int)cdata->_switch_vector.size());
-  for (int index = 0; index < num_children; ++index) {
-    const Switch &sw = cdata->_switch_vector[index];
-    bool in_range;
-    if (cdata->_got_force_switch) {
-      in_range = (cdata->_force_switch == index);
-    } else {
-      in_range = sw.in_range_2(dist2 * cdata->_lod_scale
-                   * trav->get_scene()->get_camera_node()->get_lod_scale());
+
+  if (data._instances == nullptr || cdata->_got_force_switch) {
+    LPoint3 center = cdata->_center * rel_transform->get_mat();
+    PN_stdfloat dist2 = center.dot(center);
+
+    for (int index = 0; index < num_children; ++index) {
+      const Switch &sw = cdata->_switch_vector[index];
+      bool in_range;
+      if (cdata->_got_force_switch) {
+        in_range = (cdata->_force_switch == index);
+      } else {
+        in_range = sw.in_range_2(dist2 * lod_scale);
+      }
+
+      if (in_range) {
+        // This switch level is in range.  Draw its children.
+        PandaNode *child = get_child(index);
+        if (child != nullptr) {
+          CullTraverserData next_data(data, child);
+          trav->traverse(next_data);
+        }
+      }
+    }
+  }
+  else {
+    // Figure out which instances in which switch levels should be visible.
+    size_t num_instances = data._instances->size();
+    std::unique_ptr<BitArray[]> in_range(new BitArray[num_children]);
+
+    for (size_t ii = 0; ii < num_instances; ++ii) {
+      LPoint3 inst_center = cdata->_center *
+        rel_transform->compose((*data._instances)[ii].get_transform())->get_mat();
+      PN_stdfloat dist2 = inst_center.dot(inst_center);
+
+      for (int index = 0; index < num_children; ++index) {
+        const Switch &sw = cdata->_switch_vector[index];
+        if (!sw.in_range_2(dist2 * lod_scale)) {
+          in_range[index].set_bit(ii);
+        }
+      }
     }
 
-    if (in_range) {
-      // This switch level is in range.  Draw its children.
-      PandaNode *child = get_child(index);
-      if (child != nullptr) {
-        CullTraverserData next_data(data, child);
-        trav->traverse(next_data);
+    for (int index = 0; index < num_children; ++index) {
+      CPT(InstanceList) instances = data._instances->without(in_range[index]);
+      if (!instances->empty()) {
+        // At least one instance is visible in this switch level.
+        PandaNode *child = get_child(index);
+        if (child != nullptr) {
+          CullTraverserData next_data(data, child);
+          next_data._instances = instances;
+          trav->traverse(next_data);
+        }
       }
     }
   }
@@ -321,13 +357,30 @@ compute_child(CullTraverser *trav, CullTraverserData &data) {
     return cdata->_force_switch;
   }
 
+  PN_stdfloat lod_scale = cdata->_lod_scale *
+    trav->get_scene()->get_camera_node()->get_lod_scale();
+
   CPT(TransformState) rel_transform = get_rel_transform(trav, data);
-  LPoint3 center = cdata->_center * rel_transform->get_mat();
+  LPoint3 center;
+
+  if (data._instances == nullptr) {
+    center = cdata->_center * rel_transform->get_mat();
+  }
+  else {
+    // Can't really do much with instancing in FadeLODNode; let's instead
+    // just calculate the centroid of the visible instances.
+    center = LPoint3(0);
+    for (const InstanceList::Instance &instance : *data._instances) {
+      center += cdata->_center *
+        rel_transform->compose(instance.get_transform())->get_mat();
+    }
+    center *= 1.0 / data._instances->size();
+  }
+
   PN_stdfloat dist2 = center.dot(center);
 
   for (int index = 0; index < (int)cdata->_switch_vector.size(); ++index) {
-    if (cdata->_switch_vector[index].in_range_2(dist2 * cdata->_lod_scale
-         * trav->get_scene()->get_camera_node()->get_lod_scale())) {
+    if (cdata->_switch_vector[index].in_range_2(dist2 * lod_scale)) {
       if (pgraph_cat.is_debug()) {
         pgraph_cat.debug()
           << data.get_node_path() << " at distance " << sqrt(dist2)

+ 3 - 1
panda/src/pgraphnodes/shaderGenerator.cxx

@@ -1094,7 +1094,9 @@ synthesize_shader(const RenderState *rs, const GeomVertexAnimationSpec &anim) {
     text << "\t uniform float4 clipplane_" << i << ",\n";
   }
 
-  text << "\t uniform float4 attr_ambient,\n";
+  if (key._lighting) {
+    text << "\t uniform float4 attr_ambient,\n";
+  }
   text << "\t uniform float4 attr_colorscale\n";
   text << ") {\n";
 

+ 3 - 3
panda/src/pipeline/pythonThread.cxx

@@ -38,12 +38,12 @@ PythonThread(PyObject *function, PyObject *args,
 
   set_args(args);
 
-#ifndef SIMPLE_THREADS
+#if !defined(SIMPLE_THREADS) && defined(WITH_THREAD) && PY_VERSION_HEX < 0x03090000
   // Ensure that the Python threading system is initialized and ready to go.
-#ifdef WITH_THREAD  // This symbol defined within Python.h
+  // WITH_THREAD symbol defined within Python.h
+  // PyEval_InitThreads is now a deprecated no-op in Python 3.9+
   PyEval_InitThreads();
 #endif
-#endif
 }
 
 /**

+ 5 - 0
panda/src/pstatclient/pStatCollector.I

@@ -142,6 +142,7 @@ output(std::ostream &out) const {
  */
 INLINE bool PStatCollector::
 is_active() {
+  nassertr(_client != nullptr, false);
 #ifndef HAVE_THREADS
   return _client->is_active(_index, 0);
 #else  // HAVE_THREADS
@@ -155,6 +156,7 @@ is_active() {
  */
 INLINE bool PStatCollector::
 is_started() {
+  nassertr(_client != nullptr, false);
 #ifndef HAVE_THREADS
   return _client->is_started(_index, 0);
 #else  // HAVE_THREADS
@@ -168,6 +170,7 @@ is_started() {
  */
 INLINE void PStatCollector::
 start() {
+  nassertv(_client != nullptr);
 #ifndef HAVE_THREADS
   _client->start(_index, 0);
 #else  // HAVE_THREADS
@@ -181,6 +184,7 @@ start() {
  */
 INLINE void PStatCollector::
 stop() {
+  nassertv(_client != nullptr);
 #ifndef HAVE_THREADS
   _client->stop(_index, 0);
 #else  // HAVE_THREADS
@@ -262,6 +266,7 @@ sub_level_now(double decrement) {
 INLINE void PStatCollector::
 flush_level() {
   if (_level != 0.0f) {
+    nassertv(_client != nullptr);
     _client->add_level(_index, 0, _level);
     _level = 0.0f;
   }

+ 6 - 5
panda/src/putil/pythonCallbackObject.cxx

@@ -41,15 +41,16 @@ PythonCallbackObject(PyObject *function) {
 
   set_function(function);
 
-#ifndef SIMPLE_THREADS
+#if !defined(SIMPLE_THREADS) && defined(WITH_THREAD)
   // Ensure that the Python threading system is initialized and ready to go.
-#ifdef WITH_THREAD  // This symbol defined within Python.h
-
+  // WITH_THREAD symbol defined within Python.h
   Py_Initialize();
 
+#if PY_VERSION_HEX < 0x03090000
+  // PyEval_InitThreads is now a deprecated no-op in Python 3.9+
   PyEval_InitThreads();
-#endif
-#endif
+#endif // PY_VERSION_HEX
+#endif // WITH_THREAD
 }
 
 /**

+ 2 - 2
panda/src/tinydisplay/tinyGraphicsStateGuardian.cxx

@@ -472,14 +472,14 @@ end_frame(Thread *current_thread) {
 bool TinyGraphicsStateGuardian::
 begin_draw_primitives(const GeomPipelineReader *geom_reader,
                       const GeomVertexDataPipelineReader *data_reader,
-                      bool force) {
+                      size_t num_instances, bool force) {
 #ifndef NDEBUG
   if (tinydisplay_cat.is_spam()) {
     tinydisplay_cat.spam() << "begin_draw_primitives: " << *(data_reader->get_object()) << "\n";
   }
 #endif  // NDEBUG
 
-  if (!GraphicsStateGuardian::begin_draw_primitives(geom_reader, data_reader, force)) {
+  if (!GraphicsStateGuardian::begin_draw_primitives(geom_reader, data_reader, num_instances, force)) {
     return false;
   }
   nassertr(_data_reader != nullptr, false);

+ 1 - 1
panda/src/tinydisplay/tinyGraphicsStateGuardian.h

@@ -64,7 +64,7 @@ public:
 
   virtual bool begin_draw_primitives(const GeomPipelineReader *geom_reader,
                                      const GeomVertexDataPipelineReader *data_reader,
-                                     bool force);
+                                     size_t num_instances, bool force);
   virtual bool draw_triangles(const GeomPrimitivePipelineReader *reader,
                               bool force);
   virtual bool draw_tristrips(const GeomPrimitivePipelineReader *reader,

+ 1 - 0
setup.cfg

@@ -17,6 +17,7 @@ classifiers =
     Programming Language :: Python :: 3.6
     Programming Language :: Python :: 3.7
     Programming Language :: Python :: 3.8
+    Programming Language :: Python :: 3.9
     Programming Language :: Python :: Implementation :: CPython
     Topic :: Games/Entertainment
     Topic :: Multimedia

+ 45 - 0
tests/collide/test_collision_polygon.py

@@ -0,0 +1,45 @@
+from panda3d import core
+
+
+def test_collision_polygon_verify_not_enough_points():
+    # Less than 3 points cannot create a polygon
+    assert not core.CollisionPolygon.verify_points([])
+    assert not core.CollisionPolygon.verify_points([core.LPoint3(1, 0, 0)])
+    assert not core.CollisionPolygon.verify_points([core.LPoint3(1, 0, 0), core.LPoint3(0, 0, 1)])
+
+
+def test_collision_polygon_verify_repeating_points():
+    # Repeating points cannot create a polygon
+    assert not core.CollisionPolygon.verify_points([core.LPoint3(1, 0, 0), core.LPoint3(1, 0, 0), core.LPoint3(0, 0, 1)])
+    assert not core.CollisionPolygon.verify_points([core.LPoint3(3, 6, 1), core.LPoint3(1, 3, 5), core.LPoint3(9, 1, 2), core.LPoint3(1, 3, 5)])
+
+
+def test_collision_polygon_verify_colinear_points():
+    # Colinear points cannot create a polygon
+    assert not core.CollisionPolygon.verify_points([core.LPoint3(1, 2, 3), core.LPoint3(2, 3, 4), core.LPoint3(3, 4, 5)])
+    assert not core.CollisionPolygon.verify_points([core.LPoint3(2, 1, 1), core.LPoint3(3, 2, 1), core.LPoint3(4, 3, 1)])
+
+
+def test_collision_polygon_verify_points():
+    # Those should be regular, non-colinear points
+    assert core.CollisionPolygon.verify_points([core.LPoint3(1, 0, 0), core.LPoint3(0, 1, 0), core.LPoint3(0, 0, 1)])
+    assert core.CollisionPolygon.verify_points([core.LPoint3(10, 2, 8), core.LPoint3(7, 1, 3), core.LPoint3(5, 9, 6)])
+    assert core.CollisionPolygon.verify_points([core.LPoint3(3, -8, -7), core.LPoint3(9, 10, 8), core.LPoint3(7, 0, 10), core.LPoint3(-6, -2, 3)])
+    assert core.CollisionPolygon.verify_points([core.LPoint3(-1, -3, -5), core.LPoint3(10, 3, -10), core.LPoint3(-10, 10, -4), core.LPoint3(0, 1, -4), core.LPoint3(-9, -2, 0)])
+
+
+def test_collision_polygon_setup_points():
+    # Create empty collision polygon
+    polygon = core.CollisionPolygon(core.LVecBase3(0, 0, 0), core.LVecBase3(0, 0, 0), core.LVecBase3(0, 0, 0))
+    assert not polygon.is_valid()
+
+    # Test our setup method against a few test cases
+    for points in [
+        [core.LPoint3(-1, -3, -5), core.LPoint3(10, 3, -10), core.LPoint3(-10, 10, -4), core.LPoint3(0, 1, -4), core.LPoint3(-9, -2, 0)],
+        [core.LPoint3(3, -8, -7), core.LPoint3(9, 10, 8), core.LPoint3(7, 0, 10), core.LPoint3(-6, -2, 3)],
+        [core.LPoint3(1, 0, 0), core.LPoint3(0, 1, 0), core.LPoint3(0, 0, 1)],
+        [core.LPoint3(10, 2, 8), core.LPoint3(7, 1, 3), core.LPoint3(5, 9, 6)]
+    ]:
+        polygon.setup_points(points)
+        assert polygon.is_valid()
+        assert polygon.get_num_points() == len(points)