Forráskód Böngészése

Work towards ABI stability wrt allocation. Let MemoryHook take advantage of dlmalloc's internal bookkeeping.

rdb 8 éve
szülő
commit
8c914a2855

+ 2 - 2
dtool/src/dtoolbase/deletedChain.h

@@ -77,7 +77,7 @@ public:
 // Place this macro within a class definition to define appropriate operator
 // new and delete methods that take advantage of DeletedChain.
 #define ALLOC_DELETED_CHAIN(Type)                            \
-  inline void *operator new(size_t size) {                   \
+  inline void *operator new(size_t size) RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT) { \
     return (void *)StaticDeletedChain< Type >::allocate(size, get_type_handle(Type)); \
   }                                                          \
   inline void *operator new(size_t size, void *ptr) {        \
@@ -96,7 +96,7 @@ public:
 // Use this variant of the above macro in cases in which the compiler fails to
 // unify the static template pointers properly, to prevent leaks.
 #define ALLOC_DELETED_CHAIN_DECL(Type)                       \
-  inline void *operator new(size_t size) {                   \
+  inline void *operator new(size_t size) RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT) { \
     return (void *)_deleted_chain.allocate(size, get_type_handle(Type)); \
   }                                                          \
   inline void *operator new(size_t size, void *ptr) {        \

+ 39 - 0
dtool/src/dtoolbase/dtoolbase.h

@@ -76,6 +76,10 @@
 #define __has_builtin(x) 0
 #endif
 
+#ifndef __has_attribute
+#define __has_attribute(x) 0
+#endif
+
 // Use NODEFAULT to optimize a switch() stmt to tell MSVC to automatically go
 // to the final untested case after it has failed all the other cases (i.e.
 // 'assume at least one of the cases is always true')
@@ -96,6 +100,12 @@
 #define ASSUME_ALIGNED(x, y) (x)
 #endif
 
+#if __has_attribute(assume_aligned) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9)
+#define RETURNS_ALIGNED(x) __attribute__((assume_aligned(x)))
+#else
+#define RETURNS_ALIGNED(x)
+#endif
+
 /*
   include win32 defns for everything up to WinServer2003, and assume
   I'm smart enough to use GetProcAddress for backward compat on
@@ -400,6 +410,35 @@ typedef struct _object PyObject;
 
 #endif
 
+#ifdef LINMATH_ALIGN
+/* We require 16-byte alignment of certain structures, to support SSE2.  We
+   don't strictly have to align everything, but it's just easier to do so. */
+#if defined(HAVE_EIGEN) && defined(__AVX__) && defined(STDFLOAT_DOUBLE)
+/* Eigen uses AVX instructions, but let's only enable this when compiling with
+   double precision, so that we can keep our ABI a bit more stable. */
+#define MEMORY_HOOK_ALIGNMENT 32
+#else
+#define MEMORY_HOOK_ALIGNMENT 16
+#endif
+/* Otherwise, align to two words.  This seems to be pretty standard to the
+   point where some code may rely on this being the case. */
+#elif defined(IS_OSX) || NATIVE_WORDSIZE >= 64
+#define MEMORY_HOOK_ALIGNMENT 16
+#else
+#define MEMORY_HOOK_ALIGNMENT 8
+#endif
+
+#ifdef HAVE_EIGEN
+/* Make sure that Eigen doesn't assume alignment guarantees we don't offer. */
+#define EIGEN_MAX_ALIGN_BYTES MEMORY_HOOK_ALIGNMENT
+#ifndef EIGEN_MPL2_ONLY
+#define EIGEN_MPL2_ONLY 1
+#endif
+#if !defined(_DEBUG) && !defined(EIGEN_NO_DEBUG)
+#define EIGEN_NO_DEBUG 1
+#endif
+#endif
+
 /* Determine our memory-allocation requirements. */
 #if defined(USE_MEMORY_PTMALLOC2) || defined(USE_MEMORY_DLMALLOC) || defined(DO_MEMORY_USAGE) || defined(MEMORY_HOOK_DO_ALIGN)
 /* In this case we have some custom memory management requirements. */

+ 2 - 2
dtool/src/dtoolbase/memoryBase.h

@@ -26,7 +26,7 @@
 #ifndef USE_MEMORY_NOWRAPPERS
 
 #define ALLOC_MEMORY_BASE                                    \
-  inline void *operator new(size_t size) {                   \
+  inline void *operator new(size_t size) RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT) { \
     return PANDA_MALLOC_SINGLE(size);                        \
   }                                                          \
   inline void *operator new(size_t size, void *ptr) {        \
@@ -38,7 +38,7 @@
   }                                                          \
   inline void operator delete(void *, void *) {              \
   }                                                          \
-  inline void *operator new[](size_t size) {                 \
+  inline void *operator new[](size_t size) RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT) { \
     return PANDA_MALLOC_ARRAY(size);                         \
   }                                                          \
   inline void *operator new[](size_t size, void *ptr) {      \

+ 12 - 67
dtool/src/dtoolbase/memoryHook.I

@@ -63,14 +63,24 @@ round_up_to_page_size(size_t size) const {
 
 /**
  * Given a pointer that was returned by a MemoryHook allocation, returns the
- * number of bytes that were allocated for it.  Returns 0 if not compiling
- * with DO_MEMORY_USAGE.
+ * number of bytes that were allocated for it.  This may be slightly larger
+ * than the number of bytes requested.
+ * The behavior of this function is undefined if the given pointer was not
+ * returned by the MemoryHook allocator or was already freed.
+ * May return 0 if not compiling with DO_MEMORY_USAGE.
+ *
+ * This is only defined publicly so TypeHandle can get at it; it really
+ * shouldn't be used outside of dtoolbase.
  */
 INLINE size_t MemoryHook::
 get_ptr_size(void *ptr) {
 #if defined(MEMORY_HOOK_DO_ALIGN)
   uintptr_t *root = (uintptr_t *)ptr;
   return (size_t)root[-2];
+#elif defined(USE_MEMORY_DLMALLOC) || defined(USE_MEMORY_PTMALLOC2)
+  // If we are using dlmalloc, we know how it stores the size.
+  size_t *root = (size_t *)ptr;
+  return (root[-1] & ~0x7) - sizeof(size_t);
 #elif defined(DO_MEMORY_USAGE)
   size_t *root = (size_t *)((char *)ptr - MEMORY_HOOK_ALIGNMENT);
   return *root;
@@ -78,68 +88,3 @@ get_ptr_size(void *ptr) {
   return 0;
 #endif  // DO_MEMORY_USAGE
 }
-
-/**
- * Increments the amount of requested size as necessary to accommodate the
- * extra data we might piggyback on each allocated block.
- */
-INLINE size_t MemoryHook::
-inflate_size(size_t size) {
-#if defined(MEMORY_HOOK_DO_ALIGN)
-  // If we're aligning, we need to request the header size, plus extra bytes
-  // to give us wiggle room to adjust the pointer.
-  return size + sizeof(uintptr_t) * 2 + MEMORY_HOOK_ALIGNMENT - 1;
-#elif defined(DO_MEMORY_USAGE)
-  // If we're not aligning, but we're tracking memory allocations, we just
-  // need the header size extra (this gives us a place to store the size of
-  // the allocated block).  However, we do need to make sure that any
-  // alignment guarantee is kept.
-  return size + MEMORY_HOOK_ALIGNMENT;
-#else
-  // If we're not doing any of that, we can just allocate the precise
-  // requested amount.
-  return size;
-#endif  // DO_MEMORY_USAGE
-}
-
-/**
- * Converts an allocated pointer to a pointer returnable to the application.
- * Stuffs size in the first n bytes of the allocated space.
- */
-INLINE void *MemoryHook::
-alloc_to_ptr(void *alloc, size_t size) {
-#if defined(MEMORY_HOOK_DO_ALIGN)
-  // Add room for two uintptr_t values.
-  uintptr_t *root = (uintptr_t *)((char *)alloc + sizeof(uintptr_t) * 2);
-  // Align this to the requested boundary.
-  root = (uintptr_t *)(((uintptr_t)root + MEMORY_HOOK_ALIGNMENT - 1) & ~(MEMORY_HOOK_ALIGNMENT - 1));
-  root[-2] = size;
-  root[-1] = (uintptr_t)alloc;  // Save the pointer we originally allocated.
-  return (void *)root;
-#elif defined(DO_MEMORY_USAGE)
-  size_t *root = (size_t *)alloc;
-  root[0] = size;
-  return (void *)((char *)root + MEMORY_HOOK_ALIGNMENT);
-#else
-  return alloc;
-#endif  // DO_MEMORY_USAGE
-}
-
-/**
- * Converts an application pointer back to the original allocated pointer.
- * Extracts size from the first n bytes of the allocated space.
- */
-INLINE void *MemoryHook::
-ptr_to_alloc(void *ptr, size_t &size) {
-#if defined(MEMORY_HOOK_DO_ALIGN)
-  uintptr_t *root = (uintptr_t *)ptr;
-  size = root[-2];
-  return (void *)root[-1]; // Get the pointer we originally allocated.
-#elif defined(DO_MEMORY_USAGE)
-  size_t *root = (size_t *)((char *)ptr - MEMORY_HOOK_ALIGNMENT);
-  size = root[0];
-  return (void *)root;
-#else
-  return ptr;
-#endif  // DO_MEMORY_USAGE
-}

+ 105 - 5
dtool/src/dtoolbase/memoryHook.cxx

@@ -14,6 +14,7 @@
 #include "memoryHook.h"
 #include "deletedBufferChain.h"
 #include <stdlib.h>
+#include "typeRegistry.h"
 
 #ifdef WIN32
 
@@ -104,6 +105,83 @@ static_assert((MEMORY_HOOK_ALIGNMENT & (MEMORY_HOOK_ALIGNMENT - 1)) == 0,
 
 #endif  // USE_MEMORY_*
 
+/**
+ * Increments the amount of requested size as necessary to accommodate the
+ * extra data we might piggyback on each allocated block.
+ */
+INLINE static size_t
+inflate_size(size_t size) {
+#if defined(MEMORY_HOOK_DO_ALIGN)
+  // If we're aligning, we need to request the header size, plus extra bytes
+  // to give us wiggle room to adjust the pointer.
+  return size + sizeof(uintptr_t) * 2 + MEMORY_HOOK_ALIGNMENT - 1;
+#elif defined(USE_MEMORY_DLMALLOC) || defined(USE_MEMORY_PTMALLOC2)
+  // If we are can access the allocator's bookkeeping to figure out how many
+  // bytes were allocated, we don't need to add our own information.
+  return size;
+#elif defined(DO_MEMORY_USAGE)
+  // If we're not aligning, but we're tracking memory allocations, we just
+  // need the header size extra (this gives us a place to store the size of
+  // the allocated block).  However, we do need to make sure that any
+  // alignment guarantee is kept.
+  return size + MEMORY_HOOK_ALIGNMENT;
+#else
+  // If we're not doing any of that, we can just allocate the precise
+  // requested amount.
+  return size;
+#endif  // DO_MEMORY_USAGE
+}
+
+/**
+ * Converts an allocated pointer to a pointer returnable to the application.
+ * Stuffs size in the first n bytes of the allocated space.
+ */
+INLINE static void *
+alloc_to_ptr(void *alloc, size_t size) {
+#if defined(MEMORY_HOOK_DO_ALIGN)
+  // Add room for two uintptr_t values.
+  uintptr_t *root = (uintptr_t *)((char *)alloc + sizeof(uintptr_t) * 2);
+  // Align this to the requested boundary.
+  root = (uintptr_t *)(((uintptr_t)root + MEMORY_HOOK_ALIGNMENT - 1) & ~(MEMORY_HOOK_ALIGNMENT - 1));
+  root[-2] = size;
+  root[-1] = (uintptr_t)alloc;  // Save the pointer we originally allocated.
+  return (void *)root;
+#elif defined(USE_MEMORY_DLMALLOC) || defined(USE_MEMORY_PTMALLOC2)
+  return alloc;
+#elif defined(DO_MEMORY_USAGE)
+  size_t *root = (size_t *)alloc;
+  root[0] = size;
+  return (void *)((char *)root + MEMORY_HOOK_ALIGNMENT);
+#else
+  return alloc;
+#endif  // DO_MEMORY_USAGE
+}
+
+/**
+ * Converts an application pointer back to the original allocated pointer.
+ * Extracts size from the first n bytes of the allocated space, but only if
+ * DO_MEMORY_USAGE is defined.
+ */
+INLINE static void *
+ptr_to_alloc(void *ptr, size_t &size) {
+#if defined(MEMORY_HOOK_DO_ALIGN)
+  uintptr_t *root = (uintptr_t *)ptr;
+  size = root[-2];
+  return (void *)root[-1]; // Get the pointer we originally allocated.
+#elif defined(USE_MEMORY_DLMALLOC) || defined(USE_MEMORY_PTMALLOC2)
+#ifdef DO_MEMORY_USAGE
+  size = MemoryHook::get_ptr_size(ptr);
+#endif
+  return ptr;
+#elif defined(DO_MEMORY_USAGE)
+  size_t *root = (size_t *)((char *)ptr - MEMORY_HOOK_ALIGNMENT);
+  size = root[0];
+  return (void *)root;
+#else
+  return ptr;
+#endif  // DO_MEMORY_USAGE
+}
+
 /**
  *
  */
@@ -195,6 +273,11 @@ heap_alloc_single(size_t size) {
 #ifdef DO_MEMORY_USAGE
   // In the DO_MEMORY_USAGE case, we want to track the total size of allocated
   // bytes on the heap.
+#if defined(USE_MEMORY_DLMALLOC) || defined(USE_MEMORY_PTMALLOC2)
+  // dlmalloc may slightly overallocate, however.
+  size = get_ptr_size(alloc);
+  inflated_size = size;
+#endif
   AtomicAdjust::add(_total_heap_single_size, (AtomicAdjust::Integer)size);
   if ((size_t)AtomicAdjust::get(_total_heap_single_size) +
       (size_t)AtomicAdjust::get(_total_heap_array_size) >
@@ -204,8 +287,10 @@ heap_alloc_single(size_t size) {
 #endif  // DO_MEMORY_USAGE
 
   void *ptr = alloc_to_ptr(alloc, size);
+#ifdef _DEBUG
   assert(((uintptr_t)ptr % MEMORY_HOOK_ALIGNMENT) == 0);
   assert(ptr >= alloc && (char *)ptr + size <= (char *)alloc + inflated_size);
+#endif
   return ptr;
 }
 
@@ -265,6 +350,11 @@ heap_alloc_array(size_t size) {
 #ifdef DO_MEMORY_USAGE
   // In the DO_MEMORY_USAGE case, we want to track the total size of allocated
   // bytes on the heap.
+#if defined(USE_MEMORY_DLMALLOC) || defined(USE_MEMORY_PTMALLOC2)
+  // dlmalloc may slightly overallocate, however.
+  size = get_ptr_size(alloc);
+  inflated_size = size;
+#endif
   AtomicAdjust::add(_total_heap_array_size, (AtomicAdjust::Integer)size);
   if ((size_t)AtomicAdjust::get(_total_heap_single_size) +
       (size_t)AtomicAdjust::get(_total_heap_array_size) >
@@ -274,8 +364,10 @@ heap_alloc_array(size_t size) {
 #endif  // DO_MEMORY_USAGE
 
   void *ptr = alloc_to_ptr(alloc, size);
+#ifdef _DEBUG
   assert(((uintptr_t)ptr % MEMORY_HOOK_ALIGNMENT) == 0);
   assert(ptr >= alloc && (char *)ptr + size <= (char *)alloc + inflated_size);
+#endif
   return ptr;
 }
 
@@ -287,11 +379,6 @@ heap_realloc_array(void *ptr, size_t size) {
   size_t orig_size;
   void *alloc = ptr_to_alloc(ptr, orig_size);
 
-#ifdef DO_MEMORY_USAGE
-  assert((AtomicAdjust::Integer)orig_size <= _total_heap_array_size);
-  AtomicAdjust::add(_total_heap_array_size, (AtomicAdjust::Integer)size-(AtomicAdjust::Integer)orig_size);
-#endif  // DO_MEMORY_USAGE
-
   size_t inflated_size = inflate_size(size);
 
   void *alloc1 = alloc;
@@ -318,6 +405,16 @@ heap_realloc_array(void *ptr, size_t size) {
 #endif
   }
 
+#ifdef DO_MEMORY_USAGE
+#if defined(USE_MEMORY_DLMALLOC) || defined(USE_MEMORY_PTMALLOC2)
+  // dlmalloc may slightly overallocate, however.
+  size = get_ptr_size(alloc1);
+  inflated_size = size;
+#endif
+  assert((AtomicAdjust::Integer)orig_size <= _total_heap_array_size);
+  AtomicAdjust::add(_total_heap_array_size, (AtomicAdjust::Integer)size-(AtomicAdjust::Integer)orig_size);
+#endif  // DO_MEMORY_USAGE
+
   // Align this to the requested boundary.
 #ifdef MEMORY_HOOK_DO_ALIGN
   // This copies the code from alloc_to_ptr, since we can't write the size and
@@ -337,8 +434,11 @@ heap_realloc_array(void *ptr, size_t size) {
 #else
   void *ptr1 = alloc_to_ptr(alloc1, size);
 #endif
+
+#ifdef _DEBUG
   assert(ptr1 >= alloc1 && (char *)ptr1 + size <= (char *)alloc1 + inflated_size);
   assert(((uintptr_t)ptr1 % MEMORY_HOOK_ALIGNMENT) == 0);
+#endif
   return ptr1;
 }
 

+ 0 - 21
dtool/src/dtoolbase/memoryHook.h

@@ -20,22 +20,6 @@
 #include "mutexImpl.h"
 #include <map>
 
-#ifdef LINMATH_ALIGN
-// We require 16-byte alignment of certain structures, to support SSE2.  We
-// don't strictly have to align *everything*, but it's just easier to do so.
-#ifdef __AVX__
-#define MEMORY_HOOK_ALIGNMENT 32
-#else
-#define MEMORY_HOOK_ALIGNMENT 16
-#endif
-// Otherwise, align to two words.  This seems to be pretty standard to the
-// point where some code may rely on this being the case.
-#elif defined(IS_OSX) || NATIVE_WORDSIZE >= 64
-#define MEMORY_HOOK_ALIGNMENT 16
-#else
-#define MEMORY_HOOK_ALIGNMENT 8
-#endif
-
 class DeletedBufferChain;
 
 /**
@@ -83,11 +67,6 @@ public:
 
   INLINE static size_t get_ptr_size(void *ptr);
 
-private:
-  INLINE static size_t inflate_size(size_t size);
-  INLINE static void *alloc_to_ptr(void *alloc, size_t size);
-  INLINE static void *ptr_to_alloc(void *ptr, size_t &size);
-
 #ifdef DO_MEMORY_USAGE
 protected:
   TVOLATILE AtomicAdjust::Integer _total_heap_single_size;

+ 5 - 25
dtool/src/dtoolbase/pallocator.T

@@ -19,7 +19,7 @@ pallocator_single(TypeHandle type_handle) NOEXCEPT :
 }
 
 template<class Type>
-INLINE TYPENAME pallocator_single<Type>::pointer pallocator_single<Type>::
+INLINE Type *pallocator_single<Type>::
 allocate(TYPENAME pallocator_single<Type>::size_type n, TYPENAME allocator<void>::const_pointer) {
   TAU_PROFILE("pallocator_single:allocate()", " ", TAU_USER);
   // This doesn't support allocating arrays.
@@ -43,34 +43,14 @@ pallocator_array(TypeHandle type_handle) NOEXCEPT :
 }
 
 template<class Type>
-INLINE TYPENAME pallocator_array<Type>::pointer pallocator_array<Type>::
+INLINE Type *pallocator_array<Type>::
 allocate(TYPENAME pallocator_array<Type>::size_type n, TYPENAME allocator<void>::const_pointer) {
-  TAU_PROFILE("pallocator_array:allocate()", " ", TAU_USER);
-#ifdef DO_MEMORY_USAGE
-  size_t alloc_size = n * sizeof(Type);
-  void *ptr = (TYPENAME pallocator_array<Type>::pointer)PANDA_MALLOC_ARRAY(alloc_size);
-#ifdef _DEBUG
-  assert(alloc_size == MemoryHook::get_ptr_size(ptr));
-#endif
-  _type_handle.inc_memory_usage(TypeHandle::MC_array, alloc_size);
-  return (TYPENAME pallocator_array<Type>::pointer)ASSUME_ALIGNED(ptr, MEMORY_HOOK_ALIGNMENT);
-#else
-  return (TYPENAME pallocator_array<Type>::pointer)PANDA_MALLOC_ARRAY(n * sizeof(Type));
-#endif  // DO_MEMORY_USAGE
+  return (TYPENAME pallocator_array<Type>::pointer)
+    ASSUME_ALIGNED(_type_handle.allocate_array(n * sizeof(Type)), MEMORY_HOOK_ALIGNMENT);
 }
 
 template<class Type>
 INLINE void pallocator_array<Type>::
 deallocate(TYPENAME pallocator_array<Type>::pointer p, TYPENAME pallocator_array<Type>::size_type) {
-  TAU_PROFILE("pallocator_array:deallocate()", " ", TAU_USER);
-#ifdef DO_MEMORY_USAGE
-  // Now we need to recover the total number of bytes.  Fortunately, in the
-  // case of DO_MEMORY_USAGE, MemoryHook already keeps track of this.
-  void *ptr = (void *)p;
-  size_t alloc_size = MemoryHook::get_ptr_size(ptr);
-  _type_handle.dec_memory_usage(TypeHandle::MC_array, alloc_size);
-  PANDA_FREE_ARRAY(ptr);
-#else
-  PANDA_FREE_ARRAY(p);
-#endif  // DO_MEMORY_USAGE
+  _type_handle.deallocate_array((void *)p);
 }

+ 4 - 2
dtool/src/dtoolbase/pallocator.h

@@ -59,7 +59,8 @@ public:
   INLINE pallocator_single(const pallocator_single<U> &copy) NOEXCEPT :
     _type_handle(copy._type_handle) { }
 
-  INLINE pointer allocate(size_type n, allocator<void>::const_pointer hint = 0);
+  INLINE Type *allocate(size_type n, allocator<void>::const_pointer hint = 0)
+    RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT);
   INLINE void deallocate(pointer p, size_type n);
 
   template<class U> struct rebind {
@@ -87,7 +88,8 @@ public:
   INLINE pallocator_array(const pallocator_array<U> &copy) NOEXCEPT :
     _type_handle(copy._type_handle) { }
 
-  INLINE pointer allocate(size_type n, allocator<void>::const_pointer hint = 0);
+  INLINE Type *allocate(size_type n, allocator<void>::const_pointer hint = 0)
+    RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT);
   INLINE void deallocate(pointer p, size_type n);
 
   template<class U> struct rebind {

+ 79 - 5
dtool/src/dtoolbase/typeHandle.cxx

@@ -18,7 +18,6 @@
 // This is initialized to zero by static initialization.
 TypeHandle TypeHandle::_none;
 
-#ifdef DO_MEMORY_USAGE
 /**
  * Returns the total allocated memory used by objects of this type, for the
  * indicated memory class.  This is only updated if track-memory-usage is set
@@ -26,6 +25,7 @@ TypeHandle TypeHandle::_none;
  */
 size_t TypeHandle::
 get_memory_usage(MemoryClass memory_class) const {
+#ifdef DO_MEMORY_USAGE
   assert((int)memory_class >= 0 && (int)memory_class < (int)MC_limit);
   if ((*this) == TypeHandle::none()) {
     return 0;
@@ -34,16 +34,17 @@ get_memory_usage(MemoryClass memory_class) const {
     assert(rnode != (TypeRegistryNode *)NULL);
     return (size_t)AtomicAdjust::get(rnode->_memory_usage[memory_class]);
   }
-}
 #endif  // DO_MEMORY_USAGE
+  return 0;
+}
 
-#ifdef DO_MEMORY_USAGE
 /**
  * Adds the indicated amount to the record for the total allocated memory for
  * objects of this type.
  */
 void TypeHandle::
 inc_memory_usage(MemoryClass memory_class, size_t size) {
+#ifdef DO_MEMORY_USAGE
   assert((int)memory_class >= 0 && (int)memory_class < (int)MC_limit);
   if ((*this) != TypeHandle::none()) {
     TypeRegistryNode *rnode = TypeRegistry::ptr()->look_up(*this, NULL);
@@ -56,16 +57,16 @@ inc_memory_usage(MemoryClass memory_class, size_t size) {
       abort();
     }
   }
-}
 #endif  // DO_MEMORY_USAGE
+}
 
-#ifdef DO_MEMORY_USAGE
 /**
  * Subtracts the indicated amount from the record for the total allocated
  * memory for objects of this type.
  */
 void TypeHandle::
 dec_memory_usage(MemoryClass memory_class, size_t size) {
+#ifdef DO_MEMORY_USAGE
   assert((int)memory_class >= 0 && (int)memory_class < (int)MC_limit);
   if ((*this) != TypeHandle::none()) {
     TypeRegistryNode *rnode = TypeRegistry::ptr()->look_up(*this, NULL);
@@ -75,8 +76,81 @@ dec_memory_usage(MemoryClass memory_class, size_t size) {
     // rnode->_memory_usage[memory_class] << "\n";
     assert(rnode->_memory_usage[memory_class] >= 0);
   }
+#endif  // DO_MEMORY_USAGE
+}
+
+/**
+ * Allocates memory, adding it to the total amount of memory allocated for
+ * this type.
+ */
+void *TypeHandle::
+allocate_array(size_t size) {
+  TAU_PROFILE("TypeHandle:allocate_array()", " ", TAU_USER);
+
+  void *ptr = PANDA_MALLOC_ARRAY(size);
+#ifdef DO_MEMORY_USAGE
+  if ((*this) != TypeHandle::none()) {
+    size_t alloc_size = MemoryHook::get_ptr_size(ptr);
+#ifdef _DEBUG
+    assert(size <= alloc_size);
+#endif
+    TypeRegistryNode *rnode = TypeRegistry::ptr()->look_up(*this, NULL);
+    assert(rnode != (TypeRegistryNode *)NULL);
+    AtomicAdjust::add(rnode->_memory_usage[MC_array], (AtomicAdjust::Integer)alloc_size);
+    if (rnode->_memory_usage[MC_array] < 0) {
+      cerr << "Memory usage overflow for type " << *this << ".\n";
+      abort();
+    }
+  }
+#endif  // DO_MEMORY_USAGE
+  return ptr;
+}
+
+/**
+ * Reallocates memory, adjusting the total amount of memory allocated for this
+ * type.
+ */
+void *TypeHandle::
+reallocate_array(void *old_ptr, size_t size) {
+  TAU_PROFILE("TypeHandle:reallocate_array()", " ", TAU_USER);
+
+#ifdef DO_MEMORY_USAGE
+  size_t old_size = MemoryHook::get_ptr_size(old_ptr);
+  void *new_ptr = PANDA_REALLOC_ARRAY(old_ptr, size);
+
+  if ((*this) != TypeHandle::none()) {
+    size_t new_size = MemoryHook::get_ptr_size(new_ptr);
+
+    TypeRegistryNode *rnode = TypeRegistry::ptr()->look_up(*this, NULL);
+    assert(rnode != (TypeRegistryNode *)NULL);
+    AtomicAdjust::add(rnode->_memory_usage[MC_array], (AtomicAdjust::Integer)new_size - (AtomicAdjust::Integer)old_size);
+    assert(rnode->_memory_usage[MC_array] >= 0);
+  }
+#else
+  void *new_ptr = PANDA_REALLOC_ARRAY(old_ptr, size);
+#endif
+  return new_ptr;
 }
+
+/**
+ * Deallocates memory, subtracting it from the total amount of memory
+ * allocated for this type.
+ */
+void TypeHandle::
+deallocate_array(void *ptr) {
+  TAU_PROFILE("TypeHandle:deallocate_array()", " ", TAU_USER);
+
+#ifdef DO_MEMORY_USAGE
+  size_t alloc_size = MemoryHook::get_ptr_size(ptr);
+  if ((*this) != TypeHandle::none()) {
+    TypeRegistryNode *rnode = TypeRegistry::ptr()->look_up(*this, NULL);
+    assert(rnode != (TypeRegistryNode *)NULL);
+    AtomicAdjust::add(rnode->_memory_usage[MC_array], -(AtomicAdjust::Integer)alloc_size);
+    assert(rnode->_memory_usage[MC_array] >= 0);
+  }
 #endif  // DO_MEMORY_USAGE
+  PANDA_FREE_ARRAY(ptr);
+}
 
 /**
  * Return the Index of the BEst fit Classs from a set

+ 47 - 25
dtool/src/dtoolbase/typeHandle.h

@@ -18,25 +18,49 @@
 
 #include <set>
 
-// The following illustrates the convention for declaring a type that uses
-// TypeHandle.  In this example, ThisThingie inherits from TypedObject, which
-// automatically supplies some type-differentiation functions at the cost of
-// one virtual function, get_type(); however, this inheritance is optional,
-// and may be omitted to avoid the virtual function pointer overhead.  (If you
-// do use TypedObject, be sure to consider whether your destructor should also
-// be virtual.)
-
-/*
- * class ThatThingie : public SimpleTypedObject { public: static TypeHandle
- * get_class_type() { return _type_handle; } static void init_type() {
- * register_type(_type_handle, "ThatThingie"); } private: static TypeHandle
- * _type_handle; }; class ThisThingie : public ThatThingie, publid TypedObject
- * { public: static TypeHandle get_class_type() { return _type_handle; }
- * static void init_type() { ThatThingie::init_type();
- * TypedObject::init_type(); register_type(_type_handle, "ThisThingie",
- * ThatThingie::get_class_type(), TypedObject::get_class_type()); } virtual
- * TypeHandle get_type() const { return get_class_type(); } private: static
- * TypeHandle _type_handle; };
+/**
+ * The following illustrates the convention for declaring a type that uses
+ * TypeHandle.  In this example, ThisThingie inherits from TypedObject, which
+ * automatically supplies some type-differentiation functions at the cost of
+ * one virtual function, get_type(); however, this inheritance is optional,
+ * and may be omitted to avoid the virtual function pointer overhead.  (If you
+ * do use TypedObject, be sure to consider whether your destructor should also
+ * be virtual.)
+ *
+ * @code
+ * class ThatThingie : public SimpleTypedObject {
+ * public:
+ *   static TypeHandle get_class_type() {
+ *     return _type_handle;
+ *   }
+ *   static void init_type() {
+ *     register_type(_type_handle, "ThatThingie");
+ *   }
+ *
+ * private:
+ *   static TypeHandle _type_handle;
+ * };
+ *
+ * class ThisThingie : public ThatThingie, publid TypedObject {
+ * public:
+ *   static TypeHandle get_class_type() {
+ *     return _type_handle;
+ *   }
+ *   static void init_type() {
+ *     ThatThingie::init_type();
+ *     TypedObject::init_type();
+ *     register_type(_type_handle, "ThisThingie",
+ *                  ThatThingie::get_class_type(),
+ *                  TypedObject::get_class_type());
+ *   }
+ *   virtual TypeHandle get_type() const {
+ *     return get_class_type();
+ *   }
+ *
+ * private:
+ *   static TypeHandle _type_handle;
+ * };
+ * @endcode
  */
 
 class TypedObject;
@@ -97,15 +121,9 @@ PUBLISHED:
 
   int get_best_parent_from_Set(const std::set< int > &legal_vals) const;
 
-#ifdef DO_MEMORY_USAGE
   size_t get_memory_usage(MemoryClass memory_class) const;
   void inc_memory_usage(MemoryClass memory_class, size_t size);
   void dec_memory_usage(MemoryClass memory_class, size_t size);
-#else
-  static CONSTEXPR size_t get_memory_usage(MemoryClass) { return 0; }
-  INLINE void inc_memory_usage(MemoryClass, size_t) { }
-  INLINE void dec_memory_usage(MemoryClass, size_t) { }
-#endif  // DO_MEMORY_USAGE
 
   INLINE int get_index() const;
   INLINE void output(ostream &out) const;
@@ -118,6 +136,10 @@ PUBLISHED:
   MAKE_SEQ_PROPERTY(child_classes, get_num_child_classes, get_child_class);
 
 public:
+  void *allocate_array(size_t size) RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT);
+  void *reallocate_array(void *ptr, size_t size) RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT);
+  void deallocate_array(void *ptr);
+
   INLINE static TypeHandle from_index(int index);
 
 private:

+ 13 - 0
dtool/src/dtoolutil/pandaSystem.cxx

@@ -45,6 +45,11 @@ PandaSystem() :
 #else
   set_system_tag("eigen", "vectorize", "0");
 #endif
+#ifdef __AVX__
+  set_system_tag("eigen", "avx", "1");
+#else
+  set_system_tag("eigen", "avx", "0");
+#endif
 #endif  // HAVE_EIGEN
 
 #ifdef USE_MEMORY_DLMALLOC
@@ -189,6 +194,14 @@ is_official_version() {
 #endif
 }
 
+/**
+ * Returns the memory alignment that Panda's allocators are using.
+ */
+int PandaSystem::
+get_memory_alignment() {
+  return MEMORY_HOOK_ALIGNMENT;
+}
+
 /**
  * Returns the string defined by the distributor of this version of Panda, or
  * "homebuilt" if this version was built directly from the sources by the end-

+ 2 - 0
dtool/src/dtoolutil/pandaSystem.h

@@ -39,6 +39,8 @@ PUBLISHED:
   static int get_sequence_version();
   static bool is_official_version();
 
+  static int get_memory_alignment();
+
   static string get_distributor();
   static string get_compiler();
   static string get_build_date();

+ 0 - 2
makepanda/makepanda.py

@@ -973,9 +973,7 @@ if GetTarget() == 'android':
     DefSymbol("ALWAYS", "ANDROID")
 
 if not PkgSkip("EIGEN"):
-    DefSymbol("ALWAYS", "EIGEN_MPL2_ONLY")
     if GetOptimize() >= 3:
-        DefSymbol("ALWAYS", "EIGEN_NO_DEBUG")
         if COMPILER == "MSVC":
             # Squeeze out a bit more performance on MSVC builds...
             # Only do this if EIGEN_NO_DEBUG is also set, otherwise it

+ 2 - 2
panda/src/gobj/geomVertexArrayData.h

@@ -261,8 +261,8 @@ public:
 
   INLINE Thread *get_current_thread() const;
 
-  INLINE const unsigned char *get_read_pointer(bool force) const;
-  unsigned char *get_write_pointer();
+  INLINE const unsigned char *get_read_pointer(bool force) const RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT);
+  unsigned char *get_write_pointer() RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT);
 
 PUBLISHED:
   INLINE const GeomVertexArrayData *get_object() const;

+ 11 - 20
panda/src/gobj/vertexDataBuffer.cxx

@@ -27,15 +27,13 @@ operator = (const VertexDataBuffer &copy) {
 
   if (_resident_data != (unsigned char *)NULL) {
     nassertv(_reserved_size != 0);
-    get_class_type().dec_memory_usage(TypeHandle::MC_array, (int)_reserved_size);
-    PANDA_FREE_ARRAY(_resident_data);
+    get_class_type().deallocate_array(_resident_data);
     _resident_data = NULL;
   }
   if (copy._resident_data != (unsigned char *)NULL && copy._size != 0) {
     // We only allocate _size bytes, not the full _reserved_size allocated by
     // the original copy.
-    get_class_type().inc_memory_usage(TypeHandle::MC_array, (int)copy._size);
-    _resident_data = (unsigned char *)PANDA_MALLOC_ARRAY(copy._size);
+    _resident_data = (unsigned char *)get_class_type().allocate_array(copy._size);
     memcpy(_resident_data, copy._resident_data, copy._size);
   }
   _size = copy._size;
@@ -55,17 +53,16 @@ swap(VertexDataBuffer &other) {
   unsigned char *resident_data = _resident_data;
   size_t size = _size;
   size_t reserved_size = _reserved_size;
-  PT(VertexDataBlock) block = _block;
+
+  _block.swap(other._block);
 
   _resident_data = other._resident_data;
   _size = other._size;
   _reserved_size = other._reserved_size;
-  _block = other._block;
 
   other._resident_data = resident_data;
   other._size = size;
   other._reserved_size = reserved_size;
-  other._block = block;
   nassertv(_reserved_size >= _size);
 }
 
@@ -94,13 +91,12 @@ do_clean_realloc(size_t reserved_size) {
       do_page_in();
     }
 
-    get_class_type().inc_memory_usage(TypeHandle::MC_array, (int)reserved_size - (int)_reserved_size);
     if (_reserved_size == 0) {
       nassertv(_resident_data == (unsigned char *)NULL);
-      _resident_data = (unsigned char *)PANDA_MALLOC_ARRAY(reserved_size);
+      _resident_data = (unsigned char *)get_class_type().allocate_array(reserved_size);
     } else {
       nassertv(_resident_data != (unsigned char *)NULL);
-      _resident_data = (unsigned char *)PANDA_REALLOC_ARRAY(_resident_data, reserved_size);
+      _resident_data = (unsigned char *)get_class_type().reallocate_array(_resident_data, reserved_size);
     }
     nassertv(_resident_data != (unsigned char *)NULL);
     _reserved_size = reserved_size;
@@ -129,16 +125,14 @@ do_unclean_realloc(size_t reserved_size) {
     if (_resident_data != (unsigned char *)NULL) {
       nassertv(_reserved_size != 0);
 
-      get_class_type().dec_memory_usage(TypeHandle::MC_array, (int)_reserved_size);
-      PANDA_FREE_ARRAY(_resident_data);
+      get_class_type().deallocate_array(_resident_data);
       _resident_data = NULL;
       _reserved_size = 0;
     }
 
     if (reserved_size != 0) {
-      get_class_type().inc_memory_usage(TypeHandle::MC_array, (int)reserved_size);
       nassertv(_resident_data == (unsigned char *)NULL);
-      _resident_data = (unsigned char *)PANDA_MALLOC_ARRAY(reserved_size);
+      _resident_data = (unsigned char *)get_class_type().allocate_array(reserved_size);
     }
 
     _reserved_size = reserved_size;
@@ -166,8 +160,7 @@ do_page_out(VertexDataBook &book) {
   if (_size == 0) {
     // It's an empty buffer.  Just deallocate it; don't bother to create a
     // block.
-    get_class_type().dec_memory_usage(TypeHandle::MC_array, (int)_reserved_size);
-    PANDA_FREE_ARRAY(_resident_data);
+    get_class_type().deallocate_array(_resident_data);
     _resident_data = NULL;
     _reserved_size = 0;
 
@@ -180,8 +173,7 @@ do_page_out(VertexDataBook &book) {
     nassertv(pointer != (unsigned char *)NULL);
     memcpy(pointer, _resident_data, _size);
 
-    get_class_type().dec_memory_usage(TypeHandle::MC_array, (int)_reserved_size);
-    PANDA_FREE_ARRAY(_resident_data);
+    get_class_type().deallocate_array(_resident_data);
     _resident_data = NULL;
 
     _reserved_size = _size;
@@ -205,8 +197,7 @@ do_page_in() {
   nassertv(_block != (VertexDataBlock *)NULL);
   nassertv(_reserved_size == _size);
 
-  get_class_type().inc_memory_usage(TypeHandle::MC_array, (int)_size);
-  _resident_data = (unsigned char *)PANDA_MALLOC_ARRAY(_size);
+  _resident_data = (unsigned char *)get_class_type().allocate_array(_size);
   nassertv(_resident_data != (unsigned char *)NULL);
 
   memcpy(_resident_data, _block->get_pointer(true), _size);

+ 2 - 2
panda/src/gobj/vertexDataBuffer.h

@@ -57,8 +57,8 @@ public:
   void operator = (const VertexDataBuffer &copy);
   INLINE ~VertexDataBuffer();
 
-  INLINE const unsigned char *get_read_pointer(bool force) const;
-  INLINE unsigned char *get_write_pointer();
+  INLINE const unsigned char *get_read_pointer(bool force) const RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT);
+  INLINE unsigned char *get_write_pointer() RETURNS_ALIGNED(MEMORY_HOOK_ALIGNMENT);
 
   INLINE size_t get_size() const;
   INLINE size_t get_reserved_size() const;

+ 1 - 1
panda/src/linmath/lsimpleMatrix.h

@@ -58,7 +58,7 @@ private:
 #endif  // HAVE_EIGEN
 
 // This is as good a place as any to define this alignment macro.
-#if defined(LINMATH_ALIGN) && defined(HAVE_EIGEN) && defined(__AVX__)
+#if defined(LINMATH_ALIGN) && defined(HAVE_EIGEN) && defined(__AVX__) && defined(STDFLOAT_DOUBLE)
 #define ALIGN_LINMATH ALIGN_32BYTE
 #elif defined(LINMATH_ALIGN)
 #define ALIGN_LINMATH ALIGN_16BYTE

+ 2 - 4
panda/src/tinydisplay/tinyGraphicsStateGuardian.cxx

@@ -2612,12 +2612,10 @@ setup_gltex(GLTexture *gltex, int x_size, int y_size, int num_levels) {
 
   if (gltex->total_bytecount != total_bytecount) {
     if (gltex->allocated_buffer != NULL) {
-      PANDA_FREE_ARRAY(gltex->allocated_buffer);
-      TinyTextureContext::get_class_type().dec_memory_usage(TypeHandle::MC_array, gltex->total_bytecount);
+      TinyTextureContext::get_class_type().deallocate_array(gltex->allocated_buffer);
     }
-    gltex->allocated_buffer = PANDA_MALLOC_ARRAY(total_bytecount);
+    gltex->allocated_buffer = TinyTextureContext::get_class_type().allocate_array(total_bytecount);
     gltex->total_bytecount = total_bytecount;
-    TinyTextureContext::get_class_type().inc_memory_usage(TypeHandle::MC_array, total_bytecount);
   }
 
   char *next_buffer = (char *)gltex->allocated_buffer;

+ 2 - 4
panda/src/tinydisplay/tinyTextureContext.cxx

@@ -24,8 +24,7 @@ TinyTextureContext::
   GLTexture *gltex = &_gltex;
   if (gltex->allocated_buffer != NULL) {
     nassertv(gltex->num_levels != 0);
-    TinyTextureContext::get_class_type().dec_memory_usage(TypeHandle::MC_array, gltex->total_bytecount);
-    PANDA_FREE_ARRAY(gltex->allocated_buffer);
+    get_class_type().deallocate_array(gltex->allocated_buffer);
     gltex->allocated_buffer = NULL;
     gltex->total_bytecount = 0;
     gltex->num_levels = 0;
@@ -51,8 +50,7 @@ evict_lru() {
   GLTexture *gltex = &_gltex;
   if (gltex->allocated_buffer != NULL) {
     nassertv(gltex->num_levels != 0);
-    TinyTextureContext::get_class_type().dec_memory_usage(TypeHandle::MC_array, gltex->total_bytecount);
-    PANDA_FREE_ARRAY(gltex->allocated_buffer);
+    get_class_type().deallocate_array(gltex->allocated_buffer);
     gltex->allocated_buffer = NULL;
     gltex->total_bytecount = 0;
     gltex->num_levels = 0;