瀏覽代碼

[Core] Improve `CowData` and `Memory` metadata alignment.

bruvzg 1 年之前
父節點
當前提交
7bcb419149
共有 3 個文件被更改,包括 99 次插入52 次删除
  1. 12 12
      core/os/memory.cpp
  2. 25 10
      core/os/memory.h
  3. 62 30
      core/templates/cowdata.h

+ 12 - 12
core/os/memory.cpp

@@ -72,23 +72,23 @@ void *Memory::alloc_static(size_t p_bytes, bool p_pad_align) {
 	bool prepad = p_pad_align;
 #endif
 
-	void *mem = malloc(p_bytes + (prepad ? PAD_ALIGN : 0));
+	void *mem = malloc(p_bytes + (prepad ? DATA_OFFSET : 0));
 
 	ERR_FAIL_NULL_V(mem, nullptr);
 
 	alloc_count.increment();
 
 	if (prepad) {
-		uint64_t *s = (uint64_t *)mem;
-		*s = p_bytes;
-
 		uint8_t *s8 = (uint8_t *)mem;
 
+		uint64_t *s = (uint64_t *)(s8 + SIZE_OFFSET);
+		*s = p_bytes;
+
 #ifdef DEBUG_ENABLED
 		uint64_t new_mem_usage = mem_usage.add(p_bytes);
 		max_usage.exchange_if_greater(new_mem_usage);
 #endif
-		return s8 + PAD_ALIGN;
+		return s8 + DATA_OFFSET;
 	} else {
 		return mem;
 	}
@@ -108,8 +108,8 @@ void *Memory::realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align) {
 #endif
 
 	if (prepad) {
-		mem -= PAD_ALIGN;
-		uint64_t *s = (uint64_t *)mem;
+		mem -= DATA_OFFSET;
+		uint64_t *s = (uint64_t *)(mem + SIZE_OFFSET);
 
 #ifdef DEBUG_ENABLED
 		if (p_bytes > *s) {
@@ -126,14 +126,14 @@ void *Memory::realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align) {
 		} else {
 			*s = p_bytes;
 
-			mem = (uint8_t *)realloc(mem, p_bytes + PAD_ALIGN);
+			mem = (uint8_t *)realloc(mem, p_bytes + DATA_OFFSET);
 			ERR_FAIL_NULL_V(mem, nullptr);
 
-			s = (uint64_t *)mem;
+			s = (uint64_t *)(mem + SIZE_OFFSET);
 
 			*s = p_bytes;
 
-			return mem + PAD_ALIGN;
+			return mem + DATA_OFFSET;
 		}
 	} else {
 		mem = (uint8_t *)realloc(mem, p_bytes);
@@ -158,10 +158,10 @@ void Memory::free_static(void *p_ptr, bool p_pad_align) {
 	alloc_count.decrement();
 
 	if (prepad) {
-		mem -= PAD_ALIGN;
+		mem -= DATA_OFFSET;
 
 #ifdef DEBUG_ENABLED
-		uint64_t *s = (uint64_t *)mem;
+		uint64_t *s = (uint64_t *)(mem + SIZE_OFFSET);
 		mem_usage.sub(*s);
 #endif
 

+ 25 - 10
core/os/memory.h

@@ -38,10 +38,6 @@
 #include <new>
 #include <type_traits>
 
-#ifndef PAD_ALIGN
-#define PAD_ALIGN 16 //must always be greater than this at much
-#endif
-
 class Memory {
 #ifdef DEBUG_ENABLED
 	static SafeNumeric<uint64_t> mem_usage;
@@ -51,6 +47,17 @@ class Memory {
 	static SafeNumeric<uint64_t> alloc_count;
 
 public:
+	// Alignment:  ↓ max_align_t        ↓ uint64_t          ↓ max_align_t
+	//             ┌─────────────────┬──┬────────────────┬──┬───────────...
+	//             │ uint64_t        │░░│ uint64_t       │░░│ T[]
+	//             │ alloc size      │░░│ element count  │░░│ data
+	//             └─────────────────┴──┴────────────────┴──┴───────────...
+	// Offset:     ↑ SIZE_OFFSET        ↑ ELEMENT_OFFSET    ↑ DATA_OFFSET
+
+	static constexpr size_t SIZE_OFFSET = 0;
+	static constexpr size_t ELEMENT_OFFSET = ((SIZE_OFFSET + sizeof(uint64_t)) % alignof(uint64_t) == 0) ? (SIZE_OFFSET + sizeof(uint64_t)) : ((SIZE_OFFSET + sizeof(uint64_t)) + alignof(uint64_t) - ((SIZE_OFFSET + sizeof(uint64_t)) % alignof(uint64_t)));
+	static constexpr size_t DATA_OFFSET = ((ELEMENT_OFFSET + sizeof(uint64_t)) % alignof(max_align_t) == 0) ? (ELEMENT_OFFSET + sizeof(uint64_t)) : ((ELEMENT_OFFSET + sizeof(uint64_t)) + alignof(max_align_t) - ((ELEMENT_OFFSET + sizeof(uint64_t)) % alignof(max_align_t)));
+
 	static void *alloc_static(size_t p_bytes, bool p_pad_align = false);
 	static void *realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align = false);
 	static void free_static(void *p_ptr, bool p_pad_align = false);
@@ -133,6 +140,10 @@ void memdelete_allocator(T *p_class) {
 
 #define memnew_arr(m_class, m_count) memnew_arr_template<m_class>(m_count)
 
+_FORCE_INLINE_ uint64_t *_get_element_count_ptr(uint8_t *p_ptr) {
+	return (uint64_t *)(p_ptr - Memory::DATA_OFFSET + Memory::ELEMENT_OFFSET);
+}
+
 template <typename T>
 T *memnew_arr_template(size_t p_elements) {
 	if (p_elements == 0) {
@@ -142,10 +153,12 @@ T *memnew_arr_template(size_t p_elements) {
 	same strategy used by std::vector, and the Vector class, so it should be safe.*/
 
 	size_t len = sizeof(T) * p_elements;
-	uint64_t *mem = (uint64_t *)Memory::alloc_static(len, true);
+	uint8_t *mem = (uint8_t *)Memory::alloc_static(len, true);
 	T *failptr = nullptr; //get rid of a warning
 	ERR_FAIL_NULL_V(mem, failptr);
-	*(mem - 1) = p_elements;
+
+	uint64_t *_elem_count_ptr = _get_element_count_ptr(mem);
+	*(_elem_count_ptr) = p_elements;
 
 	if constexpr (!std::is_trivially_constructible_v<T>) {
 		T *elems = (T *)mem;
@@ -166,16 +179,18 @@ T *memnew_arr_template(size_t p_elements) {
 
 template <typename T>
 size_t memarr_len(const T *p_class) {
-	uint64_t *ptr = (uint64_t *)p_class;
-	return *(ptr - 1);
+	uint8_t *ptr = (uint8_t *)p_class;
+	uint64_t *_elem_count_ptr = _get_element_count_ptr(ptr);
+	return *(_elem_count_ptr);
 }
 
 template <typename T>
 void memdelete_arr(T *p_class) {
-	uint64_t *ptr = (uint64_t *)p_class;
+	uint8_t *ptr = (uint8_t *)p_class;
 
 	if constexpr (!std::is_trivially_destructible_v<T>) {
-		uint64_t elem_count = *(ptr - 1);
+		uint64_t *_elem_count_ptr = _get_element_count_ptr(ptr);
+		uint64_t elem_count = *(_elem_count_ptr);
 
 		for (uint64_t i = 0; i < elem_count; i++) {
 			p_class[i].~T();

+ 62 - 30
core/templates/cowdata.h

@@ -46,7 +46,7 @@ class CharString;
 template <class T, class V>
 class VMap;
 
-SAFE_NUMERIC_TYPE_PUN_GUARANTEES(uint64_t)
+static_assert(std::is_trivially_destructible_v<std::atomic<uint64_t>>);
 
 // Silence a false positive warning (see GH-52119).
 #if defined(__GNUC__) && !defined(__clang__)
@@ -89,18 +89,39 @@ private:
 		return ++x;
 	}
 
-	static constexpr USize ALLOC_PAD = sizeof(USize) * 2; // For size and atomic refcount.
+	// Alignment:  ↓ max_align_t           ↓ USize          ↓ max_align_t
+	//             ┌────────────────────┬──┬─────────────┬──┬───────────...
+	//             │ SafeNumeric<USize> │░░│ USize       │░░│ T[]
+	//             │ ref. count         │░░│ data size   │░░│ data
+	//             └────────────────────┴──┴─────────────┴──┴───────────...
+	// Offset:     ↑ REF_COUNT_OFFSET      ↑ SIZE_OFFSET    ↑ DATA_OFFSET
+
+	static constexpr size_t REF_COUNT_OFFSET = 0;
+	static constexpr size_t SIZE_OFFSET = ((REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) % alignof(USize) == 0) ? (REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) : ((REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) + alignof(USize) - ((REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) % alignof(USize)));
+	static constexpr size_t DATA_OFFSET = ((SIZE_OFFSET + sizeof(USize)) % alignof(max_align_t) == 0) ? (SIZE_OFFSET + sizeof(USize)) : ((SIZE_OFFSET + sizeof(USize)) + alignof(max_align_t) - ((SIZE_OFFSET + sizeof(USize)) % alignof(max_align_t)));
 
 	mutable T *_ptr = nullptr;
 
 	// internal helpers
 
+	static _FORCE_INLINE_ SafeNumeric<USize> *_get_refcount_ptr(uint8_t *p_ptr) {
+		return (SafeNumeric<USize> *)(p_ptr + REF_COUNT_OFFSET);
+	}
+
+	static _FORCE_INLINE_ USize *_get_size_ptr(uint8_t *p_ptr) {
+		return (USize *)(p_ptr + SIZE_OFFSET);
+	}
+
+	static _FORCE_INLINE_ T *_get_data_ptr(uint8_t *p_ptr) {
+		return (T *)(p_ptr + DATA_OFFSET);
+	}
+
 	_FORCE_INLINE_ SafeNumeric<USize> *_get_refcount() const {
 		if (!_ptr) {
 			return nullptr;
 		}
 
-		return reinterpret_cast<SafeNumeric<USize> *>(_ptr) - 2;
+		return (SafeNumeric<USize> *)((uint8_t *)_ptr - DATA_OFFSET + REF_COUNT_OFFSET);
 	}
 
 	_FORCE_INLINE_ USize *_get_size() const {
@@ -108,7 +129,7 @@ private:
 			return nullptr;
 		}
 
-		return reinterpret_cast<USize *>(_ptr) - 1;
+		return (USize *)((uint8_t *)_ptr - DATA_OFFSET + SIZE_OFFSET);
 	}
 
 	_FORCE_INLINE_ USize _get_alloc_size(USize p_elements) const {
@@ -244,7 +265,7 @@ void CowData<T>::_unref(void *p_data) {
 	}
 
 	// free mem
-	Memory::free_static(((uint8_t *)p_data) - ALLOC_PAD, false);
+	Memory::free_static(((uint8_t *)p_data) - DATA_OFFSET, false);
 }
 
 template <class T>
@@ -260,26 +281,27 @@ typename CowData<T>::USize CowData<T>::_copy_on_write() {
 		/* in use by more than me */
 		USize current_size = *_get_size();
 
-		USize *mem_new = (USize *)Memory::alloc_static(_get_alloc_size(current_size) + ALLOC_PAD, false);
-		mem_new += 2;
+		uint8_t *mem_new = (uint8_t *)Memory::alloc_static(_get_alloc_size(current_size) + DATA_OFFSET, false);
+		ERR_FAIL_NULL_V(mem_new, 0);
 
-		new (mem_new - 2) SafeNumeric<USize>(1); //refcount
-		*(mem_new - 1) = current_size; //size
+		SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
+		USize *_size_ptr = _get_size_ptr(mem_new);
+		T *_data_ptr = _get_data_ptr(mem_new);
 
-		T *_data = (T *)(mem_new);
+		new (_refc_ptr) SafeNumeric<USize>(1); //refcount
+		*(_size_ptr) = current_size; //size
 
 		// initialize new elements
 		if constexpr (std::is_trivially_copyable_v<T>) {
-			memcpy(mem_new, _ptr, current_size * sizeof(T));
-
+			memcpy((uint8_t *)_data_ptr, _ptr, current_size * sizeof(T));
 		} else {
 			for (USize i = 0; i < current_size; i++) {
-				memnew_placement(&_data[i], T(_ptr[i]));
+				memnew_placement(&_data_ptr[i], T(_ptr[i]));
 			}
 		}
 
 		_unref(_ptr);
-		_ptr = _data;
+		_ptr = _data_ptr;
 
 		rc = 1;
 	}
@@ -315,21 +337,28 @@ Error CowData<T>::resize(Size p_size) {
 		if (alloc_size != current_alloc_size) {
 			if (current_size == 0) {
 				// alloc from scratch
-				USize *ptr = (USize *)Memory::alloc_static(alloc_size + ALLOC_PAD, false);
-				ptr += 2;
-				ERR_FAIL_NULL_V(ptr, ERR_OUT_OF_MEMORY);
-				*(ptr - 1) = 0; //size, currently none
-				new (ptr - 2) SafeNumeric<USize>(1); //refcount
+				uint8_t *mem_new = (uint8_t *)Memory::alloc_static(alloc_size + DATA_OFFSET, false);
+				ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY);
+
+				SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
+				USize *_size_ptr = _get_size_ptr(mem_new);
+				T *_data_ptr = _get_data_ptr(mem_new);
 
-				_ptr = (T *)ptr;
+				new (_refc_ptr) SafeNumeric<USize>(1); //refcount
+				*(_size_ptr) = 0; //size, currently none
+
+				_ptr = _data_ptr;
 
 			} else {
-				USize *_ptrnew = (USize *)Memory::realloc_static(((uint8_t *)_ptr) - ALLOC_PAD, alloc_size + ALLOC_PAD, false);
-				ERR_FAIL_NULL_V(_ptrnew, ERR_OUT_OF_MEMORY);
-				_ptrnew += 2;
-				new (_ptrnew - 2) SafeNumeric<USize>(rc); //refcount
+				uint8_t *mem_new = (uint8_t *)Memory::realloc_static(((uint8_t *)_ptr) - DATA_OFFSET, alloc_size + DATA_OFFSET, false);
+				ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY);
+
+				SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
+				T *_data_ptr = _get_data_ptr(mem_new);
 
-				_ptr = (T *)(_ptrnew);
+				new (_refc_ptr) SafeNumeric<USize>(rc); //refcount
+
+				_ptr = _data_ptr;
 			}
 		}
 
@@ -355,12 +384,15 @@ Error CowData<T>::resize(Size p_size) {
 		}
 
 		if (alloc_size != current_alloc_size) {
-			USize *_ptrnew = (USize *)Memory::realloc_static(((uint8_t *)_ptr) - ALLOC_PAD, alloc_size + ALLOC_PAD, false);
-			ERR_FAIL_NULL_V(_ptrnew, ERR_OUT_OF_MEMORY);
-			_ptrnew += 2;
-			new (_ptrnew - 2) SafeNumeric<USize>(rc); //refcount
+			uint8_t *mem_new = (uint8_t *)Memory::realloc_static(((uint8_t *)_ptr) - DATA_OFFSET, alloc_size + DATA_OFFSET, false);
+			ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY);
+
+			SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
+			T *_data_ptr = _get_data_ptr(mem_new);
+
+			new (_refc_ptr) SafeNumeric<USize>(rc); //refcount
 
-			_ptr = (T *)(_ptrnew);
+			_ptr = _data_ptr;
 		}
 
 		*_get_size() = p_size;