소스 검색

Add `Memory::alloc_static_zeroed` to allocate memory that's filled with zeroes.
This is generally faster than `malloc` followed by `memset` / loop-set to 0.

Lukas Tenbrink 4 달 전
부모
커밋
3207066e19
8개의 변경된 파일32개의 추가작업 그리고 47개의 파일을 삭제
  1. 1 2
      core/io/zip_io.cpp
  2. 10 1
      core/os/memory.cpp
  3. 3 0
      core/os/memory.h
  4. 2 6
      core/templates/a_hash_map.h
  5. 6 14
      core/templates/hash_map.h
  6. 4 10
      core/templates/hash_set.h
  7. 4 10
      core/templates/oa_hash_map.h
  8. 2 4
      drivers/gles3/shader_gles3.cpp

+ 1 - 2
core/io/zip_io.cpp

@@ -161,8 +161,7 @@ int zipio_testerror(voidpf opaque, voidpf stream) {
 }
 }
 
 
 voidpf zipio_alloc(voidpf opaque, uInt items, uInt size) {
 voidpf zipio_alloc(voidpf opaque, uInt items, uInt size) {
-	voidpf ptr = memalloc((size_t)items * size);
-	memset(ptr, 0, items * size);
+	voidpf ptr = memalloc_zeroed((size_t)items * size);
 	return ptr;
 	return ptr;
 }
 }
 
 

+ 10 - 1
core/os/memory.cpp

@@ -93,6 +93,7 @@ void Memory::free_aligned_static(void *p_memory) {
 	free(p);
 	free(p);
 }
 }
 
 
+template <bool p_ensure_zero>
 void *Memory::alloc_static(size_t p_bytes, bool p_pad_align) {
 void *Memory::alloc_static(size_t p_bytes, bool p_pad_align) {
 #ifdef DEBUG_ENABLED
 #ifdef DEBUG_ENABLED
 	bool prepad = true;
 	bool prepad = true;
@@ -100,7 +101,12 @@ void *Memory::alloc_static(size_t p_bytes, bool p_pad_align) {
 	bool prepad = p_pad_align;
 	bool prepad = p_pad_align;
 #endif
 #endif
 
 
-	void *mem = malloc(p_bytes + (prepad ? DATA_OFFSET : 0));
+	void *mem;
+	if constexpr (p_ensure_zero) {
+		mem = calloc(1, p_bytes + (prepad ? DATA_OFFSET : 0));
+	} else {
+		mem = malloc(p_bytes + (prepad ? DATA_OFFSET : 0));
+	}
 
 
 	ERR_FAIL_NULL_V(mem, nullptr);
 	ERR_FAIL_NULL_V(mem, nullptr);
 
 
@@ -120,6 +126,9 @@ void *Memory::alloc_static(size_t p_bytes, bool p_pad_align) {
 	}
 	}
 }
 }
 
 
+template void *Memory::alloc_static<true>(size_t p_bytes, bool p_pad_align);
+template void *Memory::alloc_static<false>(size_t p_bytes, bool p_pad_align);
+
 void *Memory::realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align) {
 void *Memory::realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align) {
 	if (p_memory == nullptr) {
 	if (p_memory == nullptr) {
 		return alloc_static(p_bytes, p_pad_align);
 		return alloc_static(p_bytes, p_pad_align);

+ 3 - 0
core/os/memory.h

@@ -54,7 +54,9 @@ public:
 	static constexpr size_t ELEMENT_OFFSET = ((SIZE_OFFSET + sizeof(uint64_t)) % alignof(uint64_t) == 0) ? (SIZE_OFFSET + sizeof(uint64_t)) : ((SIZE_OFFSET + sizeof(uint64_t)) + alignof(uint64_t) - ((SIZE_OFFSET + sizeof(uint64_t)) % alignof(uint64_t)));
 	static constexpr size_t ELEMENT_OFFSET = ((SIZE_OFFSET + sizeof(uint64_t)) % alignof(uint64_t) == 0) ? (SIZE_OFFSET + sizeof(uint64_t)) : ((SIZE_OFFSET + sizeof(uint64_t)) + alignof(uint64_t) - ((SIZE_OFFSET + sizeof(uint64_t)) % alignof(uint64_t)));
 	static constexpr size_t DATA_OFFSET = ((ELEMENT_OFFSET + sizeof(uint64_t)) % alignof(max_align_t) == 0) ? (ELEMENT_OFFSET + sizeof(uint64_t)) : ((ELEMENT_OFFSET + sizeof(uint64_t)) + alignof(max_align_t) - ((ELEMENT_OFFSET + sizeof(uint64_t)) % alignof(max_align_t)));
 	static constexpr size_t DATA_OFFSET = ((ELEMENT_OFFSET + sizeof(uint64_t)) % alignof(max_align_t) == 0) ? (ELEMENT_OFFSET + sizeof(uint64_t)) : ((ELEMENT_OFFSET + sizeof(uint64_t)) + alignof(max_align_t) - ((ELEMENT_OFFSET + sizeof(uint64_t)) % alignof(max_align_t)));
 
 
+	template <bool p_ensure_zero = false>
 	static void *alloc_static(size_t p_bytes, bool p_pad_align = false);
 	static void *alloc_static(size_t p_bytes, bool p_pad_align = false);
+	_FORCE_INLINE_ static void *alloc_static_zeroed(size_t p_bytes, bool p_pad_align = false) { return alloc_static<true>(p_bytes, p_pad_align); }
 	static void *realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align = false);
 	static void *realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align = false);
 	static void free_static(void *p_ptr, bool p_pad_align = false);
 	static void free_static(void *p_ptr, bool p_pad_align = false);
 
 
@@ -107,6 +109,7 @@ void operator delete(void *p_mem, void *p_pointer, size_t check, const char *p_d
 #endif
 #endif
 
 
 #define memalloc(m_size) Memory::alloc_static(m_size)
 #define memalloc(m_size) Memory::alloc_static(m_size)
+#define memalloc_zeroed(m_size) Memory::alloc_static_zeroed(m_size)
 #define memrealloc(m_mem, m_size) Memory::realloc_static(m_mem, m_size)
 #define memrealloc(m_mem, m_size) Memory::realloc_static(m_mem, m_size)
 #define memfree(m_mem) Memory::free_static(m_mem)
 #define memfree(m_mem) Memory::free_static(m_mem)
 
 

+ 2 - 6
core/templates/a_hash_map.h

@@ -214,11 +214,9 @@ private:
 
 
 		HashMapData *old_map_data = map_data;
 		HashMapData *old_map_data = map_data;
 
 
-		map_data = reinterpret_cast<HashMapData *>(Memory::alloc_static(sizeof(HashMapData) * real_capacity));
+		map_data = reinterpret_cast<HashMapData *>(Memory::alloc_static_zeroed(sizeof(HashMapData) * real_capacity));
 		elements = reinterpret_cast<MapKeyValue *>(Memory::realloc_static(elements, sizeof(MapKeyValue) * (_get_resize_count(capacity) + 1)));
 		elements = reinterpret_cast<MapKeyValue *>(Memory::realloc_static(elements, sizeof(MapKeyValue) * (_get_resize_count(capacity) + 1)));
 
 
-		memset(map_data, EMPTY_HASH, real_capacity * sizeof(HashMapData));
-
 		if (num_elements != 0) {
 		if (num_elements != 0) {
 			for (uint32_t i = 0; i < real_old_capacity; i++) {
 			for (uint32_t i = 0; i < real_old_capacity; i++) {
 				HashMapData data = old_map_data[i];
 				HashMapData data = old_map_data[i];
@@ -236,10 +234,8 @@ private:
 			// Allocate on demand to save memory.
 			// Allocate on demand to save memory.
 
 
 			uint32_t real_capacity = capacity + 1;
 			uint32_t real_capacity = capacity + 1;
-			map_data = reinterpret_cast<HashMapData *>(Memory::alloc_static(sizeof(HashMapData) * real_capacity));
+			map_data = reinterpret_cast<HashMapData *>(Memory::alloc_static_zeroed(sizeof(HashMapData) * real_capacity));
 			elements = reinterpret_cast<MapKeyValue *>(Memory::alloc_static(sizeof(MapKeyValue) * (_get_resize_count(capacity) + 1)));
 			elements = reinterpret_cast<MapKeyValue *>(Memory::alloc_static(sizeof(MapKeyValue) * (_get_resize_count(capacity) + 1)));
-
-			memset(map_data, EMPTY_HASH, real_capacity * sizeof(HashMapData));
 		}
 		}
 
 
 		if (unlikely(num_elements > _get_resize_count(capacity))) {
 		if (unlikely(num_elements > _get_resize_count(capacity))) {

+ 6 - 14
core/templates/hash_map.h

@@ -170,13 +170,9 @@ private:
 		uint32_t *old_hashes = hashes;
 		uint32_t *old_hashes = hashes;
 
 
 		num_elements = 0;
 		num_elements = 0;
-		hashes = reinterpret_cast<uint32_t *>(Memory::alloc_static(sizeof(uint32_t) * capacity));
-		elements = reinterpret_cast<HashMapElement<TKey, TValue> **>(Memory::alloc_static(sizeof(HashMapElement<TKey, TValue> *) * capacity));
-
-		for (uint32_t i = 0; i < capacity; i++) {
-			hashes[i] = 0;
-			elements[i] = nullptr;
-		}
+		static_assert(EMPTY_HASH == 0, "Assuming EMPTY_HASH = 0 for alloc_static_zeroed call");
+		hashes = reinterpret_cast<uint32_t *>(Memory::alloc_static_zeroed(sizeof(uint32_t) * capacity));
+		elements = reinterpret_cast<HashMapElement<TKey, TValue> **>(Memory::alloc_static_zeroed(sizeof(HashMapElement<TKey, TValue> *) * capacity));
 
 
 		if (old_capacity == 0) {
 		if (old_capacity == 0) {
 			// Nothing to do.
 			// Nothing to do.
@@ -200,13 +196,9 @@ private:
 		if (unlikely(elements == nullptr)) {
 		if (unlikely(elements == nullptr)) {
 			// Allocate on demand to save memory.
 			// Allocate on demand to save memory.
 
 
-			hashes = reinterpret_cast<uint32_t *>(Memory::alloc_static(sizeof(uint32_t) * capacity));
-			elements = reinterpret_cast<HashMapElement<TKey, TValue> **>(Memory::alloc_static(sizeof(HashMapElement<TKey, TValue> *) * capacity));
-
-			for (uint32_t i = 0; i < capacity; i++) {
-				hashes[i] = EMPTY_HASH;
-				elements[i] = nullptr;
-			}
+			static_assert(EMPTY_HASH == 0, "Assuming EMPTY_HASH = 0 for alloc_static_zeroed call");
+			hashes = reinterpret_cast<uint32_t *>(Memory::alloc_static_zeroed(sizeof(uint32_t) * capacity));
+			elements = reinterpret_cast<HashMapElement<TKey, TValue> **>(Memory::alloc_static_zeroed(sizeof(HashMapElement<TKey, TValue> *) * capacity));
 		}
 		}
 
 
 		if (num_elements + 1 > MAX_OCCUPANCY * capacity) {
 		if (num_elements + 1 > MAX_OCCUPANCY * capacity) {

+ 4 - 10
core/templates/hash_set.h

@@ -144,15 +144,12 @@ private:
 		uint32_t *old_hashes = hashes;
 		uint32_t *old_hashes = hashes;
 		uint32_t *old_key_to_hash = key_to_hash;
 		uint32_t *old_key_to_hash = key_to_hash;
 
 
-		hashes = reinterpret_cast<uint32_t *>(Memory::alloc_static(sizeof(uint32_t) * capacity));
+		static_assert(EMPTY_HASH == 0, "Assuming EMPTY_HASH = 0 for alloc_static_zeroed call");
+		hashes = reinterpret_cast<uint32_t *>(Memory::alloc_static_zeroed(sizeof(uint32_t) * capacity));
 		keys = reinterpret_cast<TKey *>(Memory::realloc_static(keys, sizeof(TKey) * capacity));
 		keys = reinterpret_cast<TKey *>(Memory::realloc_static(keys, sizeof(TKey) * capacity));
 		key_to_hash = reinterpret_cast<uint32_t *>(Memory::alloc_static(sizeof(uint32_t) * capacity));
 		key_to_hash = reinterpret_cast<uint32_t *>(Memory::alloc_static(sizeof(uint32_t) * capacity));
 		hash_to_key = reinterpret_cast<uint32_t *>(Memory::realloc_static(hash_to_key, sizeof(uint32_t) * capacity));
 		hash_to_key = reinterpret_cast<uint32_t *>(Memory::realloc_static(hash_to_key, sizeof(uint32_t) * capacity));
 
 
-		for (uint32_t i = 0; i < capacity; i++) {
-			hashes[i] = EMPTY_HASH;
-		}
-
 		for (uint32_t i = 0; i < num_elements; i++) {
 		for (uint32_t i = 0; i < num_elements; i++) {
 			uint32_t h = old_hashes[old_key_to_hash[i]];
 			uint32_t h = old_hashes[old_key_to_hash[i]];
 			_insert_with_hash(h, i);
 			_insert_with_hash(h, i);
@@ -167,14 +164,11 @@ private:
 		if (unlikely(keys == nullptr)) {
 		if (unlikely(keys == nullptr)) {
 			// Allocate on demand to save memory.
 			// Allocate on demand to save memory.
 
 
-			hashes = reinterpret_cast<uint32_t *>(Memory::alloc_static(sizeof(uint32_t) * capacity));
+			static_assert(EMPTY_HASH == 0, "Assuming EMPTY_HASH = 0 for alloc_static_zeroed call");
+			hashes = reinterpret_cast<uint32_t *>(Memory::alloc_static_zeroed(sizeof(uint32_t) * capacity));
 			keys = reinterpret_cast<TKey *>(Memory::alloc_static(sizeof(TKey) * capacity));
 			keys = reinterpret_cast<TKey *>(Memory::alloc_static(sizeof(TKey) * capacity));
 			key_to_hash = reinterpret_cast<uint32_t *>(Memory::alloc_static(sizeof(uint32_t) * capacity));
 			key_to_hash = reinterpret_cast<uint32_t *>(Memory::alloc_static(sizeof(uint32_t) * capacity));
 			hash_to_key = reinterpret_cast<uint32_t *>(Memory::alloc_static(sizeof(uint32_t) * capacity));
 			hash_to_key = reinterpret_cast<uint32_t *>(Memory::alloc_static(sizeof(uint32_t) * capacity));
-
-			for (uint32_t i = 0; i < capacity; i++) {
-				hashes[i] = EMPTY_HASH;
-			}
 		}
 		}
 
 
 		uint32_t pos = 0;
 		uint32_t pos = 0;

+ 4 - 10
core/templates/oa_hash_map.h

@@ -153,11 +153,8 @@ private:
 		num_elements = 0;
 		num_elements = 0;
 		keys = static_cast<TKey *>(Memory::alloc_static(sizeof(TKey) * capacity));
 		keys = static_cast<TKey *>(Memory::alloc_static(sizeof(TKey) * capacity));
 		values = static_cast<TValue *>(Memory::alloc_static(sizeof(TValue) * capacity));
 		values = static_cast<TValue *>(Memory::alloc_static(sizeof(TValue) * capacity));
-		hashes = static_cast<uint32_t *>(Memory::alloc_static(sizeof(uint32_t) * capacity));
-
-		for (uint32_t i = 0; i < capacity; i++) {
-			hashes[i] = 0;
-		}
+		static_assert(EMPTY_HASH == 0, "Assuming EMPTY_HASH = 0 for alloc_static_zeroed call");
+		hashes = static_cast<uint32_t *>(Memory::alloc_static_zeroed(sizeof(uint32_t) * capacity));
 
 
 		if (old_capacity == 0) {
 		if (old_capacity == 0) {
 			// Nothing to do.
 			// Nothing to do.
@@ -384,11 +381,8 @@ public:
 
 
 		keys = static_cast<TKey *>(Memory::alloc_static(sizeof(TKey) * capacity));
 		keys = static_cast<TKey *>(Memory::alloc_static(sizeof(TKey) * capacity));
 		values = static_cast<TValue *>(Memory::alloc_static(sizeof(TValue) * capacity));
 		values = static_cast<TValue *>(Memory::alloc_static(sizeof(TValue) * capacity));
-		hashes = static_cast<uint32_t *>(Memory::alloc_static(sizeof(uint32_t) * capacity));
-
-		for (uint32_t i = 0; i < capacity; i++) {
-			hashes[i] = EMPTY_HASH;
-		}
+		static_assert(EMPTY_HASH == 0, "Assuming EMPTY_HASH = 0 for alloc_static_zeroed call");
+		hashes = static_cast<uint32_t *>(Memory::alloc_static_zeroed(sizeof(uint32_t) * capacity));
 	}
 	}
 
 
 	~OAHashMap() {
 	~OAHashMap() {

+ 2 - 4
drivers/gles3/shader_gles3.cpp

@@ -340,8 +340,7 @@ void ShaderGLES3::_compile_specialization(Version::Specialization &spec, uint32_
 					iloglen = 4096; // buggy driver (Adreno 220+)
 					iloglen = 4096; // buggy driver (Adreno 220+)
 				}
 				}
 
 
-				char *ilogmem = (char *)Memory::alloc_static(iloglen + 1);
-				memset(ilogmem, 0, iloglen + 1);
+				char *ilogmem = (char *)Memory::alloc_static_zeroed(iloglen + 1);
 				glGetShaderInfoLog(spec.vert_id, iloglen, &iloglen, ilogmem);
 				glGetShaderInfoLog(spec.vert_id, iloglen, &iloglen, ilogmem);
 
 
 				String err_string = name + ": Vertex shader compilation failed:\n";
 				String err_string = name + ": Vertex shader compilation failed:\n";
@@ -388,8 +387,7 @@ void ShaderGLES3::_compile_specialization(Version::Specialization &spec, uint32_
 					iloglen = 4096; // buggy driver (Adreno 220+)
 					iloglen = 4096; // buggy driver (Adreno 220+)
 				}
 				}
 
 
-				char *ilogmem = (char *)Memory::alloc_static(iloglen + 1);
-				memset(ilogmem, 0, iloglen + 1);
+				char *ilogmem = (char *)Memory::alloc_static_zeroed(iloglen + 1);
 				glGetShaderInfoLog(spec.frag_id, iloglen, &iloglen, ilogmem);
 				glGetShaderInfoLog(spec.frag_id, iloglen, &iloglen, ilogmem);
 
 
 				String err_string = name + ": Fragment shader compilation failed:\n";
 				String err_string = name + ": Fragment shader compilation failed:\n";