Browse Source

Refactor `init_tokenizer_with_data` to file memory mapping (Windows only currently)

gingerBill 4 years ago
parent
commit
3713f11461
2 changed files with 145 additions and 30 deletions
  1. 121 0
      src/common.cpp
  2. 24 30
      src/tokenizer.cpp

+ 121 - 0
src/common.cpp

@@ -850,6 +850,127 @@ ReadDirectoryError read_directory(String path, Array<FileInfo> *fi) {
 
 
 
+struct MemoryMappedFile {
+	void *handle;
+	
+	void *data;
+	i32   size;
+};
+enum MemoryMappedFileError {
+	MemoryMappedFile_None,
+	
+	MemoryMappedFile_Empty,
+	MemoryMappedFile_FileTooLarge,
+	MemoryMappedFile_Invalid,
+	MemoryMappedFile_NotExists,
+	MemoryMappedFile_Permission,
+	
+	MemoryMappedFile_COUNT,
+};
+
+MemoryMappedFileError memory_map_file_32(char const *fullpath, MemoryMappedFile *memory_mapped_file) {
+	MemoryMappedFileError err = MemoryMappedFile_None;
+	
+#if defined(GB_SYSTEM_WINDOWS)
+	isize w_len = 0;
+	wchar_t *w_str = gb__alloc_utf8_to_ucs2(temporary_allocator(), fullpath, &w_len);
+	if (w_str == nullptr) {
+		return MemoryMappedFile_Invalid;
+	}
+	i64 file_size = 0;
+	LARGE_INTEGER li_file_size = {};
+	HANDLE handle = nullptr;
+	HANDLE file_mapping = nullptr;
+	void *file_data = nullptr;
+	
+	handle = CreateFileW(w_str, GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL);
+	if (handle == INVALID_HANDLE_VALUE) {
+		handle = nullptr;
+		goto window_handle_file_error;
+	}
+	
+	li_file_size = {};
+	if (!GetFileSizeEx(handle, &li_file_size)) {
+		goto window_handle_file_error;
+	}
+	file_size = cast(i64)li_file_size.QuadPart;
+	if (file_size > I32_MAX) {
+		CloseHandle(handle);
+		return MemoryMappedFile_FileTooLarge;
+	}
+	
+	if (file_size == 0) {
+		CloseHandle(handle);
+		err = MemoryMappedFile_Empty;
+		memory_mapped_file->handle = nullptr;
+		memory_mapped_file->data   = nullptr;
+		memory_mapped_file->size   = 0;
+		return err;
+	}
+	
+	file_mapping = CreateFileMappingW(handle, nullptr, PAGE_READONLY, 0, 0, nullptr);
+	CloseHandle(handle);
+	
+	file_data = MapViewOfFileEx(file_mapping, FILE_MAP_READ, 0, 0, 0/*file_size*/, nullptr/*base address*/);
+	memory_mapped_file->handle = cast(void *)file_mapping;
+	memory_mapped_file->data = file_data;
+	memory_mapped_file->size = cast(i32)file_size;
+	return err;
+	
+window_handle_file_error:;
+	{
+		DWORD handle_err = GetLastError();
+		CloseHandle(handle);
+		err = MemoryMappedFile_Invalid;
+		switch (handle_err) {
+		case ERROR_FILE_NOT_FOUND: 
+		case ERROR_PATH_NOT_FOUND: 
+		case ERROR_INVALID_DRIVE:
+			err = MemoryMappedFile_NotExists; 
+			break;
+		case ERROR_ACCESS_DENIED: 
+		case ERROR_INVALID_ACCESS:
+			err = MemoryMappedFile_Permission;
+			break;
+		}
+		return err;
+	}
+	
+#else
+	// TODO(bill): Memory map rather than copy contents
+	gbFileContents fc = gb_file_read_contents(heap_allocator(), true, fullpath);
+
+	if (fc.size > I32_MAX) {
+		err = MemoryMappedFile_FileTooLarge;
+		gb_file_free_contents(&fc);
+	} else if (fc.data != nullptr) {
+		memory_mapped_file->handle = nullptr;
+		memory_mapped_file->data = fc.data;
+		memory_mapped_file->size = cast(i32)fc.size;
+	} else {
+		gbFile f = {};
+		gbFileError file_err = gb_file_open(&f, fullpath);
+		defer (gb_file_close(&f));
+
+		switch (file_err) {
+		case gbFileError_Invalid:    err = MemoryMappedFile_Invalid;    break;
+		case gbFileError_NotExists:  err = MemoryMappedFile_NotExists;  break;
+		case gbFileError_Permission: err = MemoryMappedFile_Permission; break;
+		}
+
+		if (err == MemoryMappedFile_None && gb_file_size(&f) == 0) {
+			err = MemoryMappedFile_Empty;
+		}
+	}
+	return err;
+#endif
+}
+
+
+
+
+
+
 #define USE_DAMERAU_LEVENSHTEIN 1
 
 isize levenstein_distance_case_insensitive(String const &a, String const &b) {

+ 24 - 30
src/tokenizer.cpp

@@ -802,39 +802,33 @@ void init_tokenizer_with_data(Tokenizer *t, String const &fullpath, void *data,
 	}
 }
 
-TokenizerInitError init_tokenizer_from_fullpath(Tokenizer *t, String const &fullpath) {
-	TokenizerInitError err = TokenizerInit_None;
-
-	char *c_str = alloc_cstring(temporary_allocator(), fullpath);
-
-	// TODO(bill): Memory map rather than copy contents
-	gbFileContents fc = gb_file_read_contents(heap_allocator(), true, c_str);
+TokenizerInitError memory_mapped_file_error_map_to_tokenizer[MemoryMappedFile_COUNT] = {
+	TokenizerInit_None,         /*MemoryMappedFile_None*/
+	TokenizerInit_Empty,        /*MemoryMappedFile_Empty*/
+	TokenizerInit_FileTooLarge, /*MemoryMappedFile_FileTooLarge*/
+	TokenizerInit_Invalid,      /*MemoryMappedFile_Invalid*/
+	TokenizerInit_NotExists,    /*MemoryMappedFile_NotExists*/
+	TokenizerInit_Permission,   /*MemoryMappedFile_Permission*/
+};
 
-	if (fc.size > I32_MAX) {
-		t->fullpath = fullpath;
-		t->line_count = 1;
-		err = TokenizerInit_FileTooLarge;
-		gb_file_free_contents(&fc);
-	} else if (fc.data != nullptr) {
-		init_tokenizer_with_data(t, fullpath, fc.data, fc.size);
-	} else {
+TokenizerInitError init_tokenizer_from_fullpath(Tokenizer *t, String const &fullpath) {
+	MemoryMappedFile memory_mapped_file = {};
+	MemoryMappedFileError mmf_err = memory_map_file_32(
+		alloc_cstring(temporary_allocator(), fullpath), 
+		&memory_mapped_file
+	);
+	
+	TokenizerInitError err = memory_mapped_file_error_map_to_tokenizer[mmf_err];
+	switch (mmf_err) {
+	case MemoryMappedFile_None:
+		init_tokenizer_with_data(t, fullpath, memory_mapped_file.data, cast(isize)memory_mapped_file.size);
+		break;
+	case MemoryMappedFile_FileTooLarge:
+	case MemoryMappedFile_Empty:
 		t->fullpath = fullpath;
 		t->line_count = 1;
-		gbFile f = {};
-		gbFileError file_err = gb_file_open(&f, c_str);
-		defer (gb_file_close(&f));
-
-		switch (file_err) {
-		case gbFileError_Invalid:    err = TokenizerInit_Invalid;    break;
-		case gbFileError_NotExists:  err = TokenizerInit_NotExists;  break;
-		case gbFileError_Permission: err = TokenizerInit_Permission; break;
-		}
-
-		if (err == TokenizerInit_None && gb_file_size(&f) == 0) {
-			err = TokenizerInit_Empty;
-		}
-	}
-
+		break;
+	}	
 	return err;
 }