| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491 | /**************************************************************************//*  gdscript_tokenizer_buffer.cpp                                         *//**************************************************************************//*                         This file is part of:                          *//*                             GODOT ENGINE                               *//*                        https://godotengine.org                         *//**************************************************************************//* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). *//* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  *//*                                                                        *//* Permission is hereby granted, free of charge, to any person obtaining  *//* a copy of this software and associated documentation files (the        *//* "Software"), to deal in the Software without restriction, including    *//* without limitation the rights to use, copy, modify, merge, publish,    *//* distribute, sublicense, and/or sell copies of the Software, and to     *//* permit persons to whom the Software is furnished to do so, subject to  *//* the following conditions:                                              *//*                                                                        *//* The above copyright notice and this permission notice shall be         *//* included in all copies or substantial portions of the Software.        *//*                                                                        *//* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        *//* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     *//* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. *//* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   *//* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   *//* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      *//* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 *//**************************************************************************/#include "gdscript_tokenizer_buffer.h"#include "core/io/compression.h"#include "core/io/marshalls.h"int GDScriptTokenizerBuffer::_token_to_binary(const Token &p_token, Vector<uint8_t> &r_buffer, int p_start, HashMap<StringName, uint32_t> &r_identifiers_map, HashMap<Variant, uint32_t> &r_constants_map) {	int pos = p_start;	int token_type = p_token.type & TOKEN_MASK;	switch (p_token.type) {		case GDScriptTokenizer::Token::ANNOTATION:		case GDScriptTokenizer::Token::IDENTIFIER: {			// Add identifier to map.			int identifier_pos;			StringName id = p_token.get_identifier();			if (r_identifiers_map.has(id)) {				identifier_pos = r_identifiers_map[id];			} else {				identifier_pos = r_identifiers_map.size();				r_identifiers_map[id] = identifier_pos;			}			token_type |= identifier_pos << TOKEN_BITS;		} break;		case GDScriptTokenizer::Token::ERROR:		case GDScriptTokenizer::Token::LITERAL: {			// Add literal to map.			int constant_pos;			if (r_constants_map.has(p_token.literal)) {				constant_pos = r_constants_map[p_token.literal];			} else {				constant_pos = r_constants_map.size();				r_constants_map[p_token.literal] = constant_pos;			}			token_type |= constant_pos << TOKEN_BITS;		} break;		default:			break;	}	// Encode token.	int token_len;	if (token_type & TOKEN_MASK) {		token_len = 8;		r_buffer.resize(pos + token_len);		encode_uint32(token_type | TOKEN_BYTE_MASK, &r_buffer.write[pos]);		pos += 4;	} else {		token_len = 5;		r_buffer.resize(pos + token_len);		r_buffer.write[pos] = token_type;		pos++;	}	encode_uint32(p_token.start_line, &r_buffer.write[pos]);	return token_len;}GDScriptTokenizer::Token GDScriptTokenizerBuffer::_binary_to_token(const uint8_t *p_buffer) {	Token token;	const uint8_t *b = p_buffer;	uint32_t token_type = decode_uint32(b);	token.type = (Token::Type)(token_type & TOKEN_MASK);	if (token_type & TOKEN_BYTE_MASK) {		b += 4;	} else {		b++;	}	token.start_line = decode_uint32(b);	token.end_line = token.start_line;	token.literal = token.get_name();	if (token.type == Token::CONST_NAN) {		token.literal = String("NAN"); // Special case since name and notation are different.	}	switch (token.type) {		case GDScriptTokenizer::Token::ANNOTATION:		case GDScriptTokenizer::Token::IDENTIFIER: {			// Get name from map.			int identifier_pos = token_type >> TOKEN_BITS;			if (unlikely(identifier_pos >= identifiers.size())) {				Token error;				error.type = Token::ERROR;				error.literal = "Identifier index out of bounds.";				return error;			}			token.literal = identifiers[identifier_pos];		} break;		case GDScriptTokenizer::Token::ERROR:		case GDScriptTokenizer::Token::LITERAL: {			// Get literal from map.			int constant_pos = token_type >> TOKEN_BITS;			if (unlikely(constant_pos >= constants.size())) {				Token error;				error.type = Token::ERROR;				error.literal = "Constant index out of bounds.";				return error;			}			token.literal = constants[constant_pos];		} break;		default:			break;	}	return token;}Error GDScriptTokenizerBuffer::set_code_buffer(const Vector<uint8_t> &p_buffer) {	const uint8_t *buf = p_buffer.ptr();	ERR_FAIL_COND_V(p_buffer.size() < 12 || p_buffer[0] != 'G' || p_buffer[1] != 'D' || p_buffer[2] != 'S' || p_buffer[3] != 'C', ERR_INVALID_DATA);	int version = decode_uint32(&buf[4]);	ERR_FAIL_COND_V_MSG(version != TOKENIZER_VERSION, ERR_INVALID_DATA, "Binary GDScript is not compatible with this engine version.");	int decompressed_size = decode_uint32(&buf[8]);	Vector<uint8_t> contents;	if (decompressed_size == 0) {		contents = p_buffer.slice(12);	} else {		contents.resize(decompressed_size);		const int64_t result = Compression::decompress(contents.ptrw(), contents.size(), &buf[12], p_buffer.size() - 12, Compression::MODE_ZSTD);		ERR_FAIL_COND_V_MSG(result != decompressed_size, ERR_INVALID_DATA, "Error decompressing GDScript tokenizer buffer.");	}	int total_len = contents.size();	buf = contents.ptr();	uint32_t identifier_count = decode_uint32(&buf[0]);	uint32_t constant_count = decode_uint32(&buf[4]);	uint32_t token_line_count = decode_uint32(&buf[8]);	uint32_t token_count = decode_uint32(&buf[12]);	const uint8_t *b = &buf[16];	total_len -= 16;	identifiers.resize(identifier_count);	for (uint32_t i = 0; i < identifier_count; i++) {		uint32_t len = decode_uint32(b);		total_len -= 4;		ERR_FAIL_COND_V((len * 4u) > (uint32_t)total_len, ERR_INVALID_DATA);		b += 4;		Vector<uint32_t> cs;		cs.resize(len);		for (uint32_t j = 0; j < len; j++) {			uint8_t tmp[4];			for (uint32_t k = 0; k < 4; k++) {				tmp[k] = b[j * 4 + k] ^ 0xb6;			}			cs.write[j] = decode_uint32(tmp);		}		String s = String::utf32(Span(reinterpret_cast<const char32_t *>(cs.ptr()), len));		b += len * 4;		total_len -= len * 4;		identifiers.write[i] = s;	}	constants.resize(constant_count);	for (uint32_t i = 0; i < constant_count; i++) {		Variant v;		int len;		Error err = decode_variant(v, b, total_len, &len, false);		if (err) {			return err;		}		b += len;		total_len -= len;		constants.write[i] = v;	}	for (uint32_t i = 0; i < token_line_count; i++) {		ERR_FAIL_COND_V(total_len < 8, ERR_INVALID_DATA);		uint32_t token_index = decode_uint32(b);		b += 4;		uint32_t line = decode_uint32(b);		b += 4;		total_len -= 8;		token_lines[token_index] = line;	}	for (uint32_t i = 0; i < token_line_count; i++) {		ERR_FAIL_COND_V(total_len < 8, ERR_INVALID_DATA);		uint32_t token_index = decode_uint32(b);		b += 4;		uint32_t column = decode_uint32(b);		b += 4;		total_len -= 8;		token_columns[token_index] = column;	}	tokens.resize(token_count);	for (uint32_t i = 0; i < token_count; i++) {		int token_len = 5;		if ((*b) & TOKEN_BYTE_MASK) {			token_len = 8;		}		ERR_FAIL_COND_V(total_len < token_len, ERR_INVALID_DATA);		Token token = _binary_to_token(b);		b += token_len;		ERR_FAIL_INDEX_V(token.type, Token::TK_MAX, ERR_INVALID_DATA);		tokens.write[i] = token;		total_len -= token_len;	}	ERR_FAIL_COND_V(total_len > 0, ERR_INVALID_DATA);	return OK;}Vector<uint8_t> GDScriptTokenizerBuffer::parse_code_string(const String &p_code, CompressMode p_compress_mode) {	HashMap<StringName, uint32_t> identifier_map;	HashMap<Variant, uint32_t> constant_map;	Vector<uint8_t> token_buffer;	HashMap<uint32_t, uint32_t> token_lines;	HashMap<uint32_t, uint32_t> token_columns;	GDScriptTokenizerText tokenizer;	tokenizer.set_source_code(p_code);	tokenizer.set_multiline_mode(true); // Ignore whitespace tokens.	Token current = tokenizer.scan();	int token_pos = 0;	int last_token_line = 0;	int token_counter = 0;	while (current.type != Token::TK_EOF) {		int token_len = _token_to_binary(current, token_buffer, token_pos, identifier_map, constant_map);		token_pos += token_len;		if (token_counter > 0 && current.start_line > last_token_line) {			token_lines[token_counter] = current.start_line;			token_columns[token_counter] = current.start_column;		}		last_token_line = current.end_line;		current = tokenizer.scan();		token_counter++;	}	// Reverse maps.	Vector<StringName> rev_identifier_map;	rev_identifier_map.resize(identifier_map.size());	for (const KeyValue<StringName, uint32_t> &E : identifier_map) {		rev_identifier_map.write[E.value] = E.key;	}	Vector<Variant> rev_constant_map;	rev_constant_map.resize(constant_map.size());	for (const KeyValue<Variant, uint32_t> &E : constant_map) {		rev_constant_map.write[E.value] = E.key;	}	HashMap<uint32_t, uint32_t> rev_token_lines;	for (const KeyValue<uint32_t, uint32_t> &E : token_lines) {		rev_token_lines[E.value] = E.key;	}	// Remove continuation lines from map.	for (int line : tokenizer.get_continuation_lines()) {		if (rev_token_lines.has(line)) {			token_lines.erase(rev_token_lines[line]);			token_columns.erase(rev_token_lines[line]);		}	}	Vector<uint8_t> contents;	contents.resize(16);	encode_uint32(identifier_map.size(), &contents.write[0]);	encode_uint32(constant_map.size(), &contents.write[4]);	encode_uint32(token_lines.size(), &contents.write[8]);	encode_uint32(token_counter, &contents.write[12]);	int buf_pos = 16;	// Save identifiers.	for (const StringName &id : rev_identifier_map) {		String s = id.operator String();		int len = s.length();		contents.resize(buf_pos + (len + 1) * 4);		encode_uint32(len, &contents.write[buf_pos]);		buf_pos += 4;		for (int i = 0; i < len; i++) {			uint8_t tmp[4];			encode_uint32(s[i], tmp);			for (int b = 0; b < 4; b++) {				contents.write[buf_pos + b] = tmp[b] ^ 0xb6;			}			buf_pos += 4;		}	}	// Save constants.	for (const Variant &v : rev_constant_map) {		int len;		// Objects cannot be constant, never encode objects.		Error err = encode_variant(v, nullptr, len, false);		ERR_FAIL_COND_V_MSG(err != OK, Vector<uint8_t>(), "Error when trying to encode Variant.");		contents.resize(buf_pos + len);		encode_variant(v, &contents.write[buf_pos], len, false);		buf_pos += len;	}	// Save lines and columns.	contents.resize(buf_pos + token_lines.size() * 16);	for (const KeyValue<uint32_t, uint32_t> &e : token_lines) {		encode_uint32(e.key, &contents.write[buf_pos]);		buf_pos += 4;		encode_uint32(e.value, &contents.write[buf_pos]);		buf_pos += 4;	}	for (const KeyValue<uint32_t, uint32_t> &e : token_columns) {		encode_uint32(e.key, &contents.write[buf_pos]);		buf_pos += 4;		encode_uint32(e.value, &contents.write[buf_pos]);		buf_pos += 4;	}	// Store tokens.	contents.append_array(token_buffer);	Vector<uint8_t> buf;	// Save header.	buf.resize(12);	buf.write[0] = 'G';	buf.write[1] = 'D';	buf.write[2] = 'S';	buf.write[3] = 'C';	encode_uint32(TOKENIZER_VERSION, &buf.write[4]);	switch (p_compress_mode) {		case COMPRESS_NONE:			encode_uint32(0u, &buf.write[8]);			buf.append_array(contents);			break;		case COMPRESS_ZSTD: {			encode_uint32(contents.size(), &buf.write[8]);			Vector<uint8_t> compressed;			const int64_t max_size = Compression::get_max_compressed_buffer_size(contents.size(), Compression::MODE_ZSTD);			compressed.resize(max_size);			const int64_t compressed_size = Compression::compress(compressed.ptrw(), contents.ptr(), contents.size(), Compression::MODE_ZSTD);			ERR_FAIL_COND_V_MSG(compressed_size < 0, Vector<uint8_t>(), "Error compressing GDScript tokenizer buffer.");			compressed.resize(compressed_size);			buf.append_array(compressed);		} break;	}	return buf;}int GDScriptTokenizerBuffer::get_cursor_line() const {	return 0;}int GDScriptTokenizerBuffer::get_cursor_column() const {	return 0;}void GDScriptTokenizerBuffer::set_cursor_position(int p_line, int p_column) {}void GDScriptTokenizerBuffer::set_multiline_mode(bool p_state) {	multiline_mode = p_state;}bool GDScriptTokenizerBuffer::is_past_cursor() const {	return false;}void GDScriptTokenizerBuffer::push_expression_indented_block() {	indent_stack_stack.push_back(indent_stack);}void GDScriptTokenizerBuffer::pop_expression_indented_block() {	ERR_FAIL_COND(indent_stack_stack.is_empty());	indent_stack = indent_stack_stack.back()->get();	indent_stack_stack.pop_back();}GDScriptTokenizer::Token GDScriptTokenizerBuffer::scan() {	// Add final newline.	if (current >= tokens.size() && !last_token_was_newline) {		Token newline;		newline.type = Token::NEWLINE;		newline.start_line = current_line;		newline.end_line = current_line;		last_token_was_newline = true;		return newline;	}	// Resolve pending indentation change.	if (pending_indents > 0) {		pending_indents--;		Token indent;		indent.type = Token::INDENT;		indent.start_line = current_line;		indent.end_line = current_line;		return indent;	} else if (pending_indents < 0) {		pending_indents++;		Token dedent;		dedent.type = Token::DEDENT;		dedent.start_line = current_line;		dedent.end_line = current_line;		return dedent;	}	if (current >= tokens.size()) {		if (!indent_stack.is_empty()) {			pending_indents -= indent_stack.size();			indent_stack.clear();			return scan();		}		Token eof;		eof.type = Token::TK_EOF;		return eof;	};	if (!last_token_was_newline && token_lines.has(current)) {		current_line = token_lines[current];		uint32_t current_column = token_columns[current];		// Check if there's a need to indent/dedent.		if (!multiline_mode) {			uint32_t previous_indent = 0;			if (!indent_stack.is_empty()) {				previous_indent = indent_stack.back()->get();			}			if (current_column - 1 > previous_indent) {				pending_indents++;				indent_stack.push_back(current_column - 1);			} else {				while (current_column - 1 < previous_indent) {					pending_indents--;					indent_stack.pop_back();					if (indent_stack.is_empty()) {						break;					}					previous_indent = indent_stack.back()->get();				}			}			Token newline;			newline.type = Token::NEWLINE;			newline.start_line = current_line;			newline.end_line = current_line;			last_token_was_newline = true;			return newline;		}	}	last_token_was_newline = false;	Token token = tokens[current++];	return token;}
 |