/* * This source file is part of RmlUi, the HTML/CSS Interface Middleware * * For the latest information, see http://github.com/mikke89/RmlUi * * Copyright (c) 2008-2010 CodePoint Ltd, Shift Technology Ltd * Copyright (c) 2019 The RmlUi Team, and contributors * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * */ #include "../../Include/RmlUi/Core/BaseXMLParser.h" #include "../../Include/RmlUi/Core/Profiling.h" #include "../../Include/RmlUi/Core/Stream.h" #include namespace Rml { namespace Core { // Most file layers cache 4k. const int DEFAULT_BUFFER_SIZE = 4096; BaseXMLParser::BaseXMLParser() { read = nullptr; buffer = nullptr; buffer_used = 0; buffer_size = 0; open_tag_depth = 0; treat_content_as_cdata = false; } BaseXMLParser::~BaseXMLParser() { } // Registers a tag as containing general character data. void BaseXMLParser::RegisterCDATATag(const String& tag) { if (!tag.empty()) cdata_tags.insert(StringUtilities::ToLower(tag)); } // Parses the given stream as an XML file, and calls the handlers when // interesting phenomenon are encountered. void BaseXMLParser::Parse(Stream* stream) { xml_source = stream; buffer_size = DEFAULT_BUFFER_SIZE; treat_content_as_cdata = false; buffer = (unsigned char*) malloc(buffer_size); read = buffer; line_number = 1; FillBuffer(); // Read (er ... skip) the header, if one exists. ReadHeader(); // Read the XML body. ReadBody(); free(buffer); } // Get the current file line number int BaseXMLParser::GetLineNumber() const { return line_number; } int BaseXMLParser::GetLineNumberOpenTag() const { return line_number_open_tag; } // Called when the parser finds the beginning of an element tag. void BaseXMLParser::HandleElementStart(const String& RMLUI_UNUSED_PARAMETER(name), const XMLAttributes& RMLUI_UNUSED_PARAMETER(attributes)) { RMLUI_UNUSED(name); RMLUI_UNUSED(attributes); } // Called when the parser finds the end of an element tag. void BaseXMLParser::HandleElementEnd(const String& RMLUI_UNUSED_PARAMETER(name)) { RMLUI_UNUSED(name); } // Called when the parser encounters data. void BaseXMLParser::HandleData(const String& RMLUI_UNUSED_PARAMETER(data)) { RMLUI_UNUSED(data); } void BaseXMLParser::TreatElementContentAsCDATA() { treat_content_as_cdata = true; } void BaseXMLParser::ReadHeader() { if (PeekString("", temp); } } void BaseXMLParser::ReadBody() { RMLUI_ZoneScoped; open_tag_depth = 0; line_number_open_tag = 0; for(;;) { // Find the next open tag. if (!FindString("<", data, true)) break; // Check what kind of tag this is. if (PeekString("!--")) { // Comment. String temp; if (!FindString("-->", temp)) break; } else if (PeekString("![CDATA[")) { // CDATA tag; read everything (including markup) until the ending // CDATA tag. if (!ReadCDATA()) break; } else if (PeekString("/")) { if (!ReadCloseTag()) break; // Bail if we've hit the end of the XML data. if (open_tag_depth == 0) { xml_source->Seek((long)((read - buffer) - buffer_used), SEEK_CUR); break; } } else { if (ReadOpenTag()) line_number_open_tag = line_number; else break; } } // Check for error conditions if (open_tag_depth > 0) { Log::Message(Log::LT_WARNING, "XML parse error on line %d of %s.", GetLineNumber(), xml_source->GetSourceURL().GetURL().c_str()); } } bool BaseXMLParser::ReadOpenTag() { // Increase the open depth open_tag_depth++; treat_content_as_cdata = false; // Opening tag; send data immediately and open the tag. if (!data.empty()) { HandleData(data); data.clear(); } String tag_name; if (!FindWord(tag_name, "/>")) return false; bool section_opened = false; if (PeekString(">")) { // Simple open tag. HandleElementStart(tag_name, XMLAttributes()); section_opened = true; } else if (PeekString("/") && PeekString(">")) { // Empty open tag. HandleElementStart(tag_name, XMLAttributes()); HandleElementEnd(tag_name); // Tag immediately closed, reduce count open_tag_depth--; } else { // It appears we have some attributes. Let's parse them. XMLAttributes attributes; if (!ReadAttributes(attributes)) return false; if (PeekString(">")) { HandleElementStart(tag_name, attributes); section_opened = true; } else if (PeekString("/") && PeekString(">")) { HandleElementStart(tag_name, attributes); HandleElementEnd(tag_name); // Tag immediately closed, reduce count open_tag_depth--; } else { return false; } } // Check if this tag needs to be processed as CDATA. if (section_opened) { String lcase_tag_name = StringUtilities::ToLower(tag_name); bool is_cdata_tag = (cdata_tags.find(lcase_tag_name) != cdata_tags.end()); if (treat_content_as_cdata || is_cdata_tag) { if (ReadCDATA(lcase_tag_name.c_str(), !is_cdata_tag)) { open_tag_depth--; if (!data.empty()) { HandleData(data); data.clear(); } HandleElementEnd(tag_name); return true; } return false; } } return true; } bool BaseXMLParser::ReadCloseTag() { // Closing tag; send data immediately and close the tag. if (!data.empty()) { HandleData(data); data.clear(); } String tag_name; if (!FindString(">", tag_name)) return false; HandleElementEnd(StringUtilities::StripWhitespace(tag_name)); // Tag closed, reduce count open_tag_depth--; return true; } bool BaseXMLParser::ReadAttributes(XMLAttributes& attributes) { for (;;) { String attribute; String value; // Get the attribute name if (!FindWord(attribute, "=/>")) { return false; } // Check if theres an assigned value if (PeekString("=")) { if (PeekString("\"")) { if (!FindString("\"", value)) return false; } else if (PeekString("'")) { if (!FindString("'", value)) return false; } else if (!FindWord(value, "/>")) { return false; } } attributes[attribute] = value; // Check for the end of the tag. if (PeekString("/", false) || PeekString(">", false)) return true; } } bool BaseXMLParser::ReadCDATA(const char* tag_terminator, bool only_terminate_at_same_xml_depth) { String cdata; if (tag_terminator == nullptr) { FindString("]]>", cdata); data += cdata; return true; } else { int tag_depth = 1; // TODO: This doesn't properly handle comments and double brackets, // should probably find a way to use the normal parsing flow instead. for (;;) { // Search for the next tag opening. if (!FindString("<", cdata)) return false; String node_raw; if (!FindString(">", node_raw)) return false; String node_stripped = StringUtilities::StripWhitespace(node_raw); bool close_begin = false; bool close_end = false; if (!node_stripped.empty()) { if (node_stripped.front() == '/') close_begin = true; else if (node_stripped.back() == '/') close_end = true; } if (!close_begin && !close_end) tag_depth += 1; else if (close_begin && !close_end) tag_depth -= 1; if (close_begin && !close_end && (!only_terminate_at_same_xml_depth || tag_depth == 0)) { String tag_name = StringUtilities::StripWhitespace(node_stripped.substr(1)); if (StringUtilities::ToLower(tag_name) == tag_terminator) { data += cdata; return true; } } if (only_terminate_at_same_xml_depth && tag_depth <= 0) { return false; } cdata += '<' + node_raw + '>'; } } } // Reads from the stream until a complete word is found. bool BaseXMLParser::FindWord(String& word, const char* terminators) { for (;;) { if (read >= buffer + buffer_used) { if (!FillBuffer()) return false; } // Ignore white space if (StringUtilities::IsWhitespace(*read)) { if (word.empty()) { read++; continue; } else return true; } // Check for termination condition if (terminators && strchr(terminators, *read)) { return !word.empty(); } word += *read; read++; } } // Reads from the stream until the given character set is found. bool BaseXMLParser::FindString(const char* string, String& data, bool escape_brackets) { int index = 0; bool in_brackets = false; char previous = 0; while (string[index]) { if (read >= buffer + buffer_used) { if (!FillBuffer()) return false; } const char c = char(*read); // Count line numbers if (c == '\n') { line_number++; } if(escape_brackets) { if (c == '{' && previous == '{') in_brackets = true; else if (c == '}' && previous == '}') in_brackets = false; } if (c == string[index] && !in_brackets) { index += 1; } else { if (index > 0) { data += String(string, index); index = 0; } data += c; } previous = c; read++; } return true; } // Returns true if the next sequence of characters in the stream matches the // given string. bool BaseXMLParser::PeekString(const char* string, bool consume) { unsigned char* peek_read = read; int i = 0; while (string[i]) { // If we're about to read past the end of the buffer, read into the // overflow buffer. if ((peek_read - buffer) + i >= buffer_used) { int peek_offset = (int)(peek_read - read); FillBuffer(); peek_read = read + peek_offset; if (peek_read - buffer + i >= buffer_used) { // Weird, seems our buffer is too small, realloc it bigger. buffer_size *= 2; int read_offset = (int)(read - buffer); unsigned char* new_buffer = (unsigned char*) realloc(buffer, buffer_size); RMLUI_ASSERTMSG(new_buffer != nullptr, "Unable to allocate larger buffer for Peek() call"); if(new_buffer == nullptr) { return false; } buffer = new_buffer; // Restore the read pointers. read = buffer + read_offset; peek_read = read + peek_offset; // Attempt to fill our new buffer size. if (!FillBuffer()) return false; } } // Seek past all the whitespace if we haven't hit the initial character yet. if (i == 0 && StringUtilities::IsWhitespace(*peek_read)) { peek_read++; } else { if (char(*peek_read) != string[i]) return false; i++; peek_read++; } } // Set the read pointer to the end of the peek. if (consume) { read = peek_read; } return true; } // Fill the buffer as much as possible, without removing any content that is still pending bool BaseXMLParser::FillBuffer() { int bytes_free = buffer_size; int bytes_remaining = Math::Max((int)(buffer_used - (read - buffer)), 0); // If theres any data still in the buffer, shift it down, and fill it again if (bytes_remaining > 0) { memmove(buffer, read, bytes_remaining); bytes_free = buffer_size - bytes_remaining; } read = buffer; size_t bytes_read = xml_source->Read(&buffer[bytes_remaining], bytes_free); buffer_used = (int)(bytes_read + bytes_remaining); return bytes_read > 0; } } }