BaseXMLParser.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563
  1. /*
  2. * This source file is part of RmlUi, the HTML/CSS Interface Middleware
  3. *
  4. * For the latest information, see http://github.com/mikke89/RmlUi
  5. *
  6. * Copyright (c) 2008-2010 CodePoint Ltd, Shift Technology Ltd
  7. * Copyright (c) 2019 The RmlUi Team, and contributors
  8. *
  9. * Permission is hereby granted, free of charge, to any person obtaining a copy
  10. * of this software and associated documentation files (the "Software"), to deal
  11. * in the Software without restriction, including without limitation the rights
  12. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  13. * copies of the Software, and to permit persons to whom the Software is
  14. * furnished to do so, subject to the following conditions:
  15. *
  16. * The above copyright notice and this permission notice shall be included in
  17. * all copies or substantial portions of the Software.
  18. *
  19. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  20. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  22. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  23. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  24. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  25. * THE SOFTWARE.
  26. *
  27. */
  28. #include "../../Include/RmlUi/Core/BaseXMLParser.h"
  29. #include "../../Include/RmlUi/Core/Profiling.h"
  30. #include "../../Include/RmlUi/Core/Stream.h"
  31. #include "XMLParseTools.h"
  32. #include <string.h>
  33. namespace Rml {
  34. BaseXMLParser::BaseXMLParser()
  35. {}
  36. BaseXMLParser::~BaseXMLParser()
  37. {}
  38. // Registers a tag as containing general character data.
  39. void BaseXMLParser::RegisterCDATATag(const String& tag)
  40. {
  41. if (!tag.empty())
  42. cdata_tags.insert(StringUtilities::ToLower(tag));
  43. }
  44. void BaseXMLParser::RegisterInnerXMLAttribute(const String& attribute_name)
  45. {
  46. attributes_for_inner_xml_data.insert(attribute_name);
  47. }
  48. // Parses the given stream as an XML file, and calls the handlers when
  49. // interesting phenomenon are encountered.
  50. void BaseXMLParser::Parse(Stream* stream)
  51. {
  52. source_url = &stream->GetSourceURL();
  53. xml_source.clear();
  54. // We read in the whole XML file here.
  55. // TODO: It doesn't look like the Stream interface is used for anything useful. We
  56. // might as well just use a span or StringView, and get completely rid of it.
  57. // @performance Otherwise, use the temporary allocator.
  58. const size_t source_size = stream->Length();
  59. stream->Read(xml_source, source_size);
  60. xml_index = 0;
  61. line_number = 1;
  62. line_number_open_tag = 1;
  63. inner_xml_data = false;
  64. inner_xml_data_terminate_depth = 0;
  65. inner_xml_data_index_begin = 0;
  66. // Read (er ... skip) the header, if one exists.
  67. ReadHeader();
  68. // Read the XML body.
  69. ReadBody();
  70. xml_source.clear();
  71. source_url = nullptr;
  72. }
  73. // Get the current file line number
  74. int BaseXMLParser::GetLineNumber() const
  75. {
  76. return line_number;
  77. }
  78. int BaseXMLParser::GetLineNumberOpenTag() const
  79. {
  80. return line_number_open_tag;
  81. }
  82. // Called when the parser finds the beginning of an element tag.
  83. void BaseXMLParser::HandleElementStart(const String& RMLUI_UNUSED_PARAMETER(name), const XMLAttributes& RMLUI_UNUSED_PARAMETER(attributes))
  84. {
  85. RMLUI_UNUSED(name);
  86. RMLUI_UNUSED(attributes);
  87. }
  88. // Called when the parser finds the end of an element tag.
  89. void BaseXMLParser::HandleElementEnd(const String& RMLUI_UNUSED_PARAMETER(name))
  90. {
  91. RMLUI_UNUSED(name);
  92. }
  93. // Called when the parser encounters data.
  94. void BaseXMLParser::HandleData(const String& RMLUI_UNUSED_PARAMETER(data), XMLDataType RMLUI_UNUSED_PARAMETER(type))
  95. {
  96. RMLUI_UNUSED(data);
  97. RMLUI_UNUSED(type);
  98. }
  99. /// Returns the source URL of this parse. Only valid during parsing.
  100. const URL* BaseXMLParser::GetSourceURLPtr() const
  101. {
  102. return source_url;
  103. }
  104. void BaseXMLParser::Next() {
  105. xml_index += 1;
  106. }
  107. bool BaseXMLParser::AtEnd() const {
  108. return xml_index >= xml_source.size();
  109. }
  110. char BaseXMLParser::Look() const {
  111. RMLUI_ASSERT(!AtEnd());
  112. return xml_source[xml_index];
  113. }
  114. void BaseXMLParser::HandleElementStartInternal(const String& name, const XMLAttributes& attributes)
  115. {
  116. if (!inner_xml_data)
  117. HandleElementStart(name, attributes);
  118. }
  119. void BaseXMLParser::HandleElementEndInternal(const String& name)
  120. {
  121. if (!inner_xml_data)
  122. HandleElementEnd(name);
  123. }
  124. void BaseXMLParser::HandleDataInternal(const String& data, XMLDataType type)
  125. {
  126. if (!inner_xml_data)
  127. HandleData(data, type);
  128. }
  129. void BaseXMLParser::ReadHeader()
  130. {
  131. if (PeekString("<?"))
  132. {
  133. String temp;
  134. FindString(">", temp);
  135. }
  136. }
  137. void BaseXMLParser::ReadBody()
  138. {
  139. RMLUI_ZoneScoped;
  140. open_tag_depth = 0;
  141. line_number_open_tag = 0;
  142. for(;;)
  143. {
  144. // Find the next open tag.
  145. if (!FindString("<", data, true))
  146. break;
  147. const size_t xml_index_tag = xml_index - 1;
  148. // Check what kind of tag this is.
  149. if (PeekString("!--"))
  150. {
  151. // Comment.
  152. String temp;
  153. if (!FindString("-->", temp))
  154. break;
  155. }
  156. else if (PeekString("![CDATA["))
  157. {
  158. // CDATA tag; read everything (including markup) until the ending
  159. // CDATA tag.
  160. if (!ReadCDATA())
  161. break;
  162. }
  163. else if (PeekString("/"))
  164. {
  165. if (!ReadCloseTag(xml_index_tag))
  166. break;
  167. // Bail if we've hit the end of the XML data.
  168. if (open_tag_depth == 0)
  169. break;
  170. }
  171. else
  172. {
  173. if (ReadOpenTag())
  174. line_number_open_tag = line_number;
  175. else
  176. break;
  177. }
  178. }
  179. // Check for error conditions
  180. if (open_tag_depth > 0)
  181. {
  182. Log::Message(Log::LT_WARNING, "XML parse error on line %d of %s.", GetLineNumber(), source_url->GetURL().c_str());
  183. }
  184. }
  185. bool BaseXMLParser::ReadOpenTag()
  186. {
  187. // Increase the open depth
  188. open_tag_depth++;
  189. // Opening tag; send data immediately and open the tag.
  190. if (!data.empty())
  191. {
  192. HandleDataInternal(data, XMLDataType::Text);
  193. data.clear();
  194. }
  195. String tag_name;
  196. if (!FindWord(tag_name, "/>"))
  197. return false;
  198. bool section_opened = false;
  199. if (PeekString(">"))
  200. {
  201. // Simple open tag.
  202. HandleElementStartInternal(tag_name, XMLAttributes());
  203. section_opened = true;
  204. }
  205. else if (PeekString("/") &&
  206. PeekString(">"))
  207. {
  208. // Empty open tag.
  209. HandleElementStartInternal(tag_name, XMLAttributes());
  210. HandleElementEndInternal(tag_name);
  211. // Tag immediately closed, reduce count
  212. open_tag_depth--;
  213. }
  214. else
  215. {
  216. // It appears we have some attributes. Let's parse them.
  217. bool parse_inner_xml_as_data = false;
  218. XMLAttributes attributes;
  219. if (!ReadAttributes(attributes, parse_inner_xml_as_data))
  220. return false;
  221. if (PeekString(">"))
  222. {
  223. HandleElementStartInternal(tag_name, attributes);
  224. section_opened = true;
  225. }
  226. else if (PeekString("/") &&
  227. PeekString(">"))
  228. {
  229. HandleElementStartInternal(tag_name, attributes);
  230. HandleElementEndInternal(tag_name);
  231. // Tag immediately closed, reduce count
  232. open_tag_depth--;
  233. }
  234. else
  235. {
  236. return false;
  237. }
  238. if (section_opened && parse_inner_xml_as_data && !inner_xml_data)
  239. {
  240. inner_xml_data = true;
  241. inner_xml_data_terminate_depth = open_tag_depth;
  242. inner_xml_data_index_begin = xml_index;
  243. }
  244. }
  245. // Check if this tag needs to be processed as CDATA.
  246. if (section_opened)
  247. {
  248. const String lcase_tag_name = StringUtilities::ToLower(tag_name);
  249. bool is_cdata_tag = (cdata_tags.find(lcase_tag_name) != cdata_tags.end());
  250. if (is_cdata_tag)
  251. {
  252. if (ReadCDATA(lcase_tag_name.c_str()))
  253. {
  254. open_tag_depth--;
  255. if (!data.empty())
  256. {
  257. HandleDataInternal(data, XMLDataType::CData);
  258. data.clear();
  259. }
  260. HandleElementEndInternal(tag_name);
  261. return true;
  262. }
  263. return false;
  264. }
  265. }
  266. return true;
  267. }
  268. bool BaseXMLParser::ReadCloseTag(const size_t xml_index_tag)
  269. {
  270. if (inner_xml_data && open_tag_depth == inner_xml_data_terminate_depth)
  271. {
  272. // Closing the tag that initiated the inner xml data parsing. Set all its contents as Data to be
  273. // submitted next, and disable the mode to resume normal parsing behavior.
  274. RMLUI_ASSERT(inner_xml_data_index_begin <= xml_index_tag);
  275. inner_xml_data = false;
  276. data = xml_source.substr(inner_xml_data_index_begin, xml_index_tag - inner_xml_data_index_begin);
  277. HandleDataInternal(data, XMLDataType::InnerXML);
  278. data.clear();
  279. }
  280. // Closing tag; send data immediately and close the tag.
  281. if (!data.empty())
  282. {
  283. HandleDataInternal(data, XMLDataType::Text);
  284. data.clear();
  285. }
  286. String tag_name;
  287. if (!FindString(">", tag_name))
  288. return false;
  289. HandleElementEndInternal(StringUtilities::StripWhitespace(tag_name));
  290. // Tag closed, reduce count
  291. open_tag_depth--;
  292. return true;
  293. }
  294. bool BaseXMLParser::ReadAttributes(XMLAttributes& attributes, bool& parse_raw_xml_content)
  295. {
  296. for (;;)
  297. {
  298. String attribute;
  299. String value;
  300. // Get the attribute name
  301. if (!FindWord(attribute, "=/>"))
  302. {
  303. return false;
  304. }
  305. // Check if theres an assigned value
  306. if (PeekString("="))
  307. {
  308. if (PeekString("\""))
  309. {
  310. if (!FindString("\"", value))
  311. return false;
  312. }
  313. else if (PeekString("'"))
  314. {
  315. if (!FindString("'", value))
  316. return false;
  317. }
  318. else if (!FindWord(value, "/>"))
  319. {
  320. return false;
  321. }
  322. }
  323. if (attributes_for_inner_xml_data.count(attribute) == 1)
  324. parse_raw_xml_content = true;
  325. attributes[attribute] = value;
  326. // Check for the end of the tag.
  327. if (PeekString("/", false) || PeekString(">", false))
  328. return true;
  329. }
  330. }
  331. bool BaseXMLParser::ReadCDATA(const char* tag_terminator)
  332. {
  333. String cdata;
  334. if (tag_terminator == nullptr)
  335. {
  336. FindString("]]>", cdata);
  337. data += cdata;
  338. return true;
  339. }
  340. else
  341. {
  342. for (;;)
  343. {
  344. // Search for the next tag opening.
  345. if (!FindString("<", cdata))
  346. return false;
  347. if (PeekString("/", false))
  348. {
  349. String tag;
  350. if (FindString(">", tag))
  351. {
  352. size_t slash_pos = tag.find('/');
  353. String tag_name = StringUtilities::StripWhitespace(slash_pos == String::npos ? tag : tag.substr(slash_pos + 1));
  354. if (StringUtilities::ToLower(tag_name) == tag_terminator)
  355. {
  356. data += cdata;
  357. return true;
  358. }
  359. else
  360. {
  361. cdata += '<' + tag + '>';
  362. }
  363. }
  364. else
  365. cdata += "<";
  366. }
  367. else
  368. cdata += "<";
  369. }
  370. }
  371. }
  372. // Reads from the stream until a complete word is found.
  373. bool BaseXMLParser::FindWord(String& word, const char* terminators)
  374. {
  375. while (!AtEnd())
  376. {
  377. char c = Look();
  378. // Ignore white space
  379. if (StringUtilities::IsWhitespace(c))
  380. {
  381. if (word.empty())
  382. {
  383. Next();
  384. continue;
  385. }
  386. else
  387. return true;
  388. }
  389. // Check for termination condition
  390. if (terminators && strchr(terminators, c))
  391. {
  392. return !word.empty();
  393. }
  394. word += c;
  395. Next();
  396. }
  397. return false;
  398. }
  399. // Reads from the stream until the given character set is found.
  400. bool BaseXMLParser::FindString(const char* string, String& data, bool escape_brackets)
  401. {
  402. int index = 0;
  403. bool in_brackets = false;
  404. char previous = 0;
  405. while (string[index])
  406. {
  407. if (AtEnd())
  408. return false;
  409. const char c = Look();
  410. // Count line numbers
  411. if (c == '\n')
  412. {
  413. line_number++;
  414. }
  415. if(escape_brackets)
  416. {
  417. const char* error_str = XMLParseTools::ParseDataBrackets(in_brackets, c, previous);
  418. if (error_str)
  419. {
  420. Log::Message(Log::LT_WARNING, "XML parse error. %s", error_str);
  421. return false;
  422. }
  423. }
  424. if (c == string[index] && !in_brackets)
  425. {
  426. index += 1;
  427. }
  428. else
  429. {
  430. if (index > 0)
  431. {
  432. data += String(string, index);
  433. index = 0;
  434. }
  435. data += c;
  436. }
  437. previous = c;
  438. Next();
  439. }
  440. return true;
  441. }
  442. // Returns true if the next sequence of characters in the stream matches the
  443. // given string.
  444. bool BaseXMLParser::PeekString(const char* string, bool consume)
  445. {
  446. const size_t start_index = xml_index;
  447. bool success = true;
  448. int i = 0;
  449. while (string[i])
  450. {
  451. if (AtEnd())
  452. {
  453. success = false;
  454. break;
  455. }
  456. const char c = Look();
  457. // Seek past all the whitespace if we haven't hit the initial character yet.
  458. if (i == 0 && StringUtilities::IsWhitespace(c))
  459. {
  460. Next();
  461. }
  462. else
  463. {
  464. if (c != string[i])
  465. {
  466. success = false;
  467. break;
  468. }
  469. i++;
  470. Next();
  471. }
  472. }
  473. // Set the index to the start index unless we are consuming.
  474. if (!consume || !success)
  475. xml_index = start_index;
  476. return success;
  477. }
  478. } // namespace Rml