BaseXMLParser.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. /*
  2. * This source file is part of libRocket, the HTML/CSS Interface Middleware
  3. *
  4. * For the latest information, see http://www.librocket.com
  5. *
  6. * Copyright (c) 2008-2010 CodePoint Ltd, Shift Technology Ltd
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy
  9. * of this software and associated documentation files (the "Software"), to deal
  10. * in the Software without restriction, including without limitation the rights
  11. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. * copies of the Software, and to permit persons to whom the Software is
  13. * furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in
  16. * all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. * THE SOFTWARE.
  25. *
  26. */
  27. #include "precompiled.h"
  28. #include <Rocket/Core/BaseXMLParser.h>
  29. namespace Rocket {
  30. namespace Core {
  31. // Most file layers cache 4k.
  32. const int DEFAULT_BUFFER_SIZE = 4096;
  33. BaseXMLParser::BaseXMLParser()
  34. {
  35. read = NULL;
  36. buffer = NULL;
  37. buffer_used = 0;
  38. buffer_size = 0;
  39. open_tag_depth = 0;
  40. }
  41. BaseXMLParser::~BaseXMLParser()
  42. {
  43. }
  44. // Registers a tag as containing general character data.
  45. void BaseXMLParser::RegisterCDATATag(const String& tag)
  46. {
  47. if (!tag.Empty())
  48. cdata_tags.insert(tag.ToLower());
  49. }
  50. // Parses the given stream as an XML file, and calls the handlers when
  51. // interesting phenomenon are encountered.
  52. void BaseXMLParser::Parse(Stream* stream)
  53. {
  54. xml_source = stream;
  55. buffer_size = DEFAULT_BUFFER_SIZE;
  56. buffer = (unsigned char*) malloc(buffer_size);
  57. read = buffer;
  58. line_number = 1;
  59. FillBuffer();
  60. // Read (er ... skip) the header, if one exists.
  61. ReadHeader();
  62. // Read the XML body.
  63. ReadBody();
  64. free(buffer);
  65. }
  66. // Get the current file line number
  67. int BaseXMLParser::GetLineNumber()
  68. {
  69. return line_number;
  70. }
  71. // Called when the parser finds the beginning of an element tag.
  72. void BaseXMLParser::HandleElementStart(const String& ROCKET_UNUSED(name), const XMLAttributes& ROCKET_UNUSED(attributes))
  73. {
  74. }
  75. // Called when the parser finds the end of an element tag.
  76. void BaseXMLParser::HandleElementEnd(const String& ROCKET_UNUSED(name))
  77. {
  78. }
  79. // Called when the parser encounters data.
  80. void BaseXMLParser::HandleData(const String& ROCKET_UNUSED(data))
  81. {
  82. }
  83. void BaseXMLParser::ReadHeader()
  84. {
  85. if (PeekString((unsigned char*) "<?"))
  86. {
  87. String temp;
  88. FindString((unsigned char*) ">", temp);
  89. }
  90. }
  91. void BaseXMLParser::ReadBody()
  92. {
  93. open_tag_depth = 0;
  94. for(;;)
  95. {
  96. // Find the next open tag.
  97. if (!FindString((unsigned char*) "<", data))
  98. break;
  99. // Check what kind of tag this is.
  100. if (PeekString((const unsigned char*) "!--"))
  101. {
  102. // Comment.
  103. String temp;
  104. if (!FindString((const unsigned char*) "-->", temp))
  105. break;
  106. }
  107. else if (PeekString((const unsigned char*) "![CDATA["))
  108. {
  109. // CDATA tag; read everything (including markup) until the ending
  110. // CDATA tag.
  111. if (!ReadCDATA())
  112. break;
  113. }
  114. else if (PeekString((const unsigned char*) "/"))
  115. {
  116. if (!ReadCloseTag())
  117. break;
  118. // Bail if we've hit the end of the XML data.
  119. if (open_tag_depth == 0)
  120. {
  121. xml_source->Seek((long)((read - buffer) - buffer_used), SEEK_CUR);
  122. break;
  123. }
  124. }
  125. else
  126. {
  127. if (!ReadOpenTag())
  128. break;
  129. }
  130. }
  131. // Check for error conditions
  132. if (open_tag_depth > 0)
  133. {
  134. Log::Message(Log::LT_WARNING, "XML parse error on line %d of %s.", GetLineNumber(), xml_source->GetSourceURL().GetURL().CString());
  135. }
  136. }
  137. bool BaseXMLParser::ReadOpenTag()
  138. {
  139. // Increase the open depth
  140. open_tag_depth++;
  141. // Opening tag; send data immediately and open the tag.
  142. if (!data.Empty())
  143. {
  144. HandleData(data);
  145. data.Clear();
  146. }
  147. String tag_name;
  148. if (!FindWord(tag_name, "/>"))
  149. return false;
  150. bool section_opened = false;
  151. if (PeekString((const unsigned char*) ">"))
  152. {
  153. // Simple open tag.
  154. HandleElementStart(tag_name, XMLAttributes());
  155. section_opened = true;
  156. }
  157. else if (PeekString((const unsigned char*) "/") &&
  158. PeekString((const unsigned char*) ">"))
  159. {
  160. // Empty open tag.
  161. HandleElementStart(tag_name, XMLAttributes());
  162. HandleElementEnd(tag_name);
  163. // Tag immediately closed, reduce count
  164. open_tag_depth--;
  165. }
  166. else
  167. {
  168. // It appears we have some attributes. Let's parse them.
  169. XMLAttributes attributes;
  170. if (!ReadAttributes(attributes))
  171. return false;
  172. if (PeekString((const unsigned char*) ">"))
  173. {
  174. HandleElementStart(tag_name, attributes);
  175. section_opened = true;
  176. }
  177. else if (PeekString((const unsigned char*) "/") &&
  178. PeekString((const unsigned char*) ">"))
  179. {
  180. HandleElementStart(tag_name, attributes);
  181. HandleElementEnd(tag_name);
  182. // Tag immediately closed, reduce count
  183. open_tag_depth--;
  184. }
  185. else
  186. {
  187. return false;
  188. }
  189. }
  190. // Check if this tag needs to processed as CDATA.
  191. if (section_opened)
  192. {
  193. String lcase_tag_name = tag_name.ToLower();
  194. if (cdata_tags.find(lcase_tag_name) != cdata_tags.end())
  195. {
  196. if (ReadCDATA(lcase_tag_name.CString()))
  197. {
  198. open_tag_depth--;
  199. if (!data.Empty())
  200. {
  201. HandleData(data);
  202. data.Clear();
  203. }
  204. HandleElementEnd(tag_name);
  205. return true;
  206. }
  207. return false;
  208. }
  209. }
  210. return true;
  211. }
  212. bool BaseXMLParser::ReadCloseTag()
  213. {
  214. // Closing tag; send data immediately and close the tag.
  215. if (!data.Empty())
  216. {
  217. HandleData(data);
  218. data.Clear();
  219. }
  220. String tag_name;
  221. if (!FindString((const unsigned char*) ">", tag_name))
  222. return false;
  223. HandleElementEnd(StringUtilities::StripWhitespace(tag_name));
  224. // Tag closed, reduce count
  225. open_tag_depth--;
  226. return true;
  227. }
  228. bool BaseXMLParser::ReadAttributes(XMLAttributes& attributes)
  229. {
  230. for (;;)
  231. {
  232. String attribute;
  233. String value;
  234. // Get the attribute name
  235. if (!FindWord(attribute, "=/>"))
  236. {
  237. return false;
  238. }
  239. // Check if theres an assigned value
  240. if (PeekString((const unsigned char*)"="))
  241. {
  242. if (PeekString((const unsigned char*) "\""))
  243. {
  244. if (!FindString((const unsigned char*) "\"", value))
  245. return false;
  246. }
  247. else if (PeekString((const unsigned char*) "'"))
  248. {
  249. if (!FindString((const unsigned char*) "'", value))
  250. return false;
  251. }
  252. else if (!FindWord(value, "/>"))
  253. {
  254. return false;
  255. }
  256. }
  257. attributes.Set(attribute.CString(), value);
  258. // Check for the end of the tag.
  259. if (PeekString((const unsigned char*) "/", false) ||
  260. PeekString((const unsigned char*) ">", false))
  261. return true;
  262. }
  263. }
  264. bool BaseXMLParser::ReadCDATA(const char* terminator)
  265. {
  266. String cdata;
  267. if (terminator == NULL)
  268. {
  269. FindString((const unsigned char*) "]]>", cdata);
  270. data += cdata;
  271. return true;
  272. }
  273. else
  274. {
  275. for (;;)
  276. {
  277. // Search for the next tag opening.
  278. if (!FindString((const unsigned char*) "<", cdata))
  279. return false;
  280. if (PeekString((const unsigned char*) "/", false))
  281. {
  282. String tag;
  283. if (FindString((const unsigned char*) ">", tag))
  284. {
  285. String tag_name = StringUtilities::StripWhitespace(tag.Substring(tag.Find("/") + 1));
  286. if (tag_name.ToLower() == terminator)
  287. {
  288. data += cdata;
  289. return true;
  290. }
  291. else
  292. {
  293. cdata += "<";
  294. cdata += tag;
  295. cdata += ">";
  296. }
  297. }
  298. else
  299. cdata += "<";
  300. }
  301. else
  302. cdata += "<";
  303. }
  304. }
  305. }
  306. // Reads from the stream until a complete word is found.
  307. bool BaseXMLParser::FindWord(String& word, const char* terminators)
  308. {
  309. for (;;)
  310. {
  311. if (read >= buffer + buffer_used)
  312. {
  313. if (!FillBuffer())
  314. return false;
  315. }
  316. // Ignore white space
  317. if (StringUtilities::IsWhitespace(*read))
  318. {
  319. if (word.Empty())
  320. {
  321. read++;
  322. continue;
  323. }
  324. else
  325. return true;
  326. }
  327. // Check for termination condition
  328. if (terminators && strchr(terminators, *read))
  329. {
  330. return !word.Empty();
  331. }
  332. word += *read;
  333. read++;
  334. }
  335. }
  336. // Reads from the stream until the given character set is found.
  337. bool BaseXMLParser::FindString(const unsigned char* string, String& data)
  338. {
  339. int index = 0;
  340. while (string[index])
  341. {
  342. if (read >= buffer + buffer_used)
  343. {
  344. if (!FillBuffer())
  345. return false;
  346. }
  347. // Count line numbers
  348. if (*read == '\n')
  349. {
  350. line_number++;
  351. }
  352. if (*read == string[index])
  353. {
  354. index += 1;
  355. }
  356. else
  357. {
  358. if (index > 0)
  359. {
  360. data.Append((const char*) string, index);
  361. index = 0;
  362. }
  363. data += *read;
  364. }
  365. read++;
  366. }
  367. return true;
  368. }
  369. // Returns true if the next sequence of characters in the stream matches the
  370. // given string.
  371. bool BaseXMLParser::PeekString(const unsigned char* string, bool consume)
  372. {
  373. unsigned char* peek_read = read;
  374. int i = 0;
  375. while (string[i])
  376. {
  377. // If we're about to read past the end of the buffer, read into the
  378. // overflow buffer.
  379. if ((peek_read - buffer) + i >= buffer_used)
  380. {
  381. int peek_offset = (int)(peek_read - read);
  382. FillBuffer();
  383. peek_read = read + peek_offset;
  384. if (peek_read - buffer + i >= buffer_used)
  385. {
  386. // Wierd, seems our buffer is too small, realloc it bigger.
  387. buffer_size *= 2;
  388. int read_offset = (int)(read - buffer);
  389. buffer = (unsigned char*) realloc(buffer, buffer_size);
  390. // Restore the read pointers.
  391. read = buffer + read_offset;
  392. peek_read = read + peek_offset;
  393. // Attempt to fill our new buffer size.
  394. if (!FillBuffer())
  395. return false;
  396. }
  397. }
  398. // Seek past all the whitespace if we haven't hit the initial character yet.
  399. if (i == 0 && StringUtilities::IsWhitespace(*peek_read))
  400. {
  401. peek_read++;
  402. }
  403. else
  404. {
  405. if (*peek_read != string[i])
  406. return false;
  407. i++;
  408. peek_read++;
  409. }
  410. }
  411. // Set the read pointer to the end of the peek.
  412. if (consume)
  413. {
  414. read = peek_read;
  415. }
  416. return true;
  417. }
  418. // Fill the buffer as much as possible, without removing any content that is still pending
  419. bool BaseXMLParser::FillBuffer()
  420. {
  421. int bytes_free = buffer_size;
  422. int bytes_remaining = Math::Max((int)(buffer_used - (read - buffer)), 0);
  423. // If theres any data still in the buffer, shift it down, and fill it again
  424. if (bytes_remaining > 0)
  425. {
  426. memmove(buffer, read, bytes_remaining);
  427. bytes_free = buffer_size - bytes_remaining;
  428. }
  429. read = buffer;
  430. size_t bytes_read = xml_source->Read(&buffer[bytes_remaining], bytes_free);
  431. buffer_used = (int)(bytes_read + bytes_remaining);
  432. return bytes_read > 0;
  433. }
  434. }
  435. }