fsTinyXml.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747
  1. //-----------------------------------------------------------------------------
  2. // Copyright (c) 2013 GarageGames, LLC
  3. //
  4. // Permission is hereby granted, free of charge, to any person obtaining a copy
  5. // of this software and associated documentation files (the "Software"), to
  6. // deal in the Software without restriction, including without limitation the
  7. // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  8. // sell copies of the Software, and to permit persons to whom the Software is
  9. // furnished to do so, subject to the following conditions:
  10. //
  11. // The above copyright notice and this permission notice shall be included in
  12. // all copies or substantial portions of the Software.
  13. //
  14. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  20. // IN THE SOFTWARE.
  21. //-----------------------------------------------------------------------------
  22. #include "fsTinyXml.h"
  23. #include "console/console.h"
  24. bool fsTiXmlDocument::LoadFile( const char * pFilename, TiXmlEncoding encoding )
  25. {
  26. // Expand the file-path.
  27. char filenameBuffer[1024];
  28. Con::expandToolScriptFilename( filenameBuffer, sizeof(filenameBuffer), pFilename );
  29. FileStream stream;
  30. #ifdef TORQUE_OS_ANDROID
  31. if (strlen(pFilename) > strlen(filenameBuffer)) {
  32. dStrcpy(filenameBuffer, pFilename, 1024);
  33. }
  34. #endif
  35. // File open for read?
  36. if ( !stream.open( filenameBuffer, Torque::FS::File::Read ) )
  37. {
  38. // No, so warn.
  39. Con::warnf("TamlXmlParser::parse() - Could not open filename '%s' for parse.", filenameBuffer );
  40. return false;
  41. }
  42. // Load document from stream.
  43. if ( !LoadFile( stream ) )
  44. {
  45. // Warn!
  46. Con::warnf("TamlXmlParser: Could not load Taml XML file from stream.");
  47. return false;
  48. }
  49. // Close the stream.
  50. stream.close();
  51. return true;
  52. }
  53. bool fsTiXmlDocument::SaveFile( const char * pFilename ) const
  54. {
  55. // Expand the file-name into the file-path buffer.
  56. char filenameBuffer[1024];
  57. Con::expandToolScriptFilename( filenameBuffer, sizeof(filenameBuffer), pFilename );
  58. FileStream stream;
  59. // File opened?
  60. if ( !stream.open( filenameBuffer, Torque::FS::File::Write ) )
  61. {
  62. // No, so warn.
  63. Con::warnf("Taml::writeFile() - Could not open filename '%s' for write.", filenameBuffer );
  64. return false;
  65. }
  66. bool ret = SaveFile(stream);
  67. stream.close();
  68. return ret;
  69. }
  70. bool fsTiXmlDocument::LoadFile( FileStream &stream, TiXmlEncoding encoding )
  71. {
  72. // Delete the existing data:
  73. Clear();
  74. //TODO: Can't clear location, investigate if this gives issues.
  75. //doc.location.Clear();
  76. // Get the file size, so we can pre-allocate the string. HUGE speed impact.
  77. long length = stream.getStreamSize();
  78. // Strange case, but good to handle up front.
  79. if ( length <= 0 )
  80. {
  81. SetError( TiXmlDocument::TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
  82. return false;
  83. }
  84. // Subtle bug here. TinyXml did use fgets. But from the XML spec:
  85. // 2.11 End-of-Line Handling
  86. // <snip>
  87. // <quote>
  88. // ...the XML processor MUST behave as if it normalized all line breaks in external
  89. // parsed entities (including the document entity) on input, before parsing, by translating
  90. // both the two-character sequence #xD #xA and any #xD that is not followed by #xA to
  91. // a single #xA character.
  92. // </quote>
  93. //
  94. // It is not clear fgets does that, and certainly isn't clear it works cross platform.
  95. // Generally, you expect fgets to translate from the convention of the OS to the c/unix
  96. // convention, and not work generally.
  97. /*
  98. while( fgets( buf, sizeof(buf), file ) )
  99. {
  100. data += buf;
  101. }
  102. */
  103. char* buf = new char[ length+1 ];
  104. buf[0] = 0;
  105. if ( !stream.read( length, buf ) ) {
  106. delete [] buf;
  107. SetError( TiXmlDocument::TIXML_ERROR_OPENING_FILE, 0, 0, TIXML_ENCODING_UNKNOWN );
  108. return false;
  109. }
  110. // Process the buffer in place to normalize new lines. (See comment above.)
  111. // Copies from the 'p' to 'q' pointer, where p can advance faster if
  112. // a newline-carriage return is hit.
  113. //
  114. // Wikipedia:
  115. // Systems based on ASCII or a compatible character set use either LF (Line feed, '\n', 0x0A, 10 in decimal) or
  116. // CR (Carriage return, '\r', 0x0D, 13 in decimal) individually, or CR followed by LF (CR+LF, 0x0D 0x0A)...
  117. // * LF: Multics, Unix and Unix-like systems (GNU/Linux, AIX, Xenix, Mac OS X, FreeBSD, etc.), BeOS, Amiga, RISC OS, and others
  118. // * CR+LF: DEC RT-11 and most other early non-Unix, non-IBM OSes, CP/M, MP/M, DOS, OS/2, Microsoft Windows, Symbian OS
  119. // * CR: Commodore 8-bit machines, Apple II family, Mac OS up to version 9 and OS-9
  120. const char* p = buf; // the read head
  121. char* q = buf; // the write head
  122. const char CR = 0x0d;
  123. const char LF = 0x0a;
  124. buf[length] = 0;
  125. while( *p ) {
  126. assert( p < (buf+length) );
  127. assert( q <= (buf+length) );
  128. assert( q <= p );
  129. if ( *p == CR ) {
  130. *q++ = LF;
  131. p++;
  132. if ( *p == LF ) { // check for CR+LF (and skip LF)
  133. p++;
  134. }
  135. }
  136. else {
  137. *q++ = *p++;
  138. }
  139. }
  140. assert( q <= (buf+length) );
  141. *q = 0;
  142. Parse( buf, 0, encoding );
  143. delete [] buf;
  144. return !Error();
  145. }
  146. bool fsTiXmlDocument::SaveFile( FileStream &stream ) const
  147. {
  148. if ( useMicrosoftBOM )
  149. {
  150. const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
  151. const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
  152. const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
  153. stream.write( TIXML_UTF_LEAD_0 );
  154. stream.write( TIXML_UTF_LEAD_1 );
  155. stream.write( TIXML_UTF_LEAD_2 );
  156. }
  157. Print( stream, 0 );
  158. return true;
  159. }
  160. void fsTiXmlDocument::Print( FileStream& stream, int depth ) const
  161. {
  162. for ( const TiXmlNode* node=FirstChild(); node; node=node->NextSibling() )
  163. {
  164. //AttemptPrintTiNode(const_cast<TiXmlNode*>(node), stream, depth);
  165. dynamic_cast<const fsTiXmlNode*>(node)->Print( stream, depth );
  166. stream.writeText( "\n" );
  167. }
  168. }
  169. void fsTiXmlAttribute::Print( FileStream& stream, int depth, TIXML_STRING* str ) const
  170. {
  171. TIXML_STRING n, v;
  172. TiXmlString val = TiXmlString(Value());
  173. EncodeString( NameTStr(), &n );
  174. EncodeString( val, &v );
  175. for ( int i=0; i< depth; i++ ) {
  176. stream.writeText( " " );
  177. }
  178. if (val.find ('\"') == TIXML_STRING::npos) {
  179. const char* pValue = v.c_str();
  180. char buffer[4096];
  181. const S32 length = dSprintf(buffer, sizeof(buffer), "%s=\"%s\"", n.c_str(), pValue);
  182. stream.write(length, buffer);
  183. if ( str ) {
  184. (*str) += n; (*str) += "=\""; (*str) += v; (*str) += "\"";
  185. }
  186. }
  187. else {
  188. char buffer[4096];
  189. const S32 length = dSprintf(buffer, sizeof(buffer), "%s='%s'", n.c_str(), v.c_str());
  190. stream.write(length, buffer);
  191. if ( str ) {
  192. (*str) += n; (*str) += "='"; (*str) += v; (*str) += "'";
  193. }
  194. }
  195. }
  196. void fsTiXmlDeclaration::Print(FileStream& stream, int depth, TiXmlString* str) const
  197. {
  198. stream.writeStringBuffer( "<?xml " );
  199. if ( str ) (*str) += "<?xml ";
  200. if ( !version.empty() ) {
  201. stream.writeFormattedBuffer( "version=\"%s\" ", version.c_str ());
  202. if ( str ) { (*str) += "version=\""; (*str) += version; (*str) += "\" "; }
  203. }
  204. if ( !encoding.empty() ) {
  205. stream.writeFormattedBuffer( "encoding=\"%s\" ", encoding.c_str ());
  206. if ( str ) { (*str) += "encoding=\""; (*str) += encoding; (*str) += "\" "; }
  207. }
  208. if ( !standalone.empty() ) {
  209. stream.writeFormattedBuffer( "standalone=\"%s\" ", standalone.c_str ());
  210. if ( str ) { (*str) += "standalone=\""; (*str) += standalone; (*str) += "\" "; }
  211. }
  212. stream.writeStringBuffer( "?>" );
  213. if ( str ) (*str) += "?>";
  214. }
  215. void fsTiXmlElement::Print(FileStream& stream, int depth) const
  216. {
  217. int i;
  218. for ( i=0; i<depth; i++ ) {
  219. stream.writeStringBuffer( " " );
  220. }
  221. stream.writeFormattedBuffer( "<%s", value.c_str() );
  222. const TiXmlAttribute* attrib;
  223. for ( attrib = attributeSet.First(); attrib; attrib = attrib->Next() )
  224. {
  225. stream.writeStringBuffer( "\n" );
  226. dynamic_cast<const fsTiXmlAttribute*>(attrib)->Print( stream, depth+1 );
  227. }
  228. // There are 3 different formatting approaches:
  229. // 1) An element without children is printed as a <foo /> node
  230. // 2) An element with only a text child is printed as <foo> text </foo>
  231. // 3) An element with children is printed on multiple lines.
  232. TiXmlNode* node;
  233. if ( !firstChild )
  234. {
  235. stream.writeStringBuffer( " />" );
  236. }
  237. else if ( firstChild == lastChild && firstChild->ToText() )
  238. {
  239. stream.writeStringBuffer( ">" );
  240. dynamic_cast<const fsTiXmlNode*>(firstChild)->Print( stream, depth + 1 );
  241. stream.writeFormattedBuffer( "</%s>", value.c_str() );
  242. }
  243. else
  244. {
  245. stream.writeStringBuffer( ">" );
  246. for ( node = firstChild; node; node=node->NextSibling() )
  247. {
  248. if ( !node->ToText() )
  249. {
  250. stream.writeStringBuffer( "\n" );
  251. }
  252. dynamic_cast<const fsTiXmlNode*>(node)->Print( stream, depth+1 );
  253. }
  254. stream.writeStringBuffer( "\n" );
  255. for( i=0; i<depth; ++i ) {
  256. stream.writeStringBuffer( " " );
  257. }
  258. stream.writeFormattedBuffer( "</%s>", value.c_str() );
  259. }
  260. }
  261. void fsTiXmlComment::Print(FileStream& stream, int depth) const
  262. {
  263. for ( int i=0; i<depth; i++ )
  264. {
  265. stream.writeStringBuffer( " " );
  266. }
  267. stream.writeFormattedBuffer( "<!--%s-->", value.c_str() );
  268. }
  269. void fsTiXmlText::Print(FileStream& stream, int depth) const
  270. {
  271. if ( cdata )
  272. {
  273. int i;
  274. stream.writeStringBuffer( "\n" );
  275. for ( i=0; i<depth; i++ ) {
  276. stream.writeStringBuffer( " " );
  277. }
  278. stream.writeFormattedBuffer( "<![CDATA[%s]]>\n", value.c_str() ); // unformatted output
  279. }
  280. else
  281. {
  282. TIXML_STRING buffer;
  283. EncodeString( value, &buffer );
  284. stream.writeFormattedBuffer( "%s", buffer.c_str() );
  285. }
  286. }
  287. void fsTiXmlUnknown::Print(FileStream& stream, int depth) const
  288. {
  289. for ( int i=0; i<depth; i++ )
  290. stream.writeStringBuffer( " " );
  291. stream.writeFormattedBuffer( "<%s>", value.c_str() );
  292. }
  293. static TiXmlNode* TiNodeIdentify( TiXmlNode* parent, const char* p, TiXmlEncoding encoding )
  294. {
  295. TiXmlNode* returnNode = 0;
  296. p = TiXmlNode::SkipWhiteSpace( p, encoding );
  297. if( !p || !*p || *p != '<' )
  298. {
  299. return 0;
  300. }
  301. p = TiXmlNode::SkipWhiteSpace( p, encoding );
  302. if ( !p || !*p )
  303. {
  304. return 0;
  305. }
  306. // What is this thing?
  307. // - Elements start with a letter or underscore, but xml is reserved.
  308. // - Comments: <!--
  309. // - Decleration: <?xml
  310. // - Everthing else is unknown to tinyxml.
  311. //
  312. const char* xmlHeader = { "<?xml" };
  313. const char* commentHeader = { "<!--" };
  314. const char* dtdHeader = { "<!" };
  315. const char* cdataHeader = { "<![CDATA[" };
  316. if ( TiXmlNode::StringEqual( p, xmlHeader, true, encoding ) )
  317. {
  318. #ifdef DEBUG_PARSER
  319. TIXML_LOG( "XML parsing Declaration\n" );
  320. #endif
  321. returnNode = new fsTiXmlDeclaration();
  322. }
  323. else if ( TiXmlNode::StringEqual( p, commentHeader, false, encoding ) )
  324. {
  325. #ifdef DEBUG_PARSER
  326. TIXML_LOG( "XML parsing Comment\n" );
  327. #endif
  328. returnNode = new fsTiXmlComment();
  329. }
  330. else if ( TiXmlNode::StringEqual( p, cdataHeader, false, encoding ) )
  331. {
  332. #ifdef DEBUG_PARSER
  333. TIXML_LOG( "XML parsing CDATA\n" );
  334. #endif
  335. TiXmlText* text = new fsTiXmlText( "" );
  336. text->SetCDATA( true );
  337. returnNode = text;
  338. }
  339. else if ( TiXmlNode::StringEqual( p, dtdHeader, false, encoding ) )
  340. {
  341. #ifdef DEBUG_PARSER
  342. TIXML_LOG( "XML parsing Unknown(1)\n" );
  343. #endif
  344. returnNode = new fsTiXmlUnknown();
  345. }
  346. else if ( TiXmlNode::IsAlpha( *(p+1), encoding )
  347. || *(p+1) == '_' )
  348. {
  349. #ifdef DEBUG_PARSER
  350. TIXML_LOG( "XML parsing Element\n" );
  351. #endif
  352. returnNode = new fsTiXmlElement( "" );
  353. }
  354. else
  355. {
  356. #ifdef DEBUG_PARSER
  357. TIXML_LOG( "XML parsing Unknown(2)\n" );
  358. #endif
  359. returnNode = new fsTiXmlUnknown();
  360. }
  361. if ( returnNode )
  362. {
  363. // Set the parent, so it can report errors
  364. returnNode->parent = parent;
  365. }
  366. return returnNode;
  367. }
  368. TiXmlNode* fsTiXmlDocument::Identify( const char* p, TiXmlEncoding encoding )
  369. {
  370. return TiNodeIdentify(this, p, encoding);
  371. }
  372. TiXmlNode* fsTiXmlElement::Identify( const char* p, TiXmlEncoding encoding )
  373. {
  374. return TiNodeIdentify(this, p, encoding);
  375. }
  376. const char* fsTiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
  377. {
  378. p = SkipWhiteSpace( p, encoding );
  379. TiXmlDocument* document = GetDocument();
  380. if ( !p || !*p )
  381. {
  382. if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
  383. return 0;
  384. }
  385. if ( data )
  386. {
  387. data->Stamp( p, encoding );
  388. location = data->Cursor();
  389. }
  390. if ( *p != '<' )
  391. {
  392. if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
  393. return 0;
  394. }
  395. p = SkipWhiteSpace( p+1, encoding );
  396. // Read the name.
  397. const char* pErr = p;
  398. p = ReadName( p, &value, encoding );
  399. if ( !p || !*p )
  400. {
  401. if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
  402. return 0;
  403. }
  404. TIXML_STRING endTag ("</");
  405. endTag += value;
  406. // Check for and read attributes. Also look for an empty
  407. // tag or an end tag.
  408. while ( p && *p )
  409. {
  410. pErr = p;
  411. p = SkipWhiteSpace( p, encoding );
  412. if ( !p || !*p )
  413. {
  414. if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
  415. return 0;
  416. }
  417. if ( *p == '/' )
  418. {
  419. ++p;
  420. // Empty tag.
  421. if ( *p != '>' )
  422. {
  423. if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
  424. return 0;
  425. }
  426. return (p+1);
  427. }
  428. else if ( *p == '>' )
  429. {
  430. // Done with attributes (if there were any.)
  431. // Read the value -- which can include other
  432. // elements -- read the end tag, and return.
  433. ++p;
  434. p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens.
  435. if ( !p || !*p ) {
  436. // We were looking for the end tag, but found nothing.
  437. // Fix for [ 1663758 ] Failure to report error on bad XML
  438. if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
  439. return 0;
  440. }
  441. // We should find the end tag now
  442. // note that:
  443. // </foo > and
  444. // </foo>
  445. // are both valid end tags.
  446. if ( StringEqual( p, endTag.c_str(), false, encoding ) )
  447. {
  448. p += endTag.length();
  449. p = SkipWhiteSpace( p, encoding );
  450. if ( p && *p && *p == '>' ) {
  451. ++p;
  452. return p;
  453. }
  454. if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
  455. return 0;
  456. }
  457. else
  458. {
  459. if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
  460. return 0;
  461. }
  462. }
  463. else
  464. {
  465. // Try to read an attribute:
  466. TiXmlAttribute* attrib = new fsTiXmlAttribute();
  467. if ( !attrib )
  468. {
  469. return 0;
  470. }
  471. attrib->SetDocument( document );
  472. pErr = p;
  473. p = attrib->Parse( p, data, encoding );
  474. if ( !p || !*p )
  475. {
  476. if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
  477. delete attrib;
  478. return 0;
  479. }
  480. // Handle the strange case of double attributes:
  481. #ifdef TIXML_USE_STL
  482. TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
  483. #else
  484. TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
  485. #endif
  486. if ( node )
  487. {
  488. if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
  489. delete attrib;
  490. return 0;
  491. }
  492. attributeSet.Add( attrib );
  493. }
  494. }
  495. return p;
  496. }
  497. /*
  498. TiXmlNode* fsTiXmlNode::Identify(char const* p, TiXmlEncoding encoding)
  499. {
  500. TiXmlNode* returnNode = 0;
  501. p = TiXmlBase::SkipWhiteSpace( p, encoding );
  502. if( !p || !*p || *p != '<' )
  503. {
  504. return 0;
  505. }
  506. p = TiXmlBase::SkipWhiteSpace( p, encoding );
  507. if ( !p || !*p )
  508. {
  509. return 0;
  510. }
  511. // What is this thing?
  512. // - Elements start with a letter or underscore, but xml is reserved.
  513. // - Comments: <!--
  514. // - Decleration: <?xml
  515. // - Everthing else is unknown to tinyxml.
  516. //
  517. const char* xmlHeader = { "<?xml" };
  518. const char* commentHeader = { "<!--" };
  519. const char* dtdHeader = { "<!" };
  520. const char* cdataHeader = { "<![CDATA[" };
  521. if ( TiXmlBase::StringEqual( p, xmlHeader, true, encoding ) )
  522. {
  523. #ifdef DEBUG_PARSER
  524. TIXML_LOG( "XML parsing Declaration\n" );
  525. #endif
  526. returnNode = new fsTiXmlDeclaration();
  527. }
  528. else if ( TiXmlBase::StringEqual( p, commentHeader, false, encoding ) )
  529. {
  530. #ifdef DEBUG_PARSER
  531. TIXML_LOG( "XML parsing Comment\n" );
  532. #endif
  533. returnNode = new fsTiXmlComment();
  534. }
  535. else if ( TiXmlBase::StringEqual( p, cdataHeader, false, encoding ) )
  536. {
  537. #ifdef DEBUG_PARSER
  538. TIXML_LOG( "XML parsing CDATA\n" );
  539. #endif
  540. fsTiXmlText* text = new fsTiXmlText( "" );
  541. text->SetCDATA( true );
  542. returnNode = text;
  543. }
  544. else if ( TiXmlBase::StringEqual( p, dtdHeader, false, encoding ) )
  545. {
  546. #ifdef DEBUG_PARSER
  547. TIXML_LOG( "XML parsing Unknown(1)\n" );
  548. #endif
  549. returnNode = new fsTiXmlUnknown();
  550. }
  551. else if ( TiXmlBase::IsAlpha( *(p+1), encoding )
  552. || *(p+1) == '_' )
  553. {
  554. #ifdef DEBUG_PARSER
  555. TIXML_LOG( "XML parsing Element\n" );
  556. #endif
  557. returnNode = new fsTiXmlElement( "" );
  558. }
  559. else
  560. {
  561. #ifdef DEBUG_PARSER
  562. TIXML_LOG( "XML parsing Unknown(2)\n" );
  563. #endif
  564. returnNode = new fsTiXmlUnknown();
  565. }
  566. if ( returnNode )
  567. {
  568. // Set the parent, so it can report errors
  569. returnNode->parent = this;
  570. }
  571. return returnNode;
  572. }
  573. const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
  574. const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
  575. const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
  576. char const* fsTiXmlDocument::Parse(char const* p, TiXmlParsingData* prevData, TiXmlEncoding encoding)
  577. {
  578. ClearError();
  579. // Parse away, at the document level. Since a document
  580. // contains nothing but other tags, most of what happens
  581. // here is skipping white space.
  582. if ( !p || !*p )
  583. {
  584. SetError( TiXmlBase::TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
  585. return 0;
  586. }
  587. // Note that, for a document, this needs to come
  588. // before the while space skip, so that parsing
  589. // starts from the pointer we are given.
  590. location.Clear();
  591. if ( prevData )
  592. {
  593. location.row = prevData->Cursor().row;
  594. location.col = prevData->Cursor().col;
  595. }
  596. else
  597. {
  598. location.row = 0;
  599. location.col = 0;
  600. }
  601. TiXmlParsingData data( p, TabSize(), location.row, location.col );
  602. location = data.Cursor();
  603. if ( encoding == TIXML_ENCODING_UNKNOWN )
  604. {
  605. // Check for the Microsoft UTF-8 lead bytes.
  606. const unsigned char* pU = (const unsigned char*)p;
  607. if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
  608. && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
  609. && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
  610. {
  611. encoding = TIXML_ENCODING_UTF8;
  612. useMicrosoftBOM = true;
  613. }
  614. }
  615. p = TiXmlBase::SkipWhiteSpace( p, encoding );
  616. if ( !p )
  617. {
  618. SetError( TiXmlBase::TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
  619. return 0;
  620. }
  621. while ( p && *p )
  622. {
  623. TiXmlNode* node = fsTiXmlNode::Identify( p, encoding );
  624. if ( node )
  625. {
  626. p = node->Parse( p, &data, encoding );
  627. LinkEndChild( node );
  628. }
  629. else
  630. {
  631. break;
  632. }
  633. // Did we get encoding info?
  634. if ( encoding == TIXML_ENCODING_UNKNOWN
  635. && node->ToDeclaration() )
  636. {
  637. TiXmlDeclaration* dec = node->ToDeclaration();
  638. const char* enc = dec->Encoding();
  639. assert( enc );
  640. if ( *enc == 0 )
  641. encoding = TIXML_ENCODING_UTF8;
  642. else if ( TiXmlBase::StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
  643. encoding = TIXML_ENCODING_UTF8;
  644. else if ( TiXmlBase::StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
  645. encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
  646. else
  647. encoding = TIXML_ENCODING_LEGACY;
  648. }
  649. p = TiXmlBase::SkipWhiteSpace( p, encoding );
  650. }
  651. // Was this empty?
  652. if ( !firstChild ) {
  653. SetError( TiXmlBase::TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
  654. return 0;
  655. }
  656. // All is well.
  657. return p;
  658. }
  659. */