2
0

CXMLReaderImpl.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801
  1. // Copyright (C) 2002-2005 Nikolaus Gebhardt
  2. // This file is part of the "Irrlicht Engine" and the "irrXML" project.
  3. // For conditions of distribution and use, see copyright notice in irrlicht.h and/or irrXML.h
  4. #ifndef __ICXML_READER_IMPL_H_INCLUDED__
  5. #define __ICXML_READER_IMPL_H_INCLUDED__
  6. #include "irrXML.h"
  7. #include "irrString.h"
  8. #include "irrArray.h"
  9. #include "./../fast_atof.h"
  10. using namespace Assimp;
  11. #ifdef _DEBUG
  12. #define IRR_DEBUGPRINT(x) printf((x));
  13. #else // _DEBUG
  14. #define IRR_DEBUGPRINT(x)
  15. #endif // _DEBUG
  16. namespace irr
  17. {
  18. namespace io
  19. {
  20. //! implementation of the IrrXMLReader
  21. template<class char_type, class superclass>
  22. class CXMLReaderImpl : public IIrrXMLReader<char_type, superclass>
  23. {
  24. public:
  25. //! Constructor
  26. CXMLReaderImpl(IFileReadCallBack* callback, bool deleteCallBack = true)
  27. : TextData(0), P(0), TextSize(0), TextBegin(0), CurrentNodeType(EXN_NONE),
  28. SourceFormat(ETF_ASCII), TargetFormat(ETF_ASCII)
  29. {
  30. if (!callback)
  31. return;
  32. storeTargetFormat();
  33. // read whole xml file
  34. readFile(callback);
  35. // clean up
  36. if (deleteCallBack)
  37. delete callback;
  38. // create list with special characters
  39. createSpecialCharacterList();
  40. // set pointer to text begin
  41. P = TextBegin;
  42. }
  43. //! Destructor
  44. virtual ~CXMLReaderImpl()
  45. {
  46. delete [] TextData;
  47. }
  48. //! Reads forward to the next xml node.
  49. //! \return Returns false, if there was no further node.
  50. virtual bool read()
  51. {
  52. // if not end reached, parse the node
  53. if (P && (unsigned int)(P - TextBegin) < TextSize - 1 && *P != 0)
  54. {
  55. parseCurrentNode();
  56. return true;
  57. }
  58. _IRR_IMPLEMENT_MANAGED_MARSHALLING_BUGFIX;
  59. return false;
  60. }
  61. //! Returns the type of the current XML node.
  62. virtual EXML_NODE getNodeType() const
  63. {
  64. return CurrentNodeType;
  65. }
  66. //! Returns attribute count of the current XML node.
  67. virtual int getAttributeCount() const
  68. {
  69. return Attributes.size();
  70. }
  71. //! Returns name of an attribute.
  72. virtual const char_type* getAttributeName(int idx) const
  73. {
  74. if (idx < 0 || idx >= (int)Attributes.size())
  75. return 0;
  76. return Attributes[idx].Name.c_str();
  77. }
  78. //! Returns the value of an attribute.
  79. virtual const char_type* getAttributeValue(int idx) const
  80. {
  81. if (idx < 0 || idx >= (int)Attributes.size())
  82. return 0;
  83. return Attributes[idx].Value.c_str();
  84. }
  85. //! Returns the value of an attribute.
  86. virtual const char_type* getAttributeValue(const char_type* name) const
  87. {
  88. const SAttribute* attr = getAttributeByName(name);
  89. if (!attr)
  90. return 0;
  91. return attr->Value.c_str();
  92. }
  93. //! Returns the value of an attribute
  94. virtual const char_type* getAttributeValueSafe(const char_type* name) const
  95. {
  96. const SAttribute* attr = getAttributeByName(name);
  97. if (!attr)
  98. return EmptyString.c_str();
  99. return attr->Value.c_str();
  100. }
  101. //! Returns the value of an attribute as integer.
  102. int getAttributeValueAsInt(const char_type* name) const
  103. {
  104. return (int)getAttributeValueAsFloat(name);
  105. }
  106. //! Returns the value of an attribute as integer.
  107. int getAttributeValueAsInt(int idx) const
  108. {
  109. return (int)getAttributeValueAsFloat(idx);
  110. }
  111. //! Returns the value of an attribute as float.
  112. float getAttributeValueAsFloat(const char_type* name) const
  113. {
  114. const SAttribute* attr = getAttributeByName(name);
  115. if (!attr)
  116. return 0;
  117. core::stringc c = attr->Value.c_str();
  118. return fast_atof(c.c_str());
  119. }
  120. //! Returns the value of an attribute as float.
  121. float getAttributeValueAsFloat(int idx) const
  122. {
  123. const char_type* attrvalue = getAttributeValue(idx);
  124. if (!attrvalue)
  125. return 0;
  126. core::stringc c = attrvalue;
  127. return fast_atof(c.c_str());
  128. }
  129. //! Returns the name of the current node.
  130. virtual const char_type* getNodeName() const
  131. {
  132. return NodeName.c_str();
  133. }
  134. //! Returns data of the current node.
  135. virtual const char_type* getNodeData() const
  136. {
  137. return NodeName.c_str();
  138. }
  139. //! Returns if an element is an empty element, like <foo />
  140. virtual bool isEmptyElement() const
  141. {
  142. return IsEmptyElement;
  143. }
  144. //! Returns format of the source xml file.
  145. virtual ETEXT_FORMAT getSourceFormat() const
  146. {
  147. return SourceFormat;
  148. }
  149. //! Returns format of the strings returned by the parser.
  150. virtual ETEXT_FORMAT getParserFormat() const
  151. {
  152. return TargetFormat;
  153. }
  154. private:
  155. // Reads the current xml node
  156. void parseCurrentNode()
  157. {
  158. char_type* start = P;
  159. // more forward until '<' found
  160. while(*P != L'<' && *P)
  161. ++P;
  162. if (!*P)
  163. return;
  164. if (P - start > 0)
  165. {
  166. // we found some text, store it
  167. if (setText(start, P))
  168. return;
  169. }
  170. ++P;
  171. // based on current token, parse and report next element
  172. switch(*P)
  173. {
  174. case L'/':
  175. parseClosingXMLElement();
  176. break;
  177. case L'?':
  178. ignoreDefinition();
  179. break;
  180. case L'!':
  181. if (!parseCDATA())
  182. parseComment();
  183. break;
  184. default:
  185. parseOpeningXMLElement();
  186. break;
  187. }
  188. }
  189. //! sets the state that text was found. Returns true if set should be set
  190. bool setText(char_type* start, char_type* end)
  191. {
  192. // check if text is more than 2 characters, and if not, check if there is
  193. // only white space, so that this text won't be reported
  194. if (end - start < 3)
  195. {
  196. char_type* p = start;
  197. for(; p != end; ++p)
  198. if (!isWhiteSpace(*p))
  199. break;
  200. if (p == end)
  201. return false;
  202. }
  203. // set current text to the parsed text, and replace xml special characters
  204. core::string<char_type> s(start, (int)(end - start));
  205. NodeName = replaceSpecialCharacters(s);
  206. // current XML node type is text
  207. CurrentNodeType = EXN_TEXT;
  208. return true;
  209. }
  210. //! ignores an xml definition like <?xml something />
  211. void ignoreDefinition()
  212. {
  213. CurrentNodeType = EXN_UNKNOWN;
  214. // move until end marked with '>' reached
  215. while(*P != L'>')
  216. ++P;
  217. ++P;
  218. }
  219. //! parses a comment
  220. void parseComment()
  221. {
  222. CurrentNodeType = EXN_COMMENT;
  223. P += 1;
  224. char_type *pCommentBegin = P;
  225. int count = 1;
  226. // move until end of comment reached
  227. while(count)
  228. {
  229. if (*P == L'>')
  230. --count;
  231. else
  232. if (*P == L'<')
  233. ++count;
  234. ++P;
  235. }
  236. P -= 3;
  237. NodeName = core::string<char_type>(pCommentBegin+2, (int)(P - pCommentBegin-2));
  238. P += 3;
  239. }
  240. //! parses an opening xml element and reads attributes
  241. void parseOpeningXMLElement()
  242. {
  243. CurrentNodeType = EXN_ELEMENT;
  244. IsEmptyElement = false;
  245. Attributes.clear();
  246. // find name
  247. const char_type* startName = P;
  248. // find end of element
  249. while(*P != L'>' && !isWhiteSpace(*P))
  250. ++P;
  251. const char_type* endName = P;
  252. // find Attributes
  253. while(*P != L'>')
  254. {
  255. if (isWhiteSpace(*P))
  256. ++P;
  257. else
  258. {
  259. if (*P != L'/')
  260. {
  261. // we've got an attribute
  262. // read the attribute names
  263. const char_type* attributeNameBegin = P;
  264. while(!isWhiteSpace(*P) && *P != L'=')
  265. ++P;
  266. const char_type* attributeNameEnd = P;
  267. ++P;
  268. // read the attribute value
  269. // check for quotes and single quotes, thx to murphy
  270. while( (*P != L'\"') && (*P != L'\'') && *P)
  271. ++P;
  272. if (!*P) // malformatted xml file
  273. return;
  274. const char_type attributeQuoteChar = *P;
  275. ++P;
  276. const char_type* attributeValueBegin = P;
  277. while(*P != attributeQuoteChar && *P)
  278. ++P;
  279. if (!*P) // malformatted xml file
  280. return;
  281. const char_type* attributeValueEnd = P;
  282. ++P;
  283. SAttribute attr;
  284. attr.Name = core::string<char_type>(attributeNameBegin,
  285. (int)(attributeNameEnd - attributeNameBegin));
  286. core::string<char_type> s(attributeValueBegin,
  287. (int)(attributeValueEnd - attributeValueBegin));
  288. attr.Value = replaceSpecialCharacters(s);
  289. Attributes.push_back(attr);
  290. }
  291. else
  292. {
  293. // tag is closed directly
  294. ++P;
  295. IsEmptyElement = true;
  296. break;
  297. }
  298. }
  299. }
  300. // check if this tag is closing directly
  301. if (endName > startName && *(endName-1) == L'/')
  302. {
  303. // directly closing tag
  304. IsEmptyElement = true;
  305. endName--;
  306. }
  307. NodeName = core::string<char_type>(startName, (int)(endName - startName));
  308. ++P;
  309. }
  310. //! parses an closing xml tag
  311. void parseClosingXMLElement()
  312. {
  313. CurrentNodeType = EXN_ELEMENT_END;
  314. IsEmptyElement = false;
  315. Attributes.clear();
  316. ++P;
  317. const char_type* pBeginClose = P;
  318. while(*P != L'>')
  319. ++P;
  320. NodeName = core::string<char_type>(pBeginClose, (int)(P - pBeginClose));
  321. ++P;
  322. }
  323. //! parses a possible CDATA section, returns false if begin was not a CDATA section
  324. bool parseCDATA()
  325. {
  326. if (*(P+1) != L'[')
  327. return false;
  328. CurrentNodeType = EXN_CDATA;
  329. // skip '<![CDATA['
  330. int count=0;
  331. while( *P && count<8 )
  332. {
  333. ++P;
  334. ++count;
  335. }
  336. if (!*P)
  337. return true;
  338. char_type *cDataBegin = P;
  339. char_type *cDataEnd = 0;
  340. // find end of CDATA
  341. while(*P && !cDataEnd)
  342. {
  343. if (*P == L'>' &&
  344. (*(P-1) == L']') &&
  345. (*(P-2) == L']'))
  346. {
  347. cDataEnd = P - 2;
  348. }
  349. ++P;
  350. }
  351. if ( cDataEnd )
  352. NodeName = core::string<char_type>(cDataBegin, (int)(cDataEnd - cDataBegin));
  353. else
  354. NodeName = "";
  355. return true;
  356. }
  357. // structure for storing attribute-name pairs
  358. struct SAttribute
  359. {
  360. core::string<char_type> Name;
  361. core::string<char_type> Value;
  362. };
  363. // finds a current attribute by name, returns 0 if not found
  364. const SAttribute* getAttributeByName(const char_type* name) const
  365. {
  366. if (!name)
  367. return 0;
  368. core::string<char_type> n = name;
  369. for (int i=0; i<(int)Attributes.size(); ++i)
  370. if (Attributes[i].Name == n)
  371. return &Attributes[i];
  372. return 0;
  373. }
  374. // replaces xml special characters in a string and creates a new one
  375. core::string<char_type> replaceSpecialCharacters(
  376. core::string<char_type>& origstr)
  377. {
  378. int pos = origstr.findFirst(L'&');
  379. int oldPos = 0;
  380. if (pos == -1)
  381. return origstr;
  382. core::string<char_type> newstr;
  383. while(pos != -1 && pos < origstr.size()-2)
  384. {
  385. // check if it is one of the special characters
  386. int specialChar = -1;
  387. for (int i=0; i<(int)SpecialCharacters.size(); ++i)
  388. {
  389. const char_type* p = &origstr.c_str()[pos]+1;
  390. if (equalsn(&SpecialCharacters[i][1], p, SpecialCharacters[i].size()-1))
  391. {
  392. specialChar = i;
  393. break;
  394. }
  395. }
  396. if (specialChar != -1)
  397. {
  398. newstr.append(origstr.subString(oldPos, pos - oldPos));
  399. newstr.append(SpecialCharacters[specialChar][0]);
  400. pos += SpecialCharacters[specialChar].size();
  401. }
  402. else
  403. {
  404. newstr.append(origstr.subString(oldPos, pos - oldPos + 1));
  405. pos += 1;
  406. }
  407. // find next &
  408. oldPos = pos;
  409. pos = origstr.findNext(L'&', pos);
  410. }
  411. if (oldPos < origstr.size()-1)
  412. newstr.append(origstr.subString(oldPos, origstr.size()-oldPos));
  413. return newstr;
  414. }
  415. //! reads the xml file and converts it into the wanted character format.
  416. bool readFile(IFileReadCallBack* callback)
  417. {
  418. int size = callback->getSize();
  419. size += 4; // We need two terminating 0's at the end.
  420. // For ASCII we need 1 0's, for UTF-16 2, for UTF-32 4.
  421. char* data8 = new char[size];
  422. if (!callback->read(data8, size-4))
  423. {
  424. delete [] data8;
  425. return false;
  426. }
  427. // add zeros at end
  428. data8[size-1] = 0;
  429. data8[size-2] = 0;
  430. data8[size-3] = 0;
  431. data8[size-4] = 0;
  432. char16* data16 = reinterpret_cast<char16*>(data8);
  433. char32* data32 = reinterpret_cast<char32*>(data8);
  434. // now we need to convert the data to the desired target format
  435. // based on the byte order mark.
  436. const unsigned char UTF8[] = {0xEF, 0xBB, 0xBF}; // 0xEFBBBF;
  437. const int UTF16_BE = 0xFFFE;
  438. const int UTF16_LE = 0xFEFF;
  439. const int UTF32_BE = 0xFFFE0000;
  440. const int UTF32_LE = 0x0000FEFF;
  441. // check source for all utf versions and convert to target data format
  442. if (size >= 4 && data32[0] == (char32)UTF32_BE)
  443. {
  444. // UTF-32, big endian
  445. SourceFormat = ETF_UTF32_BE;
  446. convertTextData(data32+1, data8, (size/4)); // data32+1 because we need to skip the header
  447. }
  448. else
  449. if (size >= 4 && data32[0] == (char32)UTF32_LE)
  450. {
  451. // UTF-32, little endian
  452. SourceFormat = ETF_UTF32_LE;
  453. convertTextData(data32+1, data8, (size/4)); // data32+1 because we need to skip the header
  454. }
  455. else
  456. if (size >= 2 && data16[0] == UTF16_BE)
  457. {
  458. // UTF-16, big endian
  459. SourceFormat = ETF_UTF16_BE;
  460. convertTextData(data16+1, data8, (size/2)); // data16+1 because we need to skip the header
  461. }
  462. else
  463. if (size >= 2 && data16[0] == UTF16_LE)
  464. {
  465. // UTF-16, little endian
  466. SourceFormat = ETF_UTF16_LE;
  467. convertTextData(data16+1, data8, (size/2)); // data16+1 because we need to skip the header
  468. }
  469. else
  470. if (size >= 3 && data8[0] == UTF8[0] && data8[1] == UTF8[1] && data8[2] == UTF8[2])
  471. {
  472. // UTF-8
  473. SourceFormat = ETF_UTF8;
  474. convertTextData(data8+3, data8, size); // data8+3 because we need to skip the header
  475. }
  476. else
  477. {
  478. // ASCII
  479. SourceFormat = ETF_ASCII;
  480. convertTextData(data8, data8, size);
  481. }
  482. return true;
  483. }
  484. //! converts the text file into the desired format.
  485. //! \param source: begin of the text (without byte order mark)
  486. //! \param pointerToStore: pointer to text data block which can be
  487. //! stored or deleted based on the nesessary conversion.
  488. //! \param sizeWithoutHeader: Text size in characters without header
  489. template<class src_char_type>
  490. void convertTextData(src_char_type* source, char* pointerToStore, int sizeWithoutHeader)
  491. {
  492. // convert little to big endian if necessary
  493. if (sizeof(src_char_type) > 1 &&
  494. isLittleEndian(TargetFormat) != isLittleEndian(SourceFormat))
  495. convertToLittleEndian(source);
  496. // check if conversion is necessary:
  497. if (sizeof(src_char_type) == sizeof(char_type))
  498. {
  499. // no need to convert
  500. TextBegin = (char_type*)source;
  501. TextData = (char_type*)pointerToStore;
  502. TextSize = sizeWithoutHeader;
  503. }
  504. else
  505. {
  506. // convert source into target data format.
  507. // TODO: implement a real conversion. This one just
  508. // copies bytes. This is a problem when there are
  509. // unicode symbols using more than one character.
  510. TextData = new char_type[sizeWithoutHeader];
  511. // MSVC debugger complains here about loss of data ...
  512. // todo ... I temporarily disabled the check in the build config
  513. for (int i=0; i<sizeWithoutHeader; ++i)
  514. TextData[i] = (char_type)source[i];
  515. TextBegin = TextData;
  516. TextSize = sizeWithoutHeader;
  517. // delete original data because no longer needed
  518. delete [] pointerToStore;
  519. }
  520. }
  521. //! converts whole text buffer to little endian
  522. template<class src_char_type>
  523. void convertToLittleEndian(src_char_type* t)
  524. {
  525. if (sizeof(src_char_type) == 4)
  526. {
  527. // 32 bit
  528. while(*t)
  529. {
  530. *t = ((*t & 0xff000000) >> 24) |
  531. ((*t & 0x00ff0000) >> 8) |
  532. ((*t & 0x0000ff00) << 8) |
  533. ((*t & 0x000000ff) << 24);
  534. ++t;
  535. }
  536. }
  537. else
  538. {
  539. // 16 bit
  540. while(*t)
  541. {
  542. *t = (*t >> 8) | (*t << 8);
  543. ++t;
  544. }
  545. }
  546. }
  547. //! returns if a format is little endian
  548. inline bool isLittleEndian(ETEXT_FORMAT f)
  549. {
  550. return f == ETF_ASCII ||
  551. f == ETF_UTF8 ||
  552. f == ETF_UTF16_LE ||
  553. f == ETF_UTF32_LE;
  554. }
  555. //! returns true if a character is whitespace
  556. inline bool isWhiteSpace(char_type c)
  557. {
  558. return (c==' ' || c=='\t' || c=='\n' || c=='\r');
  559. }
  560. //! generates a list with xml special characters
  561. void createSpecialCharacterList()
  562. {
  563. // list of strings containing special symbols,
  564. // the first character is the special character,
  565. // the following is the symbol string without trailing &.
  566. SpecialCharacters.push_back("&amp;");
  567. SpecialCharacters.push_back("<lt;");
  568. SpecialCharacters.push_back(">gt;");
  569. SpecialCharacters.push_back("\"quot;");
  570. SpecialCharacters.push_back("'apos;");
  571. }
  572. //! compares the first n characters of the strings
  573. bool equalsn(const char_type* str1, const char_type* str2, int len)
  574. {
  575. int i;
  576. for(i=0; str1[i] && str2[i] && i < len; ++i)
  577. if (str1[i] != str2[i])
  578. return false;
  579. // if one (or both) of the strings was smaller then they
  580. // are only equal if they have the same lenght
  581. return (i == len) || (str1[i] == 0 && str2[i] == 0);
  582. }
  583. //! stores the target text format
  584. void storeTargetFormat()
  585. {
  586. // get target format. We could have done this using template specialization,
  587. // but VisualStudio 6 don't like it and we want to support it.
  588. switch(sizeof(char_type))
  589. {
  590. case 1:
  591. TargetFormat = ETF_UTF8;
  592. break;
  593. case 2:
  594. TargetFormat = ETF_UTF16_LE;
  595. break;
  596. case 4:
  597. TargetFormat = ETF_UTF32_LE;
  598. break;
  599. default:
  600. TargetFormat = ETF_ASCII; // should never happen.
  601. }
  602. }
  603. // instance variables:
  604. char_type* TextData; // data block of the text file
  605. char_type* P; // current point in text to parse
  606. char_type* TextBegin; // start of text to parse
  607. unsigned int TextSize; // size of text to parse in characters, not bytes
  608. EXML_NODE CurrentNodeType; // type of the currently parsed node
  609. ETEXT_FORMAT SourceFormat; // source format of the xml file
  610. ETEXT_FORMAT TargetFormat; // output format of this parser
  611. core::string<char_type> NodeName; // name of the node currently in
  612. core::string<char_type> EmptyString; // empty string to be returned by getSafe() methods
  613. bool IsEmptyElement; // is the currently parsed node empty?
  614. core::array< core::string<char_type> > SpecialCharacters; // see createSpecialCharacterList()
  615. core::array<SAttribute> Attributes; // attributes of current element
  616. }; // end CXMLReaderImpl
  617. } // end namespace
  618. } // end namespace
  619. #endif