NativeXmlImport.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. /*
  2. * Copyright (C)2005-2019 Haxe Foundation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20. * DEALINGS IN THE SOFTWARE.
  21. */
  22. #ifdef EPPC
  23. #include <memory>
  24. #else
  25. #include <memory.h>
  26. #endif
  27. #ifndef HX_WINDOWS
  28. # include <strings.h>
  29. # undef strcmpi
  30. # define strcmpi(a,b) strcasecmp(a,b)
  31. #else
  32. # include <string.h>
  33. #endif
  34. // -------------- parsing --------------------------
  35. enum STATE {
  36. IGNORE_SPACES,
  37. BEGIN,
  38. BEGIN_NODE,
  39. TAG_NAME,
  40. BODY,
  41. ATTRIB_NAME,
  42. EQUALS,
  43. ATTVAL_BEGIN,
  44. ATTRIB_VAL,
  45. CHILDS,
  46. CLOSE,
  47. WAIT_END,
  48. WAIT_END_RET,
  49. PCDATA,
  50. HEADER,
  51. COMMENT,
  52. DOCTYPE,
  53. CDATA,
  54. };
  55. static void xml_error( const char *xml, const char *inWhere, int *line, String msg ) {
  56. String b = HX_CSTRING("Xml parse error : ") + msg + HX_CSTRING(" at line ") + String(*line) + HX_CSTRING(" : ");
  57. String where(inWhere);
  58. int l = where.length;
  59. int nchars = 30;
  60. if( inWhere != xml )
  61. b += HX_CSTRING("...");
  62. if (where.length==0)
  63. b+= HX_CSTRING("<eof>");
  64. else if (where.length<nchars)
  65. b+= where;
  66. else
  67. b+= where.substr(0,nchars) + HX_CSTRING("...");
  68. hx::Throw(b);
  69. }
  70. #define ERRORSTR(msg) xml_error(xml,p,line,msg);
  71. #define ERROR(msg) xml_error(xml,p,line,HX_CSTRING(msg));
  72. static bool is_valid_char( int c ) {
  73. return ( c >= 'a' && c <= 'z' ) || ( c >= 'A' && c <= 'Z' ) || ( c >= '0' && c <= '9' ) || c == ':' || c == '.' || c == '_' || c == '-';
  74. }
  75. static void do_parse_xml( const char *xml, const char **lp, int *line, cpp::NativeXmlState callb, String parentname )
  76. {
  77. STATE state = BEGIN;
  78. STATE next = BEGIN;
  79. String aname;
  80. hx::Anon attribs;
  81. String nodename;
  82. const char *start = NULL;
  83. const char *p = *lp;
  84. char c = *p;
  85. int nsubs = 0, nbrackets = 0;
  86. while( c ) {
  87. switch( state ) {
  88. case IGNORE_SPACES:
  89. switch( c ) {
  90. case '\n':
  91. case '\r':
  92. case '\t':
  93. case ' ':
  94. break;
  95. default:
  96. state = next;
  97. continue;
  98. }
  99. break;
  100. case BEGIN:
  101. switch( c ) {
  102. case '<':
  103. state = IGNORE_SPACES;
  104. next = BEGIN_NODE;
  105. break;
  106. default:
  107. start = p;
  108. state = PCDATA;
  109. continue;
  110. }
  111. break;
  112. case PCDATA:
  113. if( c == '<' ) {
  114. callb->pcdata(String(start,p-start).dup());
  115. nsubs++;
  116. state = IGNORE_SPACES;
  117. next = BEGIN_NODE;
  118. }
  119. break;
  120. case CDATA:
  121. if( c == ']' && p[1] == ']' && p[2] == '>' ) {
  122. callb->cdata(String(start,p-start).dup());
  123. nsubs++;
  124. p += 2;
  125. state = BEGIN;
  126. }
  127. break;
  128. case BEGIN_NODE:
  129. switch( c ) {
  130. case '!':
  131. if( p[1] == '[' ) {
  132. p += 2;
  133. if( (p[0] != 'C' && p[0] != 'c') ||
  134. (p[1] != 'D' && p[1] != 'd') ||
  135. (p[2] != 'A' && p[2] != 'a') ||
  136. (p[3] != 'T' && p[3] != 't') ||
  137. (p[4] != 'A' && p[4] != 'a') ||
  138. (p[5] != '[') )
  139. ERROR("Expected <![CDATA[");
  140. p += 5;
  141. state = CDATA;
  142. start = p + 1;
  143. break;
  144. }
  145. if( p[1] == 'D' || p[1] == 'd' ) {
  146. if( (p[2] != 'O' && p[2] != 'o') ||
  147. (p[3] != 'C' && p[3] != 'c') ||
  148. (p[4] != 'T' && p[4] != 't') ||
  149. (p[5] != 'Y' && p[5] != 'y') ||
  150. (p[6] != 'P' && p[6] != 'p') ||
  151. (p[7] != 'E' && p[7] != 'e') )
  152. ERROR("Expected <!DOCTYPE");
  153. p += 7;
  154. state = DOCTYPE;
  155. start = p + 1;
  156. break;
  157. }
  158. if( p[1] != '-' || p[2] != '-' )
  159. ERROR("Expected <!--");
  160. p += 2;
  161. state = COMMENT;
  162. start = p + 1;
  163. break;
  164. case '?':
  165. state = HEADER;
  166. start = p;
  167. break;
  168. case '/':
  169. if( parentname.length==0 )
  170. ERROR("Expected node name");
  171. start = p + 1;
  172. state = IGNORE_SPACES;
  173. next = CLOSE;
  174. break;
  175. default:
  176. state = TAG_NAME;
  177. start = p;
  178. continue;
  179. }
  180. break;
  181. case TAG_NAME:
  182. if( !is_valid_char(c) ) {
  183. if( p == start )
  184. ERROR("Expected node name");
  185. nodename = String(start,p-start).dup();
  186. attribs = hx::Anon_obj::Create();
  187. state = IGNORE_SPACES;
  188. next = BODY;
  189. continue;
  190. }
  191. break;
  192. case BODY:
  193. switch( c ) {
  194. case '/':
  195. state = WAIT_END;
  196. nsubs++;
  197. callb->xml(nodename,attribs);
  198. break;
  199. case '>':
  200. state = CHILDS;
  201. nsubs++;
  202. callb->xml(nodename,attribs);
  203. break;
  204. default:
  205. state = ATTRIB_NAME;
  206. start = p;
  207. continue;
  208. }
  209. break;
  210. case ATTRIB_NAME:
  211. if( !is_valid_char(c) ) {
  212. if( start == p )
  213. ERROR("Expected attribute name");
  214. aname = String(start,p-start).dup();
  215. if( attribs->__Field(aname,hx::paccDynamic) != null() )
  216. ERROR("Duplicate attribute");
  217. state = IGNORE_SPACES;
  218. next = EQUALS;
  219. continue;
  220. }
  221. break;
  222. case EQUALS:
  223. switch( c ) {
  224. case '=':
  225. state = IGNORE_SPACES;
  226. next = ATTVAL_BEGIN;
  227. break;
  228. default:
  229. ERROR("Expected =");
  230. }
  231. break;
  232. case ATTVAL_BEGIN:
  233. switch( c ) {
  234. case '"':
  235. case '\'':
  236. state = ATTRIB_VAL;
  237. start = p;
  238. break;
  239. default:
  240. ERROR("Expected \"");
  241. }
  242. break;
  243. case ATTRIB_VAL:
  244. if( c == *start ) {
  245. attribs->Add( aname, String(start+1,p-start-1).dup() );
  246. state = IGNORE_SPACES;
  247. next = BODY;
  248. }
  249. break;
  250. case CHILDS:
  251. *lp = p;
  252. do_parse_xml(xml,lp,line,callb,nodename);
  253. p = *lp;
  254. start = p;
  255. state = BEGIN;
  256. break;
  257. case WAIT_END:
  258. switch( c ) {
  259. case '>':
  260. callb->done();
  261. state = BEGIN;
  262. break;
  263. default :
  264. ERROR("Expected >");
  265. }
  266. break;
  267. case WAIT_END_RET:
  268. switch( c ) {
  269. case '>':
  270. if( nsubs == 0 )
  271. callb->pcdata(HX_CSTRING(""));
  272. *lp = p;
  273. return;
  274. default :
  275. ERROR("Expected >");
  276. }
  277. break;
  278. case CLOSE:
  279. if( !is_valid_char(c) ) {
  280. if( start == p )
  281. ERROR("Expected node name");
  282. {
  283. String v = String(start,p - start).dup();
  284. if( strcmpi(parentname.__s,v.__s) != 0 ) {
  285. ERRORSTR(HX_CSTRING("Expected </") + parentname + HX_CSTRING(">"));
  286. }
  287. }
  288. state = IGNORE_SPACES;
  289. next = WAIT_END_RET;
  290. continue;
  291. }
  292. break;
  293. case COMMENT:
  294. if( c == '-' && p[1] == '-' && p[2] == '>' ) {
  295. callb->comment(String(start,p-start).dup());
  296. p += 2;
  297. state = BEGIN;
  298. }
  299. break;
  300. case DOCTYPE:
  301. if( c == '[' )
  302. nbrackets++;
  303. else if( c == ']' )
  304. nbrackets--;
  305. else if( c == '>' && nbrackets == 0 ) {
  306. callb->doctype(String(start,p-start).dup());
  307. state = BEGIN;
  308. }
  309. break;
  310. case HEADER:
  311. if( c == '?' && p[1] == '>' ) {
  312. p++;
  313. callb->comment(String(start,p-start).dup());
  314. state = BEGIN;
  315. }
  316. break;
  317. }
  318. c = *++p;
  319. if( c == '\n' )
  320. (*line)++;
  321. }
  322. if( state == BEGIN ) {
  323. start = p;
  324. state = PCDATA;
  325. }
  326. if( parentname.__s == 0 && state == PCDATA ) {
  327. if( p != start || nsubs == 0 )
  328. callb->pcdata(String(start,p-start).dup());
  329. return;
  330. }
  331. ERROR("Unexpected end");
  332. }
  333. // ----------------------------------------------
  334. /**
  335. <doc>
  336. <h1>Xml</h1>
  337. <p>
  338. The standard event-driven XML parser.
  339. </p>
  340. </doc>
  341. **/
  342. /**
  343. parse_xml : xml:string -> events:object -> void
  344. <doc>
  345. The [parse_xml] parse a string and for each parsed element call the
  346. corresponding object method in [events] :
  347. <ul>
  348. <li>[void xml( name : string, attribs : object)] when an XML node is found</li>
  349. <li>[void done()] when an XML node is closed</li>
  350. <li>[void pcdata(string)] when PCData chars found</li>
  351. <li>[void cdata(string)] when a CData session is found</li>
  352. <li>[void comment(string)] when some comment or special header is found</li>
  353. </ul>
  354. You can then implement the events so they build the appropriate XML data
  355. structure needed by your language.
  356. </doc>
  357. **/
  358. static void parse_xml( String str, cpp::NativeXmlState state )
  359. {
  360. int line = 0;
  361. const char *p = str.__s;
  362. // skip BOM
  363. if( p[0] == (char)0xEF && p[1] == (char)0xBB && p[2] == (char)0xBF )
  364. p += 3;
  365. do_parse_xml(p,&p,&line,state,String());
  366. }