NativeXmlImport.cpp 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
  1. #ifdef EPPC
  2. #include <memory>
  3. #else
  4. #include <memory.h>
  5. #endif
  6. #ifndef HX_WINDOWS
  7. # include <strings.h>
  8. # undef strcmpi
  9. # define strcmpi(a,b) strcasecmp(a,b)
  10. #else
  11. # include <string.h>
  12. #endif
  13. // -------------- parsing --------------------------
  14. enum STATE {
  15. IGNORE_SPACES,
  16. BEGIN,
  17. BEGIN_NODE,
  18. TAG_NAME,
  19. BODY,
  20. ATTRIB_NAME,
  21. EQUALS,
  22. ATTVAL_BEGIN,
  23. ATTRIB_VAL,
  24. CHILDS,
  25. CLOSE,
  26. WAIT_END,
  27. WAIT_END_RET,
  28. PCDATA,
  29. HEADER,
  30. COMMENT,
  31. DOCTYPE,
  32. CDATA,
  33. };
  34. static void xml_error( const char *xml, const char *inWhere, int *line, String msg ) {
  35. String b = HX_CSTRING("Xml parse error : ") + msg + HX_CSTRING(" at line ") + String(*line) + HX_CSTRING(" : ");
  36. String where(inWhere);
  37. int l = where.length;
  38. int nchars = 30;
  39. if( inWhere != xml )
  40. b += HX_CSTRING("...");
  41. if (where.length==0)
  42. b+= HX_CSTRING("<eof>");
  43. else if (where.length<nchars)
  44. b+= where;
  45. else
  46. b+= where.substr(0,nchars) + HX_CSTRING("...");
  47. hx::Throw(b);
  48. }
  49. #define ERRORSTR(msg) xml_error(xml,p,line,msg);
  50. #define ERROR(msg) xml_error(xml,p,line,HX_CSTRING(msg));
  51. static bool is_valid_char( int c ) {
  52. return ( c >= 'a' && c <= 'z' ) || ( c >= 'A' && c <= 'Z' ) || ( c >= '0' && c <= '9' ) || c == ':' || c == '.' || c == '_' || c == '-';
  53. }
  54. static void do_parse_xml( const char *xml, const char **lp, int *line, cpp::NativeXmlState callb, String parentname )
  55. {
  56. STATE state = BEGIN;
  57. STATE next = BEGIN;
  58. String aname;
  59. hx::Anon attribs;
  60. String nodename;
  61. const char *start = NULL;
  62. const char *p = *lp;
  63. char c = *p;
  64. int nsubs = 0, nbrackets = 0;
  65. while( c ) {
  66. switch( state ) {
  67. case IGNORE_SPACES:
  68. switch( c ) {
  69. case '\n':
  70. case '\r':
  71. case '\t':
  72. case ' ':
  73. break;
  74. default:
  75. state = next;
  76. continue;
  77. }
  78. break;
  79. case BEGIN:
  80. switch( c ) {
  81. case '<':
  82. state = IGNORE_SPACES;
  83. next = BEGIN_NODE;
  84. break;
  85. default:
  86. start = p;
  87. state = PCDATA;
  88. continue;
  89. }
  90. break;
  91. case PCDATA:
  92. if( c == '<' ) {
  93. callb->pcdata(String(start,p-start).dup());
  94. nsubs++;
  95. state = IGNORE_SPACES;
  96. next = BEGIN_NODE;
  97. }
  98. break;
  99. case CDATA:
  100. if( c == ']' && p[1] == ']' && p[2] == '>' ) {
  101. callb->cdata(String(start,p-start).dup());
  102. nsubs++;
  103. p += 2;
  104. state = BEGIN;
  105. }
  106. break;
  107. case BEGIN_NODE:
  108. switch( c ) {
  109. case '!':
  110. if( p[1] == '[' ) {
  111. p += 2;
  112. if( (p[0] != 'C' && p[0] != 'c') ||
  113. (p[1] != 'D' && p[1] != 'd') ||
  114. (p[2] != 'A' && p[2] != 'a') ||
  115. (p[3] != 'T' && p[3] != 't') ||
  116. (p[4] != 'A' && p[4] != 'a') ||
  117. (p[5] != '[') )
  118. ERROR("Expected <![CDATA[");
  119. p += 5;
  120. state = CDATA;
  121. start = p + 1;
  122. break;
  123. }
  124. if( p[1] == 'D' || p[1] == 'd' ) {
  125. if( (p[2] != 'O' && p[2] != 'o') ||
  126. (p[3] != 'C' && p[3] != 'c') ||
  127. (p[4] != 'T' && p[4] != 't') ||
  128. (p[5] != 'Y' && p[5] != 'y') ||
  129. (p[6] != 'P' && p[6] != 'p') ||
  130. (p[7] != 'E' && p[7] != 'e') )
  131. ERROR("Expected <!DOCTYPE");
  132. p += 7;
  133. state = DOCTYPE;
  134. start = p + 1;
  135. break;
  136. }
  137. if( p[1] != '-' || p[2] != '-' )
  138. ERROR("Expected <!--");
  139. p += 2;
  140. state = COMMENT;
  141. start = p + 1;
  142. break;
  143. case '?':
  144. state = HEADER;
  145. start = p;
  146. break;
  147. case '/':
  148. if( parentname.length==0 )
  149. ERROR("Expected node name");
  150. start = p + 1;
  151. state = IGNORE_SPACES;
  152. next = CLOSE;
  153. break;
  154. default:
  155. state = TAG_NAME;
  156. start = p;
  157. continue;
  158. }
  159. break;
  160. case TAG_NAME:
  161. if( !is_valid_char(c) ) {
  162. if( p == start )
  163. ERROR("Expected node name");
  164. nodename = String(start,p-start).dup();
  165. attribs = hx::Anon_obj::Create();
  166. state = IGNORE_SPACES;
  167. next = BODY;
  168. continue;
  169. }
  170. break;
  171. case BODY:
  172. switch( c ) {
  173. case '/':
  174. state = WAIT_END;
  175. nsubs++;
  176. callb->xml(nodename,attribs);
  177. break;
  178. case '>':
  179. state = CHILDS;
  180. nsubs++;
  181. callb->xml(nodename,attribs);
  182. break;
  183. default:
  184. state = ATTRIB_NAME;
  185. start = p;
  186. continue;
  187. }
  188. break;
  189. case ATTRIB_NAME:
  190. if( !is_valid_char(c) ) {
  191. if( start == p )
  192. ERROR("Expected attribute name");
  193. aname = String(start,p-start).dup();
  194. if( attribs->__Field(aname,hx::paccDynamic) != null() )
  195. ERROR("Duplicate attribute");
  196. state = IGNORE_SPACES;
  197. next = EQUALS;
  198. continue;
  199. }
  200. break;
  201. case EQUALS:
  202. switch( c ) {
  203. case '=':
  204. state = IGNORE_SPACES;
  205. next = ATTVAL_BEGIN;
  206. break;
  207. default:
  208. ERROR("Expected =");
  209. }
  210. break;
  211. case ATTVAL_BEGIN:
  212. switch( c ) {
  213. case '"':
  214. case '\'':
  215. state = ATTRIB_VAL;
  216. start = p;
  217. break;
  218. default:
  219. ERROR("Expected \"");
  220. }
  221. break;
  222. case ATTRIB_VAL:
  223. if( c == *start ) {
  224. attribs->Add( aname, String(start+1,p-start-1).dup() );
  225. state = IGNORE_SPACES;
  226. next = BODY;
  227. }
  228. break;
  229. case CHILDS:
  230. *lp = p;
  231. do_parse_xml(xml,lp,line,callb,nodename);
  232. p = *lp;
  233. start = p;
  234. state = BEGIN;
  235. break;
  236. case WAIT_END:
  237. switch( c ) {
  238. case '>':
  239. callb->done();
  240. state = BEGIN;
  241. break;
  242. default :
  243. ERROR("Expected >");
  244. }
  245. break;
  246. case WAIT_END_RET:
  247. switch( c ) {
  248. case '>':
  249. if( nsubs == 0 )
  250. callb->pcdata(HX_CSTRING(""));
  251. *lp = p;
  252. return;
  253. default :
  254. ERROR("Expected >");
  255. }
  256. break;
  257. case CLOSE:
  258. if( !is_valid_char(c) ) {
  259. if( start == p )
  260. ERROR("Expected node name");
  261. {
  262. String v = String(start,p - start).dup();
  263. if( strcmpi(parentname.__s,v.__s) != 0 ) {
  264. ERRORSTR(HX_CSTRING("Expected </") + parentname + HX_CSTRING(">"));
  265. }
  266. }
  267. state = IGNORE_SPACES;
  268. next = WAIT_END_RET;
  269. continue;
  270. }
  271. break;
  272. case COMMENT:
  273. if( c == '-' && p[1] == '-' && p[2] == '>' ) {
  274. callb->comment(String(start,p-start).dup());
  275. p += 2;
  276. state = BEGIN;
  277. }
  278. break;
  279. case DOCTYPE:
  280. if( c == '[' )
  281. nbrackets++;
  282. else if( c == ']' )
  283. nbrackets--;
  284. else if( c == '>' && nbrackets == 0 ) {
  285. callb->doctype(String(start,p-start).dup());
  286. state = BEGIN;
  287. }
  288. break;
  289. case HEADER:
  290. if( c == '?' && p[1] == '>' ) {
  291. p++;
  292. callb->comment(String(start,p-start).dup());
  293. state = BEGIN;
  294. }
  295. break;
  296. }
  297. c = *++p;
  298. if( c == '\n' )
  299. (*line)++;
  300. }
  301. if( state == BEGIN ) {
  302. start = p;
  303. state = PCDATA;
  304. }
  305. if( parentname.__s == 0 && state == PCDATA ) {
  306. if( p != start || nsubs == 0 )
  307. callb->pcdata(String(start,p-start).dup());
  308. return;
  309. }
  310. ERROR("Unexpected end");
  311. }
  312. // ----------------------------------------------
  313. /**
  314. <doc>
  315. <h1>Xml</h1>
  316. <p>
  317. The standard event-driven XML parser.
  318. </p>
  319. </doc>
  320. **/
  321. /**
  322. parse_xml : xml:string -> events:object -> void
  323. <doc>
  324. The [parse_xml] parse a string and for each parsed element call the
  325. corresponding object method in [events] :
  326. <ul>
  327. <li>[void xml( name : string, attribs : object)] when an XML node is found</li>
  328. <li>[void done()] when an XML node is closed</li>
  329. <li>[void pcdata(string)] when PCData chars found</li>
  330. <li>[void cdata(string)] when a CData session is found</li>
  331. <li>[void comment(string)] when some comment or special header is found</li>
  332. </ul>
  333. You can then implement the events so they build the appropriate XML data
  334. structure needed by your language.
  335. </doc>
  336. **/
  337. static void parse_xml( String str, cpp::NativeXmlState state )
  338. {
  339. int line = 0;
  340. const char *p = str.__s;
  341. // skip BOM
  342. if( p[0] == (char)0xEF && p[1] == (char)0xBB && p[2] == (char)0xBF )
  343. p += 3;
  344. do_parse_xml(p,&p,&line,state,String());
  345. }