123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386 |
- /*
- * Copyright (C)2005-2019 Haxe Foundation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
- #ifdef EPPC
- #include <memory>
- #else
- #include <memory.h>
- #endif
- #ifndef HX_WINDOWS
- # include <strings.h>
- # undef strcmpi
- # define strcmpi(a,b) strcasecmp(a,b)
- #else
- # include <string.h>
- #endif
- // -------------- parsing --------------------------
- enum STATE {
- IGNORE_SPACES,
- BEGIN,
- BEGIN_NODE,
- TAG_NAME,
- BODY,
- ATTRIB_NAME,
- EQUALS,
- ATTVAL_BEGIN,
- ATTRIB_VAL,
- CHILDS,
- CLOSE,
- WAIT_END,
- WAIT_END_RET,
- PCDATA,
- HEADER,
- COMMENT,
- DOCTYPE,
- CDATA,
- };
- static void xml_error( const char *xml, const char *inWhere, int *line, String msg ) {
- String b = HX_CSTRING("Xml parse error : ") + msg + HX_CSTRING(" at line ") + String(*line) + HX_CSTRING(" : ");
- String where(inWhere);
- int l = where.length;
- int nchars = 30;
- if( inWhere != xml )
- b += HX_CSTRING("...");
- if (where.length==0)
- b+= HX_CSTRING("<eof>");
- else if (where.length<nchars)
- b+= where;
- else
- b+= where.substr(0,nchars) + HX_CSTRING("...");
- hx::Throw(b);
- }
- #define ERRORSTR(msg) xml_error(xml,p,line,msg);
- #define ERROR(msg) xml_error(xml,p,line,HX_CSTRING(msg));
- static bool is_valid_char( int c ) {
- return ( c >= 'a' && c <= 'z' ) || ( c >= 'A' && c <= 'Z' ) || ( c >= '0' && c <= '9' ) || c == ':' || c == '.' || c == '_' || c == '-';
- }
- static void do_parse_xml( const char *xml, const char **lp, int *line, cpp::NativeXmlState callb, String parentname )
- {
- STATE state = BEGIN;
- STATE next = BEGIN;
- String aname;
- hx::Anon attribs;
- String nodename;
- const char *start = NULL;
- const char *p = *lp;
- char c = *p;
- int nsubs = 0, nbrackets = 0;
- while( c ) {
- switch( state ) {
- case IGNORE_SPACES:
- switch( c ) {
- case '\n':
- case '\r':
- case '\t':
- case ' ':
- break;
- default:
- state = next;
- continue;
- }
- break;
- case BEGIN:
- switch( c ) {
- case '<':
- state = IGNORE_SPACES;
- next = BEGIN_NODE;
- break;
- default:
- start = p;
- state = PCDATA;
- continue;
- }
- break;
- case PCDATA:
- if( c == '<' ) {
- callb->pcdata(String(start,p-start).dup());
- nsubs++;
- state = IGNORE_SPACES;
- next = BEGIN_NODE;
- }
- break;
- case CDATA:
- if( c == ']' && p[1] == ']' && p[2] == '>' ) {
- callb->cdata(String(start,p-start).dup());
- nsubs++;
- p += 2;
- state = BEGIN;
- }
- break;
- case BEGIN_NODE:
- switch( c ) {
- case '!':
- if( p[1] == '[' ) {
- p += 2;
- if( (p[0] != 'C' && p[0] != 'c') ||
- (p[1] != 'D' && p[1] != 'd') ||
- (p[2] != 'A' && p[2] != 'a') ||
- (p[3] != 'T' && p[3] != 't') ||
- (p[4] != 'A' && p[4] != 'a') ||
- (p[5] != '[') )
- ERROR("Expected <![CDATA[");
- p += 5;
- state = CDATA;
- start = p + 1;
- break;
- }
- if( p[1] == 'D' || p[1] == 'd' ) {
- if( (p[2] != 'O' && p[2] != 'o') ||
- (p[3] != 'C' && p[3] != 'c') ||
- (p[4] != 'T' && p[4] != 't') ||
- (p[5] != 'Y' && p[5] != 'y') ||
- (p[6] != 'P' && p[6] != 'p') ||
- (p[7] != 'E' && p[7] != 'e') )
- ERROR("Expected <!DOCTYPE");
- p += 7;
- state = DOCTYPE;
- start = p + 1;
- break;
- }
- if( p[1] != '-' || p[2] != '-' )
- ERROR("Expected <!--");
- p += 2;
- state = COMMENT;
- start = p + 1;
- break;
- case '?':
- state = HEADER;
- start = p;
- break;
- case '/':
- if( parentname.length==0 )
- ERROR("Expected node name");
- start = p + 1;
- state = IGNORE_SPACES;
- next = CLOSE;
- break;
- default:
- state = TAG_NAME;
- start = p;
- continue;
- }
- break;
- case TAG_NAME:
- if( !is_valid_char(c) ) {
- if( p == start )
- ERROR("Expected node name");
- nodename = String(start,p-start).dup();
- attribs = hx::Anon_obj::Create();
- state = IGNORE_SPACES;
- next = BODY;
- continue;
- }
- break;
- case BODY:
- switch( c ) {
- case '/':
- state = WAIT_END;
- nsubs++;
- callb->xml(nodename,attribs);
- break;
- case '>':
- state = CHILDS;
- nsubs++;
- callb->xml(nodename,attribs);
- break;
- default:
- state = ATTRIB_NAME;
- start = p;
- continue;
- }
- break;
- case ATTRIB_NAME:
- if( !is_valid_char(c) ) {
- if( start == p )
- ERROR("Expected attribute name");
- aname = String(start,p-start).dup();
- if( attribs->__Field(aname,hx::paccDynamic) != null() )
- ERROR("Duplicate attribute");
- state = IGNORE_SPACES;
- next = EQUALS;
- continue;
- }
- break;
- case EQUALS:
- switch( c ) {
- case '=':
- state = IGNORE_SPACES;
- next = ATTVAL_BEGIN;
- break;
- default:
- ERROR("Expected =");
- }
- break;
- case ATTVAL_BEGIN:
- switch( c ) {
- case '"':
- case '\'':
- state = ATTRIB_VAL;
- start = p;
- break;
- default:
- ERROR("Expected \"");
- }
- break;
- case ATTRIB_VAL:
- if( c == *start ) {
- attribs->Add( aname, String(start+1,p-start-1).dup() );
- state = IGNORE_SPACES;
- next = BODY;
- }
- break;
- case CHILDS:
- *lp = p;
- do_parse_xml(xml,lp,line,callb,nodename);
- p = *lp;
- start = p;
- state = BEGIN;
- break;
- case WAIT_END:
- switch( c ) {
- case '>':
- callb->done();
- state = BEGIN;
- break;
- default :
- ERROR("Expected >");
- }
- break;
- case WAIT_END_RET:
- switch( c ) {
- case '>':
- if( nsubs == 0 )
- callb->pcdata(HX_CSTRING(""));
- *lp = p;
- return;
- default :
- ERROR("Expected >");
- }
- break;
- case CLOSE:
- if( !is_valid_char(c) ) {
- if( start == p )
- ERROR("Expected node name");
- {
- String v = String(start,p - start).dup();
- if( strcmpi(parentname.__s,v.__s) != 0 ) {
- ERRORSTR(HX_CSTRING("Expected </") + parentname + HX_CSTRING(">"));
- }
- }
- state = IGNORE_SPACES;
- next = WAIT_END_RET;
- continue;
- }
- break;
- case COMMENT:
- if( c == '-' && p[1] == '-' && p[2] == '>' ) {
- callb->comment(String(start,p-start).dup());
- p += 2;
- state = BEGIN;
- }
- break;
- case DOCTYPE:
- if( c == '[' )
- nbrackets++;
- else if( c == ']' )
- nbrackets--;
- else if( c == '>' && nbrackets == 0 ) {
- callb->doctype(String(start,p-start).dup());
- state = BEGIN;
- }
- break;
- case HEADER:
- if( c == '?' && p[1] == '>' ) {
- p++;
- callb->comment(String(start,p-start).dup());
- state = BEGIN;
- }
- break;
- }
- c = *++p;
- if( c == '\n' )
- (*line)++;
- }
- if( state == BEGIN ) {
- start = p;
- state = PCDATA;
- }
- if( parentname.__s == 0 && state == PCDATA ) {
- if( p != start || nsubs == 0 )
- callb->pcdata(String(start,p-start).dup());
- return;
- }
- ERROR("Unexpected end");
- }
- // ----------------------------------------------
- /**
- <doc>
- <h1>Xml</h1>
- <p>
- The standard event-driven XML parser.
- </p>
- </doc>
- **/
- /**
- parse_xml : xml:string -> events:object -> void
- <doc>
- The [parse_xml] parse a string and for each parsed element call the
- corresponding object method in [events] :
- <ul>
- <li>[void xml( name : string, attribs : object)] when an XML node is found</li>
- <li>[void done()] when an XML node is closed</li>
- <li>[void pcdata(string)] when PCData chars found</li>
- <li>[void cdata(string)] when a CData session is found</li>
- <li>[void comment(string)] when some comment or special header is found</li>
- </ul>
- You can then implement the events so they build the appropriate XML data
- structure needed by your language.
- </doc>
- **/
- static void parse_xml( String str, cpp::NativeXmlState state )
- {
- int line = 0;
- const char *p = str.__s;
- // skip BOM
- if( p[0] == (char)0xEF && p[1] == (char)0xBB && p[2] == (char)0xBF )
- p += 3;
- do_parse_xml(p,&p,&line,state,String());
- }
|