Xml.hx 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. import php.Lib;
  2. /*
  3. * Copyright (c) 2005, The haXe Project Contributors
  4. * All rights reserved.
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * - Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * - Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE HAXE PROJECT CONTRIBUTORS "AS IS" AND ANY
  15. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  16. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  17. * DISCLAIMED. IN NO EVENT SHALL THE HAXE PROJECT CONTRIBUTORS BE LIABLE FOR
  18. * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  20. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  21. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  24. * DAMAGE.
  25. */
  26. enum XmlType {
  27. }
  28. @:core_api class Xml {
  29. public static var Element(default,null) : XmlType;
  30. public static var PCData(default,null) : XmlType;
  31. public static var CData(default,null) : XmlType;
  32. public static var Comment(default,null) : XmlType;
  33. public static var DocType(default,null) : XmlType;
  34. public static var Prolog(default,null) : XmlType;
  35. public static var Document(default,null) : XmlType;
  36. public var nodeType(default,null) : XmlType;
  37. public var nodeName(get_nodeName,set_nodeName) : String;
  38. public var nodeValue(get_nodeValue,set_nodeValue) : String;
  39. public var parent(getParent,null) : Xml;
  40. var _nodeName : String;
  41. var _nodeValue : String;
  42. var _attributes : Hash<String>;
  43. var _children : Array<Xml>;
  44. var _parent : Xml;
  45. private static var build : Xml;
  46. private static function __start_element_handler(parser : Dynamic, name : String, attribs : ArrayAccess<String>) : Void {
  47. var node = createElement(name);
  48. untyped __php__("foreach($attribs as $k => $v) $node->set($k, $v)");
  49. build.addChild(node);
  50. build = node;
  51. }
  52. private static function __end_element_handler(parser : Dynamic, name : String) : Void {
  53. build = build.getParent();
  54. }
  55. private static function __decodeattr(value : String) : String
  56. {
  57. return untyped __call__("str_replace", "'", '&apos;', __call__("htmlspecialchars", value, __php__('ENT_COMPAT'), 'UTF-8'));
  58. }
  59. private static function __decodeent(value : String) : String
  60. {
  61. return untyped __call__("str_replace", "'", '&apos;', __call__("htmlentities", value, __php__('ENT_COMPAT'), 'UTF-8'));
  62. }
  63. private static function __character_data_handler(parser : Dynamic, data : String) : Void {
  64. var d = __decodeent(data);
  65. if ((untyped __call__("strlen", data) == 1 && d != data) || d == data) {
  66. var last = build._children[build._children.length - 1];
  67. if (null != last && last.nodeType == Xml.PCData)
  68. {
  69. last.nodeValue += d;
  70. } else
  71. build.addChild(createPCData(d));
  72. } else {
  73. build.addChild(createCData(data));
  74. }
  75. }
  76. private static function __default_handler(parser : Dynamic, data : String) : Void {
  77. //On some PHP setups (seems to happen when libexpat is used) we may get called for such "entities" although character_data will correctly be called afterward.
  78. if(data == "<![CDATA[")
  79. return;
  80. if(data == "]]>")
  81. return;
  82. if ("<!--" == data.substr(0, 4))
  83. build.addChild(createComment(data.substr(4, data.length-7)));
  84. else
  85. build.addChild(createPCData(data));
  86. }
  87. static var reHeader = ~/\s*(?:<\?(.+?)\?>)?(?:<!DOCTYPE ([^>]+)>)?/mi;
  88. public static function parse( str : String ) : Xml {
  89. build = createDocument();
  90. var xml_parser = untyped __call__("xml_parser_create");
  91. untyped __call__("xml_set_element_handler", xml_parser, __start_element_handler, __end_element_handler);
  92. untyped __call__("xml_set_character_data_handler", xml_parser, __character_data_handler);
  93. untyped __call__("xml_set_default_handler", xml_parser, __default_handler);
  94. untyped __call__("xml_parser_set_option", xml_parser, __php__("XML_OPTION_CASE_FOLDING"), 0);
  95. untyped __call__("xml_parser_set_option", xml_parser, __php__("XML_OPTION_SKIP_WHITE"), 0);
  96. reHeader.match(str);
  97. str = "<doc>"+reHeader.matchedRight()+"</doc>";
  98. if(1 != untyped __call__("xml_parse", xml_parser, str, true)) {
  99. throw "Xml parse error ("+untyped __call__("xml_error_string", __call__("xml_get_error_code", xml_parser)) + ") line #" + __call__("xml_get_current_line_number", xml_parser);
  100. }
  101. untyped __call__("xml_parser_free", xml_parser);
  102. build = build._children[0];
  103. build._parent = null;
  104. build._nodeName = null;
  105. build.nodeType = Document;
  106. var doctype = reHeader.matched(2);
  107. if (null != doctype)
  108. build.insertChild(createDocType(doctype), 0);
  109. var prolog = reHeader.matched(1);
  110. if (null != prolog)
  111. build.insertChild(createProlog(prolog), 0);
  112. return build;
  113. }
  114. private function new() : Void {}
  115. public static function createElement( name : String ) : Xml {
  116. var r = new Xml();
  117. r.nodeType = Xml.Element;
  118. r._children = new Array();
  119. r._attributes = new Hash();
  120. r.set_nodeName( name );
  121. return r;
  122. }
  123. public static function createPCData( data : String ) : Xml {
  124. var r = new Xml();
  125. r.nodeType = Xml.PCData;
  126. r.set_nodeValue( data );
  127. return r;
  128. }
  129. public static function createCData( data : String ) : Xml {
  130. var r = new Xml();
  131. r.nodeType = Xml.CData;
  132. r.set_nodeValue( data );
  133. return r;
  134. }
  135. public static function createComment( data : String ) : Xml {
  136. var r = new Xml();
  137. r.nodeType = Xml.Comment;
  138. r.set_nodeValue( data );
  139. return r;
  140. }
  141. public static function createDocType( data : String ) : Xml {
  142. var r = new Xml();
  143. r.nodeType = Xml.DocType;
  144. r.set_nodeValue( data );
  145. return r;
  146. }
  147. public static function createProlog( data : String ) : Xml {
  148. var r = new Xml();
  149. r.nodeType = Xml.Prolog;
  150. r.set_nodeValue( data );
  151. return r;
  152. }
  153. public static function createDocument() : Xml {
  154. var r = new Xml();
  155. r.nodeType = Xml.Document;
  156. r._children = new Array();
  157. return r;
  158. }
  159. private function get_nodeName() : String {
  160. if( nodeType != Xml.Element )
  161. throw "bad nodeType";
  162. return _nodeName;
  163. }
  164. private function set_nodeName( n : String ) : String {
  165. if( nodeType != Xml.Element )
  166. throw "bad nodeType";
  167. return _nodeName = n;
  168. }
  169. private function get_nodeValue() : String {
  170. if( nodeType == Xml.Element || nodeType == Xml.Document )
  171. throw "bad nodeType";
  172. return _nodeValue;
  173. }
  174. private function set_nodeValue( v : String ) : String {
  175. if( nodeType == Xml.Element || nodeType == Xml.Document )
  176. throw "bad nodeType";
  177. return _nodeValue = v;
  178. }
  179. private inline function getParent() : Xml {
  180. return _parent;
  181. }
  182. public function get( att : String ) : String {
  183. if( nodeType != Xml.Element )
  184. throw "bad nodeType";
  185. return _attributes.get( att );
  186. }
  187. // TODO: check correct transform function
  188. public function set( att : String, value : String ) : Void {
  189. if( nodeType != Xml.Element )
  190. throw "bad nodeType";
  191. _attributes.set( att, __decodeattr(value) );
  192. }
  193. public function remove( att : String ) : Void{
  194. if( nodeType != Xml.Element )
  195. throw "bad nodeType";
  196. _attributes.remove( att );
  197. }
  198. public function exists( att : String ) : Bool {
  199. if( nodeType != Xml.Element )
  200. throw "bad nodeType";
  201. return _attributes.exists( att );
  202. }
  203. public function attributes() : Iterator<String> {
  204. if( nodeType != Xml.Element )
  205. throw "bad nodeType";
  206. return _attributes.keys();
  207. }
  208. public function iterator() : Iterator<Xml> {
  209. if( _children == null ) throw "bad nodetype";
  210. return _children.iterator();
  211. }
  212. public function elements() : Iterator<Xml> {
  213. if( _children == null ) throw "bad nodetype";
  214. return Lambda.filter(_children, function(child) return child.nodeType == Xml.Element).iterator();
  215. }
  216. public function elementsNamed( name : String ) : Iterator<Xml> {
  217. if( _children == null ) throw "bad nodetype";
  218. return Lambda.filter(_children, function(child) return child.nodeType == Xml.Element && child.nodeName == name).iterator();
  219. }
  220. public function firstChild() : Xml {
  221. if( _children == null ) throw "bad nodetype";
  222. if( _children.length == 0 ) return null;
  223. return _children[0];
  224. }
  225. public function firstElement() : Xml {
  226. if( _children == null ) throw "bad nodetype";
  227. for (child in _children)
  228. if (child.nodeType == Xml.Element)
  229. return child;
  230. return null;
  231. }
  232. public function addChild( x : Xml ) : Void {
  233. if( _children == null ) throw "bad nodetype";
  234. if( x._parent != null ) x._parent._children.remove(x);
  235. x._parent = this;
  236. _children.push( x );
  237. }
  238. public function removeChild( x : Xml ) : Bool {
  239. if( _children == null ) throw "bad nodetype";
  240. var b = _children.remove( x );
  241. if( b )
  242. x._parent = null;
  243. return b;
  244. }
  245. public function insertChild( x : Xml, pos : Int ) : Void {
  246. if( _children == null ) throw "bad nodetype";
  247. if( x._parent != null ) x._parent._children.remove(x);
  248. x._parent = this;
  249. _children.insert( pos, x );
  250. }
  251. public function toString() : String {
  252. if( nodeType == Xml.PCData )
  253. return _nodeValue;
  254. var s = "";
  255. if( nodeType == Xml.Element ) {
  256. s += "<";
  257. s += _nodeName;
  258. for( k in _attributes.keys() ){
  259. s += " ";
  260. s += k;
  261. s += "=\""; // \"
  262. s += _attributes.get(k);
  263. s += "\""; // \"
  264. }
  265. if( _children.length == 0 ) {
  266. s += "/>";
  267. return s;
  268. }
  269. s += ">";
  270. } else if( nodeType == Xml.CData )
  271. return "<![CDATA["+_nodeValue+"]]>";
  272. else if( nodeType == Xml.Comment )
  273. return "<!--"+_nodeValue+"-->";
  274. else if( nodeType == Xml.DocType )
  275. return "<!DOCTYPE "+_nodeValue+">";
  276. else if ( nodeType == Xml.Prolog )
  277. return "<?"+_nodeValue+"?>";
  278. for( x in iterator() )
  279. s += x.toString();
  280. if( nodeType == Xml.Element ) {
  281. s += "</";
  282. s += _nodeName;
  283. s += ">";
  284. }
  285. return s;
  286. }
  287. static function __init__() : Void untyped {
  288. Xml.Element = "element";
  289. Xml.PCData = "pcdata";
  290. Xml.CData = "cdata";
  291. Xml.Comment = "comment";
  292. Xml.DocType = "doctype";
  293. Xml.Prolog = "prolog";
  294. Xml.Document = "document";
  295. }
  296. }