Unserializer.hx 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537
  1. /*
  2. * Copyright (C)2005-2019 Haxe Foundation
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20. * DEALINGS IN THE SOFTWARE.
  21. */
  22. package haxe;
  23. using haxe.Unserializer;
  24. import haxe.ds.List;
  25. @:noDoc
  26. typedef TypeResolver = {
  27. function resolveClass(name:String):Class<Dynamic>;
  28. function resolveEnum(name:String):Enum<Dynamic>;
  29. }
  30. /**
  31. The `Unserializer` class is the complement to the `Serializer` class. It parses
  32. a serialization `String` and creates objects from the contained data.
  33. This class can be used in two ways:
  34. - create a `new Unserializer()` instance with a given serialization
  35. String, then call its `unserialize()` method until all values are
  36. extracted
  37. - call `Unserializer.run()` to unserialize a single value from a given
  38. String
  39. The specification of the serialization format can be found here:
  40. <https://haxe.org/manual/serialization/format>
  41. **/
  42. class Unserializer {
  43. /**
  44. This value can be set to use custom type resolvers.
  45. A type resolver finds a `Class` or `Enum` instance from a given `String`.
  46. By default, the Haxe `Type` Api is used.
  47. A type resolver must provide two methods:
  48. 1. `resolveClass(name:String):Class<Dynamic>` is called to determine a
  49. `Class` from a class name
  50. 2. `resolveEnum(name:String):Enum<Dynamic>` is called to determine an
  51. `Enum` from an enum name
  52. This value is applied when a new `Unserializer` instance is created.
  53. Changing it afterwards has no effect on previously created instances.
  54. **/
  55. public static var DEFAULT_RESOLVER:TypeResolver = new DefaultResolver();
  56. static var BASE64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789%:";
  57. #if !neko
  58. static var CODES = null;
  59. static function initCodes() {
  60. var codes = #if flash new flash.utils.ByteArray(); #else new Array(); #end
  61. for (i in 0...BASE64.length)
  62. codes[StringTools.fastCodeAt(BASE64, i)] = i;
  63. return codes;
  64. }
  65. #end
  66. var buf:String;
  67. var pos:Int;
  68. var length:Int;
  69. var cache:Array<Dynamic>;
  70. var scache:Array<String>;
  71. var resolver:TypeResolver;
  72. #if neko
  73. var upos:Int;
  74. #end
  75. /**
  76. Creates a new Unserializer instance, with its internal buffer
  77. initialized to `buf`.
  78. This does not parse `buf` immediately. It is parsed only when calls to
  79. `this.unserialize` are made.
  80. Each Unserializer instance maintains its own cache.
  81. **/
  82. public function new(buf:String) {
  83. this.buf = buf;
  84. length = this.buf.fastLength();
  85. pos = 0;
  86. #if neko
  87. upos = 0;
  88. #end
  89. scache = new Array();
  90. cache = new Array();
  91. var r = DEFAULT_RESOLVER;
  92. if (r == null) {
  93. r = new DefaultResolver();
  94. DEFAULT_RESOLVER = r;
  95. }
  96. resolver = r;
  97. }
  98. /**
  99. Sets the type resolver of `this` Unserializer instance to `r`.
  100. If `r` is `null`, a special resolver is used which returns `null` for all
  101. input values.
  102. See `DEFAULT_RESOLVER` for more information on type resolvers.
  103. **/
  104. public function setResolver(r) {
  105. if (r == null)
  106. resolver = NullResolver.instance;
  107. else
  108. resolver = r;
  109. }
  110. /**
  111. Gets the type resolver of `this` Unserializer instance.
  112. See `DEFAULT_RESOLVER` for more information on type resolvers.
  113. **/
  114. public function getResolver() {
  115. return resolver;
  116. }
  117. inline function get(p:Int):Int {
  118. #if php
  119. return p >= length ? 0 : buf.fastCharCodeAt(p);
  120. #else
  121. return StringTools.fastCodeAt(buf, p);
  122. #end
  123. }
  124. function readDigits() {
  125. var k = 0;
  126. var s = false;
  127. var fpos = pos;
  128. while (true) {
  129. var c = get(pos);
  130. if (StringTools.isEof(c))
  131. break;
  132. if (c == "-".code) {
  133. if (pos != fpos)
  134. break;
  135. s = true;
  136. pos++;
  137. continue;
  138. }
  139. if (c < "0".code || c > "9".code)
  140. break;
  141. k = k * 10 + (c - "0".code);
  142. pos++;
  143. }
  144. if (s)
  145. k *= -1;
  146. return k;
  147. }
  148. function readFloat() {
  149. var p1 = pos;
  150. while (true) {
  151. var c = get(pos);
  152. if (StringTools.isEof(c))
  153. break;
  154. // + - . , 0-9
  155. if ((c >= 43 && c < 58) || c == "e".code || c == "E".code)
  156. pos++;
  157. else
  158. break;
  159. }
  160. return Std.parseFloat(buf.fastSubstr(p1, pos - p1));
  161. }
  162. function unserializeObject(o:{}) {
  163. while (true) {
  164. if (pos >= length)
  165. throw "Invalid object";
  166. if (get(pos) == "g".code)
  167. break;
  168. var k:Dynamic = unserialize();
  169. if (!Std.isOfType(k, String))
  170. throw "Invalid object key";
  171. var v = unserialize();
  172. Reflect.setField(o, k, v);
  173. }
  174. pos++;
  175. }
  176. function unserializeEnum<T>(edecl:Enum<T>, tag:String) {
  177. if (get(pos++) != ":".code)
  178. throw "Invalid enum format";
  179. var nargs = readDigits();
  180. if (nargs == 0)
  181. return Type.createEnum(edecl, tag);
  182. var args = new Array();
  183. while (nargs-- > 0)
  184. args.push(unserialize());
  185. return Type.createEnum(edecl, tag, args);
  186. }
  187. /**
  188. Unserializes the next part of `this` Unserializer instance and returns
  189. the according value.
  190. This function may call `this.resolver.resolveClass` to determine a
  191. Class from a String, and `this.resolver.resolveEnum` to determine an
  192. Enum from a String.
  193. If `this` Unserializer instance contains no more or invalid data, an
  194. exception is thrown.
  195. This operation may fail on structurally valid data if a type cannot be
  196. resolved or if a field cannot be set. This can happen when unserializing
  197. Strings that were serialized on a different Haxe target, in which the
  198. serialization side has to make sure not to include platform-specific
  199. data.
  200. Classes are created from `Type.createEmptyInstance`, which means their
  201. constructors are not called.
  202. **/
  203. public function unserialize():Dynamic {
  204. switch (get(pos++)) {
  205. case "n".code:
  206. return null;
  207. case "t".code:
  208. return true;
  209. case "f".code:
  210. return false;
  211. case "z".code:
  212. return 0;
  213. case "i".code:
  214. return readDigits();
  215. case "d".code:
  216. return readFloat();
  217. case "y".code:
  218. var len = readDigits();
  219. if (get(pos++) != ":".code || length - pos < len)
  220. throw "Invalid string length";
  221. var s = buf.fastSubstr(pos, len);
  222. pos += len;
  223. s = StringTools.urlDecode(s);
  224. scache.push(s);
  225. return s;
  226. case "k".code:
  227. return Math.NaN;
  228. case "m".code:
  229. return Math.NEGATIVE_INFINITY;
  230. case "p".code:
  231. return Math.POSITIVE_INFINITY;
  232. case "a".code:
  233. var buf = buf;
  234. var a = new Array<Dynamic>();
  235. #if cpp
  236. var cachePos = cache.length;
  237. #end
  238. cache.push(a);
  239. while (true) {
  240. var c = get(pos);
  241. if (c == "h".code) {
  242. pos++;
  243. break;
  244. }
  245. if (c == "u".code) {
  246. pos++;
  247. var n = readDigits();
  248. a[a.length + n - 1] = null;
  249. } else
  250. a.push(unserialize());
  251. }
  252. #if cpp
  253. return cache[cachePos] = cpp.NativeArray.resolveVirtualArray(a);
  254. #else
  255. return a;
  256. #end
  257. case "o".code:
  258. var o = {};
  259. cache.push(o);
  260. unserializeObject(o);
  261. return o;
  262. case "r".code:
  263. var n = readDigits();
  264. if (n < 0 || n >= cache.length)
  265. throw "Invalid reference";
  266. return cache[n];
  267. case "R".code:
  268. var n = readDigits();
  269. if (n < 0 || n >= scache.length)
  270. throw "Invalid string reference";
  271. return scache[n];
  272. case "x".code:
  273. throw unserialize();
  274. case "c".code:
  275. var name = unserialize();
  276. var cl = resolver.resolveClass(name);
  277. if (cl == null)
  278. throw "Class not found " + name;
  279. var o = Type.createEmptyInstance(cl);
  280. cache.push(o);
  281. unserializeObject(o);
  282. return o;
  283. case "w".code:
  284. var name = unserialize();
  285. var edecl = resolver.resolveEnum(name);
  286. if (edecl == null)
  287. throw "Enum not found " + name;
  288. var e = unserializeEnum(edecl, unserialize());
  289. cache.push(e);
  290. return e;
  291. case "j".code:
  292. var name = unserialize();
  293. var edecl = resolver.resolveEnum(name);
  294. if (edecl == null)
  295. throw "Enum not found " + name;
  296. pos++; /* skip ':' */
  297. var index = readDigits();
  298. var tag = Type.getEnumConstructs(edecl)[index];
  299. if (tag == null)
  300. throw "Unknown enum index " + name + "@" + index;
  301. var e = unserializeEnum(edecl, tag);
  302. cache.push(e);
  303. return e;
  304. case "l".code:
  305. var l = new List();
  306. cache.push(l);
  307. var buf = buf;
  308. while (get(pos) != "h".code)
  309. l.add(unserialize());
  310. pos++;
  311. return l;
  312. case "b".code:
  313. var h = new haxe.ds.StringMap();
  314. cache.push(h);
  315. var buf = buf;
  316. while (get(pos) != "h".code) {
  317. var s = unserialize();
  318. h.set(s, unserialize());
  319. }
  320. pos++;
  321. return h;
  322. case "q".code:
  323. var h = new haxe.ds.IntMap();
  324. cache.push(h);
  325. var buf = buf;
  326. var c = get(pos++);
  327. while (c == ":".code) {
  328. var i = readDigits();
  329. h.set(i, unserialize());
  330. c = get(pos++);
  331. }
  332. if (c != "h".code)
  333. throw "Invalid IntMap format";
  334. return h;
  335. case "M".code:
  336. var h = new haxe.ds.ObjectMap();
  337. cache.push(h);
  338. var buf = buf;
  339. while (get(pos) != "h".code) {
  340. var s = unserialize();
  341. h.set(s, unserialize());
  342. }
  343. pos++;
  344. return h;
  345. case "v".code:
  346. var d;
  347. if (get(pos) >= '0'.code && get(pos) <= '9'.code && get(pos + 1) >= '0'.code && get(pos + 1) <= '9'.code && get(pos + 2) >= '0'.code
  348. && get(pos + 2) <= '9'.code && get(pos + 3) >= '0'.code && get(pos + 3) <= '9'.code && get(pos + 4) == '-'.code) {
  349. // Included for backwards compatibility
  350. d = Date.fromString(buf.fastSubstr(pos, 19));
  351. pos += 19;
  352. } else
  353. d = Date.fromTime(readFloat());
  354. cache.push(d);
  355. return d;
  356. case "s".code:
  357. var len = readDigits();
  358. var buf = buf;
  359. if (get(pos++) != ":".code || length - pos < len)
  360. throw "Invalid bytes length";
  361. #if neko
  362. var bytes = haxe.io.Bytes.ofData(base_decode(untyped buf.fastSubstr(pos, len).__s, untyped BASE64.__s));
  363. #elseif php
  364. var phpEncoded = php.Global.strtr(buf.fastSubstr(pos, len), '%:', '+/');
  365. var bytes = haxe.io.Bytes.ofData(php.Global.base64_decode(phpEncoded));
  366. #else
  367. var codes = CODES;
  368. if (codes == null) {
  369. codes = initCodes();
  370. CODES = codes;
  371. }
  372. var i = pos;
  373. var rest = len & 3;
  374. var size = (len >> 2) * 3 + ((rest >= 2) ? rest - 1 : 0);
  375. var max = i + (len - rest);
  376. var bytes = haxe.io.Bytes.alloc(size);
  377. var bpos = 0;
  378. while (i < max) {
  379. var c1 = codes[StringTools.fastCodeAt(buf, i++)];
  380. var c2 = codes[StringTools.fastCodeAt(buf, i++)];
  381. bytes.set(bpos++, (c1 << 2) | (c2 >> 4));
  382. var c3 = codes[StringTools.fastCodeAt(buf, i++)];
  383. bytes.set(bpos++, (c2 << 4) | (c3 >> 2));
  384. var c4 = codes[StringTools.fastCodeAt(buf, i++)];
  385. bytes.set(bpos++, (c3 << 6) | c4);
  386. }
  387. if (rest >= 2) {
  388. var c1 = codes[StringTools.fastCodeAt(buf, i++)];
  389. var c2 = codes[StringTools.fastCodeAt(buf, i++)];
  390. bytes.set(bpos++, (c1 << 2) | (c2 >> 4));
  391. if (rest == 3) {
  392. var c3 = codes[StringTools.fastCodeAt(buf, i++)];
  393. bytes.set(bpos++, (c2 << 4) | (c3 >> 2));
  394. }
  395. }
  396. #end
  397. pos += len;
  398. cache.push(bytes);
  399. return bytes;
  400. case "C".code:
  401. var name = unserialize();
  402. var cl = resolver.resolveClass(name);
  403. if (cl == null)
  404. throw "Class not found " + name;
  405. var o:Dynamic = Type.createEmptyInstance(cl);
  406. cache.push(o);
  407. o.hxUnserialize(this);
  408. if (get(pos++) != "g".code)
  409. throw "Invalid custom data";
  410. return o;
  411. case "A".code:
  412. var name = unserialize();
  413. var cl = resolver.resolveClass(name);
  414. if (cl == null)
  415. throw "Class not found " + name;
  416. return cl;
  417. case "B".code:
  418. var name = unserialize();
  419. var e = resolver.resolveEnum(name);
  420. if (e == null)
  421. throw "Enum not found " + name;
  422. return e;
  423. default:
  424. }
  425. pos--;
  426. throw("Invalid char " + buf.fastCharAt(pos) + " at position " + pos);
  427. }
  428. /**
  429. Unserializes `v` and returns the according value.
  430. This is a convenience function for creating a new instance of
  431. Unserializer with `v` as buffer and calling its `unserialize()` method
  432. once.
  433. **/
  434. public static function run(v:String):Dynamic {
  435. return new Unserializer(v).unserialize();
  436. }
  437. #if neko
  438. static var base_decode = neko.Lib.load("std", "base_decode", 2);
  439. #end
  440. static inline function fastLength(s:String):Int {
  441. #if php
  442. return php.Global.strlen(s);
  443. #else
  444. return s.length;
  445. #end
  446. }
  447. static inline function fastCharCodeAt(s:String, pos:Int):Int {
  448. #if php
  449. return php.Global.ord((s:php.NativeString)[pos]);
  450. #else
  451. return s.charCodeAt(pos);
  452. #end
  453. }
  454. static inline function fastCharAt(s:String, pos:Int):String {
  455. #if php
  456. return (s:php.NativeString)[pos];
  457. #else
  458. return s.charAt(pos);
  459. #end
  460. }
  461. static inline function fastSubstr(s:String, pos:Int, length:Int):String {
  462. #if php
  463. return php.Global.substr(s, pos, length);
  464. #else
  465. return s.substr(pos, length);
  466. #end
  467. }
  468. }
  469. private class DefaultResolver {
  470. public function new() {}
  471. public inline function resolveClass(name:String):Class<Dynamic>
  472. return Type.resolveClass(name);
  473. public inline function resolveEnum(name:String):Enum<Dynamic>
  474. return Type.resolveEnum(name);
  475. }
  476. private class NullResolver {
  477. function new() {}
  478. public inline function resolveClass(name:String):Class<Dynamic>
  479. return null;
  480. public inline function resolveEnum(name:String):Enum<Dynamic>
  481. return null;
  482. public static var instance(get, null):NullResolver;
  483. inline static function get_instance():NullResolver {
  484. if (instance == null)
  485. instance = new NullResolver();
  486. return instance;
  487. }
  488. }