XmlChar.cs 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
  1. // -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*-
  2. //
  3. // System.Xml.XmlChar.cs
  4. //
  5. // Author:
  6. // Jason Diamond ([email protected])
  7. //
  8. // (C) 2001 Jason Diamond http://injektilo.org/
  9. //
  10. //
  11. // Permission is hereby granted, free of charge, to any person obtaining
  12. // a copy of this software and associated documentation files (the
  13. // "Software"), to deal in the Software without restriction, including
  14. // without limitation the rights to use, copy, modify, merge, publish,
  15. // distribute, sublicense, and/or sell copies of the Software, and to
  16. // permit persons to whom the Software is furnished to do so, subject to
  17. // the following conditions:
  18. //
  19. // The above copyright notice and this permission notice shall be
  20. // included in all copies or substantial portions of the Software.
  21. //
  22. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  23. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  24. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  25. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  26. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  27. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  28. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  29. //
  30. namespace System.Xml
  31. {
  32. internal class XmlChar
  33. {
  34. public static readonly char [] WhitespaceChars = new char [] {' ', '\n', '\t', '\r'};
  35. public static bool IsWhitespace (int ch)
  36. {
  37. return ch == 0x20 || ch == 0x9 || ch == 0xD || ch == 0xA;
  38. }
  39. public static bool IsWhitespace (string str)
  40. {
  41. for (int i = 0; i < str.Length; i++)
  42. if (!IsWhitespace (str [i])) return false;
  43. return true;
  44. }
  45. public static int IndexOfNonWhitespace (string str)
  46. {
  47. for (int i = 0; i < str.Length; i++)
  48. if (!IsWhitespace (str [i])) return i;
  49. return -1;
  50. }
  51. public static bool IsFirstNameChar (int ch)
  52. {
  53. if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
  54. return true;
  55. } else if ((uint) ch <= 0xFFFF) {
  56. return (nameBitmap[(firstNamePages[ch >> 8] << 3) + ((ch & 0xFF) >> 5)] & (1 << (ch & 0x1F))) != 0;
  57. }
  58. return false;
  59. }
  60. public static bool IsValid (int ch)
  61. {
  62. return !IsInvalid (ch);
  63. }
  64. public static bool IsInvalid (int ch)
  65. {
  66. switch (ch) {
  67. case 9:
  68. case 10:
  69. case 13:
  70. return false;
  71. }
  72. if (ch < 32)
  73. return true;
  74. if (ch < 0xD800)
  75. return false;
  76. if (ch < 0xE000)
  77. return true;
  78. if (ch < 0xFFFE)
  79. return false;
  80. if (ch < 0x10000)
  81. return true;
  82. if (ch < 0x110000)
  83. return false;
  84. else
  85. return true;
  86. }
  87. public static int IndexOfInvalid (string s, bool allowSurrogate)
  88. {
  89. for (int i = 0; i < s.Length; i++)
  90. if (IsInvalid (s [i]))
  91. if (!allowSurrogate ||
  92. i + 1 == s.Length ||
  93. s [i] < '\uD800' ||
  94. s [i] >= '\uDC00' ||
  95. s [i + 1] < '\uDC00' ||
  96. s [i + 1] >= '\uE000')
  97. return i;
  98. return -1;
  99. }
  100. public static int IndexOfInvalid (char [] s, int start, int length, bool allowSurrogate)
  101. {
  102. int end = start + length;
  103. if (s.Length < end)
  104. throw new ArgumentOutOfRangeException ("length");
  105. for (int i = start; i < end; i++)
  106. if (IsInvalid (s [i]))
  107. if (!allowSurrogate ||
  108. i + 1 == s.Length ||
  109. s [i] < '\uD800' ||
  110. s [i] >= '\uDC00' ||
  111. s [i + 1] < '\uDC00' ||
  112. s [i + 1] >= '\uE000')
  113. return i;
  114. return -1;
  115. }
  116. public static bool IsNameChar (int ch)
  117. {
  118. if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
  119. return true;
  120. } else if ((uint) ch <= 0xFFFF) {
  121. return (nameBitmap[(namePages[ch >> 8] << 3) + ((ch & 0xFF) >> 5)] & (1 << (ch & 0x1F))) != 0;
  122. } else {
  123. return false;
  124. }
  125. }
  126. public static bool IsNCNameChar (int ch)
  127. {
  128. bool result = false;
  129. if (ch >= 0 && ch <= 0xFFFF && ch != ':')
  130. {
  131. result = (nameBitmap[(namePages[ch >> 8] << 3) + ((ch & 0xFF) >> 5)] & (1 << (ch & 0x1F))) != 0;
  132. }
  133. return result;
  134. }
  135. public static bool IsName (string str)
  136. {
  137. if (str.Length == 0)
  138. return false;
  139. if (!IsFirstNameChar (str [0]))
  140. return false;
  141. for (int i = 1; i < str.Length; i++)
  142. if (!IsNameChar (str [i]))
  143. return false;
  144. return true;
  145. }
  146. public static bool IsNCName (string str)
  147. {
  148. if (str.Length == 0)
  149. return false;
  150. if (!IsFirstNameChar (str [0]))
  151. return false;
  152. for (int i = 0; i < str.Length; i++)
  153. if (!IsNCNameChar (str [i]))
  154. return false;
  155. return true;
  156. }
  157. public static bool IsNmToken (string str)
  158. {
  159. if (str.Length == 0)
  160. return false;
  161. for (int i = 0; i < str.Length; i++)
  162. if (!IsNameChar (str [i]))
  163. return false;
  164. return true;
  165. }
  166. public static bool IsPubidChar (int ch)
  167. {
  168. return (IsWhitespace(ch) && ch != '\t') | ('a' <= ch && ch <= 'z') | ('A' <= ch && ch <= 'Z') | ('0' <= ch && ch <= '9') | "-'()+,./:=?;!*#@$_%".IndexOf((char)ch) >= 0;
  169. }
  170. public static bool IsPubid (string str)
  171. {
  172. for (int i = 0; i < str.Length; i++)
  173. if (!IsPubidChar (str [i]))
  174. return false;
  175. return true;
  176. }
  177. // encodings (copied from XmlConstructs.cs)
  178. /// <summary>
  179. /// Returns true if the encoding name is a valid IANA encoding.
  180. /// This method does not verify that there is a decoder available
  181. /// for this encoding, only that the characters are valid for an
  182. /// IANA encoding name.
  183. /// </summary>
  184. /// <param name="ianaEncoding">The encoding to check.</param>
  185. /// <returns></returns>
  186. public static bool IsValidIANAEncoding (String ianaEncoding)
  187. {
  188. if (ianaEncoding != null)
  189. {
  190. int length = ianaEncoding.Length;
  191. if (length > 0)
  192. {
  193. char c = ianaEncoding[0];
  194. if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
  195. {
  196. for (int i = 1; i < length; i++)
  197. {
  198. c = ianaEncoding[i];
  199. if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
  200. (c < '0' || c > '9') && c != '.' && c != '_' &&
  201. c != '-')
  202. {
  203. return false;
  204. }
  205. }
  206. return true;
  207. }
  208. }
  209. }
  210. return false;
  211. }
  212. public static int GetPredefinedEntity (string name)
  213. {
  214. switch (name) {
  215. case "amp":
  216. return '&';
  217. case "lt":
  218. return '<';
  219. case "gt":
  220. return '>';
  221. case "quot":
  222. return '"';
  223. case "apos":
  224. return '\'';
  225. default:
  226. return -1;
  227. }
  228. }
  229. static readonly byte [] firstNamePages =
  230. {
  231. 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00,
  232. 0x00, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
  233. 0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  234. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13,
  235. 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  236. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  237. 0x15, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  238. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  239. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  240. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
  241. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  242. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  243. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  244. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  245. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  246. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  247. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  248. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  249. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  250. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x17,
  251. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  252. 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
  253. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  254. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  255. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  256. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  257. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18,
  258. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  259. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  260. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  261. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  262. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
  263. };
  264. static readonly byte [] namePages =
  265. {
  266. 0x19, 0x03, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x00,
  267. 0x00, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25,
  268. 0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  269. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13,
  270. 0x26, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  271. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  272. 0x27, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  273. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  274. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  275. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
  276. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  277. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  278. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  279. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  280. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  281. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  282. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  283. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  284. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  285. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x17,
  286. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  287. 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
  288. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  289. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  290. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  291. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  292. 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18,
  293. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  294. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  295. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  296. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  297. 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
  298. };
  299. static readonly uint [] nameBitmap =
  300. {
  301. 0x00000000, 0x00000000, 0x00000000, 0x00000000,
  302. 0x00000000, 0x00000000, 0x00000000, 0x00000000,
  303. 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
  304. 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
  305. 0x00000000, 0x04000000, 0x87FFFFFE, 0x07FFFFFE,
  306. 0x00000000, 0x00000000, 0xFF7FFFFF, 0xFF7FFFFF,
  307. 0xFFFFFFFF, 0x7FF3FFFF, 0xFFFFFDFE, 0x7FFFFFFF,
  308. 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE00F, 0xFC31FFFF,
  309. 0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF,
  310. 0xFFFFFFFF, 0xF80001FF, 0x00000003, 0x00000000,
  311. 0x00000000, 0x00000000, 0x00000000, 0x00000000,
  312. 0xFFFFD740, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD,
  313. 0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF,
  314. 0xFFFF0003, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF,
  315. 0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE,
  316. 0x0000007F, 0x00000000, 0xFFFF0000, 0x000707FF,
  317. 0x00000000, 0x07FFFFFE, 0x000007FE, 0xFFFE0000,
  318. 0xFFFFFFFF, 0x7CFFFFFF, 0x002F7FFF, 0x00000060,
  319. 0xFFFFFFE0, 0x23FFFFFF, 0xFF000000, 0x00000003,
  320. 0xFFF99FE0, 0x03C5FDFF, 0xB0000000, 0x00030003,
  321. 0xFFF987E0, 0x036DFDFF, 0x5E000000, 0x001C0000,
  322. 0xFFFBAFE0, 0x23EDFDFF, 0x00000000, 0x00000001,
  323. 0xFFF99FE0, 0x23CDFDFF, 0xB0000000, 0x00000003,
  324. 0xD63DC7E0, 0x03BFC718, 0x00000000, 0x00000000,
  325. 0xFFFDDFE0, 0x03EFFDFF, 0x00000000, 0x00000003,
  326. 0xFFFDDFE0, 0x03EFFDFF, 0x40000000, 0x00000003,
  327. 0xFFFDDFE0, 0x03FFFDFF, 0x00000000, 0x00000003,
  328. 0x00000000, 0x00000000, 0x00000000, 0x00000000,
  329. 0xFFFFFFFE, 0x000D7FFF, 0x0000003F, 0x00000000,
  330. 0xFEF02596, 0x200D6CAE, 0x0000001F, 0x00000000,
  331. 0x00000000, 0x00000000, 0xFFFFFEFF, 0x000003FF,
  332. 0x00000000, 0x00000000, 0x00000000, 0x00000000,
  333. 0x00000000, 0x00000000, 0x00000000, 0x00000000,
  334. 0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x007FFFFF,
  335. 0x0007DAED, 0x50000000, 0x82315001, 0x002C62AB,
  336. 0x40000000, 0xF580C900, 0x00000007, 0x02010800,
  337. 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
  338. 0x0FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x03FFFFFF,
  339. 0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF,
  340. 0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF,
  341. 0x00000000, 0x00004C40, 0x00000000, 0x00000000,
  342. 0x00000007, 0x00000000, 0x00000000, 0x00000000,
  343. 0x00000080, 0x000003FE, 0xFFFFFFFE, 0xFFFFFFFF,
  344. 0x001FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x07FFFFFF,
  345. 0xFFFFFFE0, 0x00001FFF, 0x00000000, 0x00000000,
  346. 0x00000000, 0x00000000, 0x00000000, 0x00000000,
  347. 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
  348. 0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000,
  349. 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
  350. 0xFFFFFFFF, 0x0000000F, 0x00000000, 0x00000000,
  351. 0x00000000, 0x07FF6000, 0x87FFFFFE, 0x07FFFFFE,
  352. 0x00000000, 0x00800000, 0xFF7FFFFF, 0xFF7FFFFF,
  353. 0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF,
  354. 0xFFFFFFFF, 0xF80001FF, 0x00030003, 0x00000000,
  355. 0xFFFFFFFF, 0xFFFFFFFF, 0x0000003F, 0x00000003,
  356. 0xFFFFD7C0, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD,
  357. 0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF,
  358. 0xFFFF007B, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF,
  359. 0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE,
  360. 0xFFFE007F, 0xBBFFFFFB, 0xFFFF0016, 0x000707FF,
  361. 0x00000000, 0x07FFFFFE, 0x0007FFFF, 0xFFFF03FF,
  362. 0xFFFFFFFF, 0x7CFFFFFF, 0xFFEF7FFF, 0x03FF3DFF,
  363. 0xFFFFFFEE, 0xF3FFFFFF, 0xFF1E3FFF, 0x0000FFCF,
  364. 0xFFF99FEE, 0xD3C5FDFF, 0xB080399F, 0x0003FFCF,
  365. 0xFFF987E4, 0xD36DFDFF, 0x5E003987, 0x001FFFC0,
  366. 0xFFFBAFEE, 0xF3EDFDFF, 0x00003BBF, 0x0000FFC1,
  367. 0xFFF99FEE, 0xF3CDFDFF, 0xB0C0398F, 0x0000FFC3,
  368. 0xD63DC7EC, 0xC3BFC718, 0x00803DC7, 0x0000FF80,
  369. 0xFFFDDFEE, 0xC3EFFDFF, 0x00603DDF, 0x0000FFC3,
  370. 0xFFFDDFEC, 0xC3EFFDFF, 0x40603DDF, 0x0000FFC3,
  371. 0xFFFDDFEC, 0xC3FFFDFF, 0x00803DCF, 0x0000FFC3,
  372. 0x00000000, 0x00000000, 0x00000000, 0x00000000,
  373. 0xFFFFFFFE, 0x07FF7FFF, 0x03FF7FFF, 0x00000000,
  374. 0xFEF02596, 0x3BFF6CAE, 0x03FF3F5F, 0x00000000,
  375. 0x03000000, 0xC2A003FF, 0xFFFFFEFF, 0xFFFE03FF,
  376. 0xFEBF0FDF, 0x02FE3FFF, 0x00000000, 0x00000000,
  377. 0x00000000, 0x00000000, 0x00000000, 0x00000000,
  378. 0x00000000, 0x00000000, 0x1FFF0000, 0x00000002,
  379. 0x000000A0, 0x003EFFFE, 0xFFFFFFFE, 0xFFFFFFFF,
  380. 0x661FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x77FFFFFF
  381. };
  382. }
  383. }