UTF8EncodingTest.cs 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905
  1. //
  2. // UTF8EncodingTest.cs - NUnit Test Cases for System.Text.UTF8Encoding
  3. //
  4. // Authors:
  5. // Patrick Kalkman [email protected]
  6. // Sebastien Pouliot ([email protected])
  7. //
  8. // (C) 2003 Patrick Kalkman
  9. // (C) 2004 Novell (http://www.novell.com)
  10. //
  11. using NUnit.Framework;
  12. using System;
  13. using System.Text;
  14. #if NET_2_0
  15. using DecoderException = System.Text.DecoderFallbackException;
  16. #else
  17. using DecoderException = System.ArgumentException;
  18. #endif
  19. namespace MonoTests.System.Text {
  20. [TestFixture]
  21. public class UTF8EncodingTest : Assertion {
  22. private UTF8Encoding utf8;
  23. [SetUp]
  24. public void Create ()
  25. {
  26. utf8 = new UTF8Encoding (true, true);
  27. }
  28. [Test]
  29. public void TestEncodingGetBytes1()
  30. {
  31. UTF8Encoding utf8Enc = new UTF8Encoding ();
  32. string UniCode = "\u0041\u2262\u0391\u002E";
  33. // "A<NOT IDENTICAL TO><ALPHA>." may be encoded as 41 E2 89 A2 CE 91 2E
  34. // see (RFC 2044)
  35. byte[] utf8Bytes = utf8Enc.GetBytes (UniCode);
  36. Assertion.AssertEquals ("UTF #1", 0x41, utf8Bytes [0]);
  37. Assertion.AssertEquals ("UTF #2", 0xE2, utf8Bytes [1]);
  38. Assertion.AssertEquals ("UTF #3", 0x89, utf8Bytes [2]);
  39. Assertion.AssertEquals ("UTF #4", 0xA2, utf8Bytes [3]);
  40. Assertion.AssertEquals ("UTF #5", 0xCE, utf8Bytes [4]);
  41. Assertion.AssertEquals ("UTF #6", 0x91, utf8Bytes [5]);
  42. Assertion.AssertEquals ("UTF #7", 0x2E, utf8Bytes [6]);
  43. }
  44. [Test]
  45. public void TestEncodingGetBytes2()
  46. {
  47. UTF8Encoding utf8Enc = new UTF8Encoding ();
  48. string UniCode = "\u0048\u0069\u0020\u004D\u006F\u006D\u0020\u263A\u0021";
  49. // "Hi Mom <WHITE SMILING FACE>!" may be encoded as 48 69 20 4D 6F 6D 20 E2 98 BA 21
  50. // see (RFC 2044)
  51. byte[] utf8Bytes = new byte [11];
  52. int ByteCnt = utf8Enc.GetBytes (UniCode.ToCharArray(), 0, UniCode.Length, utf8Bytes, 0);
  53. Assertion.AssertEquals ("UTF #1", 11, ByteCnt);
  54. Assertion.AssertEquals ("UTF #2", 0x48, utf8Bytes [0]);
  55. Assertion.AssertEquals ("UTF #3", 0x69, utf8Bytes [1]);
  56. Assertion.AssertEquals ("UTF #4", 0x20, utf8Bytes [2]);
  57. Assertion.AssertEquals ("UTF #5", 0x4D, utf8Bytes [3]);
  58. Assertion.AssertEquals ("UTF #6", 0x6F, utf8Bytes [4]);
  59. Assertion.AssertEquals ("UTF #7", 0x6D, utf8Bytes [5]);
  60. Assertion.AssertEquals ("UTF #8", 0x20, utf8Bytes [6]);
  61. Assertion.AssertEquals ("UTF #9", 0xE2, utf8Bytes [7]);
  62. Assertion.AssertEquals ("UTF #10", 0x98, utf8Bytes [8]);
  63. Assertion.AssertEquals ("UTF #11", 0xBA, utf8Bytes [9]);
  64. Assertion.AssertEquals ("UTF #12", 0x21, utf8Bytes [10]);
  65. }
  66. [Test]
  67. public void TestDecodingGetChars1()
  68. {
  69. UTF8Encoding utf8Enc = new UTF8Encoding ();
  70. // 41 E2 89 A2 CE 91 2E may be decoded as "A<NOT IDENTICAL TO><ALPHA>."
  71. // see (RFC 2044)
  72. byte[] utf8Bytes = new byte [] {0x41, 0xE2, 0x89, 0xA2, 0xCE, 0x91, 0x2E};
  73. char[] UniCodeChars = utf8Enc.GetChars(utf8Bytes);
  74. Assertion.AssertEquals ("UTF #1", 0x0041, UniCodeChars [0]);
  75. Assertion.AssertEquals ("UTF #2", 0x2262, UniCodeChars [1]);
  76. Assertion.AssertEquals ("UTF #3", 0x0391, UniCodeChars [2]);
  77. Assertion.AssertEquals ("UTF #4", 0x002E, UniCodeChars [3]);
  78. }
  79. [Test]
  80. public void TestMaxCharCount()
  81. {
  82. UTF8Encoding UTF8enc = new UTF8Encoding ();
  83. Assertion.AssertEquals ("UTF #1", 50, UTF8enc.GetMaxCharCount(50));
  84. }
  85. [Test]
  86. public void TestMaxByteCount()
  87. {
  88. UTF8Encoding UTF8enc = new UTF8Encoding ();
  89. Assertion.AssertEquals ("UTF #1", 200, UTF8enc.GetMaxByteCount(50));
  90. }
  91. // regression for bug #59648
  92. [Test]
  93. public void TestThrowOnInvalid ()
  94. {
  95. UTF8Encoding u = new UTF8Encoding (true, false);
  96. byte[] data = new byte [] { 0xC0, 0xAF };
  97. string s = u.GetString (data);
  98. AssertEquals (0, s.Length);
  99. data = new byte [] { 0x30, 0x31, 0xC0, 0xAF, 0x30, 0x32 };
  100. s = u.GetString (data);
  101. AssertEquals (4, s.Length);
  102. AssertEquals (0x30, (int) s [0]);
  103. AssertEquals (0x31, (int) s [1]);
  104. AssertEquals (0x30, (int) s [2]);
  105. AssertEquals (0x32, (int) s [3]);
  106. }
  107. // UTF8 decoding tests from http://www.cl.cam.ac.uk/~mgk25/
  108. [Test]
  109. public void T1_Correct_GreekWord_kosme ()
  110. {
  111. byte[] data = { 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5 };
  112. string s = utf8.GetString (data);
  113. // cute but saving source code in unicode can be problematic
  114. // so we just ensure we can re-encode this
  115. AssertEquals ("Reconverted", BitConverter.ToString (data), BitConverter.ToString (utf8.GetBytes (s)));
  116. }
  117. [Test]
  118. public void T2_Boundary_1_FirstPossibleSequence_Pass ()
  119. {
  120. byte[] data211 = { 0x00 };
  121. string s = utf8.GetString (data211);
  122. AssertEquals ("1 byte (U-00000000)", "\0", s);
  123. AssertEquals ("Reconverted-1", BitConverter.ToString (data211), BitConverter.ToString (utf8.GetBytes (s)));
  124. byte[] data212 = { 0xC2, 0x80 };
  125. s = utf8.GetString (data212);
  126. AssertEquals ("2 bytes (U-00000080)", 128, s [0]);
  127. AssertEquals ("Reconverted-2", BitConverter.ToString (data212), BitConverter.ToString (utf8.GetBytes (s)));
  128. byte[] data213 = { 0xE0, 0xA0, 0x80 };
  129. s = utf8.GetString (data213);
  130. AssertEquals ("3 bytes (U-00000800)", 2048, s [0]);
  131. AssertEquals ("Reconverted-3", BitConverter.ToString (data213), BitConverter.ToString (utf8.GetBytes (s)));
  132. byte[] data214 = { 0xF0, 0x90, 0x80, 0x80 };
  133. s = utf8.GetString (data214);
  134. AssertEquals ("4 bytes (U-00010000)-0", 55296, s [0]);
  135. AssertEquals ("4 bytes (U-00010000)-1", 56320, s [1]);
  136. AssertEquals ("Reconverted-4", BitConverter.ToString (data214), BitConverter.ToString (utf8.GetBytes (s)));
  137. }
  138. [Test]
  139. // Fail on MS Fx 1.1
  140. [ExpectedException (typeof (DecoderException))]
  141. public void T2_Boundary_1_FirstPossibleSequence_Fail_5 ()
  142. {
  143. byte[] data215 = { 0xF8, 0x88, 0x80, 0x80, 0x80 };
  144. string s = utf8.GetString (data215);
  145. AssertNull ("5 bytes (U-00200000)", s);
  146. AssertEquals ("Reconverted-5", BitConverter.ToString (data215), BitConverter.ToString (utf8.GetBytes (s)));
  147. }
  148. [Test]
  149. // Fail on MS Fx 1.1
  150. [ExpectedException (typeof (DecoderException))]
  151. public void T2_Boundary_1_FirstPossibleSequence_Fail_6 ()
  152. {
  153. byte[] data216 = { 0xFC, 0x84, 0x80, 0x80, 0x80, 0x80 };
  154. string s = utf8.GetString (data216);
  155. AssertNull ("6 bytes (U-04000000)", s);
  156. AssertEquals ("Reconverted-6", BitConverter.ToString (data216), BitConverter.ToString (utf8.GetBytes (s)));
  157. }
  158. [Test]
  159. public void T2_Boundary_2_LastPossibleSequence_Pass ()
  160. {
  161. byte[] data221 = { 0x7F };
  162. string s = utf8.GetString (data221);
  163. AssertEquals ("1 byte (U-0000007F)", 127, s [0]);
  164. AssertEquals ("Reconverted-1", BitConverter.ToString (data221), BitConverter.ToString (utf8.GetBytes (s)));
  165. byte[] data222 = { 0xDF, 0xBF };
  166. s = utf8.GetString (data222);
  167. AssertEquals ("2 bytes (U-000007FF)", 2047, s [0]);
  168. AssertEquals ("Reconverted-2", BitConverter.ToString (data222), BitConverter.ToString (utf8.GetBytes (s)));
  169. byte[] data223 = { 0xEF, 0xBF, 0xBF };
  170. s = utf8.GetString (data223);
  171. AssertEquals ("3 bytes (U-0000FFFF)", 65535, s [0]);
  172. AssertEquals ("Reconverted-3", BitConverter.ToString (data223), BitConverter.ToString (utf8.GetBytes (s)));
  173. }
  174. [Test]
  175. // Fail on MS Fx 1.1
  176. [ExpectedException (typeof (DecoderException))]
  177. public void T2_Boundary_2_LastPossibleSequence_Fail_4 ()
  178. {
  179. byte[] data224 = { 0x7F, 0xBF, 0xBF, 0xBF };
  180. string s = utf8.GetString (data224);
  181. AssertNull ("4 bytes (U-001FFFFF)", s);
  182. AssertEquals ("Reconverted-4", BitConverter.ToString (data224), BitConverter.ToString (utf8.GetBytes (s)));
  183. }
  184. [Test]
  185. // Fail on MS Fx 1.1
  186. [ExpectedException (typeof (DecoderException))]
  187. public void T2_Boundary_2_LastPossibleSequence_Fail_5 ()
  188. {
  189. byte[] data225 = { 0xFB, 0xBF, 0xBF, 0xBF, 0xBF };
  190. string s = utf8.GetString (data225);
  191. AssertNull ("5 bytes (U-03FFFFFF)", s);
  192. AssertEquals ("Reconverted-5", BitConverter.ToString (data225), BitConverter.ToString (utf8.GetBytes (s)));
  193. }
  194. [Test]
  195. // Fail on MS Fx 1.1
  196. [ExpectedException (typeof (DecoderException))]
  197. public void T2_Boundary_2_LastPossibleSequence_Fail_6 ()
  198. {
  199. byte[] data226 = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF, 0xBF };
  200. string s = utf8.GetString (data226);
  201. AssertNull ("6 bytes (U-7FFFFFFF)", s);
  202. AssertEquals ("Reconverted-6", BitConverter.ToString (data226), BitConverter.ToString (utf8.GetBytes (s)));
  203. }
  204. [Test]
  205. public void T2_Boundary_3_Other_Pass ()
  206. {
  207. byte[] data231 = { 0xED, 0x9F, 0xBF };
  208. string s = utf8.GetString (data231);
  209. AssertEquals ("U-0000D7FF", 55295, s [0]);
  210. AssertEquals ("Reconverted-1", BitConverter.ToString (data231), BitConverter.ToString (utf8.GetBytes (s)));
  211. byte[] data232 = { 0xEE, 0x80, 0x80 };
  212. s = utf8.GetString (data232);
  213. AssertEquals ("U-0000E000", 57344, s [0]);
  214. AssertEquals ("Reconverted-2", BitConverter.ToString (data232), BitConverter.ToString (utf8.GetBytes (s)));
  215. byte[] data233 = { 0xEF, 0xBF, 0xBD };
  216. s = utf8.GetString (data233);
  217. AssertEquals ("U-0000FFFD", 65533, s [0]);
  218. AssertEquals ("Reconverted-3", BitConverter.ToString (data233), BitConverter.ToString (utf8.GetBytes (s)));
  219. byte[] data234 = { 0xF4, 0x8F, 0xBF, 0xBF };
  220. s = utf8.GetString (data234);
  221. AssertEquals ("U-0010FFFF-0", 56319, s [0]);
  222. AssertEquals ("U-0010FFFF-1", 57343, s [1]);
  223. AssertEquals ("Reconverted-4", BitConverter.ToString (data234), BitConverter.ToString (utf8.GetBytes (s)));
  224. }
  225. [Test]
  226. // Fail on MS Fx 1.1
  227. [ExpectedException (typeof (DecoderException))]
  228. public void T2_Boundary_3_Other_Fail_5 ()
  229. {
  230. byte[] data235 = { 0xF4, 0x90, 0x80, 0x80 };
  231. string s = utf8.GetString (data235);
  232. AssertNull ("U-00110000", s);
  233. AssertEquals ("Reconverted-5", BitConverter.ToString (data235), BitConverter.ToString (utf8.GetBytes (s)));
  234. }
  235. [Test]
  236. [ExpectedException (typeof (DecoderException))]
  237. public void T3_Malformed_1_UnexpectedContinuation_311 ()
  238. {
  239. byte[] data = { 0x80 };
  240. string s = utf8.GetString (data);
  241. // exception is "really" expected here
  242. }
  243. [Test]
  244. [ExpectedException (typeof (DecoderException))]
  245. public void T3_Malformed_1_UnexpectedContinuation_312 ()
  246. {
  247. byte[] data = { 0xBF };
  248. string s = utf8.GetString (data);
  249. // exception is "really" expected here
  250. }
  251. [Test]
  252. [ExpectedException (typeof (DecoderException))]
  253. public void T3_Malformed_1_UnexpectedContinuation_313 ()
  254. {
  255. byte[] data = { 0x80, 0xBF };
  256. string s = utf8.GetString (data);
  257. // exception is "really" expected here
  258. }
  259. [Test]
  260. [ExpectedException (typeof (DecoderException))]
  261. public void T3_Malformed_1_UnexpectedContinuation_314 ()
  262. {
  263. byte[] data = { 0x80, 0xBF, 0x80 };
  264. string s = utf8.GetString (data);
  265. // exception is "really" expected here
  266. }
  267. [Test]
  268. [ExpectedException (typeof (DecoderException))]
  269. public void T3_Malformed_1_UnexpectedContinuation_315 ()
  270. {
  271. byte[] data = { 0x80, 0xBF, 0x80, 0xBF };
  272. string s = utf8.GetString (data);
  273. // exception is "really" expected here
  274. }
  275. [Test]
  276. [ExpectedException (typeof (DecoderException))]
  277. public void T3_Malformed_1_UnexpectedContinuation_316 ()
  278. {
  279. byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80 };
  280. string s = utf8.GetString (data);
  281. // exception is "really" expected here
  282. }
  283. [Test]
  284. [ExpectedException (typeof (DecoderException))]
  285. public void T3_Malformed_1_UnexpectedContinuation_317 ()
  286. {
  287. byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF };
  288. string s = utf8.GetString (data);
  289. // exception is "really" expected here
  290. }
  291. [Test]
  292. [ExpectedException (typeof (DecoderException))]
  293. public void T3_Malformed_1_UnexpectedContinuation_318 ()
  294. {
  295. byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 };
  296. string s = utf8.GetString (data);
  297. // exception is "really" expected here
  298. }
  299. [Test]
  300. [ExpectedException (typeof (DecoderException))]
  301. public void T3_Malformed_1_UnexpectedContinuation_319 ()
  302. {
  303. // 64 different continuation characters
  304. byte[] data = {
  305. 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
  306. 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
  307. 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
  308. 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF };
  309. string s = utf8.GetString (data);
  310. // exception is "really" expected here
  311. }
  312. [Test]
  313. [ExpectedException (typeof (DecoderException))]
  314. public void T3_Malformed_2_LonelyStart_321 ()
  315. {
  316. byte[] data = {
  317. 0xC0, 0x20, 0xC1, 0x20, 0xC2, 0x20, 0xC3, 0x20, 0xC4, 0x20, 0xC5, 0x20, 0xC6, 0x20, 0xC7, 0x20,
  318. 0xC8, 0x20, 0xC9, 0x20, 0xCA, 0x20, 0xCB, 0x20, 0xCC, 0x20, 0xCD, 0x20, 0xCE, 0x20, 0xCF, 0x20,
  319. 0xD0, 0x20, 0xD1, 0x20, 0xD2, 0x20, 0xD3, 0x20, 0xD4, 0x20, 0xD5, 0x20, 0xD6, 0x20, 0xD7, 0x20,
  320. 0xD8, 0x20, 0xD9, 0x20, 0xDA, 0x20, 0xDB, 0x20, 0xDC, 0x20, 0xDD, 0x20, 0xDE, 0x20, 0xDF, 0x20 };
  321. string s = utf8.GetString (data);
  322. // exception is "really" expected here
  323. }
  324. [Test]
  325. [ExpectedException (typeof (DecoderException))]
  326. public void T3_Malformed_2_LonelyStart_322 ()
  327. {
  328. byte[] data = {
  329. 0xE0, 0x20, 0xE1, 0x20, 0xE2, 0x20, 0xE3, 0x20, 0xE4, 0x20, 0xE5, 0x20, 0xE6, 0x20, 0xE7, 0x20,
  330. 0xE8, 0x20, 0xE9, 0x20, 0xEA, 0x20, 0xEB, 0x20, 0xEC, 0x20, 0xED, 0x20, 0xEE, 0x20, 0xEF, 0x20 };
  331. string s = utf8.GetString (data);
  332. // exception is "really" expected here
  333. }
  334. [Test]
  335. [ExpectedException (typeof (DecoderException))]
  336. public void T3_Malformed_2_LonelyStart_323 ()
  337. {
  338. byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
  339. string s = utf8.GetString (data);
  340. // exception is "really" expected here
  341. }
  342. [Test]
  343. [ExpectedException (typeof (DecoderException))]
  344. public void T3_Malformed_2_LonelyStart_324 ()
  345. {
  346. byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
  347. string s = utf8.GetString (data);
  348. // exception is "really" expected here
  349. }
  350. [Test]
  351. [ExpectedException (typeof (DecoderException))]
  352. public void T3_Malformed_2_LonelyStart_325 ()
  353. {
  354. byte[] data = { 0xFC, 0x20, 0xFD, 0x20 };
  355. string s = utf8.GetString (data);
  356. // exception is "really" expected here
  357. }
  358. [Test]
  359. [ExpectedException (typeof (DecoderException))]
  360. public void T3_Malformed_3_LastContinuationMissing_331 ()
  361. {
  362. byte[] data = { 0xC0 };
  363. string s = utf8.GetString (data);
  364. // exception is "really" expected here
  365. }
  366. [Test]
  367. [ExpectedException (typeof (DecoderException))]
  368. public void T3_Malformed_3_LastContinuationMissing_332 ()
  369. {
  370. byte[] data = { 0xE0, 0x80 };
  371. string s = utf8.GetString (data);
  372. // exception is "really" expected here
  373. }
  374. [Test]
  375. [ExpectedException (typeof (DecoderException))]
  376. public void T3_Malformed_3_LastContinuationMissing_333 ()
  377. {
  378. byte[] data = { 0xF0, 0x80, 0x80 };
  379. string s = utf8.GetString (data);
  380. // exception is "really" expected here
  381. }
  382. [Test]
  383. [ExpectedException (typeof (DecoderException))]
  384. public void T3_Malformed_3_LastContinuationMissing_334 ()
  385. {
  386. byte[] data = { 0xF8, 0x80, 0x80, 0x80 };
  387. string s = utf8.GetString (data);
  388. // exception is "really" expected here
  389. }
  390. [Test]
  391. [ExpectedException (typeof (DecoderException))]
  392. public void T3_Malformed_3_LastContinuationMissing_335 ()
  393. {
  394. byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80 };
  395. string s = utf8.GetString (data);
  396. // exception is "really" expected here
  397. }
  398. [Test]
  399. // MS Fx 1.1 accept this
  400. // [ExpectedException (typeof (DecoderException))]
  401. public void T3_Malformed_3_LastContinuationMissing_336 ()
  402. {
  403. byte[] data = { 0xDF };
  404. try {
  405. string s = utf8.GetString (data);
  406. // exception is "really" expected here
  407. AssertEquals ("MS FX 1.1 behaviour", String.Empty, s);
  408. }
  409. catch (DecoderException) {
  410. // but Mono doesn't - better stick to the standard
  411. }
  412. }
  413. [Test]
  414. // MS Fx 1.1 accept this
  415. // [ExpectedException (typeof (DecoderException))]
  416. public void T3_Malformed_3_LastContinuationMissing_337 ()
  417. {
  418. byte[] data = { 0xEF, 0xBF };
  419. try {
  420. string s = utf8.GetString (data);
  421. // exception is "really" expected here
  422. AssertEquals ("MS FX 1.1 behaviour", String.Empty, s);
  423. }
  424. catch (DecoderException) {
  425. // but Mono doesn't - better stick to the standard
  426. }
  427. }
  428. [Test]
  429. [ExpectedException (typeof (DecoderException))]
  430. public void T3_Malformed_3_LastContinuationMissing_338 ()
  431. {
  432. byte[] data = { 0xF7, 0xBF, 0xBF };
  433. string s = utf8.GetString (data);
  434. // exception is "really" expected here
  435. }
  436. [Test]
  437. [ExpectedException (typeof (DecoderException))]
  438. public void T3_Malformed_3_LastContinuationMissing_339 ()
  439. {
  440. byte[] data = { 0xF, 0xBF, 0xBF, 0xBF };
  441. string s = utf8.GetString (data);
  442. // exception is "really" expected here
  443. }
  444. [Test]
  445. [ExpectedException (typeof (DecoderException))]
  446. public void T3_Malformed_3_LastContinuationMissing_3310 ()
  447. {
  448. byte[] data = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
  449. string s = utf8.GetString (data);
  450. // exception is "really" expected here
  451. }
  452. [Test]
  453. [ExpectedException (typeof (DecoderException))]
  454. public void T3_Malformed_4_ConcatenationImcomplete ()
  455. {
  456. byte[] data = {
  457. 0xC0, 0xE0, 0x80, 0xF0, 0x80, 0x80, 0xF8, 0x80, 0x80, 0x80, 0xFC, 0x80, 0x80, 0x80, 0x80, 0xDF,
  458. 0xEF, 0xBF, 0xF7, 0xBF, 0xBF, 0xFB, 0xBF, 0xBF, 0xBF, 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
  459. string s = utf8.GetString (data);
  460. // exception is "really" expected here
  461. }
  462. [Test]
  463. [ExpectedException (typeof (DecoderException))]
  464. public void T3_Malformed_5_ImpossibleBytes_351 ()
  465. {
  466. byte[] data = { 0xFE };
  467. string s = utf8.GetString (data);
  468. // exception is "really" expected here
  469. }
  470. [Test]
  471. [ExpectedException (typeof (DecoderException))]
  472. public void T3_Malformed_5_ImpossibleBytes_352 ()
  473. {
  474. byte[] data = { 0xFF };
  475. string s = utf8.GetString (data);
  476. // exception is "really" expected here
  477. }
  478. [Test]
  479. [ExpectedException (typeof (DecoderException))]
  480. public void T3_Malformed_5_ImpossibleBytes_353 ()
  481. {
  482. byte[] data = { 0xFE, 0xFE, 0xFF, 0xFF };
  483. string s = utf8.GetString (data);
  484. // exception is "really" expected here
  485. }
  486. // Overlong == dangereous -> "safe" decoder should reject them
  487. [Test]
  488. [ExpectedException (typeof (DecoderException))]
  489. public void T4_Overlong_1_ASCII_Slash_411 ()
  490. {
  491. byte[] data = { 0xC0, 0xAF };
  492. string s = utf8.GetString (data);
  493. // exception is "really" expected here
  494. }
  495. [Test]
  496. [ExpectedException (typeof (DecoderException))]
  497. public void T4_Overlong_1_ASCII_Slash_412 ()
  498. {
  499. byte[] data = { 0xE0, 0x80, 0xAF };
  500. string s = utf8.GetString (data);
  501. // exception is "really" expected here
  502. }
  503. [Test]
  504. [ExpectedException (typeof (DecoderException))]
  505. public void T4_Overlong_1_ASCII_Slash_413 ()
  506. {
  507. byte[] data = { 0xF0, 0x80, 0x80, 0xAF };
  508. string s = utf8.GetString (data);
  509. // exception is "really" expected here
  510. }
  511. [Test]
  512. [ExpectedException (typeof (DecoderException))]
  513. public void T4_Overlong_1_ASCII_Slash_414 ()
  514. {
  515. byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0xAF };
  516. string s = utf8.GetString (data);
  517. // exception is "really" expected here
  518. }
  519. [Test]
  520. [ExpectedException (typeof (DecoderException))]
  521. public void T4_Overlong_1_ASCII_Slash_415 ()
  522. {
  523. byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0xAF };
  524. string s = utf8.GetString (data);
  525. // exception is "really" expected here
  526. }
  527. [Test]
  528. [ExpectedException (typeof (DecoderException))]
  529. public void T4_Overlong_2_MaximumBoundary_421 ()
  530. {
  531. byte[] data = { 0xC1, 0xBF };
  532. string s = utf8.GetString (data);
  533. // exception is "really" expected here
  534. }
  535. [Test]
  536. [ExpectedException (typeof (DecoderException))]
  537. public void T4_Overlong_2_MaximumBoundary_422 ()
  538. {
  539. byte[] data = { 0xE0, 0x9F, 0xBF };
  540. string s = utf8.GetString (data);
  541. // exception is "really" expected here
  542. }
  543. [Test]
  544. [ExpectedException (typeof (DecoderException))]
  545. public void T4_Overlong_2_MaximumBoundary_423 ()
  546. {
  547. byte[] data = { 0xF0, 0x8F, 0xBF, 0xBF };
  548. string s = utf8.GetString (data);
  549. // exception is "really" expected here
  550. }
  551. [Test]
  552. [ExpectedException (typeof (DecoderException))]
  553. public void T4_Overlong_2_MaximumBoundary_424 ()
  554. {
  555. byte[] data = { 0xF8, 0x87, 0xBF, 0xBF, 0xBF };
  556. string s = utf8.GetString (data);
  557. // exception is "really" expected here
  558. }
  559. [Test]
  560. [ExpectedException (typeof (DecoderException))]
  561. public void T4_Overlong_2_MaximumBoundary_425 ()
  562. {
  563. byte[] data = { 0xFC, 0x83, 0xBF, 0xBF, 0xBF, 0xBF };
  564. string s = utf8.GetString (data);
  565. // exception is "really" expected here
  566. }
  567. [Test]
  568. [ExpectedException (typeof (DecoderException))]
  569. public void T4_Overlong_3_NUL_431 ()
  570. {
  571. byte[] data = { 0xC0, 0x80 };
  572. string s = utf8.GetString (data);
  573. // exception is "really" expected here
  574. }
  575. [Test]
  576. [ExpectedException (typeof (DecoderException))]
  577. public void T4_Overlong_3_NUL_432 ()
  578. {
  579. byte[] data = { 0xE0, 0x80, 0x80 };
  580. string s = utf8.GetString (data);
  581. // exception is "really" expected here
  582. }
  583. [Test]
  584. [ExpectedException (typeof (DecoderException))]
  585. public void T4_Overlong_3_NUL_433 ()
  586. {
  587. byte[] data = { 0xF0, 0x80, 0x80, 0x80 };
  588. string s = utf8.GetString (data);
  589. // exception is "really" expected here
  590. }
  591. [Test]
  592. [ExpectedException (typeof (DecoderException))]
  593. public void T4_Overlong_3_NUL_434 ()
  594. {
  595. byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0x80 };
  596. string s = utf8.GetString (data);
  597. // exception is "really" expected here
  598. }
  599. [Test]
  600. [ExpectedException (typeof (DecoderException))]
  601. public void T4_Overlong_3_NUL_435 ()
  602. {
  603. byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0x80 };
  604. string s = utf8.GetString (data);
  605. // exception is "really" expected here
  606. }
  607. [Test]
  608. // MS Fx 1.1 accept this
  609. // [ExpectedException (typeof (DecoderException))]
  610. public void T5_IllegalCodePosition_1_UTF16Surrogates_511 ()
  611. {
  612. byte[] data = { 0xED, 0xA0, 0x80 };
  613. string s = utf8.GetString (data);
  614. // exception is "really" expected here
  615. AssertEquals ("MS FX 1.1 behaviour", 55296, s [0]);
  616. }
  617. [Test]
  618. // MS Fx 1.1 accept this
  619. // [ExpectedException (typeof (DecoderException))]
  620. public void T5_IllegalCodePosition_1_UTF16Surrogates_512 ()
  621. {
  622. byte[] data = { 0xED, 0xAD, 0xBF };
  623. string s = utf8.GetString (data);
  624. // exception is "really" expected here
  625. AssertEquals ("MS FX 1.1 behaviour", 56191, s [0]);
  626. }
  627. [Test]
  628. // MS Fx 1.1 accept this
  629. // [ExpectedException (typeof (DecoderException))]
  630. public void T5_IllegalCodePosition_1_UTF16Surrogates_513 ()
  631. {
  632. byte[] data = { 0xED, 0xAE, 0x80 };
  633. string s = utf8.GetString (data);
  634. // exception is "really" expected here
  635. AssertEquals ("MS FX 1.1 behaviour", 56192, s [0]);
  636. }
  637. [Test]
  638. // MS Fx 1.1 accept this
  639. // [ExpectedException (typeof (DecoderException))]
  640. public void T5_IllegalCodePosition_1_UTF16Surrogates_514 ()
  641. {
  642. byte[] data = { 0xED, 0xAF, 0xBF };
  643. string s = utf8.GetString (data);
  644. // exception is "really" expected here
  645. AssertEquals ("MS FX 1.1 behaviour", 56319, s [0]);
  646. }
  647. [Test]
  648. // MS Fx 1.1 accept this
  649. // [ExpectedException (typeof (DecoderException))]
  650. public void T5_IllegalCodePosition_1_UTF16Surrogates_515 ()
  651. {
  652. byte[] data = { 0xED, 0xB0, 0x80 };
  653. string s = utf8.GetString (data);
  654. // exception is "really" expected here
  655. AssertEquals ("MS FX 1.1 behaviour", 56320, s [0]);
  656. }
  657. [Test]
  658. // MS Fx 1.1 accept this
  659. // [ExpectedException (typeof (DecoderException))]
  660. public void T5_IllegalCodePosition_1_UTF16Surrogates_516 ()
  661. {
  662. byte[] data = { 0xED, 0xBE, 0x80 };
  663. string s = utf8.GetString (data);
  664. // exception is "really" expected here
  665. AssertEquals ("MS FX 1.1 behaviour", 57216, s [0]);
  666. }
  667. [Test]
  668. // MS Fx 1.1 accept this
  669. // [ExpectedException (typeof (DecoderException))]
  670. public void T5_IllegalCodePosition_1_UTF16Surrogates_517 ()
  671. {
  672. byte[] data = { 0xED, 0xBF, 0xBF };
  673. string s = utf8.GetString (data);
  674. // exception is "really" expected here
  675. AssertEquals ("MS FX 1.1 behaviour", 57343, s [0]);
  676. }
  677. [Test]
  678. // MS Fx 1.1 accept this
  679. // [ExpectedException (typeof (DecoderException))]
  680. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_521 ()
  681. {
  682. byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80 };
  683. string s = utf8.GetString (data);
  684. // exception is "really" expected here
  685. AssertEquals ("MS FX 1.1 behaviour", 55296, s [0]);
  686. AssertEquals ("MS FX 1.1 behaviour", 56320, s [1]);
  687. }
  688. [Test]
  689. // MS Fx 1.1 accept this
  690. // [ExpectedException (typeof (DecoderException))]
  691. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_522 ()
  692. {
  693. byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xBF, 0xBF };
  694. string s = utf8.GetString (data);
  695. // exception is "really" expected here
  696. AssertEquals ("MS FX 1.1 behaviour", 55296, s [0]);
  697. AssertEquals ("MS FX 1.1 behaviour", 57343, s [1]);
  698. }
  699. [Test]
  700. // MS Fx 1.1 accept this
  701. // [ExpectedException (typeof (DecoderException))]
  702. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_523 ()
  703. {
  704. byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xB0, 0x80 };
  705. string s = utf8.GetString (data);
  706. // exception is "really" expected here
  707. AssertEquals ("MS FX 1.1 behaviour", 56191, s [0]);
  708. AssertEquals ("MS FX 1.1 behaviour", 56320, s [1]);
  709. }
  710. [Test]
  711. // MS Fx 1.1 accept this
  712. // [ExpectedException (typeof (DecoderException))]
  713. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_524 ()
  714. {
  715. byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xBF, 0xBF };
  716. string s = utf8.GetString (data);
  717. // exception is "really" expected here
  718. AssertEquals ("MS FX 1.1 behaviour", 56191, s [0]);
  719. AssertEquals ("MS FX 1.1 behaviour", 57343, s [1]);
  720. }
  721. [Test]
  722. // MS Fx 1.1 accept this
  723. // [ExpectedException (typeof (DecoderException))]
  724. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_525 ()
  725. {
  726. byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xB0, 0x80 };
  727. string s = utf8.GetString (data);
  728. // exception is "really" expected here
  729. AssertEquals ("MS FX 1.1 behaviour", 56192, s [0]);
  730. AssertEquals ("MS FX 1.1 behaviour", 56320, s [1]);
  731. }
  732. [Test]
  733. // MS Fx 1.1 accept this
  734. // [ExpectedException (typeof (DecoderException))]
  735. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_526 ()
  736. {
  737. byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xBF, 0x8F };
  738. string s = utf8.GetString (data);
  739. // exception is "really" expected here
  740. AssertEquals ("MS FX 1.1 behaviour", 56192, s [0]);
  741. AssertEquals ("MS FX 1.1 behaviour", 57295, s [1]);
  742. }
  743. [Test]
  744. // MS Fx 1.1 accept this
  745. // [ExpectedException (typeof (DecoderException))]
  746. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_527 ()
  747. {
  748. byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xB0, 0x80 };
  749. string s = utf8.GetString (data);
  750. // exception is "really" expected here
  751. AssertEquals ("MS FX 1.1 behaviour", 56319, s [0]);
  752. AssertEquals ("MS FX 1.1 behaviour", 56320, s [1]);
  753. }
  754. [Test]
  755. // MS Fx 1.1 accept this
  756. // [ExpectedException (typeof (DecoderException))]
  757. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_528 ()
  758. {
  759. byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xBF, 0xBF };
  760. string s = utf8.GetString (data);
  761. // exception is "really" expected here
  762. AssertEquals ("MS FX 1.1 behaviour", 56319, s [0]);
  763. AssertEquals ("MS FX 1.1 behaviour", 57343, s [1]);
  764. }
  765. [Test]
  766. // MS Fx 1.1 accept this
  767. // [ExpectedException (typeof (DecoderException))]
  768. public void T5_IllegalCodePosition_3_Other_531 ()
  769. {
  770. byte[] data = { 0xEF, 0xBF, 0xBE };
  771. string s = utf8.GetString (data);
  772. // exception is "really" expected here
  773. AssertEquals ("MS FX 1.1 behaviour", 65534, s [0]);
  774. }
  775. [Test]
  776. // MS Fx 1.1 accept this
  777. // [ExpectedException (typeof (DecoderException))]
  778. public void T5_IllegalCodePosition_3_Other_532 ()
  779. {
  780. byte[] data = { 0xEF, 0xBF, 0xBF };
  781. string s = utf8.GetString (data);
  782. // exception is "really" expected here
  783. AssertEquals ("MS FX 1.1 behaviour", 65535, s [0]);
  784. }
  785. [Test]
  786. // bug #75065 and #73086.
  787. public void GetCharsFEFF ()
  788. {
  789. byte [] data = new byte [] {0xEF, 0xBB, 0xBF};
  790. Encoding enc = new UTF8Encoding (false, true);
  791. string s = enc.GetString (data);
  792. AssertEquals ("\uFEFF", s);
  793. Encoding utf = Encoding.UTF8;
  794. char[] testChars = {'\uFEFF','A'};
  795. byte[] bytes = utf.GetBytes(testChars);
  796. char[] chars = utf.GetChars(bytes);
  797. AssertEquals ("#1", '\uFEFF', chars [0]);
  798. AssertEquals ("#2", 'A', chars [1]);
  799. }
  800. }
  801. }