UTF8EncodingTest.cs 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050
  1. //
  2. // UTF8EncodingTest.cs - NUnit Test Cases for System.Text.UTF8Encoding
  3. //
  4. // Authors:
  5. // Patrick Kalkman [email protected]
  6. // Sebastien Pouliot ([email protected])
  7. //
  8. // (C) 2003 Patrick Kalkman
  9. // (C) 2004 Novell (http://www.novell.com)
  10. //
  11. using NUnit.Framework;
  12. using System;
  13. using System.Text;
  14. #if NET_2_0
  15. using DecoderException = System.Text.DecoderFallbackException;
  16. #else
  17. using DecoderException = System.ArgumentException;
  18. #endif
  19. namespace MonoTests.System.Text {
  20. [TestFixture]
  21. public class UTF8EncodingTest : Assertion {
  22. private UTF8Encoding utf8;
  23. [SetUp]
  24. public void Create ()
  25. {
  26. utf8 = new UTF8Encoding (true, true);
  27. }
  28. [Test]
  29. public void TestEncodingGetBytes1()
  30. {
  31. UTF8Encoding utf8Enc = new UTF8Encoding ();
  32. string UniCode = "\u0041\u2262\u0391\u002E";
  33. // "A<NOT IDENTICAL TO><ALPHA>." may be encoded as 41 E2 89 A2 CE 91 2E
  34. // see (RFC 2044)
  35. byte[] utf8Bytes = utf8Enc.GetBytes (UniCode);
  36. Assertion.AssertEquals ("UTF #1", 0x41, utf8Bytes [0]);
  37. Assertion.AssertEquals ("UTF #2", 0xE2, utf8Bytes [1]);
  38. Assertion.AssertEquals ("UTF #3", 0x89, utf8Bytes [2]);
  39. Assertion.AssertEquals ("UTF #4", 0xA2, utf8Bytes [3]);
  40. Assertion.AssertEquals ("UTF #5", 0xCE, utf8Bytes [4]);
  41. Assertion.AssertEquals ("UTF #6", 0x91, utf8Bytes [5]);
  42. Assertion.AssertEquals ("UTF #7", 0x2E, utf8Bytes [6]);
  43. }
  44. [Test]
  45. public void TestEncodingGetBytes2()
  46. {
  47. UTF8Encoding utf8Enc = new UTF8Encoding ();
  48. string UniCode = "\u0048\u0069\u0020\u004D\u006F\u006D\u0020\u263A\u0021";
  49. // "Hi Mom <WHITE SMILING FACE>!" may be encoded as 48 69 20 4D 6F 6D 20 E2 98 BA 21
  50. // see (RFC 2044)
  51. byte[] utf8Bytes = new byte [11];
  52. int ByteCnt = utf8Enc.GetBytes (UniCode.ToCharArray(), 0, UniCode.Length, utf8Bytes, 0);
  53. Assertion.AssertEquals ("UTF #1", 11, ByteCnt);
  54. Assertion.AssertEquals ("UTF #2", 0x48, utf8Bytes [0]);
  55. Assertion.AssertEquals ("UTF #3", 0x69, utf8Bytes [1]);
  56. Assertion.AssertEquals ("UTF #4", 0x20, utf8Bytes [2]);
  57. Assertion.AssertEquals ("UTF #5", 0x4D, utf8Bytes [3]);
  58. Assertion.AssertEquals ("UTF #6", 0x6F, utf8Bytes [4]);
  59. Assertion.AssertEquals ("UTF #7", 0x6D, utf8Bytes [5]);
  60. Assertion.AssertEquals ("UTF #8", 0x20, utf8Bytes [6]);
  61. Assertion.AssertEquals ("UTF #9", 0xE2, utf8Bytes [7]);
  62. Assertion.AssertEquals ("UTF #10", 0x98, utf8Bytes [8]);
  63. Assertion.AssertEquals ("UTF #11", 0xBA, utf8Bytes [9]);
  64. Assertion.AssertEquals ("UTF #12", 0x21, utf8Bytes [10]);
  65. }
  66. [Test]
  67. public void TestDecodingGetChars1()
  68. {
  69. UTF8Encoding utf8Enc = new UTF8Encoding ();
  70. // 41 E2 89 A2 CE 91 2E may be decoded as "A<NOT IDENTICAL TO><ALPHA>."
  71. // see (RFC 2044)
  72. byte[] utf8Bytes = new byte [] {0x41, 0xE2, 0x89, 0xA2, 0xCE, 0x91, 0x2E};
  73. char[] UniCodeChars = utf8Enc.GetChars(utf8Bytes);
  74. Assertion.AssertEquals ("UTF #1", 0x0041, UniCodeChars [0]);
  75. Assertion.AssertEquals ("UTF #2", 0x2262, UniCodeChars [1]);
  76. Assertion.AssertEquals ("UTF #3", 0x0391, UniCodeChars [2]);
  77. Assertion.AssertEquals ("UTF #4", 0x002E, UniCodeChars [3]);
  78. }
  79. [Test]
  80. #if NET_2_0
  81. [Category ("NotWorking")]
  82. #endif
  83. public void TestMaxCharCount()
  84. {
  85. UTF8Encoding UTF8enc = new UTF8Encoding ();
  86. #if NET_2_0
  87. // hmm, where is this extra 1 coming from?
  88. Assertion.AssertEquals ("UTF #1", 51, UTF8enc.GetMaxCharCount(50));
  89. #else
  90. Assertion.AssertEquals ("UTF #1", 50, UTF8enc.GetMaxCharCount(50));
  91. #endif
  92. }
  93. [Test]
  94. #if NET_2_0
  95. [Category ("NotWorking")]
  96. #endif
  97. public void TestMaxByteCount()
  98. {
  99. UTF8Encoding UTF8enc = new UTF8Encoding ();
  100. #if NET_2_0
  101. // maybe under .NET 2.0 insufficient surrogate pair is just not handled, and 3 is Preamble size.
  102. Assertion.AssertEquals ("UTF #1", 153, UTF8enc.GetMaxByteCount(50));
  103. #else
  104. Assertion.AssertEquals ("UTF #1", 200, UTF8enc.GetMaxByteCount(50));
  105. #endif
  106. }
  107. // regression for bug #59648
  108. [Test]
  109. public void TestThrowOnInvalid ()
  110. {
  111. UTF8Encoding u = new UTF8Encoding (true, false);
  112. byte[] data = new byte [] { 0xC0, 0xAF };
  113. string s = u.GetString (data);
  114. AssertEquals (0, s.Length);
  115. data = new byte [] { 0x30, 0x31, 0xC0, 0xAF, 0x30, 0x32 };
  116. s = u.GetString (data);
  117. AssertEquals (4, s.Length);
  118. AssertEquals (0x30, (int) s [0]);
  119. AssertEquals (0x31, (int) s [1]);
  120. AssertEquals (0x30, (int) s [2]);
  121. AssertEquals (0x32, (int) s [3]);
  122. }
  123. // UTF8 decoding tests from http://www.cl.cam.ac.uk/~mgk25/
  124. [Test]
  125. public void T1_Correct_GreekWord_kosme ()
  126. {
  127. byte[] data = { 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5 };
  128. string s = utf8.GetString (data);
  129. // cute but saving source code in unicode can be problematic
  130. // so we just ensure we can re-encode this
  131. AssertEquals ("Reconverted", BitConverter.ToString (data), BitConverter.ToString (utf8.GetBytes (s)));
  132. }
  133. [Test]
  134. public void T2_Boundary_1_FirstPossibleSequence_Pass ()
  135. {
  136. byte[] data211 = { 0x00 };
  137. string s = utf8.GetString (data211);
  138. AssertEquals ("1 byte (U-00000000)", "\0", s);
  139. AssertEquals ("Reconverted-1", BitConverter.ToString (data211), BitConverter.ToString (utf8.GetBytes (s)));
  140. byte[] data212 = { 0xC2, 0x80 };
  141. s = utf8.GetString (data212);
  142. AssertEquals ("2 bytes (U-00000080)", 128, s [0]);
  143. AssertEquals ("Reconverted-2", BitConverter.ToString (data212), BitConverter.ToString (utf8.GetBytes (s)));
  144. byte[] data213 = { 0xE0, 0xA0, 0x80 };
  145. s = utf8.GetString (data213);
  146. AssertEquals ("3 bytes (U-00000800)", 2048, s [0]);
  147. AssertEquals ("Reconverted-3", BitConverter.ToString (data213), BitConverter.ToString (utf8.GetBytes (s)));
  148. byte[] data214 = { 0xF0, 0x90, 0x80, 0x80 };
  149. s = utf8.GetString (data214);
  150. AssertEquals ("4 bytes (U-00010000)-0", 55296, s [0]);
  151. AssertEquals ("4 bytes (U-00010000)-1", 56320, s [1]);
  152. AssertEquals ("Reconverted-4", BitConverter.ToString (data214), BitConverter.ToString (utf8.GetBytes (s)));
  153. }
  154. [Test]
  155. // Fail on MS Fx 1.1
  156. [ExpectedException (typeof (DecoderException))]
  157. public void T2_Boundary_1_FirstPossibleSequence_Fail_5 ()
  158. {
  159. byte[] data215 = { 0xF8, 0x88, 0x80, 0x80, 0x80 };
  160. string s = utf8.GetString (data215);
  161. AssertNull ("5 bytes (U-00200000)", s);
  162. AssertEquals ("Reconverted-5", BitConverter.ToString (data215), BitConverter.ToString (utf8.GetBytes (s)));
  163. }
  164. [Test]
  165. // Fail on MS Fx 1.1
  166. [ExpectedException (typeof (DecoderException))]
  167. public void T2_Boundary_1_FirstPossibleSequence_Fail_6 ()
  168. {
  169. byte[] data216 = { 0xFC, 0x84, 0x80, 0x80, 0x80, 0x80 };
  170. string s = utf8.GetString (data216);
  171. AssertNull ("6 bytes (U-04000000)", s);
  172. AssertEquals ("Reconverted-6", BitConverter.ToString (data216), BitConverter.ToString (utf8.GetBytes (s)));
  173. }
  174. [Test]
  175. public void T2_Boundary_2_LastPossibleSequence_Pass ()
  176. {
  177. byte[] data221 = { 0x7F };
  178. string s = utf8.GetString (data221);
  179. AssertEquals ("1 byte (U-0000007F)", 127, s [0]);
  180. AssertEquals ("Reconverted-1", BitConverter.ToString (data221), BitConverter.ToString (utf8.GetBytes (s)));
  181. byte[] data222 = { 0xDF, 0xBF };
  182. s = utf8.GetString (data222);
  183. AssertEquals ("2 bytes (U-000007FF)", 2047, s [0]);
  184. AssertEquals ("Reconverted-2", BitConverter.ToString (data222), BitConverter.ToString (utf8.GetBytes (s)));
  185. byte[] data223 = { 0xEF, 0xBF, 0xBF };
  186. s = utf8.GetString (data223);
  187. AssertEquals ("3 bytes (U-0000FFFF)", 65535, s [0]);
  188. AssertEquals ("Reconverted-3", BitConverter.ToString (data223), BitConverter.ToString (utf8.GetBytes (s)));
  189. }
  190. [Test]
  191. // Fail on MS Fx 1.1
  192. [ExpectedException (typeof (DecoderException))]
  193. public void T2_Boundary_2_LastPossibleSequence_Fail_4 ()
  194. {
  195. byte[] data224 = { 0x7F, 0xBF, 0xBF, 0xBF };
  196. string s = utf8.GetString (data224);
  197. AssertNull ("4 bytes (U-001FFFFF)", s);
  198. AssertEquals ("Reconverted-4", BitConverter.ToString (data224), BitConverter.ToString (utf8.GetBytes (s)));
  199. }
  200. [Test]
  201. // Fail on MS Fx 1.1
  202. [ExpectedException (typeof (DecoderException))]
  203. public void T2_Boundary_2_LastPossibleSequence_Fail_5 ()
  204. {
  205. byte[] data225 = { 0xFB, 0xBF, 0xBF, 0xBF, 0xBF };
  206. string s = utf8.GetString (data225);
  207. AssertNull ("5 bytes (U-03FFFFFF)", s);
  208. AssertEquals ("Reconverted-5", BitConverter.ToString (data225), BitConverter.ToString (utf8.GetBytes (s)));
  209. }
  210. [Test]
  211. // Fail on MS Fx 1.1
  212. [ExpectedException (typeof (DecoderException))]
  213. public void T2_Boundary_2_LastPossibleSequence_Fail_6 ()
  214. {
  215. byte[] data226 = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF, 0xBF };
  216. string s = utf8.GetString (data226);
  217. AssertNull ("6 bytes (U-7FFFFFFF)", s);
  218. AssertEquals ("Reconverted-6", BitConverter.ToString (data226), BitConverter.ToString (utf8.GetBytes (s)));
  219. }
  220. [Test]
  221. public void T2_Boundary_3_Other_Pass ()
  222. {
  223. byte[] data231 = { 0xED, 0x9F, 0xBF };
  224. string s = utf8.GetString (data231);
  225. AssertEquals ("U-0000D7FF", 55295, s [0]);
  226. AssertEquals ("Reconverted-1", BitConverter.ToString (data231), BitConverter.ToString (utf8.GetBytes (s)));
  227. byte[] data232 = { 0xEE, 0x80, 0x80 };
  228. s = utf8.GetString (data232);
  229. AssertEquals ("U-0000E000", 57344, s [0]);
  230. AssertEquals ("Reconverted-2", BitConverter.ToString (data232), BitConverter.ToString (utf8.GetBytes (s)));
  231. byte[] data233 = { 0xEF, 0xBF, 0xBD };
  232. s = utf8.GetString (data233);
  233. AssertEquals ("U-0000FFFD", 65533, s [0]);
  234. AssertEquals ("Reconverted-3", BitConverter.ToString (data233), BitConverter.ToString (utf8.GetBytes (s)));
  235. byte[] data234 = { 0xF4, 0x8F, 0xBF, 0xBF };
  236. s = utf8.GetString (data234);
  237. AssertEquals ("U-0010FFFF-0", 56319, s [0]);
  238. AssertEquals ("U-0010FFFF-1", 57343, s [1]);
  239. AssertEquals ("Reconverted-4", BitConverter.ToString (data234), BitConverter.ToString (utf8.GetBytes (s)));
  240. }
  241. [Test]
  242. // Fail on MS Fx 1.1
  243. [ExpectedException (typeof (DecoderException))]
  244. public void T2_Boundary_3_Other_Fail_5 ()
  245. {
  246. byte[] data235 = { 0xF4, 0x90, 0x80, 0x80 };
  247. string s = utf8.GetString (data235);
  248. AssertNull ("U-00110000", s);
  249. AssertEquals ("Reconverted-5", BitConverter.ToString (data235), BitConverter.ToString (utf8.GetBytes (s)));
  250. }
  251. [Test]
  252. [ExpectedException (typeof (DecoderException))]
  253. public void T3_Malformed_1_UnexpectedContinuation_311 ()
  254. {
  255. byte[] data = { 0x80 };
  256. string s = utf8.GetString (data);
  257. // exception is "really" expected here
  258. }
  259. [Test]
  260. [ExpectedException (typeof (DecoderException))]
  261. public void T3_Malformed_1_UnexpectedContinuation_312 ()
  262. {
  263. byte[] data = { 0xBF };
  264. string s = utf8.GetString (data);
  265. // exception is "really" expected here
  266. }
  267. [Test]
  268. [ExpectedException (typeof (DecoderException))]
  269. public void T3_Malformed_1_UnexpectedContinuation_313 ()
  270. {
  271. byte[] data = { 0x80, 0xBF };
  272. string s = utf8.GetString (data);
  273. // exception is "really" expected here
  274. }
  275. [Test]
  276. [ExpectedException (typeof (DecoderException))]
  277. public void T3_Malformed_1_UnexpectedContinuation_314 ()
  278. {
  279. byte[] data = { 0x80, 0xBF, 0x80 };
  280. string s = utf8.GetString (data);
  281. // exception is "really" expected here
  282. }
  283. [Test]
  284. [ExpectedException (typeof (DecoderException))]
  285. public void T3_Malformed_1_UnexpectedContinuation_315 ()
  286. {
  287. byte[] data = { 0x80, 0xBF, 0x80, 0xBF };
  288. string s = utf8.GetString (data);
  289. // exception is "really" expected here
  290. }
  291. [Test]
  292. [ExpectedException (typeof (DecoderException))]
  293. public void T3_Malformed_1_UnexpectedContinuation_316 ()
  294. {
  295. byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80 };
  296. string s = utf8.GetString (data);
  297. // exception is "really" expected here
  298. }
  299. [Test]
  300. [ExpectedException (typeof (DecoderException))]
  301. public void T3_Malformed_1_UnexpectedContinuation_317 ()
  302. {
  303. byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF };
  304. string s = utf8.GetString (data);
  305. // exception is "really" expected here
  306. }
  307. [Test]
  308. [ExpectedException (typeof (DecoderException))]
  309. public void T3_Malformed_1_UnexpectedContinuation_318 ()
  310. {
  311. byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 };
  312. string s = utf8.GetString (data);
  313. // exception is "really" expected here
  314. }
  315. [Test]
  316. [ExpectedException (typeof (DecoderException))]
  317. public void T3_Malformed_1_UnexpectedContinuation_319 ()
  318. {
  319. // 64 different continuation characters
  320. byte[] data = {
  321. 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
  322. 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
  323. 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
  324. 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF };
  325. string s = utf8.GetString (data);
  326. // exception is "really" expected here
  327. }
  328. [Test]
  329. [ExpectedException (typeof (DecoderException))]
  330. public void T3_Malformed_2_LonelyStart_321 ()
  331. {
  332. byte[] data = {
  333. 0xC0, 0x20, 0xC1, 0x20, 0xC2, 0x20, 0xC3, 0x20, 0xC4, 0x20, 0xC5, 0x20, 0xC6, 0x20, 0xC7, 0x20,
  334. 0xC8, 0x20, 0xC9, 0x20, 0xCA, 0x20, 0xCB, 0x20, 0xCC, 0x20, 0xCD, 0x20, 0xCE, 0x20, 0xCF, 0x20,
  335. 0xD0, 0x20, 0xD1, 0x20, 0xD2, 0x20, 0xD3, 0x20, 0xD4, 0x20, 0xD5, 0x20, 0xD6, 0x20, 0xD7, 0x20,
  336. 0xD8, 0x20, 0xD9, 0x20, 0xDA, 0x20, 0xDB, 0x20, 0xDC, 0x20, 0xDD, 0x20, 0xDE, 0x20, 0xDF, 0x20 };
  337. string s = utf8.GetString (data);
  338. // exception is "really" expected here
  339. }
  340. [Test]
  341. [ExpectedException (typeof (DecoderException))]
  342. public void T3_Malformed_2_LonelyStart_322 ()
  343. {
  344. byte[] data = {
  345. 0xE0, 0x20, 0xE1, 0x20, 0xE2, 0x20, 0xE3, 0x20, 0xE4, 0x20, 0xE5, 0x20, 0xE6, 0x20, 0xE7, 0x20,
  346. 0xE8, 0x20, 0xE9, 0x20, 0xEA, 0x20, 0xEB, 0x20, 0xEC, 0x20, 0xED, 0x20, 0xEE, 0x20, 0xEF, 0x20 };
  347. string s = utf8.GetString (data);
  348. // exception is "really" expected here
  349. }
  350. [Test]
  351. [ExpectedException (typeof (DecoderException))]
  352. public void T3_Malformed_2_LonelyStart_323 ()
  353. {
  354. byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
  355. string s = utf8.GetString (data);
  356. // exception is "really" expected here
  357. }
  358. [Test]
  359. [ExpectedException (typeof (DecoderException))]
  360. public void T3_Malformed_2_LonelyStart_324 ()
  361. {
  362. byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
  363. string s = utf8.GetString (data);
  364. // exception is "really" expected here
  365. }
  366. [Test]
  367. [ExpectedException (typeof (DecoderException))]
  368. public void T3_Malformed_2_LonelyStart_325 ()
  369. {
  370. byte[] data = { 0xFC, 0x20, 0xFD, 0x20 };
  371. string s = utf8.GetString (data);
  372. // exception is "really" expected here
  373. }
  374. [Test]
  375. [ExpectedException (typeof (DecoderException))]
  376. public void T3_Malformed_3_LastContinuationMissing_331 ()
  377. {
  378. byte[] data = { 0xC0 };
  379. string s = utf8.GetString (data);
  380. // exception is "really" expected here
  381. }
  382. [Test]
  383. [ExpectedException (typeof (DecoderException))]
  384. public void T3_Malformed_3_LastContinuationMissing_332 ()
  385. {
  386. byte[] data = { 0xE0, 0x80 };
  387. string s = utf8.GetString (data);
  388. // exception is "really" expected here
  389. }
  390. [Test]
  391. [ExpectedException (typeof (DecoderException))]
  392. public void T3_Malformed_3_LastContinuationMissing_333 ()
  393. {
  394. byte[] data = { 0xF0, 0x80, 0x80 };
  395. string s = utf8.GetString (data);
  396. // exception is "really" expected here
  397. }
  398. [Test]
  399. [ExpectedException (typeof (DecoderException))]
  400. public void T3_Malformed_3_LastContinuationMissing_334 ()
  401. {
  402. byte[] data = { 0xF8, 0x80, 0x80, 0x80 };
  403. string s = utf8.GetString (data);
  404. // exception is "really" expected here
  405. }
  406. [Test]
  407. [ExpectedException (typeof (DecoderException))]
  408. public void T3_Malformed_3_LastContinuationMissing_335 ()
  409. {
  410. byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80 };
  411. string s = utf8.GetString (data);
  412. // exception is "really" expected here
  413. }
  414. [Test]
  415. // MS Fx 1.1 accept this
  416. // [ExpectedException (typeof (DecoderException))]
  417. public void T3_Malformed_3_LastContinuationMissing_336 ()
  418. {
  419. byte[] data = { 0xDF };
  420. try {
  421. string s = utf8.GetString (data);
  422. // exception is "really" expected here
  423. AssertEquals ("MS FX 1.1 behaviour", String.Empty, s);
  424. }
  425. catch (DecoderException) {
  426. // but Mono doesn't - better stick to the standard
  427. }
  428. }
  429. [Test]
  430. // MS Fx 1.1 accept this
  431. // [ExpectedException (typeof (DecoderException))]
  432. public void T3_Malformed_3_LastContinuationMissing_337 ()
  433. {
  434. byte[] data = { 0xEF, 0xBF };
  435. try {
  436. string s = utf8.GetString (data);
  437. // exception is "really" expected here
  438. AssertEquals ("MS FX 1.1 behaviour", String.Empty, s);
  439. }
  440. catch (DecoderException) {
  441. // but Mono doesn't - better stick to the standard
  442. }
  443. }
  444. [Test]
  445. [ExpectedException (typeof (DecoderException))]
  446. public void T3_Malformed_3_LastContinuationMissing_338 ()
  447. {
  448. byte[] data = { 0xF7, 0xBF, 0xBF };
  449. string s = utf8.GetString (data);
  450. // exception is "really" expected here
  451. }
  452. [Test]
  453. [ExpectedException (typeof (DecoderException))]
  454. public void T3_Malformed_3_LastContinuationMissing_339 ()
  455. {
  456. byte[] data = { 0xF, 0xBF, 0xBF, 0xBF };
  457. string s = utf8.GetString (data);
  458. // exception is "really" expected here
  459. }
  460. [Test]
  461. [ExpectedException (typeof (DecoderException))]
  462. public void T3_Malformed_3_LastContinuationMissing_3310 ()
  463. {
  464. byte[] data = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
  465. string s = utf8.GetString (data);
  466. // exception is "really" expected here
  467. }
  468. [Test]
  469. [ExpectedException (typeof (DecoderException))]
  470. public void T3_Malformed_4_ConcatenationImcomplete ()
  471. {
  472. byte[] data = {
  473. 0xC0, 0xE0, 0x80, 0xF0, 0x80, 0x80, 0xF8, 0x80, 0x80, 0x80, 0xFC, 0x80, 0x80, 0x80, 0x80, 0xDF,
  474. 0xEF, 0xBF, 0xF7, 0xBF, 0xBF, 0xFB, 0xBF, 0xBF, 0xBF, 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
  475. string s = utf8.GetString (data);
  476. // exception is "really" expected here
  477. }
  478. [Test]
  479. [ExpectedException (typeof (DecoderException))]
  480. public void T3_Malformed_5_ImpossibleBytes_351 ()
  481. {
  482. byte[] data = { 0xFE };
  483. string s = utf8.GetString (data);
  484. // exception is "really" expected here
  485. }
  486. [Test]
  487. [ExpectedException (typeof (DecoderException))]
  488. public void T3_Malformed_5_ImpossibleBytes_352 ()
  489. {
  490. byte[] data = { 0xFF };
  491. string s = utf8.GetString (data);
  492. // exception is "really" expected here
  493. }
  494. [Test]
  495. [ExpectedException (typeof (DecoderException))]
  496. public void T3_Malformed_5_ImpossibleBytes_353 ()
  497. {
  498. byte[] data = { 0xFE, 0xFE, 0xFF, 0xFF };
  499. string s = utf8.GetString (data);
  500. // exception is "really" expected here
  501. }
  502. // Overlong == dangereous -> "safe" decoder should reject them
  503. [Test]
  504. [ExpectedException (typeof (DecoderException))]
  505. public void T4_Overlong_1_ASCII_Slash_411 ()
  506. {
  507. byte[] data = { 0xC0, 0xAF };
  508. string s = utf8.GetString (data);
  509. // exception is "really" expected here
  510. }
  511. [Test]
  512. [ExpectedException (typeof (DecoderException))]
  513. public void T4_Overlong_1_ASCII_Slash_412 ()
  514. {
  515. byte[] data = { 0xE0, 0x80, 0xAF };
  516. string s = utf8.GetString (data);
  517. // exception is "really" expected here
  518. }
  519. [Test]
  520. [ExpectedException (typeof (DecoderException))]
  521. public void T4_Overlong_1_ASCII_Slash_413 ()
  522. {
  523. byte[] data = { 0xF0, 0x80, 0x80, 0xAF };
  524. string s = utf8.GetString (data);
  525. // exception is "really" expected here
  526. }
  527. [Test]
  528. [ExpectedException (typeof (DecoderException))]
  529. public void T4_Overlong_1_ASCII_Slash_414 ()
  530. {
  531. byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0xAF };
  532. string s = utf8.GetString (data);
  533. // exception is "really" expected here
  534. }
  535. [Test]
  536. [ExpectedException (typeof (DecoderException))]
  537. public void T4_Overlong_1_ASCII_Slash_415 ()
  538. {
  539. byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0xAF };
  540. string s = utf8.GetString (data);
  541. // exception is "really" expected here
  542. }
  543. [Test]
  544. [ExpectedException (typeof (DecoderException))]
  545. public void T4_Overlong_2_MaximumBoundary_421 ()
  546. {
  547. byte[] data = { 0xC1, 0xBF };
  548. string s = utf8.GetString (data);
  549. // exception is "really" expected here
  550. }
  551. [Test]
  552. [ExpectedException (typeof (DecoderException))]
  553. public void T4_Overlong_2_MaximumBoundary_422 ()
  554. {
  555. byte[] data = { 0xE0, 0x9F, 0xBF };
  556. string s = utf8.GetString (data);
  557. // exception is "really" expected here
  558. }
  559. [Test]
  560. [ExpectedException (typeof (DecoderException))]
  561. public void T4_Overlong_2_MaximumBoundary_423 ()
  562. {
  563. byte[] data = { 0xF0, 0x8F, 0xBF, 0xBF };
  564. string s = utf8.GetString (data);
  565. // exception is "really" expected here
  566. }
  567. [Test]
  568. [ExpectedException (typeof (DecoderException))]
  569. public void T4_Overlong_2_MaximumBoundary_424 ()
  570. {
  571. byte[] data = { 0xF8, 0x87, 0xBF, 0xBF, 0xBF };
  572. string s = utf8.GetString (data);
  573. // exception is "really" expected here
  574. }
  575. [Test]
  576. [ExpectedException (typeof (DecoderException))]
  577. public void T4_Overlong_2_MaximumBoundary_425 ()
  578. {
  579. byte[] data = { 0xFC, 0x83, 0xBF, 0xBF, 0xBF, 0xBF };
  580. string s = utf8.GetString (data);
  581. // exception is "really" expected here
  582. }
  583. [Test]
  584. [ExpectedException (typeof (DecoderException))]
  585. public void T4_Overlong_3_NUL_431 ()
  586. {
  587. byte[] data = { 0xC0, 0x80 };
  588. string s = utf8.GetString (data);
  589. // exception is "really" expected here
  590. }
  591. [Test]
  592. [ExpectedException (typeof (DecoderException))]
  593. public void T4_Overlong_3_NUL_432 ()
  594. {
  595. byte[] data = { 0xE0, 0x80, 0x80 };
  596. string s = utf8.GetString (data);
  597. // exception is "really" expected here
  598. }
  599. [Test]
  600. [ExpectedException (typeof (DecoderException))]
  601. public void T4_Overlong_3_NUL_433 ()
  602. {
  603. byte[] data = { 0xF0, 0x80, 0x80, 0x80 };
  604. string s = utf8.GetString (data);
  605. // exception is "really" expected here
  606. }
  607. [Test]
  608. [ExpectedException (typeof (DecoderException))]
  609. public void T4_Overlong_3_NUL_434 ()
  610. {
  611. byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0x80 };
  612. string s = utf8.GetString (data);
  613. // exception is "really" expected here
  614. }
  615. [Test]
  616. [ExpectedException (typeof (DecoderException))]
  617. public void T4_Overlong_3_NUL_435 ()
  618. {
  619. byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0x80 };
  620. string s = utf8.GetString (data);
  621. // exception is "really" expected here
  622. }
  623. [Test]
  624. #if NET_2_0
  625. [ExpectedException (typeof (DecoderFallbackException))]
  626. #else
  627. // MS Fx 1.1 accept this
  628. [Category ("NotDotNet")]
  629. [ExpectedException (typeof (DecoderException))]
  630. #endif
  631. public void T5_IllegalCodePosition_1_UTF16Surrogates_511 ()
  632. {
  633. byte[] data = { 0xED, 0xA0, 0x80 };
  634. string s = utf8.GetString (data);
  635. // exception is "really" expected here
  636. AssertEquals ("MS FX 1.1 behaviour", 55296, s [0]);
  637. }
  638. [Test]
  639. #if NET_2_0
  640. [ExpectedException (typeof (DecoderFallbackException))]
  641. #else
  642. // MS Fx 1.1 accept this
  643. [Category ("NotDotNet")]
  644. [ExpectedException (typeof (DecoderException))]
  645. #endif
  646. public void T5_IllegalCodePosition_1_UTF16Surrogates_512 ()
  647. {
  648. byte[] data = { 0xED, 0xAD, 0xBF };
  649. string s = utf8.GetString (data);
  650. // exception is "really" expected here
  651. AssertEquals ("MS FX 1.1 behaviour", 56191, s [0]);
  652. }
  653. [Test]
  654. #if NET_2_0
  655. [ExpectedException (typeof (DecoderFallbackException))]
  656. #else
  657. // MS Fx 1.1 accept this
  658. [Category ("NotDotNet")]
  659. [ExpectedException (typeof (DecoderException))]
  660. #endif
  661. public void T5_IllegalCodePosition_1_UTF16Surrogates_513 ()
  662. {
  663. byte[] data = { 0xED, 0xAE, 0x80 };
  664. string s = utf8.GetString (data);
  665. // exception is "really" expected here
  666. AssertEquals ("MS FX 1.1 behaviour", 56192, s [0]);
  667. }
  668. [Test]
  669. #if NET_2_0
  670. [ExpectedException (typeof (DecoderFallbackException))]
  671. #else
  672. // MS Fx 1.1 accept this
  673. [Category ("NotDotNet")]
  674. [ExpectedException (typeof (DecoderException))]
  675. #endif
  676. public void T5_IllegalCodePosition_1_UTF16Surrogates_514 ()
  677. {
  678. byte[] data = { 0xED, 0xAF, 0xBF };
  679. string s = utf8.GetString (data);
  680. // exception is "really" expected here
  681. AssertEquals ("MS FX 1.1 behaviour", 56319, s [0]);
  682. }
  683. [Test]
  684. #if NET_2_0
  685. [ExpectedException (typeof (DecoderFallbackException))]
  686. #else
  687. // MS Fx 1.1 accept this
  688. [Category ("NotDotNet")]
  689. [ExpectedException (typeof (DecoderException))]
  690. #endif
  691. public void T5_IllegalCodePosition_1_UTF16Surrogates_515 ()
  692. {
  693. byte[] data = { 0xED, 0xB0, 0x80 };
  694. string s = utf8.GetString (data);
  695. // exception is "really" expected here
  696. AssertEquals ("MS FX 1.1 behaviour", 56320, s [0]);
  697. }
  698. [Test]
  699. #if NET_2_0
  700. [ExpectedException (typeof (DecoderFallbackException))]
  701. #else
  702. // MS Fx 1.1 accept this
  703. [Category ("NotDotNet")]
  704. [ExpectedException (typeof (DecoderException))]
  705. #endif
  706. public void T5_IllegalCodePosition_1_UTF16Surrogates_516 ()
  707. {
  708. byte[] data = { 0xED, 0xBE, 0x80 };
  709. string s = utf8.GetString (data);
  710. // exception is "really" expected here
  711. AssertEquals ("MS FX 1.1 behaviour", 57216, s [0]);
  712. }
  713. [Test]
  714. #if NET_2_0
  715. [ExpectedException (typeof (DecoderFallbackException))]
  716. #else
  717. // MS Fx 1.1 accept this
  718. [Category ("NotDotNet")]
  719. [ExpectedException (typeof (DecoderException))]
  720. #endif
  721. public void T5_IllegalCodePosition_1_UTF16Surrogates_517 ()
  722. {
  723. byte[] data = { 0xED, 0xBF, 0xBF };
  724. string s = utf8.GetString (data);
  725. // exception is "really" expected here
  726. AssertEquals ("MS FX 1.1 behaviour", 57343, s [0]);
  727. }
  728. [Test]
  729. #if NET_2_0
  730. [ExpectedException (typeof (DecoderFallbackException))]
  731. #else
  732. // MS Fx 1.1 accept this
  733. [Category ("NotDotNet")]
  734. [ExpectedException (typeof (DecoderException))]
  735. #endif
  736. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_521 ()
  737. {
  738. byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80 };
  739. string s = utf8.GetString (data);
  740. // exception is "really" expected here
  741. AssertEquals ("MS FX 1.1 behaviour", 55296, s [0]);
  742. AssertEquals ("MS FX 1.1 behaviour", 56320, s [1]);
  743. }
  744. [Test]
  745. #if NET_2_0
  746. [ExpectedException (typeof (DecoderFallbackException))]
  747. #else
  748. // MS Fx 1.1 accept this
  749. [Category ("NotDotNet")]
  750. [ExpectedException (typeof (DecoderException))]
  751. #endif
  752. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_522 ()
  753. {
  754. byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xBF, 0xBF };
  755. string s = utf8.GetString (data);
  756. // exception is "really" expected here
  757. AssertEquals ("MS FX 1.1 behaviour", 55296, s [0]);
  758. AssertEquals ("MS FX 1.1 behaviour", 57343, s [1]);
  759. }
  760. [Test]
  761. #if NET_2_0
  762. [ExpectedException (typeof (DecoderFallbackException))]
  763. #else
  764. // MS Fx 1.1 accept this
  765. [Category ("NotDotNet")]
  766. [ExpectedException (typeof (DecoderException))]
  767. #endif
  768. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_523 ()
  769. {
  770. byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xB0, 0x80 };
  771. string s = utf8.GetString (data);
  772. // exception is "really" expected here
  773. AssertEquals ("MS FX 1.1 behaviour", 56191, s [0]);
  774. AssertEquals ("MS FX 1.1 behaviour", 56320, s [1]);
  775. }
  776. [Test]
  777. #if NET_2_0
  778. [ExpectedException (typeof (DecoderFallbackException))]
  779. #else
  780. // MS Fx 1.1 accept this
  781. [Category ("NotDotNet")]
  782. [ExpectedException (typeof (DecoderException))]
  783. #endif
  784. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_524 ()
  785. {
  786. byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xBF, 0xBF };
  787. string s = utf8.GetString (data);
  788. // exception is "really" expected here
  789. AssertEquals ("MS FX 1.1 behaviour", 56191, s [0]);
  790. AssertEquals ("MS FX 1.1 behaviour", 57343, s [1]);
  791. }
  792. [Test]
  793. #if NET_2_0
  794. [ExpectedException (typeof (DecoderFallbackException))]
  795. #else
  796. // MS Fx 1.1 accept this
  797. [Category ("NotDotNet")]
  798. [ExpectedException (typeof (DecoderException))]
  799. #endif
  800. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_525 ()
  801. {
  802. byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xB0, 0x80 };
  803. string s = utf8.GetString (data);
  804. // exception is "really" expected here
  805. AssertEquals ("MS FX 1.1 behaviour", 56192, s [0]);
  806. AssertEquals ("MS FX 1.1 behaviour", 56320, s [1]);
  807. }
  808. [Test]
  809. #if NET_2_0
  810. [ExpectedException (typeof (DecoderFallbackException))]
  811. #else
  812. // MS Fx 1.1 accept this
  813. [Category ("NotDotNet")]
  814. [ExpectedException (typeof (DecoderException))]
  815. #endif
  816. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_526 ()
  817. {
  818. byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xBF, 0x8F };
  819. string s = utf8.GetString (data);
  820. // exception is "really" expected here
  821. AssertEquals ("MS FX 1.1 behaviour", 56192, s [0]);
  822. AssertEquals ("MS FX 1.1 behaviour", 57295, s [1]);
  823. }
  824. [Test]
  825. #if NET_2_0
  826. [ExpectedException (typeof (DecoderFallbackException))]
  827. #else
  828. // MS Fx 1.1 accept this
  829. [Category ("NotDotNet")]
  830. [ExpectedException (typeof (DecoderException))]
  831. #endif
  832. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_527 ()
  833. {
  834. byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xB0, 0x80 };
  835. string s = utf8.GetString (data);
  836. // exception is "really" expected here
  837. AssertEquals ("MS FX 1.1 behaviour", 56319, s [0]);
  838. AssertEquals ("MS FX 1.1 behaviour", 56320, s [1]);
  839. }
  840. [Test]
  841. #if NET_2_0
  842. [ExpectedException (typeof (DecoderFallbackException))]
  843. #else
  844. // MS Fx 1.1 accept this
  845. [Category ("NotDotNet")]
  846. [ExpectedException (typeof (DecoderException))]
  847. #endif
  848. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_528 ()
  849. {
  850. byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xBF, 0xBF };
  851. string s = utf8.GetString (data);
  852. // exception is "really" expected here
  853. AssertEquals ("MS FX 1.1 behaviour", 56319, s [0]);
  854. AssertEquals ("MS FX 1.1 behaviour", 57343, s [1]);
  855. }
  856. [Test]
  857. // MS Fx 1.1 accept this
  858. // [ExpectedException (typeof (DecoderException))]
  859. public void T5_IllegalCodePosition_3_Other_531 ()
  860. {
  861. byte[] data = { 0xEF, 0xBF, 0xBE };
  862. string s = utf8.GetString (data);
  863. // exception is "really" expected here
  864. AssertEquals ("MS FX 1.1 behaviour", 65534, s [0]);
  865. }
  866. [Test]
  867. // MS Fx 1.1 accept this
  868. // [ExpectedException (typeof (DecoderException))]
  869. public void T5_IllegalCodePosition_3_Other_532 ()
  870. {
  871. byte[] data = { 0xEF, 0xBF, 0xBF };
  872. string s = utf8.GetString (data);
  873. // exception is "really" expected here
  874. AssertEquals ("MS FX 1.1 behaviour", 65535, s [0]);
  875. }
  876. [Test]
  877. // bug #75065 and #73086.
  878. public void GetCharsFEFF ()
  879. {
  880. byte [] data = new byte [] {0xEF, 0xBB, 0xBF};
  881. Encoding enc = new UTF8Encoding (false, true);
  882. string s = enc.GetString (data);
  883. AssertEquals ("\uFEFF", s);
  884. Encoding utf = Encoding.UTF8;
  885. char[] testChars = {'\uFEFF','A'};
  886. byte[] bytes = utf.GetBytes(testChars);
  887. char[] chars = utf.GetChars(bytes);
  888. AssertEquals ("#1", '\uFEFF', chars [0]);
  889. AssertEquals ("#2", 'A', chars [1]);
  890. }
  891. #if NET_2_0
  892. [Test]
  893. public void CloneNotReadOnly ()
  894. {
  895. Encoding e = Encoding.GetEncoding (65001).Clone ()
  896. as Encoding;
  897. AssertEquals (false, e.IsReadOnly);
  898. e.EncoderFallback = new EncoderExceptionFallback ();
  899. }
  900. #endif
  901. [Test]
  902. #if NET_2_0
  903. [ExpectedException (typeof (DecoderFallbackException))]
  904. #else
  905. [ExpectedException (typeof (ArgumentException))]
  906. [Category ("NotDotNet")] // MS Bug
  907. #endif
  908. public void Bug77315 ()
  909. {
  910. new UTF8Encoding (false, true).GetString (
  911. new byte [] {0xED, 0xA2, 0x8C});
  912. }
  913. [Test]
  914. public void SufficientByteArray ()
  915. {
  916. Encoder e = Encoding.UTF8.GetEncoder ();
  917. byte [] bytes = new byte [0];
  918. char [] chars = new char [] {'\uD800'};
  919. e.GetBytes (chars, 0, 1, bytes, 0, false);
  920. try {
  921. int ret = e.GetBytes (chars, 1, 0, bytes, 0, true);
  922. #if NET_2_0
  923. AssertEquals ("drop insufficient char in 2.0: char[]", 0, ret);
  924. #else
  925. Fail ("ArgumentException is expected: char[]");
  926. #endif
  927. } catch (ArgumentException) {
  928. }
  929. string s = "\uD800";
  930. try {
  931. int ret = Encoding.UTF8.GetBytes (s, 0, 1, bytes, 0);
  932. #if NET_2_0
  933. AssertEquals ("drop insufficient char in 2.0: string", 0, ret);
  934. #else
  935. Fail ("ArgumentException is expected: string");
  936. #endif
  937. } catch (ArgumentException) {
  938. }
  939. }
  940. }
  941. }