UTF8EncodingTest.cs 33 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069
  1. //
  2. // UTF8EncodingTest.cs - NUnit Test Cases for System.Text.UTF8Encoding
  3. //
  4. // Authors:
  5. // Patrick Kalkman [email protected]
  6. // Sebastien Pouliot ([email protected])
  7. //
  8. // (C) 2003 Patrick Kalkman
  9. // (C) 2004 Novell (http://www.novell.com)
  10. //
  11. using NUnit.Framework;
  12. using System;
  13. using System.Text;
  14. #if NET_2_0
  15. using DecoderException = System.Text.DecoderFallbackException;
  16. #else
  17. using DecoderException = System.ArgumentException;
  18. #endif
  19. using AssertType = NUnit.Framework.Assert;
  20. namespace MonoTests.System.Text {
  21. [TestFixture]
  22. public class UTF8EncodingTest : Assertion {
  23. private UTF8Encoding utf8;
  24. [SetUp]
  25. public void Create ()
  26. {
  27. utf8 = new UTF8Encoding (true, true);
  28. }
  29. [Test]
  30. public void TestEncodingGetBytes1()
  31. {
  32. UTF8Encoding utf8Enc = new UTF8Encoding ();
  33. string UniCode = "\u0041\u2262\u0391\u002E";
  34. // "A<NOT IDENTICAL TO><ALPHA>." may be encoded as 41 E2 89 A2 CE 91 2E
  35. // see (RFC 2044)
  36. byte[] utf8Bytes = utf8Enc.GetBytes (UniCode);
  37. Assertion.AssertEquals ("UTF #1", 0x41, utf8Bytes [0]);
  38. Assertion.AssertEquals ("UTF #2", 0xE2, utf8Bytes [1]);
  39. Assertion.AssertEquals ("UTF #3", 0x89, utf8Bytes [2]);
  40. Assertion.AssertEquals ("UTF #4", 0xA2, utf8Bytes [3]);
  41. Assertion.AssertEquals ("UTF #5", 0xCE, utf8Bytes [4]);
  42. Assertion.AssertEquals ("UTF #6", 0x91, utf8Bytes [5]);
  43. Assertion.AssertEquals ("UTF #7", 0x2E, utf8Bytes [6]);
  44. }
  45. [Test]
  46. public void TestEncodingGetBytes2()
  47. {
  48. UTF8Encoding utf8Enc = new UTF8Encoding ();
  49. string UniCode = "\u0048\u0069\u0020\u004D\u006F\u006D\u0020\u263A\u0021";
  50. // "Hi Mom <WHITE SMILING FACE>!" may be encoded as 48 69 20 4D 6F 6D 20 E2 98 BA 21
  51. // see (RFC 2044)
  52. byte[] utf8Bytes = new byte [11];
  53. int ByteCnt = utf8Enc.GetBytes (UniCode.ToCharArray(), 0, UniCode.Length, utf8Bytes, 0);
  54. Assertion.AssertEquals ("UTF #1", 11, ByteCnt);
  55. Assertion.AssertEquals ("UTF #2", 0x48, utf8Bytes [0]);
  56. Assertion.AssertEquals ("UTF #3", 0x69, utf8Bytes [1]);
  57. Assertion.AssertEquals ("UTF #4", 0x20, utf8Bytes [2]);
  58. Assertion.AssertEquals ("UTF #5", 0x4D, utf8Bytes [3]);
  59. Assertion.AssertEquals ("UTF #6", 0x6F, utf8Bytes [4]);
  60. Assertion.AssertEquals ("UTF #7", 0x6D, utf8Bytes [5]);
  61. Assertion.AssertEquals ("UTF #8", 0x20, utf8Bytes [6]);
  62. Assertion.AssertEquals ("UTF #9", 0xE2, utf8Bytes [7]);
  63. Assertion.AssertEquals ("UTF #10", 0x98, utf8Bytes [8]);
  64. Assertion.AssertEquals ("UTF #11", 0xBA, utf8Bytes [9]);
  65. Assertion.AssertEquals ("UTF #12", 0x21, utf8Bytes [10]);
  66. }
  67. [Test]
  68. public void TestDecodingGetChars1()
  69. {
  70. UTF8Encoding utf8Enc = new UTF8Encoding ();
  71. // 41 E2 89 A2 CE 91 2E may be decoded as "A<NOT IDENTICAL TO><ALPHA>."
  72. // see (RFC 2044)
  73. byte[] utf8Bytes = new byte [] {0x41, 0xE2, 0x89, 0xA2, 0xCE, 0x91, 0x2E};
  74. char[] UniCodeChars = utf8Enc.GetChars(utf8Bytes);
  75. Assertion.AssertEquals ("UTF #1", 0x0041, UniCodeChars [0]);
  76. Assertion.AssertEquals ("UTF #2", 0x2262, UniCodeChars [1]);
  77. Assertion.AssertEquals ("UTF #3", 0x0391, UniCodeChars [2]);
  78. Assertion.AssertEquals ("UTF #4", 0x002E, UniCodeChars [3]);
  79. }
  80. [Test]
  81. #if NET_2_0
  82. [Category ("NotWorking")]
  83. #endif
  84. public void TestMaxCharCount()
  85. {
  86. UTF8Encoding UTF8enc = new UTF8Encoding ();
  87. #if NET_2_0
  88. // hmm, where is this extra 1 coming from?
  89. Assertion.AssertEquals ("UTF #1", 51, UTF8enc.GetMaxCharCount(50));
  90. #else
  91. Assertion.AssertEquals ("UTF #1", 50, UTF8enc.GetMaxCharCount(50));
  92. #endif
  93. }
  94. [Test]
  95. #if NET_2_0
  96. [Category ("NotWorking")]
  97. #endif
  98. public void TestMaxByteCount()
  99. {
  100. UTF8Encoding UTF8enc = new UTF8Encoding ();
  101. #if NET_2_0
  102. // maybe under .NET 2.0 insufficient surrogate pair is just not handled, and 3 is Preamble size.
  103. Assertion.AssertEquals ("UTF #1", 153, UTF8enc.GetMaxByteCount(50));
  104. #else
  105. Assertion.AssertEquals ("UTF #1", 200, UTF8enc.GetMaxByteCount(50));
  106. #endif
  107. }
  108. // regression for bug #59648
  109. [Test]
  110. public void TestThrowOnInvalid ()
  111. {
  112. UTF8Encoding u = new UTF8Encoding (true, false);
  113. byte[] data = new byte [] { 0xC0, 0xAF };
  114. string s = u.GetString (data);
  115. AssertEquals (0, s.Length);
  116. data = new byte [] { 0x30, 0x31, 0xC0, 0xAF, 0x30, 0x32 };
  117. s = u.GetString (data);
  118. AssertEquals (4, s.Length);
  119. AssertEquals (0x30, (int) s [0]);
  120. AssertEquals (0x31, (int) s [1]);
  121. AssertEquals (0x30, (int) s [2]);
  122. AssertEquals (0x32, (int) s [3]);
  123. }
  124. // UTF8 decoding tests from http://www.cl.cam.ac.uk/~mgk25/
  125. [Test]
  126. public void T1_Correct_GreekWord_kosme ()
  127. {
  128. byte[] data = { 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5 };
  129. string s = utf8.GetString (data);
  130. // cute but saving source code in unicode can be problematic
  131. // so we just ensure we can re-encode this
  132. AssertEquals ("Reconverted", BitConverter.ToString (data), BitConverter.ToString (utf8.GetBytes (s)));
  133. }
  134. [Test]
  135. public void T2_Boundary_1_FirstPossibleSequence_Pass ()
  136. {
  137. byte[] data211 = { 0x00 };
  138. string s = utf8.GetString (data211);
  139. AssertEquals ("1 byte (U-00000000)", "\0", s);
  140. AssertEquals ("Reconverted-1", BitConverter.ToString (data211), BitConverter.ToString (utf8.GetBytes (s)));
  141. byte[] data212 = { 0xC2, 0x80 };
  142. s = utf8.GetString (data212);
  143. AssertEquals ("2 bytes (U-00000080)", 128, s [0]);
  144. AssertEquals ("Reconverted-2", BitConverter.ToString (data212), BitConverter.ToString (utf8.GetBytes (s)));
  145. byte[] data213 = { 0xE0, 0xA0, 0x80 };
  146. s = utf8.GetString (data213);
  147. AssertEquals ("3 bytes (U-00000800)", 2048, s [0]);
  148. AssertEquals ("Reconverted-3", BitConverter.ToString (data213), BitConverter.ToString (utf8.GetBytes (s)));
  149. byte[] data214 = { 0xF0, 0x90, 0x80, 0x80 };
  150. s = utf8.GetString (data214);
  151. AssertEquals ("4 bytes (U-00010000)-0", 55296, s [0]);
  152. AssertEquals ("4 bytes (U-00010000)-1", 56320, s [1]);
  153. AssertEquals ("Reconverted-4", BitConverter.ToString (data214), BitConverter.ToString (utf8.GetBytes (s)));
  154. }
  155. [Test]
  156. // Fail on MS Fx 1.1
  157. [ExpectedException (typeof (DecoderException))]
  158. public void T2_Boundary_1_FirstPossibleSequence_Fail_5 ()
  159. {
  160. byte[] data215 = { 0xF8, 0x88, 0x80, 0x80, 0x80 };
  161. string s = utf8.GetString (data215);
  162. AssertNull ("5 bytes (U-00200000)", s);
  163. AssertEquals ("Reconverted-5", BitConverter.ToString (data215), BitConverter.ToString (utf8.GetBytes (s)));
  164. }
  165. [Test]
  166. // Fail on MS Fx 1.1
  167. [ExpectedException (typeof (DecoderException))]
  168. public void T2_Boundary_1_FirstPossibleSequence_Fail_6 ()
  169. {
  170. byte[] data216 = { 0xFC, 0x84, 0x80, 0x80, 0x80, 0x80 };
  171. string s = utf8.GetString (data216);
  172. AssertNull ("6 bytes (U-04000000)", s);
  173. AssertEquals ("Reconverted-6", BitConverter.ToString (data216), BitConverter.ToString (utf8.GetBytes (s)));
  174. }
  175. [Test]
  176. public void T2_Boundary_2_LastPossibleSequence_Pass ()
  177. {
  178. byte[] data221 = { 0x7F };
  179. string s = utf8.GetString (data221);
  180. AssertEquals ("1 byte (U-0000007F)", 127, s [0]);
  181. AssertEquals ("Reconverted-1", BitConverter.ToString (data221), BitConverter.ToString (utf8.GetBytes (s)));
  182. byte[] data222 = { 0xDF, 0xBF };
  183. s = utf8.GetString (data222);
  184. AssertEquals ("2 bytes (U-000007FF)", 2047, s [0]);
  185. AssertEquals ("Reconverted-2", BitConverter.ToString (data222), BitConverter.ToString (utf8.GetBytes (s)));
  186. byte[] data223 = { 0xEF, 0xBF, 0xBF };
  187. s = utf8.GetString (data223);
  188. AssertEquals ("3 bytes (U-0000FFFF)", 65535, s [0]);
  189. AssertEquals ("Reconverted-3", BitConverter.ToString (data223), BitConverter.ToString (utf8.GetBytes (s)));
  190. }
  191. [Test]
  192. // Fail on MS Fx 1.1
  193. [ExpectedException (typeof (DecoderException))]
  194. public void T2_Boundary_2_LastPossibleSequence_Fail_4 ()
  195. {
  196. byte[] data224 = { 0x7F, 0xBF, 0xBF, 0xBF };
  197. string s = utf8.GetString (data224);
  198. AssertNull ("4 bytes (U-001FFFFF)", s);
  199. AssertEquals ("Reconverted-4", BitConverter.ToString (data224), BitConverter.ToString (utf8.GetBytes (s)));
  200. }
  201. [Test]
  202. // Fail on MS Fx 1.1
  203. [ExpectedException (typeof (DecoderException))]
  204. public void T2_Boundary_2_LastPossibleSequence_Fail_5 ()
  205. {
  206. byte[] data225 = { 0xFB, 0xBF, 0xBF, 0xBF, 0xBF };
  207. string s = utf8.GetString (data225);
  208. AssertNull ("5 bytes (U-03FFFFFF)", s);
  209. AssertEquals ("Reconverted-5", BitConverter.ToString (data225), BitConverter.ToString (utf8.GetBytes (s)));
  210. }
  211. [Test]
  212. // Fail on MS Fx 1.1
  213. [ExpectedException (typeof (DecoderException))]
  214. public void T2_Boundary_2_LastPossibleSequence_Fail_6 ()
  215. {
  216. byte[] data226 = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF, 0xBF };
  217. string s = utf8.GetString (data226);
  218. AssertNull ("6 bytes (U-7FFFFFFF)", s);
  219. AssertEquals ("Reconverted-6", BitConverter.ToString (data226), BitConverter.ToString (utf8.GetBytes (s)));
  220. }
  221. [Test]
  222. public void T2_Boundary_3_Other_Pass ()
  223. {
  224. byte[] data231 = { 0xED, 0x9F, 0xBF };
  225. string s = utf8.GetString (data231);
  226. AssertEquals ("U-0000D7FF", 55295, s [0]);
  227. AssertEquals ("Reconverted-1", BitConverter.ToString (data231), BitConverter.ToString (utf8.GetBytes (s)));
  228. byte[] data232 = { 0xEE, 0x80, 0x80 };
  229. s = utf8.GetString (data232);
  230. AssertEquals ("U-0000E000", 57344, s [0]);
  231. AssertEquals ("Reconverted-2", BitConverter.ToString (data232), BitConverter.ToString (utf8.GetBytes (s)));
  232. byte[] data233 = { 0xEF, 0xBF, 0xBD };
  233. s = utf8.GetString (data233);
  234. AssertEquals ("U-0000FFFD", 65533, s [0]);
  235. AssertEquals ("Reconverted-3", BitConverter.ToString (data233), BitConverter.ToString (utf8.GetBytes (s)));
  236. byte[] data234 = { 0xF4, 0x8F, 0xBF, 0xBF };
  237. s = utf8.GetString (data234);
  238. AssertEquals ("U-0010FFFF-0", 56319, s [0]);
  239. AssertEquals ("U-0010FFFF-1", 57343, s [1]);
  240. AssertEquals ("Reconverted-4", BitConverter.ToString (data234), BitConverter.ToString (utf8.GetBytes (s)));
  241. }
  242. [Test]
  243. // Fail on MS Fx 1.1
  244. [ExpectedException (typeof (DecoderException))]
  245. public void T2_Boundary_3_Other_Fail_5 ()
  246. {
  247. byte[] data235 = { 0xF4, 0x90, 0x80, 0x80 };
  248. string s = utf8.GetString (data235);
  249. AssertNull ("U-00110000", s);
  250. AssertEquals ("Reconverted-5", BitConverter.ToString (data235), BitConverter.ToString (utf8.GetBytes (s)));
  251. }
  252. [Test]
  253. [ExpectedException (typeof (DecoderException))]
  254. public void T3_Malformed_1_UnexpectedContinuation_311 ()
  255. {
  256. byte[] data = { 0x80 };
  257. string s = utf8.GetString (data);
  258. // exception is "really" expected here
  259. }
  260. [Test]
  261. [ExpectedException (typeof (DecoderException))]
  262. public void T3_Malformed_1_UnexpectedContinuation_312 ()
  263. {
  264. byte[] data = { 0xBF };
  265. string s = utf8.GetString (data);
  266. // exception is "really" expected here
  267. }
  268. [Test]
  269. [ExpectedException (typeof (DecoderException))]
  270. public void T3_Malformed_1_UnexpectedContinuation_313 ()
  271. {
  272. byte[] data = { 0x80, 0xBF };
  273. string s = utf8.GetString (data);
  274. // exception is "really" expected here
  275. }
  276. [Test]
  277. [ExpectedException (typeof (DecoderException))]
  278. public void T3_Malformed_1_UnexpectedContinuation_314 ()
  279. {
  280. byte[] data = { 0x80, 0xBF, 0x80 };
  281. string s = utf8.GetString (data);
  282. // exception is "really" expected here
  283. }
  284. [Test]
  285. [ExpectedException (typeof (DecoderException))]
  286. public void T3_Malformed_1_UnexpectedContinuation_315 ()
  287. {
  288. byte[] data = { 0x80, 0xBF, 0x80, 0xBF };
  289. string s = utf8.GetString (data);
  290. // exception is "really" expected here
  291. }
  292. [Test]
  293. [ExpectedException (typeof (DecoderException))]
  294. public void T3_Malformed_1_UnexpectedContinuation_316 ()
  295. {
  296. byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80 };
  297. string s = utf8.GetString (data);
  298. // exception is "really" expected here
  299. }
  300. [Test]
  301. [ExpectedException (typeof (DecoderException))]
  302. public void T3_Malformed_1_UnexpectedContinuation_317 ()
  303. {
  304. byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF };
  305. string s = utf8.GetString (data);
  306. // exception is "really" expected here
  307. }
  308. [Test]
  309. [ExpectedException (typeof (DecoderException))]
  310. public void T3_Malformed_1_UnexpectedContinuation_318 ()
  311. {
  312. byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 };
  313. string s = utf8.GetString (data);
  314. // exception is "really" expected here
  315. }
  316. [Test]
  317. [ExpectedException (typeof (DecoderException))]
  318. public void T3_Malformed_1_UnexpectedContinuation_319 ()
  319. {
  320. // 64 different continuation characters
  321. byte[] data = {
  322. 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
  323. 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
  324. 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
  325. 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF };
  326. string s = utf8.GetString (data);
  327. // exception is "really" expected here
  328. }
  329. [Test]
  330. [ExpectedException (typeof (DecoderException))]
  331. public void T3_Malformed_2_LonelyStart_321 ()
  332. {
  333. byte[] data = {
  334. 0xC0, 0x20, 0xC1, 0x20, 0xC2, 0x20, 0xC3, 0x20, 0xC4, 0x20, 0xC5, 0x20, 0xC6, 0x20, 0xC7, 0x20,
  335. 0xC8, 0x20, 0xC9, 0x20, 0xCA, 0x20, 0xCB, 0x20, 0xCC, 0x20, 0xCD, 0x20, 0xCE, 0x20, 0xCF, 0x20,
  336. 0xD0, 0x20, 0xD1, 0x20, 0xD2, 0x20, 0xD3, 0x20, 0xD4, 0x20, 0xD5, 0x20, 0xD6, 0x20, 0xD7, 0x20,
  337. 0xD8, 0x20, 0xD9, 0x20, 0xDA, 0x20, 0xDB, 0x20, 0xDC, 0x20, 0xDD, 0x20, 0xDE, 0x20, 0xDF, 0x20 };
  338. string s = utf8.GetString (data);
  339. // exception is "really" expected here
  340. }
  341. [Test]
  342. [ExpectedException (typeof (DecoderException))]
  343. public void T3_Malformed_2_LonelyStart_322 ()
  344. {
  345. byte[] data = {
  346. 0xE0, 0x20, 0xE1, 0x20, 0xE2, 0x20, 0xE3, 0x20, 0xE4, 0x20, 0xE5, 0x20, 0xE6, 0x20, 0xE7, 0x20,
  347. 0xE8, 0x20, 0xE9, 0x20, 0xEA, 0x20, 0xEB, 0x20, 0xEC, 0x20, 0xED, 0x20, 0xEE, 0x20, 0xEF, 0x20 };
  348. string s = utf8.GetString (data);
  349. // exception is "really" expected here
  350. }
  351. [Test]
  352. [ExpectedException (typeof (DecoderException))]
  353. public void T3_Malformed_2_LonelyStart_323 ()
  354. {
  355. byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
  356. string s = utf8.GetString (data);
  357. // exception is "really" expected here
  358. }
  359. [Test]
  360. [ExpectedException (typeof (DecoderException))]
  361. public void T3_Malformed_2_LonelyStart_324 ()
  362. {
  363. byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
  364. string s = utf8.GetString (data);
  365. // exception is "really" expected here
  366. }
  367. [Test]
  368. [ExpectedException (typeof (DecoderException))]
  369. public void T3_Malformed_2_LonelyStart_325 ()
  370. {
  371. byte[] data = { 0xFC, 0x20, 0xFD, 0x20 };
  372. string s = utf8.GetString (data);
  373. // exception is "really" expected here
  374. }
  375. [Test]
  376. [ExpectedException (typeof (DecoderException))]
  377. public void T3_Malformed_3_LastContinuationMissing_331 ()
  378. {
  379. byte[] data = { 0xC0 };
  380. string s = utf8.GetString (data);
  381. // exception is "really" expected here
  382. }
  383. [Test]
  384. [ExpectedException (typeof (DecoderException))]
  385. public void T3_Malformed_3_LastContinuationMissing_332 ()
  386. {
  387. byte[] data = { 0xE0, 0x80 };
  388. string s = utf8.GetString (data);
  389. // exception is "really" expected here
  390. }
  391. [Test]
  392. [ExpectedException (typeof (DecoderException))]
  393. public void T3_Malformed_3_LastContinuationMissing_333 ()
  394. {
  395. byte[] data = { 0xF0, 0x80, 0x80 };
  396. string s = utf8.GetString (data);
  397. // exception is "really" expected here
  398. }
  399. [Test]
  400. [ExpectedException (typeof (DecoderException))]
  401. public void T3_Malformed_3_LastContinuationMissing_334 ()
  402. {
  403. byte[] data = { 0xF8, 0x80, 0x80, 0x80 };
  404. string s = utf8.GetString (data);
  405. // exception is "really" expected here
  406. }
  407. [Test]
  408. [ExpectedException (typeof (DecoderException))]
  409. public void T3_Malformed_3_LastContinuationMissing_335 ()
  410. {
  411. byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80 };
  412. string s = utf8.GetString (data);
  413. // exception is "really" expected here
  414. }
  415. [Test]
  416. // MS Fx 1.1 accept this
  417. // [ExpectedException (typeof (DecoderException))]
  418. public void T3_Malformed_3_LastContinuationMissing_336 ()
  419. {
  420. byte[] data = { 0xDF };
  421. try {
  422. string s = utf8.GetString (data);
  423. // exception is "really" expected here
  424. AssertEquals ("MS FX 1.1 behaviour", String.Empty, s);
  425. }
  426. catch (DecoderException) {
  427. // but Mono doesn't - better stick to the standard
  428. }
  429. }
  430. [Test]
  431. // MS Fx 1.1 accept this
  432. // [ExpectedException (typeof (DecoderException))]
  433. public void T3_Malformed_3_LastContinuationMissing_337 ()
  434. {
  435. byte[] data = { 0xEF, 0xBF };
  436. try {
  437. string s = utf8.GetString (data);
  438. // exception is "really" expected here
  439. AssertEquals ("MS FX 1.1 behaviour", String.Empty, s);
  440. }
  441. catch (DecoderException) {
  442. // but Mono doesn't - better stick to the standard
  443. }
  444. }
  445. [Test]
  446. [ExpectedException (typeof (DecoderException))]
  447. public void T3_Malformed_3_LastContinuationMissing_338 ()
  448. {
  449. byte[] data = { 0xF7, 0xBF, 0xBF };
  450. string s = utf8.GetString (data);
  451. // exception is "really" expected here
  452. }
  453. [Test]
  454. [ExpectedException (typeof (DecoderException))]
  455. public void T3_Malformed_3_LastContinuationMissing_339 ()
  456. {
  457. byte[] data = { 0xF, 0xBF, 0xBF, 0xBF };
  458. string s = utf8.GetString (data);
  459. // exception is "really" expected here
  460. }
  461. [Test]
  462. [ExpectedException (typeof (DecoderException))]
  463. public void T3_Malformed_3_LastContinuationMissing_3310 ()
  464. {
  465. byte[] data = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
  466. string s = utf8.GetString (data);
  467. // exception is "really" expected here
  468. }
  469. [Test]
  470. [ExpectedException (typeof (DecoderException))]
  471. public void T3_Malformed_4_ConcatenationImcomplete ()
  472. {
  473. byte[] data = {
  474. 0xC0, 0xE0, 0x80, 0xF0, 0x80, 0x80, 0xF8, 0x80, 0x80, 0x80, 0xFC, 0x80, 0x80, 0x80, 0x80, 0xDF,
  475. 0xEF, 0xBF, 0xF7, 0xBF, 0xBF, 0xFB, 0xBF, 0xBF, 0xBF, 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
  476. string s = utf8.GetString (data);
  477. // exception is "really" expected here
  478. }
  479. [Test]
  480. [ExpectedException (typeof (DecoderException))]
  481. public void T3_Malformed_5_ImpossibleBytes_351 ()
  482. {
  483. byte[] data = { 0xFE };
  484. string s = utf8.GetString (data);
  485. // exception is "really" expected here
  486. }
  487. [Test]
  488. [ExpectedException (typeof (DecoderException))]
  489. public void T3_Malformed_5_ImpossibleBytes_352 ()
  490. {
  491. byte[] data = { 0xFF };
  492. string s = utf8.GetString (data);
  493. // exception is "really" expected here
  494. }
  495. [Test]
  496. [ExpectedException (typeof (DecoderException))]
  497. public void T3_Malformed_5_ImpossibleBytes_353 ()
  498. {
  499. byte[] data = { 0xFE, 0xFE, 0xFF, 0xFF };
  500. string s = utf8.GetString (data);
  501. // exception is "really" expected here
  502. }
  503. // Overlong == dangereous -> "safe" decoder should reject them
  504. [Test]
  505. [ExpectedException (typeof (DecoderException))]
  506. public void T4_Overlong_1_ASCII_Slash_411 ()
  507. {
  508. byte[] data = { 0xC0, 0xAF };
  509. string s = utf8.GetString (data);
  510. // exception is "really" expected here
  511. }
  512. [Test]
  513. [ExpectedException (typeof (DecoderException))]
  514. public void T4_Overlong_1_ASCII_Slash_412 ()
  515. {
  516. byte[] data = { 0xE0, 0x80, 0xAF };
  517. string s = utf8.GetString (data);
  518. // exception is "really" expected here
  519. }
  520. [Test]
  521. [ExpectedException (typeof (DecoderException))]
  522. public void T4_Overlong_1_ASCII_Slash_413 ()
  523. {
  524. byte[] data = { 0xF0, 0x80, 0x80, 0xAF };
  525. string s = utf8.GetString (data);
  526. // exception is "really" expected here
  527. }
  528. [Test]
  529. [ExpectedException (typeof (DecoderException))]
  530. public void T4_Overlong_1_ASCII_Slash_414 ()
  531. {
  532. byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0xAF };
  533. string s = utf8.GetString (data);
  534. // exception is "really" expected here
  535. }
  536. [Test]
  537. [ExpectedException (typeof (DecoderException))]
  538. public void T4_Overlong_1_ASCII_Slash_415 ()
  539. {
  540. byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0xAF };
  541. string s = utf8.GetString (data);
  542. // exception is "really" expected here
  543. }
  544. [Test]
  545. [ExpectedException (typeof (DecoderException))]
  546. public void T4_Overlong_2_MaximumBoundary_421 ()
  547. {
  548. byte[] data = { 0xC1, 0xBF };
  549. string s = utf8.GetString (data);
  550. // exception is "really" expected here
  551. }
  552. [Test]
  553. [ExpectedException (typeof (DecoderException))]
  554. public void T4_Overlong_2_MaximumBoundary_422 ()
  555. {
  556. byte[] data = { 0xE0, 0x9F, 0xBF };
  557. string s = utf8.GetString (data);
  558. // exception is "really" expected here
  559. }
  560. [Test]
  561. [ExpectedException (typeof (DecoderException))]
  562. public void T4_Overlong_2_MaximumBoundary_423 ()
  563. {
  564. byte[] data = { 0xF0, 0x8F, 0xBF, 0xBF };
  565. string s = utf8.GetString (data);
  566. // exception is "really" expected here
  567. }
  568. [Test]
  569. [ExpectedException (typeof (DecoderException))]
  570. public void T4_Overlong_2_MaximumBoundary_424 ()
  571. {
  572. byte[] data = { 0xF8, 0x87, 0xBF, 0xBF, 0xBF };
  573. string s = utf8.GetString (data);
  574. // exception is "really" expected here
  575. }
  576. [Test]
  577. [ExpectedException (typeof (DecoderException))]
  578. public void T4_Overlong_2_MaximumBoundary_425 ()
  579. {
  580. byte[] data = { 0xFC, 0x83, 0xBF, 0xBF, 0xBF, 0xBF };
  581. string s = utf8.GetString (data);
  582. // exception is "really" expected here
  583. }
  584. [Test]
  585. [ExpectedException (typeof (DecoderException))]
  586. public void T4_Overlong_3_NUL_431 ()
  587. {
  588. byte[] data = { 0xC0, 0x80 };
  589. string s = utf8.GetString (data);
  590. // exception is "really" expected here
  591. }
  592. [Test]
  593. [ExpectedException (typeof (DecoderException))]
  594. public void T4_Overlong_3_NUL_432 ()
  595. {
  596. byte[] data = { 0xE0, 0x80, 0x80 };
  597. string s = utf8.GetString (data);
  598. // exception is "really" expected here
  599. }
  600. [Test]
  601. [ExpectedException (typeof (DecoderException))]
  602. public void T4_Overlong_3_NUL_433 ()
  603. {
  604. byte[] data = { 0xF0, 0x80, 0x80, 0x80 };
  605. string s = utf8.GetString (data);
  606. // exception is "really" expected here
  607. }
  608. [Test]
  609. [ExpectedException (typeof (DecoderException))]
  610. public void T4_Overlong_3_NUL_434 ()
  611. {
  612. byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0x80 };
  613. string s = utf8.GetString (data);
  614. // exception is "really" expected here
  615. }
  616. [Test]
  617. [ExpectedException (typeof (DecoderException))]
  618. public void T4_Overlong_3_NUL_435 ()
  619. {
  620. byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0x80 };
  621. string s = utf8.GetString (data);
  622. // exception is "really" expected here
  623. }
  624. [Test]
  625. #if NET_2_0
  626. [ExpectedException (typeof (DecoderFallbackException))]
  627. #else
  628. // MS Fx 1.1 accept this
  629. [Category ("NotDotNet")]
  630. [ExpectedException (typeof (DecoderException))]
  631. #endif
  632. public void T5_IllegalCodePosition_1_UTF16Surrogates_511 ()
  633. {
  634. byte[] data = { 0xED, 0xA0, 0x80 };
  635. string s = utf8.GetString (data);
  636. // exception is "really" expected here
  637. AssertEquals ("MS FX 1.1 behaviour", 55296, s [0]);
  638. }
  639. [Test]
  640. #if NET_2_0
  641. [ExpectedException (typeof (DecoderFallbackException))]
  642. #else
  643. // MS Fx 1.1 accept this
  644. [Category ("NotDotNet")]
  645. [ExpectedException (typeof (DecoderException))]
  646. #endif
  647. public void T5_IllegalCodePosition_1_UTF16Surrogates_512 ()
  648. {
  649. byte[] data = { 0xED, 0xAD, 0xBF };
  650. string s = utf8.GetString (data);
  651. // exception is "really" expected here
  652. AssertEquals ("MS FX 1.1 behaviour", 56191, s [0]);
  653. }
  654. [Test]
  655. #if NET_2_0
  656. [ExpectedException (typeof (DecoderFallbackException))]
  657. #else
  658. // MS Fx 1.1 accept this
  659. [Category ("NotDotNet")]
  660. [ExpectedException (typeof (DecoderException))]
  661. #endif
  662. public void T5_IllegalCodePosition_1_UTF16Surrogates_513 ()
  663. {
  664. byte[] data = { 0xED, 0xAE, 0x80 };
  665. string s = utf8.GetString (data);
  666. // exception is "really" expected here
  667. AssertEquals ("MS FX 1.1 behaviour", 56192, s [0]);
  668. }
  669. [Test]
  670. #if NET_2_0
  671. [ExpectedException (typeof (DecoderFallbackException))]
  672. #else
  673. // MS Fx 1.1 accept this
  674. [Category ("NotDotNet")]
  675. [ExpectedException (typeof (DecoderException))]
  676. #endif
  677. public void T5_IllegalCodePosition_1_UTF16Surrogates_514 ()
  678. {
  679. byte[] data = { 0xED, 0xAF, 0xBF };
  680. string s = utf8.GetString (data);
  681. // exception is "really" expected here
  682. AssertEquals ("MS FX 1.1 behaviour", 56319, s [0]);
  683. }
  684. [Test]
  685. #if NET_2_0
  686. [ExpectedException (typeof (DecoderFallbackException))]
  687. #else
  688. // MS Fx 1.1 accept this
  689. [Category ("NotDotNet")]
  690. [ExpectedException (typeof (DecoderException))]
  691. #endif
  692. public void T5_IllegalCodePosition_1_UTF16Surrogates_515 ()
  693. {
  694. byte[] data = { 0xED, 0xB0, 0x80 };
  695. string s = utf8.GetString (data);
  696. // exception is "really" expected here
  697. AssertEquals ("MS FX 1.1 behaviour", 56320, s [0]);
  698. }
  699. [Test]
  700. #if NET_2_0
  701. [ExpectedException (typeof (DecoderFallbackException))]
  702. #else
  703. // MS Fx 1.1 accept this
  704. [Category ("NotDotNet")]
  705. [ExpectedException (typeof (DecoderException))]
  706. #endif
  707. public void T5_IllegalCodePosition_1_UTF16Surrogates_516 ()
  708. {
  709. byte[] data = { 0xED, 0xBE, 0x80 };
  710. string s = utf8.GetString (data);
  711. // exception is "really" expected here
  712. AssertEquals ("MS FX 1.1 behaviour", 57216, s [0]);
  713. }
  714. [Test]
  715. #if NET_2_0
  716. [ExpectedException (typeof (DecoderFallbackException))]
  717. #else
  718. // MS Fx 1.1 accept this
  719. [Category ("NotDotNet")]
  720. [ExpectedException (typeof (DecoderException))]
  721. #endif
  722. public void T5_IllegalCodePosition_1_UTF16Surrogates_517 ()
  723. {
  724. byte[] data = { 0xED, 0xBF, 0xBF };
  725. string s = utf8.GetString (data);
  726. // exception is "really" expected here
  727. AssertEquals ("MS FX 1.1 behaviour", 57343, s [0]);
  728. }
  729. [Test]
  730. #if NET_2_0
  731. [ExpectedException (typeof (DecoderFallbackException))]
  732. #else
  733. // MS Fx 1.1 accept this
  734. [Category ("NotDotNet")]
  735. [ExpectedException (typeof (DecoderException))]
  736. #endif
  737. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_521 ()
  738. {
  739. byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80 };
  740. string s = utf8.GetString (data);
  741. // exception is "really" expected here
  742. AssertEquals ("MS FX 1.1 behaviour", 55296, s [0]);
  743. AssertEquals ("MS FX 1.1 behaviour", 56320, s [1]);
  744. }
  745. [Test]
  746. #if NET_2_0
  747. [ExpectedException (typeof (DecoderFallbackException))]
  748. #else
  749. // MS Fx 1.1 accept this
  750. [Category ("NotDotNet")]
  751. [ExpectedException (typeof (DecoderException))]
  752. #endif
  753. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_522 ()
  754. {
  755. byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xBF, 0xBF };
  756. string s = utf8.GetString (data);
  757. // exception is "really" expected here
  758. AssertEquals ("MS FX 1.1 behaviour", 55296, s [0]);
  759. AssertEquals ("MS FX 1.1 behaviour", 57343, s [1]);
  760. }
  761. [Test]
  762. #if NET_2_0
  763. [ExpectedException (typeof (DecoderFallbackException))]
  764. #else
  765. // MS Fx 1.1 accept this
  766. [Category ("NotDotNet")]
  767. [ExpectedException (typeof (DecoderException))]
  768. #endif
  769. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_523 ()
  770. {
  771. byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xB0, 0x80 };
  772. string s = utf8.GetString (data);
  773. // exception is "really" expected here
  774. AssertEquals ("MS FX 1.1 behaviour", 56191, s [0]);
  775. AssertEquals ("MS FX 1.1 behaviour", 56320, s [1]);
  776. }
  777. [Test]
  778. #if NET_2_0
  779. [ExpectedException (typeof (DecoderFallbackException))]
  780. #else
  781. // MS Fx 1.1 accept this
  782. [Category ("NotDotNet")]
  783. [ExpectedException (typeof (DecoderException))]
  784. #endif
  785. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_524 ()
  786. {
  787. byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xBF, 0xBF };
  788. string s = utf8.GetString (data);
  789. // exception is "really" expected here
  790. AssertEquals ("MS FX 1.1 behaviour", 56191, s [0]);
  791. AssertEquals ("MS FX 1.1 behaviour", 57343, s [1]);
  792. }
  793. [Test]
  794. #if NET_2_0
  795. [ExpectedException (typeof (DecoderFallbackException))]
  796. #else
  797. // MS Fx 1.1 accept this
  798. [Category ("NotDotNet")]
  799. [ExpectedException (typeof (DecoderException))]
  800. #endif
  801. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_525 ()
  802. {
  803. byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xB0, 0x80 };
  804. string s = utf8.GetString (data);
  805. // exception is "really" expected here
  806. AssertEquals ("MS FX 1.1 behaviour", 56192, s [0]);
  807. AssertEquals ("MS FX 1.1 behaviour", 56320, s [1]);
  808. }
  809. [Test]
  810. #if NET_2_0
  811. [ExpectedException (typeof (DecoderFallbackException))]
  812. #else
  813. // MS Fx 1.1 accept this
  814. [Category ("NotDotNet")]
  815. [ExpectedException (typeof (DecoderException))]
  816. #endif
  817. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_526 ()
  818. {
  819. byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xBF, 0x8F };
  820. string s = utf8.GetString (data);
  821. // exception is "really" expected here
  822. AssertEquals ("MS FX 1.1 behaviour", 56192, s [0]);
  823. AssertEquals ("MS FX 1.1 behaviour", 57295, s [1]);
  824. }
  825. [Test]
  826. #if NET_2_0
  827. [ExpectedException (typeof (DecoderFallbackException))]
  828. #else
  829. // MS Fx 1.1 accept this
  830. [Category ("NotDotNet")]
  831. [ExpectedException (typeof (DecoderException))]
  832. #endif
  833. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_527 ()
  834. {
  835. byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xB0, 0x80 };
  836. string s = utf8.GetString (data);
  837. // exception is "really" expected here
  838. AssertEquals ("MS FX 1.1 behaviour", 56319, s [0]);
  839. AssertEquals ("MS FX 1.1 behaviour", 56320, s [1]);
  840. }
  841. [Test]
  842. #if NET_2_0
  843. [ExpectedException (typeof (DecoderFallbackException))]
  844. #else
  845. // MS Fx 1.1 accept this
  846. [Category ("NotDotNet")]
  847. [ExpectedException (typeof (DecoderException))]
  848. #endif
  849. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_528 ()
  850. {
  851. byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xBF, 0xBF };
  852. string s = utf8.GetString (data);
  853. // exception is "really" expected here
  854. AssertEquals ("MS FX 1.1 behaviour", 56319, s [0]);
  855. AssertEquals ("MS FX 1.1 behaviour", 57343, s [1]);
  856. }
  857. [Test]
  858. // MS Fx 1.1 accept this
  859. // [ExpectedException (typeof (DecoderException))]
  860. public void T5_IllegalCodePosition_3_Other_531 ()
  861. {
  862. byte[] data = { 0xEF, 0xBF, 0xBE };
  863. string s = utf8.GetString (data);
  864. // exception is "really" expected here
  865. AssertEquals ("MS FX 1.1 behaviour", 65534, s [0]);
  866. }
  867. [Test]
  868. // MS Fx 1.1 accept this
  869. // [ExpectedException (typeof (DecoderException))]
  870. public void T5_IllegalCodePosition_3_Other_532 ()
  871. {
  872. byte[] data = { 0xEF, 0xBF, 0xBF };
  873. string s = utf8.GetString (data);
  874. // exception is "really" expected here
  875. AssertEquals ("MS FX 1.1 behaviour", 65535, s [0]);
  876. }
  877. [Test]
  878. // bug #75065 and #73086.
  879. public void GetCharsFEFF ()
  880. {
  881. byte [] data = new byte [] {0xEF, 0xBB, 0xBF};
  882. Encoding enc = new UTF8Encoding (false, true);
  883. string s = enc.GetString (data);
  884. AssertEquals ("\uFEFF", s);
  885. Encoding utf = Encoding.UTF8;
  886. char[] testChars = {'\uFEFF','A'};
  887. byte[] bytes = utf.GetBytes(testChars);
  888. char[] chars = utf.GetChars(bytes);
  889. AssertEquals ("#1", '\uFEFF', chars [0]);
  890. AssertEquals ("#2", 'A', chars [1]);
  891. }
  892. #if NET_2_0
  893. [Test]
  894. public void CloneNotReadOnly ()
  895. {
  896. Encoding e = Encoding.GetEncoding (65001).Clone ()
  897. as Encoding;
  898. AssertEquals (false, e.IsReadOnly);
  899. e.EncoderFallback = new EncoderExceptionFallback ();
  900. }
  901. #endif
  902. [Test]
  903. #if NET_2_0
  904. [ExpectedException (typeof (DecoderFallbackException))]
  905. #else
  906. [ExpectedException (typeof (ArgumentException))]
  907. [Category ("NotDotNet")] // MS Bug
  908. #endif
  909. public void Bug77315 ()
  910. {
  911. new UTF8Encoding (false, true).GetString (
  912. new byte [] {0xED, 0xA2, 0x8C});
  913. }
  914. [Test]
  915. public void SufficientByteArray ()
  916. {
  917. Encoder e = Encoding.UTF8.GetEncoder ();
  918. byte [] bytes = new byte [0];
  919. char [] chars = new char [] {'\uD800'};
  920. e.GetBytes (chars, 0, 1, bytes, 0, false);
  921. try {
  922. int ret = e.GetBytes (chars, 1, 0, bytes, 0, true);
  923. #if NET_2_0
  924. AssertEquals ("drop insufficient char in 2.0: char[]", 0, ret);
  925. #else
  926. Fail ("ArgumentException is expected: char[]");
  927. #endif
  928. } catch (ArgumentException) {
  929. }
  930. string s = "\uD800";
  931. try {
  932. int ret = Encoding.UTF8.GetBytes (s, 0, 1, bytes, 0);
  933. #if NET_2_0
  934. AssertEquals ("drop insufficient char in 2.0: string", 0, ret);
  935. #else
  936. Fail ("ArgumentException is expected: string");
  937. #endif
  938. } catch (ArgumentException) {
  939. }
  940. }
  941. #if NET_2_0
  942. [Test] // bug #77550
  943. public void DecoderFallbackSimple ()
  944. {
  945. UTF8Encoding e = new UTF8Encoding (false, false);
  946. AssertType.AreEqual (0, e.GetDecoder ().GetCharCount (
  947. new byte [] {(byte) 183}, 0, 1),
  948. "#1");
  949. AssertType.AreEqual (0, e.GetDecoder().GetChars (
  950. new byte [] {(byte) 183}, 0, 1,
  951. new char [100], 0),
  952. "#2");
  953. AssertType.AreEqual (0, e.GetString (new byte [] {(byte) 183}).Length,
  954. "#3");
  955. }
  956. #endif
  957. }
  958. }