UTF8EncodingTest.cs 38 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211
  1. //
  2. // UTF8EncodingTest.cs - NUnit Test Cases for System.Text.UTF8Encoding
  3. //
  4. // Authors:
  5. // Patrick Kalkman [email protected]
  6. // Sebastien Pouliot ([email protected])
  7. //
  8. // (C) 2003 Patrick Kalkman
  9. // (C) 2004 Novell (http://www.novell.com)
  10. //
  11. using NUnit.Framework;
  12. using System;
  13. using System.Reflection;
  14. using System.IO;
  15. using System.Text;
  16. using DecoderException = System.Text.DecoderFallbackException;
  17. using AssertType = NUnit.Framework.Assert;
  18. namespace MonoTests.System.Text
  19. {
  20. [TestFixture]
  21. public class UTF8EncodingTest
  22. {
  23. private UTF8Encoding utf8;
  24. [SetUp]
  25. public void Create ()
  26. {
  27. utf8 = new UTF8Encoding (true, true);
  28. }
  29. [Test]
  30. public void IsBrowserDisplay ()
  31. {
  32. Assert.IsTrue (utf8.IsBrowserDisplay);
  33. }
  34. [Test]
  35. public void IsBrowserSave ()
  36. {
  37. Assert.IsTrue (utf8.IsBrowserSave);
  38. }
  39. [Test]
  40. public void IsMailNewsDisplay ()
  41. {
  42. Assert.IsTrue (utf8.IsMailNewsDisplay);
  43. }
  44. [Test]
  45. public void IsMailNewsSave ()
  46. {
  47. Assert.IsTrue (utf8.IsMailNewsSave);
  48. }
  49. [Test]
  50. public void TestCompat ()
  51. {
  52. Assert.IsTrue (new UTF8Encoding ().Equals (new UTF8Encoding ()));
  53. }
  54. [Test]
  55. public void TestEncodingGetBytes1()
  56. {
  57. UTF8Encoding utf8Enc = new UTF8Encoding ();
  58. string UniCode = "\u0041\u2262\u0391\u002E";
  59. // "A<NOT IDENTICAL TO><ALPHA>." may be encoded as 41 E2 89 A2 CE 91 2E
  60. // see (RFC 2044)
  61. byte[] utf8Bytes = utf8Enc.GetBytes (UniCode);
  62. Assert.AreEqual (0x41, utf8Bytes [0], "UTF #1");
  63. Assert.AreEqual (0xE2, utf8Bytes [1], "UTF #2");
  64. Assert.AreEqual (0x89, utf8Bytes [2], "UTF #3");
  65. Assert.AreEqual (0xA2, utf8Bytes [3], "UTF #4");
  66. Assert.AreEqual (0xCE, utf8Bytes [4], "UTF #5");
  67. Assert.AreEqual (0x91, utf8Bytes [5], "UTF #6");
  68. Assert.AreEqual (0x2E, utf8Bytes [6], "UTF #7");
  69. }
  70. [Test]
  71. public void TestEncodingGetBytes2()
  72. {
  73. UTF8Encoding utf8Enc = new UTF8Encoding ();
  74. string UniCode = "\u0048\u0069\u0020\u004D\u006F\u006D\u0020\u263A\u0021";
  75. // "Hi Mom <WHITE SMILING FACE>!" may be encoded as 48 69 20 4D 6F 6D 20 E2 98 BA 21
  76. // see (RFC 2044)
  77. byte[] utf8Bytes = new byte [11];
  78. int ByteCnt = utf8Enc.GetBytes (UniCode.ToCharArray(), 0, UniCode.Length, utf8Bytes, 0);
  79. Assert.AreEqual (11, ByteCnt, "UTF #1");
  80. Assert.AreEqual (0x48, utf8Bytes [0], "UTF #2");
  81. Assert.AreEqual (0x69, utf8Bytes [1], "UTF #3");
  82. Assert.AreEqual (0x20, utf8Bytes [2], "UTF #4");
  83. Assert.AreEqual (0x4D, utf8Bytes [3], "UTF #5");
  84. Assert.AreEqual (0x6F, utf8Bytes [4], "UTF #6");
  85. Assert.AreEqual (0x6D, utf8Bytes [5], "UTF #7");
  86. Assert.AreEqual (0x20, utf8Bytes [6], "UTF #8");
  87. Assert.AreEqual (0xE2, utf8Bytes [7], "UTF #9");
  88. Assert.AreEqual (0x98, utf8Bytes [8], "UTF #10");
  89. Assert.AreEqual (0xBA, utf8Bytes [9], "UTF #11");
  90. Assert.AreEqual (0x21, utf8Bytes [10], "UTF #12");
  91. }
  92. [Test]
  93. public void TestDecodingGetChars1()
  94. {
  95. UTF8Encoding utf8Enc = new UTF8Encoding ();
  96. // 41 E2 89 A2 CE 91 2E may be decoded as "A<NOT IDENTICAL TO><ALPHA>."
  97. // see (RFC 2044)
  98. byte[] utf8Bytes = new byte [] {0x41, 0xE2, 0x89, 0xA2, 0xCE, 0x91, 0x2E};
  99. char[] UniCodeChars = utf8Enc.GetChars(utf8Bytes);
  100. Assert.AreEqual (0x0041, UniCodeChars [0], "UTF #1");
  101. Assert.AreEqual (0x2262, UniCodeChars [1], "UTF #2");
  102. Assert.AreEqual (0x0391, UniCodeChars [2], "UTF #3");
  103. Assert.AreEqual (0x002E, UniCodeChars [3], "UTF #4");
  104. }
  105. [Test]
  106. public void TestMaxCharCount()
  107. {
  108. UTF8Encoding UTF8enc = new UTF8Encoding ();
  109. Encoding UTF8encWithBOM = new UTF8Encoding(true);
  110. Assert.AreEqual (51, UTF8enc.GetMaxCharCount(50), "UTF #1");
  111. Assert.AreEqual (UTF8enc.GetMaxByteCount(50), UTF8encWithBOM.GetMaxByteCount(50), "UTF #2");
  112. }
  113. [Test]
  114. public void TestMaxCharCountWithCustomFallback()
  115. {
  116. Encoding encoding = Encoding.GetEncoding("utf-8", new EncoderReplacementFallback("\u2047\u2047"), new DecoderReplacementFallback("\u2047\u2047"));
  117. Assert.AreEqual (102, encoding.GetMaxCharCount(50), "UTF #1");
  118. }
  119. [Test]
  120. public void TestMaxByteCount()
  121. {
  122. UTF8Encoding UTF8enc = new UTF8Encoding ();
  123. Encoding UTF8encWithBOM = new UTF8Encoding(true);
  124. Assert.AreEqual (153, UTF8enc.GetMaxByteCount(50), "UTF #1");
  125. Assert.AreEqual (UTF8enc.GetMaxByteCount(50), UTF8encWithBOM.GetMaxByteCount(50), "UTF #2");
  126. }
  127. [Test]
  128. public void TestMaxByteCountWithCustomFallback()
  129. {
  130. Encoding encoding = Encoding.GetEncoding("utf-8", new EncoderReplacementFallback("\u2047\u2047"), new DecoderReplacementFallback("?"));
  131. Assert.AreEqual (306, encoding.GetMaxByteCount(50), "UTF #1");
  132. }
  133. // regression for bug #59648
  134. [Test]
  135. public void TestThrowOnInvalid ()
  136. {
  137. UTF8Encoding u = new UTF8Encoding (true, false);
  138. byte[] data = new byte [] { 0xC0, 0xAF };
  139. Assert.AreEqual (2, u.GetCharCount (data), "#A0");
  140. string s = u.GetString (data);
  141. Assert.AreEqual ("\uFFFD\uFFFD", s, "#A1");
  142. data = new byte [] { 0x30, 0x31, 0xC0, 0xAF, 0x30, 0x32 };
  143. s = u.GetString (data);
  144. Assert.AreEqual (6, s.Length, "#B1");
  145. Assert.AreEqual (0x30, (int) s [0], "#B2");
  146. Assert.AreEqual (0x31, (int) s [1], "#B3");
  147. Assert.AreEqual (0xFFFD, (int) s [2], "#B4");
  148. Assert.AreEqual (0xFFFD, (int) s [3], "#B5");
  149. Assert.AreEqual (0x30, (int) s [4], "#B6");
  150. Assert.AreEqual (0x32, (int) s [5], "#B7");
  151. }
  152. //
  153. // UTF8 decoding tests are based on the test file from http://www.cl.cam.ac.uk/~mgk25/
  154. // The test file is: https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
  155. // which is licensed under CC-by-4.0: https://creativecommons.org/licenses/by/4.0/
  156. //
  157. // The file is not copied verbatim, instead individual
  158. // tests are based on individual portions of that file
  159. //
  160. [Test]
  161. public void T1_Correct_GreekWord_kosme ()
  162. {
  163. byte[] data = { 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5 };
  164. string s = utf8.GetString (data);
  165. // cute but saving source code in unicode can be problematic
  166. // so we just ensure we can re-encode this
  167. Assert.AreEqual (BitConverter.ToString (data), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted");
  168. }
  169. [Test]
  170. public void T2_Boundary_1_FirstPossibleSequence_Pass ()
  171. {
  172. byte[] data211 = { 0x00 };
  173. string s = utf8.GetString (data211);
  174. Assert.AreEqual ("\0", s, "1 byte (U-00000000)");
  175. Assert.AreEqual (BitConverter.ToString (data211), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
  176. byte[] data212 = { 0xC2, 0x80 };
  177. s = utf8.GetString (data212);
  178. Assert.AreEqual (128, s [0], "2 bytes (U-00000080)");
  179. Assert.AreEqual (BitConverter.ToString (data212), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
  180. byte[] data213 = { 0xE0, 0xA0, 0x80 };
  181. s = utf8.GetString (data213);
  182. Assert.AreEqual (2048, s [0], "3 bytes (U-00000800)");
  183. Assert.AreEqual (BitConverter.ToString (data213), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
  184. byte[] data214 = { 0xF0, 0x90, 0x80, 0x80 };
  185. s = utf8.GetString (data214);
  186. Assert.AreEqual (55296, s [0], "4 bytes (U-00010000)-0");
  187. Assert.AreEqual (56320, s [1], "4 bytes (U-00010000)-1");
  188. Assert.AreEqual (BitConverter.ToString (data214), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
  189. }
  190. [Test]
  191. // Fail on MS Fx 1.1
  192. [ExpectedException (typeof (DecoderException))]
  193. public void T2_Boundary_1_FirstPossibleSequence_Fail_5 ()
  194. {
  195. byte[] data215 = { 0xF8, 0x88, 0x80, 0x80, 0x80 };
  196. string s = utf8.GetString (data215);
  197. Assert.IsNull (s, "5 bytes (U-00200000)");
  198. Assert.AreEqual (BitConverter.ToString (data215), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
  199. }
  200. [Test]
  201. // Fail on MS Fx 1.1
  202. [ExpectedException (typeof (DecoderException))]
  203. public void T2_Boundary_1_FirstPossibleSequence_Fail_6 ()
  204. {
  205. byte[] data216 = { 0xFC, 0x84, 0x80, 0x80, 0x80, 0x80 };
  206. string s = utf8.GetString (data216);
  207. Assert.IsNull (s, "6 bytes (U-04000000)");
  208. Assert.AreEqual (BitConverter.ToString (data216), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-6");
  209. }
  210. [Test]
  211. public void T2_Boundary_2_LastPossibleSequence_Pass ()
  212. {
  213. byte[] data221 = { 0x7F };
  214. string s = utf8.GetString (data221);
  215. Assert.AreEqual (127, s [0], "1 byte (U-0000007F)");
  216. Assert.AreEqual (BitConverter.ToString (data221), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
  217. byte[] data222 = { 0xDF, 0xBF };
  218. s = utf8.GetString (data222);
  219. Assert.AreEqual (2047, s [0], "2 bytes (U-000007FF)");
  220. Assert.AreEqual (BitConverter.ToString (data222), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
  221. byte[] data223 = { 0xEF, 0xBF, 0xBF };
  222. s = utf8.GetString (data223);
  223. Assert.AreEqual (65535, s [0], "3 bytes (U-0000FFFF)");
  224. Assert.AreEqual (BitConverter.ToString (data223), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
  225. }
  226. [Test]
  227. // Fail on MS Fx 1.1
  228. [ExpectedException (typeof (DecoderException))]
  229. public void T2_Boundary_2_LastPossibleSequence_Fail_4 ()
  230. {
  231. byte[] data224 = { 0x7F, 0xBF, 0xBF, 0xBF };
  232. string s = utf8.GetString (data224);
  233. Assert.IsNull (s, "4 bytes (U-001FFFFF)");
  234. Assert.AreEqual (BitConverter.ToString (data224), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
  235. }
  236. [Test]
  237. // Fail on MS Fx 1.1
  238. [ExpectedException (typeof (DecoderException))]
  239. public void T2_Boundary_2_LastPossibleSequence_Fail_5 ()
  240. {
  241. byte[] data225 = { 0xFB, 0xBF, 0xBF, 0xBF, 0xBF };
  242. string s = utf8.GetString (data225);
  243. Assert.IsNull (s, "5 bytes (U-03FFFFFF)");
  244. Assert.AreEqual (BitConverter.ToString (data225), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
  245. }
  246. [Test]
  247. // Fail on MS Fx 1.1
  248. [ExpectedException (typeof (DecoderException))]
  249. public void T2_Boundary_2_LastPossibleSequence_Fail_6 ()
  250. {
  251. byte[] data226 = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF, 0xBF };
  252. string s = utf8.GetString (data226);
  253. Assert.IsNull (s, "6 bytes (U-7FFFFFFF)");
  254. Assert.AreEqual (BitConverter.ToString (data226), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-6");
  255. }
  256. [Test]
  257. public void T2_Boundary_3_Other_Pass ()
  258. {
  259. byte[] data231 = { 0xED, 0x9F, 0xBF };
  260. string s = utf8.GetString (data231);
  261. Assert.AreEqual (55295, s [0], "U-0000D7FF");
  262. Assert.AreEqual (BitConverter.ToString (data231), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-1");
  263. byte[] data232 = { 0xEE, 0x80, 0x80 };
  264. s = utf8.GetString (data232);
  265. Assert.AreEqual (57344, s [0], "U-0000E000");
  266. Assert.AreEqual (BitConverter.ToString (data232), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-2");
  267. byte[] data233 = { 0xEF, 0xBF, 0xBD };
  268. s = utf8.GetString (data233);
  269. Assert.AreEqual (65533, s [0], "U-0000FFFD");
  270. Assert.AreEqual (BitConverter.ToString (data233), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-3");
  271. byte[] data234 = { 0xF4, 0x8F, 0xBF, 0xBF };
  272. s = utf8.GetString (data234);
  273. Assert.AreEqual (56319, s [0], "U-0010FFFF-0");
  274. Assert.AreEqual (57343, s [1], "U-0010FFFF-1");
  275. Assert.AreEqual (BitConverter.ToString (data234), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-4");
  276. }
  277. [Test]
  278. // Fail on MS Fx 1.1
  279. [ExpectedException (typeof (DecoderException))]
  280. public void T2_Boundary_3_Other_Fail_5 ()
  281. {
  282. byte[] data235 = { 0xF4, 0x90, 0x80, 0x80 };
  283. string s = utf8.GetString (data235);
  284. Assert.IsNull (s, "U-00110000");
  285. Assert.AreEqual (BitConverter.ToString (data235), BitConverter.ToString (utf8.GetBytes (s)), "Reconverted-5");
  286. }
  287. [Test]
  288. [ExpectedException (typeof (DecoderException))]
  289. public void T3_Malformed_1_UnexpectedContinuation_311 ()
  290. {
  291. byte[] data = { 0x80 };
  292. string s = utf8.GetString (data);
  293. // exception is "really" expected here
  294. }
  295. [Test]
  296. [ExpectedException (typeof (DecoderException))]
  297. public void T3_Malformed_1_UnexpectedContinuation_312 ()
  298. {
  299. byte[] data = { 0xBF };
  300. string s = utf8.GetString (data);
  301. // exception is "really" expected here
  302. }
  303. [Test]
  304. [ExpectedException (typeof (DecoderException))]
  305. public void T3_Malformed_1_UnexpectedContinuation_313 ()
  306. {
  307. byte[] data = { 0x80, 0xBF };
  308. string s = utf8.GetString (data);
  309. // exception is "really" expected here
  310. }
  311. [Test]
  312. [ExpectedException (typeof (DecoderException))]
  313. public void T3_Malformed_1_UnexpectedContinuation_314 ()
  314. {
  315. byte[] data = { 0x80, 0xBF, 0x80 };
  316. string s = utf8.GetString (data);
  317. // exception is "really" expected here
  318. }
  319. [Test]
  320. [ExpectedException (typeof (DecoderException))]
  321. public void T3_Malformed_1_UnexpectedContinuation_315 ()
  322. {
  323. byte[] data = { 0x80, 0xBF, 0x80, 0xBF };
  324. string s = utf8.GetString (data);
  325. // exception is "really" expected here
  326. }
  327. [Test]
  328. [ExpectedException (typeof (DecoderException))]
  329. public void T3_Malformed_1_UnexpectedContinuation_316 ()
  330. {
  331. byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80 };
  332. string s = utf8.GetString (data);
  333. // exception is "really" expected here
  334. }
  335. [Test]
  336. [ExpectedException (typeof (DecoderException))]
  337. public void T3_Malformed_1_UnexpectedContinuation_317 ()
  338. {
  339. byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF };
  340. string s = utf8.GetString (data);
  341. // exception is "really" expected here
  342. }
  343. [Test]
  344. [ExpectedException (typeof (DecoderException))]
  345. public void T3_Malformed_1_UnexpectedContinuation_318 ()
  346. {
  347. byte[] data = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 };
  348. string s = utf8.GetString (data);
  349. // exception is "really" expected here
  350. }
  351. [Test]
  352. [ExpectedException (typeof (DecoderException))]
  353. public void T3_Malformed_1_UnexpectedContinuation_319 ()
  354. {
  355. // 64 different continuation characters
  356. byte[] data = {
  357. 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
  358. 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
  359. 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
  360. 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF };
  361. string s = utf8.GetString (data);
  362. // exception is "really" expected here
  363. }
  364. [Test]
  365. [ExpectedException (typeof (DecoderException))]
  366. public void T3_Malformed_2_LonelyStart_321 ()
  367. {
  368. byte[] data = {
  369. 0xC0, 0x20, 0xC1, 0x20, 0xC2, 0x20, 0xC3, 0x20, 0xC4, 0x20, 0xC5, 0x20, 0xC6, 0x20, 0xC7, 0x20,
  370. 0xC8, 0x20, 0xC9, 0x20, 0xCA, 0x20, 0xCB, 0x20, 0xCC, 0x20, 0xCD, 0x20, 0xCE, 0x20, 0xCF, 0x20,
  371. 0xD0, 0x20, 0xD1, 0x20, 0xD2, 0x20, 0xD3, 0x20, 0xD4, 0x20, 0xD5, 0x20, 0xD6, 0x20, 0xD7, 0x20,
  372. 0xD8, 0x20, 0xD9, 0x20, 0xDA, 0x20, 0xDB, 0x20, 0xDC, 0x20, 0xDD, 0x20, 0xDE, 0x20, 0xDF, 0x20 };
  373. string s = utf8.GetString (data);
  374. // exception is "really" expected here
  375. }
  376. [Test]
  377. [ExpectedException (typeof (DecoderException))]
  378. public void T3_Malformed_2_LonelyStart_322 ()
  379. {
  380. byte[] data = {
  381. 0xE0, 0x20, 0xE1, 0x20, 0xE2, 0x20, 0xE3, 0x20, 0xE4, 0x20, 0xE5, 0x20, 0xE6, 0x20, 0xE7, 0x20,
  382. 0xE8, 0x20, 0xE9, 0x20, 0xEA, 0x20, 0xEB, 0x20, 0xEC, 0x20, 0xED, 0x20, 0xEE, 0x20, 0xEF, 0x20 };
  383. string s = utf8.GetString (data);
  384. // exception is "really" expected here
  385. }
  386. [Test]
  387. [ExpectedException (typeof (DecoderException))]
  388. public void T3_Malformed_2_LonelyStart_323 ()
  389. {
  390. byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
  391. string s = utf8.GetString (data);
  392. // exception is "really" expected here
  393. }
  394. [Test]
  395. [ExpectedException (typeof (DecoderException))]
  396. public void T3_Malformed_2_LonelyStart_324 ()
  397. {
  398. byte[] data = { 0xF0, 0x20, 0xF1, 0x20, 0xF2, 0x20, 0xF3, 0x20, 0xF4, 0x20, 0xF5, 0x20, 0xF6, 0x20, 0xF7, 0x20 };
  399. string s = utf8.GetString (data);
  400. // exception is "really" expected here
  401. }
  402. [Test]
  403. [ExpectedException (typeof (DecoderException))]
  404. public void T3_Malformed_2_LonelyStart_325 ()
  405. {
  406. byte[] data = { 0xFC, 0x20, 0xFD, 0x20 };
  407. string s = utf8.GetString (data);
  408. // exception is "really" expected here
  409. }
  410. [Test]
  411. [ExpectedException (typeof (DecoderException))]
  412. public void T3_Malformed_3_LastContinuationMissing_331 ()
  413. {
  414. byte[] data = { 0xC0 };
  415. string s = utf8.GetString (data);
  416. // exception is "really" expected here
  417. }
  418. [Test]
  419. [ExpectedException (typeof (DecoderException))]
  420. public void T3_Malformed_3_LastContinuationMissing_332 ()
  421. {
  422. byte[] data = { 0xE0, 0x80 };
  423. string s = utf8.GetString (data);
  424. // exception is "really" expected here
  425. }
  426. [Test]
  427. [ExpectedException (typeof (DecoderException))]
  428. public void T3_Malformed_3_LastContinuationMissing_333 ()
  429. {
  430. byte[] data = { 0xF0, 0x80, 0x80 };
  431. string s = utf8.GetString (data);
  432. // exception is "really" expected here
  433. }
  434. [Test]
  435. [ExpectedException (typeof (DecoderException))]
  436. public void T3_Malformed_3_LastContinuationMissing_334 ()
  437. {
  438. byte[] data = { 0xF8, 0x80, 0x80, 0x80 };
  439. string s = utf8.GetString (data);
  440. // exception is "really" expected here
  441. }
  442. [Test]
  443. [ExpectedException (typeof (DecoderException))]
  444. public void T3_Malformed_3_LastContinuationMissing_335 ()
  445. {
  446. byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80 };
  447. string s = utf8.GetString (data);
  448. // exception is "really" expected here
  449. }
  450. [Test]
  451. // MS Fx 1.1 accept this
  452. // [ExpectedException (typeof (DecoderException))]
  453. public void T3_Malformed_3_LastContinuationMissing_336 ()
  454. {
  455. byte[] data = { 0xDF };
  456. try {
  457. string s = utf8.GetString (data);
  458. // exception is "really" expected here
  459. Assert.AreEqual (String.Empty, s, "MS FX 1.1 behaviour");
  460. }
  461. catch (DecoderException) {
  462. // but Mono doesn't - better stick to the standard
  463. }
  464. }
  465. [Test]
  466. // MS Fx 1.1 accept this
  467. // [ExpectedException (typeof (DecoderException))]
  468. public void T3_Malformed_3_LastContinuationMissing_337 ()
  469. {
  470. byte[] data = { 0xEF, 0xBF };
  471. try {
  472. string s = utf8.GetString (data);
  473. // exception is "really" expected here
  474. Assert.AreEqual (String.Empty, s, "MS FX 1.1 behaviour");
  475. }
  476. catch (DecoderException) {
  477. // but Mono doesn't - better stick to the standard
  478. }
  479. }
  480. [Test]
  481. [ExpectedException (typeof (DecoderException))]
  482. public void T3_Malformed_3_LastContinuationMissing_338 ()
  483. {
  484. byte[] data = { 0xF7, 0xBF, 0xBF };
  485. string s = utf8.GetString (data);
  486. // exception is "really" expected here
  487. }
  488. [Test]
  489. [ExpectedException (typeof (DecoderException))]
  490. public void T3_Malformed_3_LastContinuationMissing_339 ()
  491. {
  492. byte[] data = { 0xF, 0xBF, 0xBF, 0xBF };
  493. string s = utf8.GetString (data);
  494. // exception is "really" expected here
  495. }
  496. [Test]
  497. [ExpectedException (typeof (DecoderException))]
  498. public void T3_Malformed_3_LastContinuationMissing_3310 ()
  499. {
  500. byte[] data = { 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
  501. string s = utf8.GetString (data);
  502. // exception is "really" expected here
  503. }
  504. [Test]
  505. [ExpectedException (typeof (DecoderException))]
  506. public void T3_Malformed_4_ConcatenationImcomplete ()
  507. {
  508. byte[] data = {
  509. 0xC0, 0xE0, 0x80, 0xF0, 0x80, 0x80, 0xF8, 0x80, 0x80, 0x80, 0xFC, 0x80, 0x80, 0x80, 0x80, 0xDF,
  510. 0xEF, 0xBF, 0xF7, 0xBF, 0xBF, 0xFB, 0xBF, 0xBF, 0xBF, 0xFD, 0xBF, 0xBF, 0xBF, 0xBF };
  511. string s = utf8.GetString (data);
  512. // exception is "really" expected here
  513. }
  514. [Test]
  515. [ExpectedException (typeof (DecoderException))]
  516. public void T3_Malformed_5_ImpossibleBytes_351 ()
  517. {
  518. byte[] data = { 0xFE };
  519. string s = utf8.GetString (data);
  520. // exception is "really" expected here
  521. }
  522. [Test]
  523. [ExpectedException (typeof (DecoderException))]
  524. public void T3_Malformed_5_ImpossibleBytes_352 ()
  525. {
  526. byte[] data = { 0xFF };
  527. string s = utf8.GetString (data);
  528. // exception is "really" expected here
  529. }
  530. [Test]
  531. [ExpectedException (typeof (DecoderException))]
  532. public void T3_Malformed_5_ImpossibleBytes_353 ()
  533. {
  534. byte[] data = { 0xFE, 0xFE, 0xFF, 0xFF };
  535. string s = utf8.GetString (data);
  536. // exception is "really" expected here
  537. }
  538. // Overlong == dangereous -> "safe" decoder should reject them
  539. [Test]
  540. [ExpectedException (typeof (DecoderException))]
  541. public void T4_Overlong_1_ASCII_Slash_411 ()
  542. {
  543. byte[] data = { 0xC0, 0xAF };
  544. string s = utf8.GetString (data);
  545. // exception is "really" expected here
  546. }
  547. [Test]
  548. [ExpectedException (typeof (DecoderException))]
  549. public void T4_Overlong_1_ASCII_Slash_412 ()
  550. {
  551. byte[] data = { 0xE0, 0x80, 0xAF };
  552. string s = utf8.GetString (data);
  553. // exception is "really" expected here
  554. }
  555. [Test]
  556. [ExpectedException (typeof (DecoderException))]
  557. public void T4_Overlong_1_ASCII_Slash_413 ()
  558. {
  559. byte[] data = { 0xF0, 0x80, 0x80, 0xAF };
  560. string s = utf8.GetString (data);
  561. // exception is "really" expected here
  562. }
  563. [Test]
  564. [ExpectedException (typeof (DecoderException))]
  565. public void T4_Overlong_1_ASCII_Slash_414 ()
  566. {
  567. byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0xAF };
  568. string s = utf8.GetString (data);
  569. // exception is "really" expected here
  570. }
  571. [Test]
  572. [ExpectedException (typeof (DecoderException))]
  573. public void T4_Overlong_1_ASCII_Slash_415 ()
  574. {
  575. byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0xAF };
  576. string s = utf8.GetString (data);
  577. // exception is "really" expected here
  578. }
  579. [Test]
  580. [ExpectedException (typeof (DecoderException))]
  581. public void T4_Overlong_2_MaximumBoundary_421 ()
  582. {
  583. byte[] data = { 0xC1, 0xBF };
  584. string s = utf8.GetString (data);
  585. // exception is "really" expected here
  586. }
  587. [Test]
  588. [ExpectedException (typeof (DecoderException))]
  589. public void T4_Overlong_2_MaximumBoundary_422 ()
  590. {
  591. byte[] data = { 0xE0, 0x9F, 0xBF };
  592. string s = utf8.GetString (data);
  593. // exception is "really" expected here
  594. }
  595. [Test]
  596. [ExpectedException (typeof (DecoderException))]
  597. public void T4_Overlong_2_MaximumBoundary_423 ()
  598. {
  599. byte[] data = { 0xF0, 0x8F, 0xBF, 0xBF };
  600. string s = utf8.GetString (data);
  601. // exception is "really" expected here
  602. }
  603. [Test]
  604. [ExpectedException (typeof (DecoderException))]
  605. public void T4_Overlong_2_MaximumBoundary_424 ()
  606. {
  607. byte[] data = { 0xF8, 0x87, 0xBF, 0xBF, 0xBF };
  608. string s = utf8.GetString (data);
  609. // exception is "really" expected here
  610. }
  611. [Test]
  612. [ExpectedException (typeof (DecoderException))]
  613. public void T4_Overlong_2_MaximumBoundary_425 ()
  614. {
  615. byte[] data = { 0xFC, 0x83, 0xBF, 0xBF, 0xBF, 0xBF };
  616. string s = utf8.GetString (data);
  617. // exception is "really" expected here
  618. }
  619. [Test]
  620. [ExpectedException (typeof (DecoderException))]
  621. public void T4_Overlong_3_NUL_431 ()
  622. {
  623. byte[] data = { 0xC0, 0x80 };
  624. string s = utf8.GetString (data);
  625. // exception is "really" expected here
  626. }
  627. [Test]
  628. [ExpectedException (typeof (DecoderException))]
  629. public void T4_Overlong_3_NUL_432 ()
  630. {
  631. byte[] data = { 0xE0, 0x80, 0x80 };
  632. string s = utf8.GetString (data);
  633. // exception is "really" expected here
  634. }
  635. [Test]
  636. [ExpectedException (typeof (DecoderException))]
  637. public void T4_Overlong_3_NUL_433 ()
  638. {
  639. byte[] data = { 0xF0, 0x80, 0x80, 0x80 };
  640. string s = utf8.GetString (data);
  641. // exception is "really" expected here
  642. }
  643. [Test]
  644. [ExpectedException (typeof (DecoderException))]
  645. public void T4_Overlong_3_NUL_434 ()
  646. {
  647. byte[] data = { 0xF8, 0x80, 0x80, 0x80, 0x80 };
  648. string s = utf8.GetString (data);
  649. // exception is "really" expected here
  650. }
  651. [Test]
  652. [ExpectedException (typeof (DecoderException))]
  653. public void T4_Overlong_3_NUL_435 ()
  654. {
  655. byte[] data = { 0xFC, 0x80, 0x80, 0x80, 0x80, 0x80 };
  656. string s = utf8.GetString (data);
  657. // exception is "really" expected here
  658. }
  659. [Test]
  660. [ExpectedException (typeof (DecoderFallbackException))]
  661. public void T5_IllegalCodePosition_1_UTF16Surrogates_511 ()
  662. {
  663. byte[] data = { 0xED, 0xA0, 0x80 };
  664. string s = utf8.GetString (data);
  665. // exception is "really" expected here
  666. Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
  667. }
  668. [Test]
  669. [ExpectedException (typeof (DecoderFallbackException))]
  670. public void T5_IllegalCodePosition_1_UTF16Surrogates_512 ()
  671. {
  672. byte[] data = { 0xED, 0xAD, 0xBF };
  673. string s = utf8.GetString (data);
  674. // exception is "really" expected here
  675. Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
  676. }
  677. [Test]
  678. [ExpectedException (typeof (DecoderFallbackException))]
  679. public void T5_IllegalCodePosition_1_UTF16Surrogates_513 ()
  680. {
  681. byte[] data = { 0xED, 0xAE, 0x80 };
  682. string s = utf8.GetString (data);
  683. // exception is "really" expected here
  684. Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
  685. }
  686. [Test]
  687. [ExpectedException (typeof (DecoderFallbackException))]
  688. public void T5_IllegalCodePosition_1_UTF16Surrogates_514 ()
  689. {
  690. byte[] data = { 0xED, 0xAF, 0xBF };
  691. string s = utf8.GetString (data);
  692. // exception is "really" expected here
  693. Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
  694. }
  695. [Test]
  696. [ExpectedException (typeof (DecoderFallbackException))]
  697. public void T5_IllegalCodePosition_1_UTF16Surrogates_515 ()
  698. {
  699. byte[] data = { 0xED, 0xB0, 0x80 };
  700. string s = utf8.GetString (data);
  701. // exception is "really" expected here
  702. Assert.AreEqual (56320, s [0], "MS FX 1.1 behaviour");
  703. }
  704. [Test]
  705. [ExpectedException (typeof (DecoderFallbackException))]
  706. public void T5_IllegalCodePosition_1_UTF16Surrogates_516 ()
  707. {
  708. byte[] data = { 0xED, 0xBE, 0x80 };
  709. string s = utf8.GetString (data);
  710. // exception is "really" expected here
  711. Assert.AreEqual (57216, s [0], "MS FX 1.1 behaviour");
  712. }
  713. [Test]
  714. [ExpectedException (typeof (DecoderFallbackException))]
  715. public void T5_IllegalCodePosition_1_UTF16Surrogates_517 ()
  716. {
  717. byte[] data = { 0xED, 0xBF, 0xBF };
  718. string s = utf8.GetString (data);
  719. // exception is "really" expected here
  720. Assert.AreEqual (57343, s [0], "MS FX 1.1 behaviour");
  721. }
  722. [Test]
  723. [ExpectedException (typeof (DecoderFallbackException))]
  724. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_521 ()
  725. {
  726. byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80 };
  727. string s = utf8.GetString (data);
  728. // exception is "really" expected here
  729. Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
  730. Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
  731. }
  732. [Test]
  733. [ExpectedException (typeof (DecoderFallbackException))]
  734. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_522 ()
  735. {
  736. byte[] data = { 0xED, 0xA0, 0x80, 0xED, 0xBF, 0xBF };
  737. string s = utf8.GetString (data);
  738. // exception is "really" expected here
  739. Assert.AreEqual (55296, s [0], "MS FX 1.1 behaviour");
  740. Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
  741. }
  742. [Test]
  743. [ExpectedException (typeof (DecoderFallbackException))]
  744. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_523 ()
  745. {
  746. byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xB0, 0x80 };
  747. string s = utf8.GetString (data);
  748. // exception is "really" expected here
  749. Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
  750. Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
  751. }
  752. [Test]
  753. [ExpectedException (typeof (DecoderFallbackException))]
  754. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_524 ()
  755. {
  756. byte[] data = { 0xED, 0xAD, 0xBF, 0xED, 0xBF, 0xBF };
  757. string s = utf8.GetString (data);
  758. // exception is "really" expected here
  759. Assert.AreEqual (56191, s [0], "MS FX 1.1 behaviour");
  760. Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
  761. }
  762. [Test]
  763. [ExpectedException (typeof (DecoderFallbackException))]
  764. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_525 ()
  765. {
  766. byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xB0, 0x80 };
  767. string s = utf8.GetString (data);
  768. // exception is "really" expected here
  769. Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
  770. Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
  771. }
  772. [Test]
  773. [ExpectedException (typeof (DecoderFallbackException))]
  774. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_526 ()
  775. {
  776. byte[] data = { 0xED, 0xAE, 0x80, 0xED, 0xBF, 0x8F };
  777. string s = utf8.GetString (data);
  778. // exception is "really" expected here
  779. Assert.AreEqual (56192, s [0], "MS FX 1.1 behaviour");
  780. Assert.AreEqual (57295, s [1], "MS FX 1.1 behaviour");
  781. }
  782. [Test]
  783. [ExpectedException (typeof (DecoderFallbackException))]
  784. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_527 ()
  785. {
  786. byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xB0, 0x80 };
  787. string s = utf8.GetString (data);
  788. // exception is "really" expected here
  789. Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
  790. Assert.AreEqual (56320, s [1], "MS FX 1.1 behaviour");
  791. }
  792. [Test]
  793. [ExpectedException (typeof (DecoderFallbackException))]
  794. public void T5_IllegalCodePosition_2_PairedUTF16Surrogates_528 ()
  795. {
  796. byte[] data = { 0xED, 0xAF, 0xBF, 0xED, 0xBF, 0xBF };
  797. string s = utf8.GetString (data);
  798. // exception is "really" expected here
  799. Assert.AreEqual (56319, s [0], "MS FX 1.1 behaviour");
  800. Assert.AreEqual (57343, s [1], "MS FX 1.1 behaviour");
  801. }
  802. [Test]
  803. // MS Fx 1.1 accept this
  804. // [ExpectedException (typeof (DecoderException))]
  805. public void T5_IllegalCodePosition_3_Other_531 ()
  806. {
  807. byte[] data = { 0xEF, 0xBF, 0xBE };
  808. string s = utf8.GetString (data);
  809. // exception is "really" expected here
  810. Assert.AreEqual (65534, s [0], "MS FX 1.1 behaviour");
  811. }
  812. [Test]
  813. // MS Fx 1.1 accept this
  814. // [ExpectedException (typeof (DecoderException))]
  815. public void T5_IllegalCodePosition_3_Other_532 ()
  816. {
  817. byte[] data = { 0xEF, 0xBF, 0xBF };
  818. string s = utf8.GetString (data);
  819. // exception is "really" expected here
  820. Assert.AreEqual (65535, s [0], "MS FX 1.1 behaviour");
  821. }
  822. [Test]
  823. // bug #75065 and #73086.
  824. public void GetCharsFEFF ()
  825. {
  826. byte [] data = new byte [] {0xEF, 0xBB, 0xBF};
  827. Encoding enc = new UTF8Encoding (false, true);
  828. string s = enc.GetString (data);
  829. Assert.AreEqual (s, "\uFEFF");
  830. Encoding utf = Encoding.UTF8;
  831. char[] testChars = {'\uFEFF','A'};
  832. byte[] bytes = utf.GetBytes(testChars);
  833. char[] chars = utf.GetChars(bytes);
  834. Assert.AreEqual ('\uFEFF', chars [0], "#1");
  835. Assert.AreEqual ('A', chars [1], "#2");
  836. }
  837. [Test]
  838. public void CloneNotReadOnly ()
  839. {
  840. Encoding e = Encoding.GetEncoding (65001).Clone ()
  841. as Encoding;
  842. Assert.AreEqual (false, e.IsReadOnly);
  843. e.EncoderFallback = new EncoderExceptionFallback ();
  844. }
  845. [Test]
  846. [ExpectedException (typeof (DecoderFallbackException))]
  847. public void Bug77315 ()
  848. {
  849. new UTF8Encoding (false, true).GetString (
  850. new byte [] {0xED, 0xA2, 0x8C});
  851. }
  852. [Test]
  853. public void SufficientByteArray ()
  854. {
  855. Encoder e = Encoding.UTF8.GetEncoder ();
  856. byte [] bytes = new byte [0];
  857. char [] chars = new char [] {'\uD800'};
  858. e.GetBytes (chars, 0, 1, bytes, 0, false);
  859. try {
  860. int ret = e.GetBytes (chars, 1, 0, bytes, 0, true);
  861. Assert.AreEqual (0, ret, "drop insufficient char in 2.0: char[]");
  862. } catch (ArgumentException) {
  863. }
  864. string s = "\uD800";
  865. try {
  866. int ret = Encoding.UTF8.GetBytes (s, 0, 1, bytes, 0);
  867. Assert.AreEqual (0, ret, "drop insufficient char in 2.0: string");
  868. } catch (ArgumentException) {
  869. }
  870. }
  871. [Test] // bug #565129
  872. public void SufficientByteArray2 ()
  873. {
  874. var u = Encoding.UTF8;
  875. Assert.AreEqual (3, u.GetByteCount ("\uFFFD"), "#1-1");
  876. Assert.AreEqual (3, u.GetByteCount ("\uD800"), "#1-2");
  877. Assert.AreEqual (3, u.GetByteCount ("\uDC00"), "#1-3");
  878. Assert.AreEqual (4, u.GetByteCount ("\uD800\uDC00"), "#1-4");
  879. byte [] bytes = new byte [10];
  880. Assert.AreEqual (3, u.GetBytes ("\uDC00", 0, 1, bytes, 0), "#1-5"); // was bogus
  881. Assert.AreEqual (3, u.GetBytes ("\uFFFD").Length, "#2-1");
  882. Assert.AreEqual (3, u.GetBytes ("\uD800").Length, "#2-2");
  883. Assert.AreEqual (3, u.GetBytes ("\uDC00").Length, "#2-3");
  884. Assert.AreEqual (4, u.GetBytes ("\uD800\uDC00").Length, "#2-4");
  885. for (char c = char.MinValue; c < char.MaxValue; c++) {
  886. byte [] bIn;
  887. bIn = u.GetBytes (c.ToString ());
  888. }
  889. try {
  890. new UTF8Encoding (false, true).GetBytes (new char [] {'\uDF45', '\uD808'}, 0, 2);
  891. Assert.Fail ("EncoderFallbackException is expected");
  892. } catch (EncoderFallbackException) {
  893. }
  894. }
  895. [Test] // bug #77550
  896. public void DecoderFallbackSimple ()
  897. {
  898. UTF8Encoding e = new UTF8Encoding (false, false);
  899. AssertType.AreEqual (1, e.GetDecoder ().GetCharCount (
  900. new byte [] {(byte) 183}, 0, 1),
  901. "#1");
  902. AssertType.AreEqual (1, e.GetDecoder().GetChars (
  903. new byte [] {(byte) 183}, 0, 1,
  904. new char [100], 0),
  905. "#2");
  906. AssertType.AreEqual (1, e.GetString (new byte [] {(byte) 183}).Length,
  907. "#3");
  908. }
  909. [Test]
  910. public void FallbackDefaultEncodingUTF8 ()
  911. {
  912. DecoderReplacementFallbackBuffer b =
  913. Encoding.UTF8.DecoderFallback.CreateFallbackBuffer ()
  914. as DecoderReplacementFallbackBuffer;
  915. AssertType.IsTrue (b.Fallback (new byte [] {}, 0), "#1");
  916. AssertType.IsFalse (b.MovePrevious (), "#2");
  917. AssertType.AreEqual (1, b.Remaining, "#3");
  918. AssertType.AreEqual ('\uFFFD', b.GetNextChar (), "#4");
  919. }
  920. [Test]
  921. [Category ("MobileNotWorking")]
  922. public void Bug415628 ()
  923. {
  924. DirectoryInfo bcl_output_dir = Directory.GetParent (Path.GetDirectoryName (Assembly.GetExecutingAssembly ().Location));
  925. string namespace_dir = Path.Combine (bcl_output_dir.Parent.FullName, "corlib");
  926. using (var f = File.Open (Path.Combine (namespace_dir, "Test/resources/415628.bin"), FileMode.Open)) {
  927. BinaryReader br = new BinaryReader (f);
  928. byte [] buf = br.ReadBytes (8000);
  929. Encoding.UTF8.GetString(buf);
  930. }
  931. }
  932. [Test]
  933. [ExpectedException (typeof (ArgumentException))]
  934. public void Bug10788()
  935. {
  936. byte[] bytes = new byte[4096];
  937. char[] chars = new char[10];
  938. Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 4096, chars, 9, false);
  939. }
  940. [Test]
  941. public void Bug10789()
  942. {
  943. byte[] bytes = new byte[4096];
  944. char[] chars = new char[10];
  945. try {
  946. Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 1, chars, 10, false);
  947. Assert.Fail ("ArgumentException is expected #1");
  948. } catch (ArgumentException) {
  949. }
  950. try {
  951. Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 1, chars, 11, false);
  952. Assert.Fail ("ArgumentOutOfRangeException is expected #2");
  953. } catch (ArgumentOutOfRangeException) {
  954. }
  955. int charactersWritten = Encoding.UTF8.GetDecoder ().GetChars (bytes, 0, 0, chars, 10, false);
  956. Assert.AreEqual (0, charactersWritten, "#3");
  957. }
  958. [Test]
  959. public void EncodingFallback ()
  960. {
  961. /* Legal UTF-8 Byte Sequences
  962. * 1st 2nd 3rd 4th
  963. * 00..7F
  964. * C2..DF 80..BF
  965. * E0 A0..BF 80..BF
  966. * E1..EF 80..BF 80..BF
  967. * F0 90..BF 80..BF 80..BF
  968. * F1..F3 80..BF 80..BF 80..BF
  969. * F4 80..8F 80..BF 80..BF
  970. */
  971. var t = new EncodingTester ("utf-8");
  972. byte [] data;
  973. // Invalid 1st byte
  974. for (byte b = 0x80; b <= 0xC1; b++) {
  975. data = new byte [] { b };
  976. t.TestDecoderFallback (data, "?", new byte [] { b });
  977. }
  978. ///Invalid 2nd byte
  979. // C2..DF 80..BF
  980. for (byte b = 0xC2; b <= 0xDF; b++) {
  981. data = new byte [] { b, 0x61 };
  982. t.TestDecoderFallback (data, "?a", new byte [] { b });
  983. }
  984. // E0 A0..BF
  985. data = new byte [] { 0xE0, 0x99};
  986. t.TestDecoderFallback (data, "?", new byte [] { 0xE0, 0x99});
  987. // E1..EF 80..BF
  988. for (byte b = 0xE1; b <= 0xEF; b++) {
  989. data = new byte [] { b, 0x61 };
  990. t.TestDecoderFallback (data, "?a", new byte [] { b });
  991. }
  992. // F0 90..BF
  993. data = new byte [] { 0xF0, 0x8F};
  994. t.TestDecoderFallback (data, "?", new byte [] { 0xF0, 0x8F });
  995. // F1..F4 80..XX
  996. for (byte b = 0xF1; b <= 0xF4; b++) {
  997. data = new byte [] { b, 0x61 };
  998. t.TestDecoderFallback (data, "?a", new byte [] { b });
  999. }
  1000. // C2..F3 XX..BF
  1001. for (byte b = 0xC2; b <= 0xF3; b++) {
  1002. data = new byte [] { b, 0xC0 };
  1003. t.TestDecoderFallback (data, "??", new byte [] { b }, new byte [] { 0xC0 });
  1004. }
  1005. // Invalid 3rd byte
  1006. // E0..F3 90..BF 80..BF
  1007. for (byte b = 0xE0; b <= 0xF3; b++) {
  1008. data = new byte [] { b, 0xB0, 0x61 };
  1009. t.TestDecoderFallback (data, "?a", new byte [] { b, 0xB0 });
  1010. data = new byte [] { b, 0xB0, 0xC0 };
  1011. t.TestDecoderFallback (data, "??", new byte [] { b, 0xB0 }, new byte [] { 0xC0 });
  1012. }
  1013. // F4 80..8F 80..BF
  1014. data = new byte [] { 0xF4, 0x8F, 0xC0 };
  1015. t.TestDecoderFallback (data, "??", new byte [] { 0xF4, 0x8F }, new byte [] { 0xC0 });
  1016. // Invalid 4th byte
  1017. // F0..F3 90..BF 80..BF 80..BF
  1018. for (byte b = 0xF0; b <= 0xF3; b++) {
  1019. data = new byte [] { b, 0xB0, 0xB0, 0x61 };
  1020. t.TestDecoderFallback (data, "?a", new byte [] { b, 0xB0, 0xB0 });
  1021. data = new byte [] { b, 0xB0, 0xB0, 0xC0 };
  1022. t.TestDecoderFallback (data, "??", new byte [] { b, 0xB0, 0xB0 }, new byte [] { 0xC0 });
  1023. }
  1024. // F4 80..8F 80..BF 80..BF
  1025. data = new byte [] { 0xF4, 0x8F, 0xB0, 0xC0 };
  1026. t.TestDecoderFallback (data, "??", new byte [] { 0xF4, 0x8F, 0xB0 }, new byte [] { 0xC0 });
  1027. }
  1028. [Test]
  1029. public void DecoderBug23771 ()
  1030. {
  1031. var input = "\u733F"; // 'mono' on Japanese, 3bytes in UTF-8.
  1032. var encoded = Encoding.UTF8.GetBytes (input);
  1033. var decoder = Encoding.UTF8.GetDecoder ();
  1034. var chars = new char [10]; // Just enough space to decode.
  1035. var result = new StringBuilder ();
  1036. var bytes = new byte [1]; // Simulates chunked input bytes.
  1037. // Specify encoded bytes separetely.
  1038. foreach (var b in encoded) {
  1039. bytes [0] = b;
  1040. int bytesUsed, charsUsed;
  1041. bool completed;
  1042. decoder.Convert (bytes, 0, bytes.Length, chars, 0, chars.Length, false, out bytesUsed, out charsUsed, out completed);
  1043. result.Append (chars, 0, charsUsed);
  1044. // Expected outputs are written in bottom.
  1045. //Debug.Print ("bytesUsed:{0}, charsUsed:{1}, completed:{2}, result:'{3}'", bytesUsed, charsUsed, completed, result);
  1046. }
  1047. // Expected: NO assertion error.
  1048. Assert.AreEqual (input, result.ToString (), "#1");
  1049. /*
  1050. * Expected Debug outputs are:
  1051. * bytesUsed:1, charsUsed:0, completed:True, result:''
  1052. * bytesUsed:1, charsUsed:0, completed:True, result:''
  1053. * bytesUsed:1, charsUsed:1, completed:True, result:'猿'
  1054. *
  1055. * -- Note: '猿' is U+733F (1char in UTF-16)
  1056. *
  1057. * Actual Debug output are:
  1058. * bytesUsed:3, charsUsed:1, completed:False, result:'�'
  1059. * bytesUsed:3, charsUsed:1, completed:False, result:'��'
  1060. * bytesUsed:3, charsUsed:1, completed:False, result:'���'
  1061. *
  1062. * All output parameters are not match.
  1063. * -- Note: '�' is decoder fallback char (U+FFFD)
  1064. */
  1065. }
  1066. }
  1067. }