TextInfo.cs 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. //
  2. // System.Globalization.TextInfo.cs
  3. //
  4. // Author:
  5. // Dick Porter ([email protected])
  6. // Duncan Mak ([email protected])
  7. // Atsushi Enomoto ([email protected])
  8. //
  9. // (C) 2002 Ximian, Inc.
  10. // (C) 2005 Novell, Inc.
  11. //
  12. // TODO:
  13. // Missing the various code page mappings.
  14. // Missing the OnDeserialization implementation.
  15. //
  16. // Copyright (C) 2004, 2005 Novell, Inc (http://www.novell.com)
  17. //
  18. // Permission is hereby granted, free of charge, to any person obtaining
  19. // a copy of this software and associated documentation files (the
  20. // "Software"), to deal in the Software without restriction, including
  21. // without limitation the rights to use, copy, modify, merge, publish,
  22. // distribute, sublicense, and/or sell copies of the Software, and to
  23. // permit persons to whom the Software is furnished to do so, subject to
  24. // the following conditions:
  25. //
  26. // The above copyright notice and this permission notice shall be
  27. // included in all copies or substantial portions of the Software.
  28. //
  29. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  30. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  31. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  32. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  33. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  34. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  35. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  36. //
  37. using System;
  38. using System.Globalization;
  39. using System.Runtime.Serialization;
  40. using System.Runtime.InteropServices;
  41. using System.Text;
  42. namespace System.Globalization {
  43. [Serializable]
  44. public class TextInfo: IDeserializationCallback
  45. {
  46. [StructLayout (LayoutKind.Sequential)]
  47. struct Data {
  48. public int ansi;
  49. public int ebcdic;
  50. public int mac;
  51. public int oem;
  52. public byte list_sep;
  53. }
  54. int m_win32LangID;
  55. int m_nDataItem;
  56. bool m_useUserOverride;
  57. [NonSerialized]
  58. readonly CultureInfo ci;
  59. [NonSerialized]
  60. readonly CultureInfo parentCulture;
  61. [NonSerialized]
  62. readonly bool handleDotI;
  63. [NonSerialized]
  64. readonly Data data;
  65. internal unsafe TextInfo (CultureInfo ci, int lcid, void* data)
  66. {
  67. this.m_win32LangID = lcid;
  68. this.ci = ci;
  69. if (data != null)
  70. this.data = *(Data*) data;
  71. else {
  72. this.data = new Data ();
  73. this.data.list_sep = (byte) '.';
  74. }
  75. CultureInfo tmp = ci;
  76. while (tmp.Parent != null && tmp.Parent != tmp && tmp.Parent.LCID != 0x7F)
  77. tmp = tmp.Parent;
  78. parentCulture = tmp;
  79. if (tmp != null) {
  80. switch (tmp.LCID) {
  81. case 44: // Azeri (az)
  82. case 31: // Turkish (tr)
  83. handleDotI = true;
  84. break;
  85. }
  86. }
  87. }
  88. public virtual int ANSICodePage
  89. {
  90. get {
  91. return data.ansi;
  92. }
  93. }
  94. public virtual int EBCDICCodePage
  95. {
  96. get {
  97. return data.ebcdic;
  98. }
  99. }
  100. public virtual string ListSeparator
  101. {
  102. get {
  103. return ((char) data.list_sep).ToString ();
  104. }
  105. }
  106. public virtual int MacCodePage
  107. {
  108. get {
  109. return data.mac;
  110. }
  111. }
  112. public virtual int OEMCodePage
  113. {
  114. get {
  115. return data.oem;
  116. }
  117. }
  118. public override bool Equals (object obj)
  119. {
  120. if (obj == null)
  121. return false;
  122. TextInfo other = obj as TextInfo;
  123. if (other == null)
  124. return false;
  125. if (other.m_win32LangID != m_win32LangID)
  126. return false;
  127. if (other.ci != ci)
  128. return false;
  129. return true;
  130. }
  131. public override int GetHashCode()
  132. {
  133. return (m_win32LangID);
  134. }
  135. public override string ToString()
  136. {
  137. return "TextInfo - " + m_win32LangID;
  138. }
  139. public string ToTitleCase (string str)
  140. {
  141. if(str == null)
  142. throw new ArgumentNullException("string is null");
  143. StringBuilder sb = null;
  144. int i = 0;
  145. int start = 0;
  146. while (i < str.Length) {
  147. if (!Char.IsLetter (str [i++]))
  148. continue;
  149. i--;
  150. char t = ToTitleCase (str [i]);
  151. bool capitalize = true;
  152. if (t == str [i]) {
  153. capitalize = false;
  154. bool allTitle = true;
  155. // if the word is all titlecase,
  156. // then don't capitalize it.
  157. int saved = i;
  158. while (++i < str.Length) {
  159. if (Char.IsWhiteSpace (str [i]))
  160. break;
  161. t = ToTitleCase (str [i]);
  162. if (t != str [i]) {
  163. allTitle = false;
  164. break;
  165. }
  166. }
  167. if (allTitle)
  168. continue;
  169. i = saved;
  170. // still check if all remaining
  171. // characters are lowercase,
  172. // where we don't have to modify
  173. // the source word.
  174. while (++i < str.Length) {
  175. if (Char.IsWhiteSpace (str [i]))
  176. break;
  177. if (ToLower (str [i]) != str [i]) {
  178. capitalize = true;
  179. i = saved;
  180. break;
  181. }
  182. }
  183. }
  184. if (capitalize) {
  185. if (sb == null)
  186. sb = new StringBuilder (str.Length);
  187. sb.Append (str, start, i - start);
  188. sb.Append (ToTitleCase (str [i]));
  189. start = i + 1;
  190. while (++i < str.Length) {
  191. if (Char.IsWhiteSpace (str [i]))
  192. break;
  193. sb.Append (ToLower (str [i]));
  194. }
  195. start = i;
  196. }
  197. }
  198. if (sb != null)
  199. sb.Append (str, start, str.Length - start);
  200. return sb != null ? sb.ToString () : str;
  201. }
  202. // Only Azeri and Turkish have their own special cases.
  203. // Other than them, all languages have common special case
  204. // (enumerable enough).
  205. public virtual char ToLower (char c)
  206. {
  207. // quick ASCII range check
  208. if (c < 0x40 || 0x60 < c && c < 128)
  209. return c;
  210. else if ('A' <= c && c <= 'Z' && (!handleDotI || c != 'I'))
  211. return (char) (c + 0x20);
  212. if (ci == null || ci.LCID == 0x7F)
  213. return Char.ToLowerInvariant (c);
  214. switch (c) {
  215. case '\u0049': // Latin uppercase I
  216. if (handleDotI)
  217. return '\u0131'; // I becomes dotless i
  218. break;
  219. case '\u0130': // I-dotted
  220. return '\u0069'; // i
  221. case '\u01c5': // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
  222. return '\u01c6';
  223. // \u01c7 -> \u01c9 (LJ) : invariant
  224. case '\u01c8': // LATIN CAPITAL LETTER L WITH SMALL LETTER J
  225. return '\u01c9';
  226. // \u01ca -> \u01cc (NJ) : invariant
  227. case '\u01cb': // LATIN CAPITAL LETTER N WITH SMALL LETTER J
  228. return '\u01cc';
  229. // WITH CARON : invariant
  230. // WITH DIAERESIS AND * : invariant
  231. case '\u01f2': // LATIN CAPITAL LETTER D WITH SMALL LETTER Z
  232. return '\u01f3';
  233. case '\u03d2': // ? it is not in ICU
  234. return '\u03c5';
  235. case '\u03d3': // ? it is not in ICU
  236. return '\u03cd';
  237. case '\u03d4': // ? it is not in ICU
  238. return '\u03cb';
  239. }
  240. return Char.ToLowerInvariant (c);
  241. }
  242. public virtual char ToUpper (char c)
  243. {
  244. // quick ASCII range check
  245. if (c < 0x60)
  246. return c;
  247. else if ('a' <= c && c <= 'z' && (!handleDotI || c != 'i'))
  248. return (char) (c - 0x20);
  249. if (ci == null || ci.LCID == 0x7F)
  250. return Char.ToUpperInvariant (c);
  251. switch (c) {
  252. case '\u0069': // Latin lowercase i
  253. if (handleDotI)
  254. return '\u0130'; // dotted capital I
  255. break;
  256. case '\u0131': // dotless i
  257. return '\u0049'; // I
  258. case '\u01c5': // see ToLower()
  259. return '\u01c4';
  260. case '\u01c8': // see ToLower()
  261. return '\u01c7';
  262. case '\u01cb': // see ToLower()
  263. return '\u01ca';
  264. case '\u01f2': // see ToLower()
  265. return '\u01f1';
  266. case '\u0390': // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
  267. return '\u03aa'; // it is not in ICU
  268. case '\u03b0': // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
  269. return '\u03ab'; // it is not in ICU
  270. case '\u03d0': // GREEK BETA
  271. return '\u0392';
  272. case '\u03d1': // GREEK THETA
  273. return '\u0398';
  274. case '\u03d5': // GREEK PHI
  275. return '\u03a6';
  276. case '\u03d6': // GREEK PI
  277. return '\u03a0';
  278. case '\u03f0': // GREEK KAPPA
  279. return '\u039a';
  280. case '\u03f1': // GREEK RHO
  281. return '\u03a1';
  282. // am not sure why miscellaneous GREEK symbols are
  283. // not handled here.
  284. }
  285. return Char.ToUpperInvariant (c);
  286. }
  287. private char ToTitleCase (char c)
  288. {
  289. // Handle some Latin characters.
  290. switch (c) {
  291. case '\u01c4':
  292. case '\u01c5':
  293. case '\u01c6':
  294. return '\u01c5';
  295. case '\u01c7':
  296. case '\u01c8':
  297. case '\u01c9':
  298. return '\u01c8';
  299. case '\u01ca':
  300. case '\u01cb':
  301. case '\u01cc':
  302. return '\u01cb';
  303. case '\u01f1':
  304. case '\u01f2':
  305. case '\u01f3':
  306. return '\u01f2';
  307. }
  308. if ('\u2170' <= c && c <= '\u217f' || // Roman numbers
  309. '\u24d0' <= c && c <= '\u24e9')
  310. return c;
  311. return ToUpper (c);
  312. }
  313. public virtual string ToLower (string s)
  314. {
  315. // In ICU (3.2) there are a few cases that one single
  316. // character results in multiple characters in e.g.
  317. // tr-TR culture. So I tried brute force conversion
  318. // test with single character as a string input, but
  319. // there was no such conversion. So I think it just
  320. // invokes ToLower(char).
  321. if (s == null)
  322. throw new ArgumentNullException ("string is null");
  323. StringBuilder sb = null;
  324. int start = 0;
  325. for (int i = 0; i < s.Length; i++) {
  326. if (s [i] != ToLower (s [i])) {
  327. if (sb == null)
  328. sb = new StringBuilder (s.Length);
  329. sb.Append (s, start, i - start);
  330. sb.Append (ToLower (s [i]));
  331. start = i + 1;
  332. }
  333. }
  334. if (sb != null && start < s.Length)
  335. sb.Append (s, start, s.Length - start);
  336. return sb == null ? s : sb.ToString ();
  337. }
  338. public virtual string ToUpper (string s)
  339. {
  340. // In ICU (3.2) there is a case that string
  341. // is handled beyond per-character conversion, but
  342. // it is only lt-LT culture where MS.NET does not
  343. // handle any special transliteration. So I keep
  344. // ToUpper() just as character conversion.
  345. if (s == null)
  346. throw new ArgumentNullException ("string is null");
  347. StringBuilder sb = null;
  348. int start = 0;
  349. for (int i = 0; i < s.Length; i++) {
  350. if (s [i] != ToUpper (s [i])) {
  351. if (sb == null)
  352. sb = new StringBuilder (s.Length);
  353. sb.Append (s, start, i - start);
  354. sb.Append (ToUpper (s [i]));
  355. start = i + 1;
  356. }
  357. }
  358. if (sb != null && start < s.Length)
  359. sb.Append (s, start, s.Length - start);
  360. return sb == null ? s : sb.ToString ();
  361. }
  362. /* IDeserialization interface */
  363. [MonoTODO]
  364. void IDeserializationCallback.OnDeserialization(object sender)
  365. {
  366. }
  367. }
  368. }