TextInfo.cs 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. //
  2. // System.Globalization.TextInfo.cs
  3. //
  4. // Author:
  5. // Dick Porter ([email protected])
  6. // Duncan Mak ([email protected])
  7. //
  8. // (C) 2002 Ximian, Inc.
  9. //
  10. // TODO:
  11. // Missing the various code page mappings.
  12. // Missing the OnDeserialization implementation.
  13. //
  14. // Copyright (C) 2004 Novell, Inc (http://www.novell.com)
  15. //
  16. // Permission is hereby granted, free of charge, to any person obtaining
  17. // a copy of this software and associated documentation files (the
  18. // "Software"), to deal in the Software without restriction, including
  19. // without limitation the rights to use, copy, modify, merge, publish,
  20. // distribute, sublicense, and/or sell copies of the Software, and to
  21. // permit persons to whom the Software is furnished to do so, subject to
  22. // the following conditions:
  23. //
  24. // The above copyright notice and this permission notice shall be
  25. // included in all copies or substantial portions of the Software.
  26. //
  27. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  28. // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  29. // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  30. // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  31. // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  32. // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  33. // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  34. //
  35. using System;
  36. using System.Globalization;
  37. using System.Runtime.Serialization;
  38. using System.Runtime.InteropServices;
  39. using System.Text;
  40. namespace System.Globalization {
  41. [Serializable]
  42. public class TextInfo: IDeserializationCallback
  43. {
  44. private delegate char CharConverter (char c);
  45. [StructLayout (LayoutKind.Sequential)]
  46. struct Data {
  47. public int ansi;
  48. public int ebcdic;
  49. public int mac;
  50. public int oem;
  51. public byte list_sep;
  52. }
  53. CharConverter toLower;
  54. CharConverter toUpper;
  55. int m_win32LangID;
  56. int m_nDataItem;
  57. bool m_useUserOverride;
  58. [NonSerialized]
  59. readonly CultureInfo ci;
  60. [NonSerialized]
  61. readonly Data data;
  62. internal unsafe TextInfo (CultureInfo ci, int lcid, void* data)
  63. {
  64. this.m_win32LangID = lcid;
  65. this.ci = ci;
  66. if (data != null)
  67. this.data = *(Data*) data;
  68. else {
  69. this.data = new Data ();
  70. this.data.list_sep = (byte) '.';
  71. }
  72. toLower = new CharConverter (ToLower);
  73. toUpper = new CharConverter (ToUpper);
  74. }
  75. public virtual int ANSICodePage
  76. {
  77. get {
  78. return data.ansi;
  79. }
  80. }
  81. public virtual int EBCDICCodePage
  82. {
  83. get {
  84. return data.ebcdic;
  85. }
  86. }
  87. public virtual string ListSeparator
  88. {
  89. get {
  90. return ((char) data.list_sep).ToString ();
  91. }
  92. }
  93. public virtual int MacCodePage
  94. {
  95. get {
  96. return data.mac;
  97. }
  98. }
  99. public virtual int OEMCodePage
  100. {
  101. get {
  102. return data.oem;
  103. }
  104. }
  105. public override bool Equals (object obj)
  106. {
  107. if (obj == null)
  108. return false;
  109. TextInfo other = obj as TextInfo;
  110. if (other == null)
  111. return false;
  112. if (other.m_win32LangID != m_win32LangID)
  113. return false;
  114. if (other.ci != ci)
  115. return false;
  116. return true;
  117. }
  118. public override int GetHashCode()
  119. {
  120. return (m_win32LangID);
  121. }
  122. public override string ToString()
  123. {
  124. return "TextInfo - " + m_win32LangID;
  125. }
  126. public string ToTitleCase (string str)
  127. {
  128. if(str == null)
  129. throw new ArgumentNullException("string is null");
  130. Text.StringBuilder s = new Text.StringBuilder ();
  131. bool space_seen = true;
  132. for (int i = 0; i < str.Length; i ++){
  133. char c = str [i];
  134. if (Char.IsLetter (c)){
  135. if (space_seen)
  136. s.Append (Char.ToUpper (c, ci));
  137. else
  138. s.Append (Char.ToLower (c, ci));
  139. space_seen = false;
  140. } else {
  141. s.Append (c);
  142. if (Char.IsWhiteSpace (c))
  143. space_seen = true;
  144. }
  145. }
  146. return s.ToString ();
  147. }
  148. // Only Azeri and Turkish have their own special cases.
  149. // Other than them, all languages have common special case
  150. // (enumerable enough).
  151. public virtual char ToLower (char c)
  152. {
  153. if (ci == CultureInfo.InvariantCulture)
  154. return Char.ToLowerInvariant (c);
  155. switch ((int) c) {
  156. case '\u0049': // Latin uppercase I
  157. CultureInfo tmp = ci;
  158. while (tmp.Parent != tmp && tmp.Parent != CultureInfo.InvariantCulture)
  159. tmp = tmp.Parent;
  160. switch (tmp.LCID) {
  161. case 44: // Azeri (az)
  162. case 31: // Turkish (tr)
  163. return '\u0131'; // I becomes dotless i
  164. }
  165. break;
  166. case '\u0130': // I-dotted
  167. return '\u0069'; // i
  168. case '\u01c5': // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
  169. return '\u01c6';
  170. // \u01c7 -> \u01c9 (LJ) : invariant
  171. case '\u01c8': // LATIN CAPITAL LETTER L WITH SMALL LETTER J
  172. return '\u01c9';
  173. // \u01ca -> \u01cc (NJ) : invariant
  174. case '\u01cb': // LATIN CAPITAL LETTER N WITH SMALL LETTER J
  175. return '\u01cc';
  176. // WITH CARON : invariant
  177. // WITH DIAERESIS AND * : invariant
  178. case '\u01f2': // LATIN CAPITAL LETTER D WITH SMALL LETTER Z
  179. return '\u01f3';
  180. case '\u03d2': // ? it is not in ICU
  181. return '\u03c5';
  182. case '\u03d3': // ? it is not in ICU
  183. return '\u03cd';
  184. case '\u03d4': // ? it is not in ICU
  185. return '\u03cb';
  186. }
  187. return Char.ToLowerInvariant (c);
  188. }
  189. public virtual char ToUpper (char c)
  190. {
  191. if (ci == CultureInfo.InvariantCulture)
  192. return Char.ToUpperInvariant (c);
  193. switch (c) {
  194. case '\u0069': // Latin lowercase i
  195. CultureInfo tmp = ci;
  196. while (tmp.Parent != tmp && tmp.Parent != CultureInfo.InvariantCulture)
  197. tmp = tmp.Parent;
  198. switch (tmp.LCID) {
  199. case 44: // Azeri (az)
  200. case 31: // Turkish (tr)
  201. return '\u0130'; // dotted capital I
  202. }
  203. break;
  204. case '\u0131': // dotless i
  205. return '\u0049'; // I
  206. case '\u01c5': // see ToLower()
  207. return '\u01c4';
  208. case '\u01c8': // see ToLower()
  209. return '\u01c7';
  210. case '\u01cb': // see ToLower()
  211. return '\u01ca';
  212. case '\u01f2': // see ToLower()
  213. return '\u01f1';
  214. case '\u0390': // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
  215. return '\u03aa'; // it is not in ICU
  216. case '\u03b0': // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
  217. return '\u03ab'; // it is not in ICU
  218. case '\u03d0': // GREEK BETA
  219. return '\u0392';
  220. case '\u03d1': // GREEK THETA
  221. return '\u0398';
  222. case '\u03d5': // GREEK PHI
  223. return '\u03a6';
  224. case '\u03d6': // GREEK PI
  225. return '\u03a0';
  226. case '\u03f0': // GREEK KAPPA
  227. return '\u039a';
  228. case '\u03f1': // GREEK RHO
  229. return '\u03a1';
  230. // am not sure why miscellaneous GREEK symbols are
  231. // not handled here.
  232. }
  233. return Char.ToUpperInvariant (c);
  234. }
  235. public virtual string ToLower (string s)
  236. {
  237. // In ICU (3.2) there are a few cases that one single
  238. // character results in multiple characters in e.g.
  239. // tr-TR culture. So I tried brute force conversion
  240. // test with single character as a string input, but
  241. // there was no such conversion. So I think it just
  242. // invokes ToLower(char).
  243. return Transliterate (s, toLower);
  244. }
  245. public virtual string ToUpper (string s)
  246. {
  247. // In ICU (3.2) there is a case that string
  248. // is handled beyond per-character conversion, but
  249. // it is only lt-LT culture where MS.NET does not
  250. // handle any special transliteration. So I keep
  251. // ToUpper() just as character conversion.
  252. return Transliterate (s, toUpper);
  253. }
  254. private string Transliterate (string s, CharConverter convert)
  255. {
  256. if (s == null)
  257. throw new ArgumentNullException("string is null");
  258. StringBuilder sb = null;
  259. int start = 0;
  260. for (int i = 0; i < s.Length; i++) {
  261. if (s [i] != convert (s [i])) {
  262. if (sb == null)
  263. sb = new StringBuilder (s.Length);
  264. sb.Append (s.Substring (start, i - start));
  265. sb.Append (convert (s [i]));
  266. start = i + 1;
  267. }
  268. }
  269. if (sb != null && start < s.Length)
  270. sb.Append (s.Substring (start));
  271. return sb == null ? s : sb.ToString ();
  272. }
  273. /* IDeserialization interface */
  274. [MonoTODO]
  275. void IDeserializationCallback.OnDeserialization(object sender)
  276. {
  277. }
  278. }
  279. }