HebrewNumber.cs 19 KB


  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Text;
  5. using System.Diagnostics;
  6. namespace System.Globalization
  7. {
  8. ////////////////////////////////////////////////////////////////////////////
  9. //
  10. // Used in HebrewNumber.ParseByChar to maintain the context information (
  11. // the state in the state machine and current Hebrew number values, etc.)
  12. // when parsing Hebrew number character by character.
  13. //
  14. ////////////////////////////////////////////////////////////////////////////
  15. internal struct HebrewNumberParsingContext
  16. {
  17. // The current state of the state machine for parsing Hebrew numbers.
  18. internal HebrewNumber.HS state;
  19. // The current value of the Hebrew number.
  20. // The final value is determined when state is FoundEndOfHebrewNumber.
  21. internal int result;
  22. public HebrewNumberParsingContext(int result)
  23. {
  24. // Set the start state of the state machine for parsing Hebrew numbers.
  25. state = HebrewNumber.HS.Start;
  26. this.result = result;
  27. }
  28. }
  29. ////////////////////////////////////////////////////////////////////////////
  30. //
  31. // Please see ParseByChar() for comments about different states defined here.
  32. //
  33. ////////////////////////////////////////////////////////////////////////////
  34. internal enum HebrewNumberParsingState
  35. {
  36. InvalidHebrewNumber,
  37. NotHebrewDigit,
  38. FoundEndOfHebrewNumber,
  39. ContinueParsing,
  40. }
  41. ////////////////////////////////////////////////////////////////////////////
  42. //
  43. // class HebrewNumber
  44. //
  45. // Provides static methods for formatting integer values into
  46. // Hebrew text and parsing Hebrew number text.
  47. //
  48. // Limitations:
  49. // Parse can only handle value 1 ~ 999.
  50. // Append() can only handle 1 ~ 999. If value is greater than 5000,
  51. // 5000 will be subtracted from the value.
  52. //
  53. ////////////////////////////////////////////////////////////////////////////
  54. internal static class HebrewNumber
  55. {
  56. ////////////////////////////////////////////////////////////////////////////
  57. //
  58. // Append
  59. //
  60. // Converts the given number to Hebrew letters according to the numeric
  61. // value of each Hebrew letter, appending to the supplied StringBuilder.
  62. // Basically, this converts the lunar year and the lunar month to letters.
  63. //
  64. // The character of a year is described by three letters of the Hebrew
  65. // alphabet, the first and third giving, respectively, the days of the
  66. // weeks on which the New Year occurs and Passover begins, while the
  67. // second is the initial of the Hebrew word for defective, normal, or
  68. // complete.
  69. //
  70. // Defective Year : Both Heshvan and Kislev are defective (353 or 383 days)
  71. // Normal Year : Heshvan is defective, Kislev is full (354 or 384 days)
  72. // Complete Year : Both Heshvan and Kislev are full (355 or 385 days)
  73. //
  74. ////////////////////////////////////////////////////////////////////////////
  75. internal static void Append(StringBuilder outputBuffer, int Number)
  76. {
  77. Debug.Assert(outputBuffer != null);
  78. int outputBufferStartingLength = outputBuffer.Length;
  79. char cTens = '\x0';
  80. char cUnits; // tens and units chars
  81. int Hundreds, Tens; // hundreds and tens values
  82. //
  83. // Adjust the number if greater than 5000.
  84. //
  85. if (Number > 5000)
  86. {
  87. Number -= 5000;
  88. }
  89. Debug.Assert(Number > 0 && Number <= 999, "Number is out of range."); ;
  90. //
  91. // Get the Hundreds.
  92. //
  93. Hundreds = Number / 100;
  94. if (Hundreds > 0)
  95. {
  96. Number -= Hundreds * 100;
  97. // \x05e7 = 100
  98. // \x05e8 = 200
  99. // \x05e9 = 300
  100. // \x05ea = 400
  101. // If the number is greater than 400, use the multiples of 400.
  102. for (int i = 0; i < (Hundreds / 4); i++)
  103. {
  104. outputBuffer.Append('\x05ea');
  105. }
  106. int remains = Hundreds % 4;
  107. if (remains > 0)
  108. {
  109. outputBuffer.Append((char)((int)'\x05e6' + remains));
  110. }
  111. }
  112. //
  113. // Get the Tens.
  114. //
  115. Tens = Number / 10;
  116. Number %= 10;
  117. switch (Tens)
  118. {
  119. case (0):
  120. cTens = '\x0';
  121. break;
  122. case (1):
  123. cTens = '\x05d9'; // Hebrew Letter Yod
  124. break;
  125. case (2):
  126. cTens = '\x05db'; // Hebrew Letter Kaf
  127. break;
  128. case (3):
  129. cTens = '\x05dc'; // Hebrew Letter Lamed
  130. break;
  131. case (4):
  132. cTens = '\x05de'; // Hebrew Letter Mem
  133. break;
  134. case (5):
  135. cTens = '\x05e0'; // Hebrew Letter Nun
  136. break;
  137. case (6):
  138. cTens = '\x05e1'; // Hebrew Letter Samekh
  139. break;
  140. case (7):
  141. cTens = '\x05e2'; // Hebrew Letter Ayin
  142. break;
  143. case (8):
  144. cTens = '\x05e4'; // Hebrew Letter Pe
  145. break;
  146. case (9):
  147. cTens = '\x05e6'; // Hebrew Letter Tsadi
  148. break;
  149. }
  150. //
  151. // Get the Units.
  152. //
  153. cUnits = (char)(Number > 0 ? ((int)'\x05d0' + Number - 1) : 0);
  154. if ((cUnits == '\x05d4') && // Hebrew Letter He (5)
  155. (cTens == '\x05d9'))
  156. { // Hebrew Letter Yod (10)
  157. cUnits = '\x05d5'; // Hebrew Letter Vav (6)
  158. cTens = '\x05d8'; // Hebrew Letter Tet (9)
  159. }
  160. if ((cUnits == '\x05d5') && // Hebrew Letter Vav (6)
  161. (cTens == '\x05d9'))
  162. { // Hebrew Letter Yod (10)
  163. cUnits = '\x05d6'; // Hebrew Letter Zayin (7)
  164. cTens = '\x05d8'; // Hebrew Letter Tet (9)
  165. }
  166. //
  167. // Copy the appropriate info to the given buffer.
  168. //
  169. if (cTens != '\x0')
  170. {
  171. outputBuffer.Append(cTens);
  172. }
  173. if (cUnits != '\x0')
  174. {
  175. outputBuffer.Append(cUnits);
  176. }
  177. if (outputBuffer.Length - outputBufferStartingLength > 1)
  178. {
  179. outputBuffer.Insert(outputBuffer.Length - 1, '"');
  180. }
  181. else
  182. {
  183. outputBuffer.Append('\'');
  184. }
  185. }
  186. ////////////////////////////////////////////////////////////////////////////
  187. //
  188. // Token used to tokenize a Hebrew word into tokens so that we can use in the
  189. // state machine.
  190. //
  191. ////////////////////////////////////////////////////////////////////////////
  192. private enum HebrewToken : short
  193. {
  194. Invalid = -1,
  195. Digit400 = 0,
  196. Digit200_300 = 1,
  197. Digit100 = 2,
  198. Digit10 = 3, // 10 ~ 90
  199. Digit1 = 4, // 1, 2, 3, 4, 5, 8,
  200. Digit6_7 = 5,
  201. Digit7 = 6,
  202. Digit9 = 7,
  203. SingleQuote = 8,
  204. DoubleQuote = 9,
  205. };
  206. ////////////////////////////////////////////////////////////////////////////
  207. //
  208. // This class is used to map a token into its Hebrew digit value.
  209. //
  210. ////////////////////////////////////////////////////////////////////////////
  211. private struct HebrewValue
  212. {
  213. internal HebrewToken token;
  214. internal short value;
  215. internal HebrewValue(HebrewToken token, short value)
  216. {
  217. this.token = token;
  218. this.value = value;
  219. }
  220. }
  221. //
  222. // Map a Hebrew character from U+05D0 ~ U+05EA to its digit value.
  223. // The value is -1 if the Hebrew character does not have a associated value.
  224. //
  225. private static readonly HebrewValue[] s_hebrewValues = {
  226. new HebrewValue(HebrewToken.Digit1, 1) , // '\x05d0
  227. new HebrewValue(HebrewToken.Digit1, 2) , // '\x05d1
  228. new HebrewValue(HebrewToken.Digit1, 3) , // '\x05d2
  229. new HebrewValue(HebrewToken.Digit1, 4) , // '\x05d3
  230. new HebrewValue(HebrewToken.Digit1, 5) , // '\x05d4
  231. new HebrewValue(HebrewToken.Digit6_7,6) , // '\x05d5
  232. new HebrewValue(HebrewToken.Digit6_7,7) , // '\x05d6
  233. new HebrewValue(HebrewToken.Digit1, 8) , // '\x05d7
  234. new HebrewValue(HebrewToken.Digit9, 9) , // '\x05d8
  235. new HebrewValue(HebrewToken.Digit10, 10) , // '\x05d9; // Hebrew Letter Yod
  236. new HebrewValue(HebrewToken.Invalid, -1) , // '\x05da;
  237. new HebrewValue(HebrewToken.Digit10, 20) , // '\x05db; // Hebrew Letter Kaf
  238. new HebrewValue(HebrewToken.Digit10, 30) , // '\x05dc; // Hebrew Letter Lamed
  239. new HebrewValue(HebrewToken.Invalid, -1) , // '\x05dd;
  240. new HebrewValue(HebrewToken.Digit10, 40) , // '\x05de; // Hebrew Letter Mem
  241. new HebrewValue(HebrewToken.Invalid, -1) , // '\x05df;
  242. new HebrewValue(HebrewToken.Digit10, 50) , // '\x05e0; // Hebrew Letter Nun
  243. new HebrewValue(HebrewToken.Digit10, 60) , // '\x05e1; // Hebrew Letter Samekh
  244. new HebrewValue(HebrewToken.Digit10, 70) , // '\x05e2; // Hebrew Letter Ayin
  245. new HebrewValue(HebrewToken.Invalid, -1) , // '\x05e3;
  246. new HebrewValue(HebrewToken.Digit10, 80) , // '\x05e4; // Hebrew Letter Pe
  247. new HebrewValue(HebrewToken.Invalid, -1) , // '\x05e5;
  248. new HebrewValue(HebrewToken.Digit10, 90) , // '\x05e6; // Hebrew Letter Tsadi
  249. new HebrewValue(HebrewToken.Digit100, 100) , // '\x05e7;
  250. new HebrewValue(HebrewToken.Digit200_300, 200) , // '\x05e8;
  251. new HebrewValue(HebrewToken.Digit200_300, 300) , // '\x05e9;
  252. new HebrewValue(HebrewToken.Digit400, 400) , // '\x05ea;
  253. };
  254. private const int minHebrewNumberCh = 0x05d0;
  255. private static char s_maxHebrewNumberCh = (char)(minHebrewNumberCh + s_hebrewValues.Length - 1);
  256. ////////////////////////////////////////////////////////////////////////////
  257. //
  258. // Hebrew number parsing State
  259. // The current state and the next token will lead to the next state in the state machine.
  260. // DQ = Double Quote
  261. //
  262. ////////////////////////////////////////////////////////////////////////////
  263. internal enum HS : sbyte
  264. {
  265. _err = -1, // an error state
  266. Start = 0,
  267. S400 = 1, // a Hebrew digit 400
  268. S400_400 = 2, // Two Hebrew digit 400
  269. S400_X00 = 3, // Two Hebrew digit 400 and followed by 100
  270. S400_X0 = 4, // Hebrew digit 400 and followed by 10 ~ 90
  271. X00_DQ = 5, // A hundred number and followed by a double quote.
  272. S400_X00_X0 = 6,
  273. X0_DQ = 7, // A two-digit number and followed by a double quote.
  274. X = 8, // A single digit Hebrew number.
  275. X0 = 9, // A two-digit Hebrew number
  276. X00 = 10, // A three-digit Hebrew number
  277. S400_DQ = 11, // A Hebrew digit 400 and followed by a double quote.
  278. S400_400_DQ = 12,
  279. S400_400_100 = 13,
  280. S9 = 14, // Hebrew digit 9
  281. X00_S9 = 15, // A hundered number and followed by a digit 9
  282. S9_DQ = 16, // Hebrew digit 9 and followed by a double quote
  283. END = 100, // A terminial state is reached.
  284. }
  285. //
  286. // The state machine for Hebrew number pasing.
  287. //
  288. private static readonly HS[] s_numberPasingState =
  289. {
  290. // 400 300/200 100 90~10 8~1 6, 7, 9, ' "
  291. /* 0 */
  292. HS.S400, HS.X00, HS.X00, HS.X0, HS.X, HS.X, HS.X, HS.S9, HS._err, HS._err,
  293. /* 1: S400 */
  294. HS.S400_400, HS.S400_X00, HS.S400_X00, HS.S400_X0, HS._err, HS._err, HS._err, HS.X00_S9 ,HS.END, HS.S400_DQ,
  295. /* 2: S400_400 */
  296. HS._err, HS._err, HS.S400_400_100,HS.S400_X0, HS._err, HS._err, HS._err, HS.X00_S9 ,HS._err, HS.S400_400_DQ,
  297. /* 3: S400_X00 */
  298. HS._err, HS._err, HS._err, HS.S400_X00_X0, HS._err, HS._err, HS._err, HS.X00_S9 ,HS._err, HS.X00_DQ,
  299. /* 4: S400_X0 */
  300. HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS.X0_DQ,
  301. /* 5: X00_DQ */
  302. HS._err, HS._err, HS._err, HS.END, HS.END, HS.END, HS.END, HS.END, HS._err, HS._err,
  303. /* 6: S400_X00_X0 */
  304. HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS.X0_DQ,
  305. /* 7: X0_DQ */
  306. HS._err, HS._err, HS._err, HS._err, HS.END, HS.END, HS.END, HS.END, HS._err, HS._err,
  307. /* 8: X */
  308. HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS.END, HS._err,
  309. /* 9: X0 */
  310. HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS.END, HS.X0_DQ,
  311. /* 10: X00 */
  312. HS._err, HS._err, HS._err, HS.S400_X0, HS._err, HS._err, HS._err, HS.X00_S9, HS.END, HS.X00_DQ,
  313. /* 11: S400_DQ */
  314. HS.END, HS.END, HS.END, HS.END, HS.END, HS.END, HS.END, HS.END, HS._err, HS._err,
  315. /* 12: S400_400_DQ*/
  316. HS._err, HS._err, HS.END, HS.END, HS.END, HS.END, HS.END, HS.END, HS._err, HS._err,
  317. /* 13: S400_400_100*/
  318. HS._err, HS._err, HS._err, HS.S400_X00_X0, HS._err, HS._err, HS._err, HS.X00_S9, HS._err, HS.X00_DQ,
  319. /* 14: S9 */
  320. HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS.END, HS.S9_DQ,
  321. /* 15: X00_S9 */
  322. HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS._err, HS.S9_DQ,
  323. /* 16: S9_DQ */
  324. HS._err, HS._err, HS._err, HS._err, HS._err, HS.END, HS.END, HS._err, HS._err, HS._err
  325. };
  326. // Count of valid HebrewToken, column count in the NumberPasingState array
  327. private const int HebrewTokenCount = 10;
  328. ////////////////////////////////////////////////////////////////////////
  329. //
  330. // Actions:
  331. // Parse the Hebrew number by passing one character at a time.
  332. // The state between characters are maintained at HebrewNumberPasingContext.
  333. // Returns:
  334. // Return a enum of HebrewNumberParsingState.
  335. // NotHebrewDigit: The specified ch is not a valid Hebrew digit.
  336. // InvalidHebrewNumber: After parsing the specified ch, it will lead into
  337. // an invalid Hebrew number text.
  338. // FoundEndOfHebrewNumber: A terminal state is reached. This means that
  339. // we find a valid Hebrew number text after the specified ch is parsed.
  340. // ContinueParsing: The specified ch is a valid Hebrew digit, and
  341. // it will lead into a valid state in the state machine, we should
  342. // continue to parse incoming characters.
  343. //
  344. ////////////////////////////////////////////////////////////////////////
  345. internal static HebrewNumberParsingState ParseByChar(char ch, ref HebrewNumberParsingContext context)
  346. {
  347. Debug.Assert(s_numberPasingState.Length == HebrewTokenCount * ((int)HS.S9_DQ + 1));
  348. HebrewToken token;
  349. if (ch == '\'')
  350. {
  351. token = HebrewToken.SingleQuote;
  352. }
  353. else if (ch == '\"')
  354. {
  355. token = HebrewToken.DoubleQuote;
  356. }
  357. else
  358. {
  359. int index = (int)ch - minHebrewNumberCh;
  360. if (index >= 0 && index < s_hebrewValues.Length)
  361. {
  362. token = s_hebrewValues[index].token;
  363. if (token == HebrewToken.Invalid)
  364. {
  365. return (HebrewNumberParsingState.NotHebrewDigit);
  366. }
  367. context.result += s_hebrewValues[index].value;
  368. }
  369. else
  370. {
  371. // Not in valid Hebrew digit range.
  372. return (HebrewNumberParsingState.NotHebrewDigit);
  373. }
  374. }
  375. context.state = s_numberPasingState[(int)context.state * (int)HebrewTokenCount + (int)token];
  376. if (context.state == HS._err)
  377. {
  378. // Invalid Hebrew state. This indicates an incorrect Hebrew number.
  379. return (HebrewNumberParsingState.InvalidHebrewNumber);
  380. }
  381. if (context.state == HS.END)
  382. {
  383. // Reach a terminal state.
  384. return (HebrewNumberParsingState.FoundEndOfHebrewNumber);
  385. }
  386. // We should continue to parse.
  387. return (HebrewNumberParsingState.ContinueParsing);
  388. }
  389. ////////////////////////////////////////////////////////////////////////
  390. //
  391. // Actions:
  392. // Check if the ch is a valid Hebrew number digit.
  393. // This function will return true if the specified char is a legal Hebrew
  394. // digit character, single quote, or double quote.
  395. // Returns:
  396. // true if the specified character is a valid Hebrew number character.
  397. //
  398. ////////////////////////////////////////////////////////////////////////
  399. internal static bool IsDigit(char ch)
  400. {
  401. if (ch >= minHebrewNumberCh && ch <= s_maxHebrewNumberCh)
  402. {
  403. return (s_hebrewValues[ch - minHebrewNumberCh].value >= 0);
  404. }
  405. return (ch == '\'' || ch == '\"');
  406. }
  407. }
  408. }