ParseNumbers.cs 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659
  1. // Licensed to the .NET Foundation under one or more agreements.
  2. // The .NET Foundation licenses this file to you under the MIT license.
  3. // See the LICENSE file in the project root for more information.
  4. using System.Diagnostics;
  5. using System.Runtime.CompilerServices;
  6. namespace System
  7. {
  8. /// <summary>Methods for parsing numbers and strings.</summary>
  9. internal static class ParseNumbers
  10. {
  11. internal const int LeftAlign = 0x0001;
  12. internal const int RightAlign = 0x0004;
  13. internal const int PrefixSpace = 0x0008;
  14. internal const int PrintSign = 0x0010;
  15. internal const int PrintBase = 0x0020;
  16. internal const int PrintAsI1 = 0x0040;
  17. internal const int PrintAsI2 = 0x0080;
  18. internal const int PrintAsI4 = 0x0100;
  19. internal const int TreatAsUnsigned = 0x0200;
  20. internal const int TreatAsI1 = 0x0400;
  21. internal const int TreatAsI2 = 0x0800;
  22. internal const int IsTight = 0x1000;
  23. internal const int NoSpace = 0x2000;
  24. internal const int PrintRadixBase = 0x4000;
  25. private const int MinRadix = 2;
  26. private const int MaxRadix = 36;
  27. public static unsafe long StringToLong(ReadOnlySpan<char> s, int radix, int flags)
  28. {
  29. int pos = 0;
  30. return StringToLong(s, radix, flags, ref pos);
  31. }
  32. public static long StringToLong(ReadOnlySpan<char> s, int radix, int flags, ref int currPos)
  33. {
  34. int i = currPos;
  35. // Do some radix checking.
  36. // A radix of -1 says to use whatever base is spec'd on the number.
  37. // Parse in Base10 until we figure out what the base actually is.
  38. int r = (-1 == radix) ? 10 : radix;
  39. if (r != 2 && r != 10 && r != 8 && r != 16)
  40. throw new ArgumentException(SR.Arg_InvalidBase, nameof(radix));
  41. int length = s.Length;
  42. if (i < 0 || i >= length)
  43. throw new ArgumentOutOfRangeException(SR.ArgumentOutOfRange_Index);
  44. // Get rid of the whitespace and then check that we've still got some digits to parse.
  45. if (((flags & IsTight) == 0) && ((flags & NoSpace) == 0))
  46. {
  47. EatWhiteSpace(s, ref i);
  48. if (i == length)
  49. throw new FormatException(SR.Format_EmptyInputString);
  50. }
  51. // Check for a sign
  52. int sign = 1;
  53. if (s[i] == '-')
  54. {
  55. if (r != 10)
  56. throw new ArgumentException(SR.Arg_CannotHaveNegativeValue);
  57. if ((flags & TreatAsUnsigned) != 0)
  58. throw new OverflowException(SR.Overflow_NegativeUnsigned);
  59. sign = -1;
  60. i++;
  61. }
  62. else if (s[i] == '+')
  63. {
  64. i++;
  65. }
  66. if ((radix == -1 || radix == 16) && (i + 1 < length) && s[i] == '0')
  67. {
  68. if (s[i + 1] == 'x' || s[i + 1] == 'X')
  69. {
  70. r = 16;
  71. i += 2;
  72. }
  73. }
  74. int grabNumbersStart = i;
  75. long result = GrabLongs(r, s, ref i, (flags & TreatAsUnsigned) != 0);
  76. // Check if they passed us a string with no parsable digits.
  77. if (i == grabNumbersStart)
  78. throw new FormatException(SR.Format_NoParsibleDigits);
  79. if ((flags & IsTight) != 0)
  80. {
  81. //If we've got effluvia left at the end of the string, complain.
  82. if (i < length)
  83. throw new FormatException(SR.Format_ExtraJunkAtEnd);
  84. }
  85. // Put the current index back into the correct place.
  86. currPos = i;
  87. // Return the value properly signed.
  88. if ((ulong)result == 0x8000000000000000 && sign == 1 && r == 10 && ((flags & TreatAsUnsigned) == 0))
  89. Number.ThrowOverflowException(TypeCode.Int64);
  90. if (r == 10)
  91. {
  92. result *= sign;
  93. }
  94. return result;
  95. }
  96. public static int StringToInt(ReadOnlySpan<char> s, int radix, int flags)
  97. {
  98. int pos = 0;
  99. return StringToInt(s, radix, flags, ref pos);
  100. }
  101. public static int StringToInt(ReadOnlySpan<char> s, int radix, int flags, ref int currPos)
  102. {
  103. // They're requied to tell me where to start parsing.
  104. int i = currPos;
  105. // Do some radix checking.
  106. // A radix of -1 says to use whatever base is spec'd on the number.
  107. // Parse in Base10 until we figure out what the base actually is.
  108. int r = (-1 == radix) ? 10 : radix;
  109. if (r != 2 && r != 10 && r != 8 && r != 16)
  110. throw new ArgumentException(SR.Arg_InvalidBase, nameof(radix));
  111. int length = s.Length;
  112. if (i < 0 || i >= length)
  113. throw new ArgumentOutOfRangeException(SR.ArgumentOutOfRange_Index);
  114. // Get rid of the whitespace and then check that we've still got some digits to parse.
  115. if (((flags & IsTight) == 0) && ((flags & NoSpace) == 0))
  116. {
  117. EatWhiteSpace(s, ref i);
  118. if (i == length)
  119. throw new FormatException(SR.Format_EmptyInputString);
  120. }
  121. // Check for a sign
  122. int sign = 1;
  123. if (s[i] == '-')
  124. {
  125. if (r != 10)
  126. throw new ArgumentException(SR.Arg_CannotHaveNegativeValue);
  127. if ((flags & TreatAsUnsigned) != 0)
  128. throw new OverflowException(SR.Overflow_NegativeUnsigned);
  129. sign = -1;
  130. i++;
  131. }
  132. else if (s[i] == '+')
  133. {
  134. i++;
  135. }
  136. // Consume the 0x if we're in an unknown base or in base-16.
  137. if ((radix == -1 || radix == 16) && (i + 1 < length) && s[i] == '0')
  138. {
  139. if (s[i + 1] == 'x' || s[i + 1] == 'X')
  140. {
  141. r = 16;
  142. i += 2;
  143. }
  144. }
  145. int grabNumbersStart = i;
  146. int result = GrabInts(r, s, ref i, ((flags & TreatAsUnsigned) != 0));
  147. // Check if they passed us a string with no parsable digits.
  148. if (i == grabNumbersStart)
  149. throw new FormatException(SR.Format_NoParsibleDigits);
  150. if ((flags & IsTight) != 0)
  151. {
  152. // If we've got effluvia left at the end of the string, complain.
  153. if (i < length)
  154. throw new FormatException(SR.Format_ExtraJunkAtEnd);
  155. }
  156. // Put the current index back into the correct place.
  157. currPos = i;
  158. // Return the value properly signed.
  159. if ((flags & TreatAsI1) != 0)
  160. {
  161. if ((uint)result > 0xFF)
  162. Number.ThrowOverflowException(TypeCode.SByte);
  163. }
  164. else if ((flags & TreatAsI2) != 0)
  165. {
  166. if ((uint)result > 0xFFFF)
  167. Number.ThrowOverflowException(TypeCode.Int16);
  168. }
  169. else if ((uint)result == 0x80000000 && sign == 1 && r == 10 && ((flags & TreatAsUnsigned) == 0))
  170. {
  171. Number.ThrowOverflowException(TypeCode.Int32);
  172. }
  173. if (r == 10)
  174. {
  175. result *= sign;
  176. }
  177. return result;
  178. }
  179. public static string IntToString(int n, int radix, int width, char paddingChar, int flags)
  180. {
  181. Span<char> buffer = stackalloc char[66]; // Longest possible string length for an integer in binary notation with prefix
  182. if (radix < MinRadix || radix > MaxRadix)
  183. throw new ArgumentException(SR.Arg_InvalidBase, nameof(radix));
  184. // If the number is negative, make it positive and remember the sign.
  185. // If the number is MIN_VALUE, this will still be negative, so we'll have to
  186. // special case this later.
  187. bool isNegative = false;
  188. uint l;
  189. if (n < 0)
  190. {
  191. isNegative = true;
  192. // For base 10, write out -num, but other bases write out the
  193. // 2's complement bit pattern
  194. l = (10 == radix) ? (uint)-n : (uint)n;
  195. }
  196. else
  197. {
  198. l = (uint)n;
  199. }
  200. // The conversion to a uint will sign extend the number. In order to ensure
  201. // that we only get as many bits as we expect, we chop the number.
  202. if ((flags & PrintAsI1) != 0)
  203. {
  204. l &= 0xFF;
  205. }
  206. else if ((flags & PrintAsI2) != 0)
  207. {
  208. l &= 0xFFFF;
  209. }
  210. // Special case the 0.
  211. int index;
  212. if (0 == l)
  213. {
  214. buffer[0] = '0';
  215. index = 1;
  216. }
  217. else
  218. {
  219. index = 0;
  220. for (int i = 0; i < buffer.Length; i++) // for(...;i<buffer.Length;...) loop instead of do{...}while(l!=0) to help JIT eliminate span bounds checks
  221. {
  222. uint div = l / (uint)radix; // TODO https://github.com/dotnet/coreclr/issues/3439
  223. uint charVal = l - (div * (uint)radix);
  224. l = div;
  225. buffer[i] = (charVal < 10) ?
  226. (char)(charVal + '0') :
  227. (char)(charVal + 'a' - 10);
  228. if (l == 0)
  229. {
  230. index = i + 1;
  231. break;
  232. }
  233. }
  234. Debug.Assert(l == 0, $"Expected {l} == 0");
  235. }
  236. // If they want the base, append that to the string (in reverse order)
  237. if (radix != 10 && ((flags & PrintBase) != 0))
  238. {
  239. if (16 == radix)
  240. {
  241. buffer[index++] = 'x';
  242. buffer[index++] = '0';
  243. }
  244. else if (8 == radix)
  245. {
  246. buffer[index++] = '0';
  247. }
  248. }
  249. if (10 == radix)
  250. {
  251. // If it was negative, append the sign, else if they requested, add the '+'.
  252. // If they requested a leading space, put it on.
  253. if (isNegative)
  254. {
  255. buffer[index++] = '-';
  256. }
  257. else if ((flags & PrintSign) != 0)
  258. {
  259. buffer[index++] = '+';
  260. }
  261. else if ((flags & PrefixSpace) != 0)
  262. {
  263. buffer[index++] = ' ';
  264. }
  265. }
  266. // Figure out the size of and allocate the resulting string
  267. string result = string.FastAllocateString(Math.Max(width, index));
  268. unsafe
  269. {
  270. // Put the characters into the string in reverse order.
  271. // Fill the remaining space, if there is any, with the correct padding character.
  272. fixed (char* resultPtr = result)
  273. {
  274. char* p = resultPtr;
  275. int padding = result.Length - index;
  276. if ((flags & LeftAlign) != 0)
  277. {
  278. for (int i = 0; i < padding; i++)
  279. {
  280. *p++ = paddingChar;
  281. }
  282. for (int i = 0; i < index; i++)
  283. {
  284. *p++ = buffer[index - i - 1];
  285. }
  286. }
  287. else
  288. {
  289. for (int i = 0; i < index; i++)
  290. {
  291. *p++ = buffer[index - i - 1];
  292. }
  293. for (int i = 0; i < padding; i++)
  294. {
  295. *p++ = paddingChar;
  296. }
  297. }
  298. Debug.Assert((p - resultPtr) == result.Length, $"Expected {p - resultPtr} == {result.Length}");
  299. }
  300. }
  301. return result;
  302. }
  303. public static string LongToString(long n, int radix, int width, char paddingChar, int flags)
  304. {
  305. Span<char> buffer = stackalloc char[67]; // Longest possible string length for an integer in binary notation with prefix
  306. if (radix < MinRadix || radix > MaxRadix)
  307. throw new ArgumentException(SR.Arg_InvalidBase, nameof(radix));
  308. //If the number is negative, make it positive and remember the sign.
  309. ulong ul;
  310. bool isNegative = false;
  311. if (n < 0)
  312. {
  313. isNegative = true;
  314. // For base 10, write out -num, but other bases write out the
  315. // 2's complement bit pattern
  316. ul = (10 == radix) ? (ulong)(-n) : (ulong)n;
  317. }
  318. else
  319. {
  320. ul = (ulong)n;
  321. }
  322. if ((flags & PrintAsI1) != 0)
  323. {
  324. ul = ul & 0xFF;
  325. }
  326. else if ((flags & PrintAsI2) != 0)
  327. {
  328. ul = ul & 0xFFFF;
  329. }
  330. else if ((flags & PrintAsI4) != 0)
  331. {
  332. ul = ul & 0xFFFFFFFF;
  333. }
  334. //Special case the 0.
  335. int index;
  336. if (0 == ul)
  337. {
  338. buffer[0] = '0';
  339. index = 1;
  340. }
  341. else
  342. {
  343. index = 0;
  344. for (int i = 0; i < buffer.Length; i++) // for loop instead of do{...}while(l!=0) to help JIT eliminate span bounds checks
  345. {
  346. ulong div = ul / (ulong)radix; // TODO https://github.com/dotnet/coreclr/issues/3439
  347. int charVal = (int)(ul - (div * (ulong)radix));
  348. ul = div;
  349. buffer[i] = (charVal < 10) ?
  350. (char)(charVal + '0') :
  351. (char)(charVal + 'a' - 10);
  352. if (ul == 0)
  353. {
  354. index = i + 1;
  355. break;
  356. }
  357. }
  358. Debug.Assert(ul == 0, $"Expected {ul} == 0");
  359. }
  360. //If they want the base, append that to the string (in reverse order)
  361. if (radix != 10 && ((flags & PrintBase) != 0))
  362. {
  363. if (16 == radix)
  364. {
  365. buffer[index++] = 'x';
  366. buffer[index++] = '0';
  367. }
  368. else if (8 == radix)
  369. {
  370. buffer[index++] = '0';
  371. }
  372. else if ((flags & PrintRadixBase) != 0)
  373. {
  374. buffer[index++] = '#';
  375. buffer[index++] = (char)((radix % 10) + '0');
  376. buffer[index++] = (char)((radix / 10) + '0');
  377. }
  378. }
  379. if (10 == radix)
  380. {
  381. //If it was negative, append the sign.
  382. if (isNegative)
  383. {
  384. buffer[index++] = '-';
  385. }
  386. //else if they requested, add the '+';
  387. else if ((flags & PrintSign) != 0)
  388. {
  389. buffer[index++] = '+';
  390. }
  391. //If they requested a leading space, put it on.
  392. else if ((flags & PrefixSpace) != 0)
  393. {
  394. buffer[index++] = ' ';
  395. }
  396. }
  397. // Figure out the size of and allocate the resulting string
  398. string result = string.FastAllocateString(Math.Max(width, index));
  399. unsafe
  400. {
  401. // Put the characters into the string in reverse order.
  402. // Fill the remaining space, if there is any, with the correct padding character.
  403. fixed (char* resultPtr = result)
  404. {
  405. char* p = resultPtr;
  406. int padding = result.Length - index;
  407. if ((flags & LeftAlign) != 0)
  408. {
  409. for (int i = 0; i < padding; i++)
  410. {
  411. *p++ = paddingChar;
  412. }
  413. for (int i = 0; i < index; i++)
  414. {
  415. *p++ = buffer[index - i - 1];
  416. }
  417. }
  418. else
  419. {
  420. for (int i = 0; i < index; i++)
  421. {
  422. *p++ = buffer[index - i - 1];
  423. }
  424. for (int i = 0; i < padding; i++)
  425. {
  426. *p++ = paddingChar;
  427. }
  428. }
  429. Debug.Assert((p - resultPtr) == result.Length, $"Expected {p - resultPtr} == {result.Length}");
  430. }
  431. }
  432. return result;
  433. }
  434. private static void EatWhiteSpace(ReadOnlySpan<char> s, ref int i)
  435. {
  436. int localIndex = i;
  437. for (; localIndex < s.Length && char.IsWhiteSpace(s[localIndex]); localIndex++);
  438. i = localIndex;
  439. }
  440. private static long GrabLongs(int radix, ReadOnlySpan<char> s, ref int i, bool isUnsigned)
  441. {
  442. ulong result = 0;
  443. ulong maxVal;
  444. // Allow all non-decimal numbers to set the sign bit.
  445. if (radix == 10 && !isUnsigned)
  446. {
  447. maxVal = 0x7FFFFFFFFFFFFFFF / 10;
  448. // Read all of the digits and convert to a number
  449. while (i < s.Length && IsDigit(s[i], radix, out int value))
  450. {
  451. // Check for overflows - this is sufficient & correct.
  452. if (result > maxVal || ((long)result) < 0)
  453. {
  454. Number.ThrowOverflowException(TypeCode.Int64);
  455. }
  456. result = result * (ulong)radix + (ulong)value;
  457. i++;
  458. }
  459. if ((long)result < 0 && result != 0x8000000000000000)
  460. {
  461. Number.ThrowOverflowException(TypeCode.Int64);
  462. }
  463. }
  464. else
  465. {
  466. Debug.Assert(radix == 2 || radix == 8 || radix == 10 || radix == 16);
  467. maxVal =
  468. radix == 10 ? 0xffffffffffffffff / 10 :
  469. radix == 16 ? 0xffffffffffffffff / 16 :
  470. radix == 8 ? 0xffffffffffffffff / 8 :
  471. 0xffffffffffffffff / 2;
  472. // Read all of the digits and convert to a number
  473. while (i < s.Length && IsDigit(s[i], radix, out int value))
  474. {
  475. // Check for overflows - this is sufficient & correct.
  476. if (result > maxVal)
  477. {
  478. Number.ThrowOverflowException(TypeCode.UInt64);
  479. }
  480. ulong temp = result * (ulong)radix + (ulong)value;
  481. if (temp < result) // this means overflow as well
  482. {
  483. Number.ThrowOverflowException(TypeCode.UInt64);
  484. }
  485. result = temp;
  486. i++;
  487. }
  488. }
  489. return (long)result;
  490. }
  491. private static int GrabInts(int radix, ReadOnlySpan<char> s, ref int i, bool isUnsigned)
  492. {
  493. uint result = 0;
  494. uint maxVal;
  495. // Allow all non-decimal numbers to set the sign bit.
  496. if (radix == 10 && !isUnsigned)
  497. {
  498. maxVal = (0x7FFFFFFF / 10);
  499. // Read all of the digits and convert to a number
  500. while (i < s.Length && IsDigit(s[i], radix, out int value))
  501. {
  502. // Check for overflows - this is sufficient & correct.
  503. if (result > maxVal || (int)result < 0)
  504. {
  505. Number.ThrowOverflowException(TypeCode.Int32);
  506. }
  507. result = result * (uint)radix + (uint)value;
  508. i++;
  509. }
  510. if ((int)result < 0 && result != 0x80000000)
  511. {
  512. Number.ThrowOverflowException(TypeCode.Int32);
  513. }
  514. }
  515. else
  516. {
  517. Debug.Assert(radix == 2 || radix == 8 || radix == 10 || radix == 16);
  518. maxVal =
  519. radix == 10 ? 0xffffffff / 10 :
  520. radix == 16 ? 0xffffffff / 16 :
  521. radix == 8 ? 0xffffffff / 8 :
  522. 0xffffffff / 2;
  523. // Read all of the digits and convert to a number
  524. while (i < s.Length && IsDigit(s[i], radix, out int value))
  525. {
  526. // Check for overflows - this is sufficient & correct.
  527. if (result > maxVal)
  528. {
  529. Number.ThrowOverflowException(TypeCode.UInt32);
  530. }
  531. uint temp = result * (uint)radix + (uint)value;
  532. if (temp < result) // this means overflow as well
  533. {
  534. Number.ThrowOverflowException(TypeCode.UInt32);
  535. }
  536. result = temp;
  537. i++;
  538. }
  539. }
  540. return (int)result;
  541. }
  542. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  543. private static bool IsDigit(char c, int radix, out int result)
  544. {
  545. int tmp;
  546. if ((uint)(c - '0') <= 9)
  547. {
  548. result = tmp = c - '0';
  549. }
  550. else if ((uint)(c - 'A') <= 'Z' - 'A')
  551. {
  552. result = tmp = c - 'A' + 10;
  553. }
  554. else if ((uint)(c - 'a') <= 'z' - 'a')
  555. {
  556. result = tmp = c - 'a' + 10;
  557. }
  558. else
  559. {
  560. result = -1;
  561. return false;
  562. }
  563. return tmp < radix;
  564. }
  565. }
  566. }