alphabet.rs 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. //! Provides [Alphabet] and constants for alphabets commonly used in the wild.
  2. use crate::PAD_BYTE;
  3. use core::fmt;
  4. #[cfg(any(feature = "std", test))]
  5. use std::error;
  6. const ALPHABET_SIZE: usize = 64;
  7. /// An alphabet defines the 64 ASCII characters (symbols) used for base64.
  8. ///
  9. /// Common alphabets are provided as constants, and custom alphabets
  10. /// can be made via `from_str` or the `TryFrom<str>` implementation.
  11. ///
  12. /// ```
  13. /// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap();
  14. ///
  15. /// let engine = base64::engine::GeneralPurpose::new(
  16. /// &custom,
  17. /// base64::engine::general_purpose::PAD);
  18. /// ```
  19. #[derive(Clone, Debug, Eq, PartialEq)]
  20. pub struct Alphabet {
  21. pub(crate) symbols: [u8; ALPHABET_SIZE],
  22. }
  23. impl Alphabet {
  24. /// Performs no checks so that it can be const.
  25. /// Used only for known-valid strings.
  26. const fn from_str_unchecked(alphabet: &str) -> Self {
  27. let mut symbols = [0_u8; ALPHABET_SIZE];
  28. let source_bytes = alphabet.as_bytes();
  29. // a way to copy that's allowed in const fn
  30. let mut index = 0;
  31. while index < ALPHABET_SIZE {
  32. symbols[index] = source_bytes[index];
  33. index += 1;
  34. }
  35. Self { symbols }
  36. }
  37. /// Create an `Alphabet` from a string of 64 unique printable ASCII bytes.
  38. ///
  39. /// The `=` byte is not allowed as it is used for padding.
  40. pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> {
  41. let bytes = alphabet.as_bytes();
  42. if bytes.len() != ALPHABET_SIZE {
  43. return Err(ParseAlphabetError::InvalidLength);
  44. }
  45. {
  46. let mut index = 0;
  47. while index < ALPHABET_SIZE {
  48. let byte = bytes[index];
  49. // must be ascii printable. 127 (DEL) is commonly considered printable
  50. // for some reason but clearly unsuitable for base64.
  51. if !(byte >= 32_u8 && byte <= 126_u8) {
  52. return Err(ParseAlphabetError::UnprintableByte(byte));
  53. }
  54. // = is assumed to be padding, so cannot be used as a symbol
  55. if byte == PAD_BYTE {
  56. return Err(ParseAlphabetError::ReservedByte(byte));
  57. }
  58. // Check for duplicates while staying within what const allows.
  59. // It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit
  60. // microsecond range.
  61. let mut probe_index = 0;
  62. while probe_index < ALPHABET_SIZE {
  63. if probe_index == index {
  64. probe_index += 1;
  65. continue;
  66. }
  67. let probe_byte = bytes[probe_index];
  68. if byte == probe_byte {
  69. return Err(ParseAlphabetError::DuplicatedByte(byte));
  70. }
  71. probe_index += 1;
  72. }
  73. index += 1;
  74. }
  75. }
  76. Ok(Self::from_str_unchecked(alphabet))
  77. }
  78. }
  79. impl TryFrom<&str> for Alphabet {
  80. type Error = ParseAlphabetError;
  81. fn try_from(value: &str) -> Result<Self, Self::Error> {
  82. Self::new(value)
  83. }
  84. }
  85. /// Possible errors when constructing an [Alphabet] from a `str`.
  86. #[derive(Debug, Eq, PartialEq)]
  87. pub enum ParseAlphabetError {
  88. /// Alphabets must be 64 ASCII bytes
  89. InvalidLength,
  90. /// All bytes must be unique
  91. DuplicatedByte(u8),
  92. /// All bytes must be printable (in the range `[32, 126]`).
  93. UnprintableByte(u8),
  94. /// `=` cannot be used
  95. ReservedByte(u8),
  96. }
  97. impl fmt::Display for ParseAlphabetError {
  98. fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  99. match self {
  100. Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"),
  101. Self::DuplicatedByte(b) => write!(f, "Duplicated byte: {:#04x}", b),
  102. Self::UnprintableByte(b) => write!(f, "Unprintable byte: {:#04x}", b),
  103. Self::ReservedByte(b) => write!(f, "Reserved byte: {:#04x}", b),
  104. }
  105. }
  106. }
  107. #[cfg(any(feature = "std", test))]
  108. impl error::Error for ParseAlphabetError {}
  109. /// The standard alphabet (uses `+` and `/`).
  110. ///
  111. /// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-3).
  112. pub const STANDARD: Alphabet = Alphabet::from_str_unchecked(
  113. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
  114. );
  115. /// The URL safe alphabet (uses `-` and `_`).
  116. ///
  117. /// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-4).
  118. pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked(
  119. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
  120. );
  121. /// The `crypt(3)` alphabet (uses `.` and `/` as the first two values).
  122. ///
  123. /// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
  124. pub const CRYPT: Alphabet = Alphabet::from_str_unchecked(
  125. "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
  126. );
  127. /// The bcrypt alphabet.
  128. pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked(
  129. "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
  130. );
  131. /// The alphabet used in IMAP-modified UTF-7 (uses `+` and `,`).
  132. ///
  133. /// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
  134. pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked(
  135. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
  136. );
  137. /// The alphabet used in BinHex 4.0 files.
  138. ///
  139. /// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt)
  140. pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked(
  141. "!\"#$%&'()*+,-0123456789@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdehijklmpqr",
  142. );
  143. #[cfg(test)]
  144. mod tests {
  145. use crate::alphabet::*;
  146. use std::convert::TryFrom as _;
  147. #[test]
  148. fn detects_duplicate_start() {
  149. assert_eq!(
  150. ParseAlphabetError::DuplicatedByte(b'A'),
  151. Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
  152. .unwrap_err()
  153. );
  154. }
  155. #[test]
  156. fn detects_duplicate_end() {
  157. assert_eq!(
  158. ParseAlphabetError::DuplicatedByte(b'/'),
  159. Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//")
  160. .unwrap_err()
  161. );
  162. }
  163. #[test]
  164. fn detects_duplicate_middle() {
  165. assert_eq!(
  166. ParseAlphabetError::DuplicatedByte(b'Z'),
  167. Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/")
  168. .unwrap_err()
  169. );
  170. }
  171. #[test]
  172. fn detects_length() {
  173. assert_eq!(
  174. ParseAlphabetError::InvalidLength,
  175. Alphabet::new(
  176. "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/",
  177. )
  178. .unwrap_err()
  179. );
  180. }
  181. #[test]
  182. fn detects_padding() {
  183. assert_eq!(
  184. ParseAlphabetError::ReservedByte(b'='),
  185. Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=")
  186. .unwrap_err()
  187. );
  188. }
  189. #[test]
  190. fn detects_unprintable() {
  191. // form feed
  192. assert_eq!(
  193. ParseAlphabetError::UnprintableByte(0xc),
  194. Alphabet::new("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
  195. .unwrap_err()
  196. );
  197. }
  198. #[test]
  199. fn same_as_unchecked() {
  200. assert_eq!(
  201. STANDARD,
  202. Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
  203. .unwrap()
  204. );
  205. }
  206. }