123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241 |
- //! Provides [Alphabet] and constants for alphabets commonly used in the wild.
- use crate::PAD_BYTE;
- use core::fmt;
- #[cfg(any(feature = "std", test))]
- use std::error;
- const ALPHABET_SIZE: usize = 64;
- /// An alphabet defines the 64 ASCII characters (symbols) used for base64.
- ///
- /// Common alphabets are provided as constants, and custom alphabets
- /// can be made via `from_str` or the `TryFrom<str>` implementation.
- ///
- /// ```
- /// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap();
- ///
- /// let engine = base64::engine::GeneralPurpose::new(
- /// &custom,
- /// base64::engine::general_purpose::PAD);
- /// ```
- #[derive(Clone, Debug, Eq, PartialEq)]
- pub struct Alphabet {
- pub(crate) symbols: [u8; ALPHABET_SIZE],
- }
- impl Alphabet {
- /// Performs no checks so that it can be const.
- /// Used only for known-valid strings.
- const fn from_str_unchecked(alphabet: &str) -> Self {
- let mut symbols = [0_u8; ALPHABET_SIZE];
- let source_bytes = alphabet.as_bytes();
- // a way to copy that's allowed in const fn
- let mut index = 0;
- while index < ALPHABET_SIZE {
- symbols[index] = source_bytes[index];
- index += 1;
- }
- Self { symbols }
- }
- /// Create an `Alphabet` from a string of 64 unique printable ASCII bytes.
- ///
- /// The `=` byte is not allowed as it is used for padding.
- pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> {
- let bytes = alphabet.as_bytes();
- if bytes.len() != ALPHABET_SIZE {
- return Err(ParseAlphabetError::InvalidLength);
- }
- {
- let mut index = 0;
- while index < ALPHABET_SIZE {
- let byte = bytes[index];
- // must be ascii printable. 127 (DEL) is commonly considered printable
- // for some reason but clearly unsuitable for base64.
- if !(byte >= 32_u8 && byte <= 126_u8) {
- return Err(ParseAlphabetError::UnprintableByte(byte));
- }
- // = is assumed to be padding, so cannot be used as a symbol
- if byte == PAD_BYTE {
- return Err(ParseAlphabetError::ReservedByte(byte));
- }
- // Check for duplicates while staying within what const allows.
- // It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit
- // microsecond range.
- let mut probe_index = 0;
- while probe_index < ALPHABET_SIZE {
- if probe_index == index {
- probe_index += 1;
- continue;
- }
- let probe_byte = bytes[probe_index];
- if byte == probe_byte {
- return Err(ParseAlphabetError::DuplicatedByte(byte));
- }
- probe_index += 1;
- }
- index += 1;
- }
- }
- Ok(Self::from_str_unchecked(alphabet))
- }
- }
- impl TryFrom<&str> for Alphabet {
- type Error = ParseAlphabetError;
- fn try_from(value: &str) -> Result<Self, Self::Error> {
- Self::new(value)
- }
- }
- /// Possible errors when constructing an [Alphabet] from a `str`.
- #[derive(Debug, Eq, PartialEq)]
- pub enum ParseAlphabetError {
- /// Alphabets must be 64 ASCII bytes
- InvalidLength,
- /// All bytes must be unique
- DuplicatedByte(u8),
- /// All bytes must be printable (in the range `[32, 126]`).
- UnprintableByte(u8),
- /// `=` cannot be used
- ReservedByte(u8),
- }
- impl fmt::Display for ParseAlphabetError {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- match self {
- Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"),
- Self::DuplicatedByte(b) => write!(f, "Duplicated byte: {:#04x}", b),
- Self::UnprintableByte(b) => write!(f, "Unprintable byte: {:#04x}", b),
- Self::ReservedByte(b) => write!(f, "Reserved byte: {:#04x}", b),
- }
- }
- }
- #[cfg(any(feature = "std", test))]
- impl error::Error for ParseAlphabetError {}
- /// The standard alphabet (uses `+` and `/`).
- ///
- /// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-3).
- pub const STANDARD: Alphabet = Alphabet::from_str_unchecked(
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
- );
- /// The URL safe alphabet (uses `-` and `_`).
- ///
- /// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-4).
- pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked(
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
- );
- /// The `crypt(3)` alphabet (uses `.` and `/` as the first two values).
- ///
- /// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
- pub const CRYPT: Alphabet = Alphabet::from_str_unchecked(
- "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
- );
- /// The bcrypt alphabet.
- pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked(
- "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
- );
- /// The alphabet used in IMAP-modified UTF-7 (uses `+` and `,`).
- ///
- /// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
- pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked(
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
- );
- /// The alphabet used in BinHex 4.0 files.
- ///
- /// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt)
- pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked(
- "!\"#$%&'()*+,-0123456789@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdehijklmpqr",
- );
- #[cfg(test)]
- mod tests {
- use crate::alphabet::*;
- use std::convert::TryFrom as _;
- #[test]
- fn detects_duplicate_start() {
- assert_eq!(
- ParseAlphabetError::DuplicatedByte(b'A'),
- Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
- .unwrap_err()
- );
- }
- #[test]
- fn detects_duplicate_end() {
- assert_eq!(
- ParseAlphabetError::DuplicatedByte(b'/'),
- Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//")
- .unwrap_err()
- );
- }
- #[test]
- fn detects_duplicate_middle() {
- assert_eq!(
- ParseAlphabetError::DuplicatedByte(b'Z'),
- Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/")
- .unwrap_err()
- );
- }
- #[test]
- fn detects_length() {
- assert_eq!(
- ParseAlphabetError::InvalidLength,
- Alphabet::new(
- "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/",
- )
- .unwrap_err()
- );
- }
- #[test]
- fn detects_padding() {
- assert_eq!(
- ParseAlphabetError::ReservedByte(b'='),
- Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=")
- .unwrap_err()
- );
- }
- #[test]
- fn detects_unprintable() {
- // form feed
- assert_eq!(
- ParseAlphabetError::UnprintableByte(0xc),
- Alphabet::new("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
- .unwrap_err()
- );
- }
- #[test]
- fn same_as_unchecked() {
- assert_eq!(
- STANDARD,
- Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
- .unwrap()
- );
- }
- }
|