make_tables.rs 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. use std::collections::{HashMap, HashSet};
  2. use std::iter::Iterator;
  3. fn main() {
  4. println!("pub const INVALID_VALUE: u8 = 255;");
  5. // A-Z
  6. let standard_alphabet: Vec<u8> = (0x41..0x5B)
  7. // a-z
  8. .chain(0x61..0x7B)
  9. // 0-9
  10. .chain(0x30..0x3A)
  11. // +
  12. .chain(0x2B..0x2C)
  13. // /
  14. .chain(0x2F..0x30)
  15. .collect();
  16. print_encode_table(&standard_alphabet, "STANDARD_ENCODE", 0);
  17. print_decode_table(&standard_alphabet, "STANDARD_DECODE", 0);
  18. // A-Z
  19. let url_alphabet: Vec<u8> = (0x41..0x5B)
  20. // a-z
  21. .chain(0x61..0x7B)
  22. // 0-9
  23. .chain(0x30..0x3A)
  24. // -
  25. .chain(0x2D..0x2E)
  26. // _
  27. .chain(0x5F..0x60)
  28. .collect();
  29. print_encode_table(&url_alphabet, "URL_SAFE_ENCODE", 0);
  30. print_decode_table(&url_alphabet, "URL_SAFE_DECODE", 0);
  31. // ./0123456789
  32. let crypt_alphabet: Vec<u8> = (b'.'..(b'9' + 1))
  33. // A-Z
  34. .chain(b'A'..(b'Z' + 1))
  35. // a-z
  36. .chain(b'a'..(b'z' + 1))
  37. .collect();
  38. print_encode_table(&crypt_alphabet, "CRYPT_ENCODE", 0);
  39. print_decode_table(&crypt_alphabet, "CRYPT_DECODE", 0);
  40. // ./
  41. let bcrypt_alphabet: Vec<u8> = (b'.'..(b'/' + 1))
  42. // A-Z
  43. .chain(b'A'..(b'Z' + 1))
  44. // a-z
  45. .chain(b'a'..(b'z' + 1))
  46. // 0-9
  47. .chain(b'0'..(b'9' + 1))
  48. .collect();
  49. print_encode_table(&bcrypt_alphabet, "BCRYPT_ENCODE", 0);
  50. print_decode_table(&bcrypt_alphabet, "BCRYPT_DECODE", 0);
  51. // A-Z
  52. let imap_alphabet: Vec<u8> = (0x41..0x5B)
  53. // a-z
  54. .chain(0x61..0x7B)
  55. // 0-9
  56. .chain(0x30..0x3A)
  57. // +
  58. .chain(0x2B..0x2C)
  59. // ,
  60. .chain(0x2C..0x2D)
  61. .collect();
  62. print_encode_table(&imap_alphabet, "IMAP_MUTF7_ENCODE", 0);
  63. print_decode_table(&imap_alphabet, "IMAP_MUTF7_DECODE", 0);
  64. // '!' - '-'
  65. let binhex_alphabet: Vec<u8> = (0x21..0x2E)
  66. // 0-9
  67. .chain(0x30..0x3A)
  68. // @-N
  69. .chain(0x40..0x4F)
  70. // P-V
  71. .chain(0x50..0x57)
  72. // X-[
  73. .chain(0x58..0x5C)
  74. // `-f
  75. .chain(0x60..0x66)
  76. // h-m
  77. .chain(0x68..0x6E)
  78. // p-r
  79. .chain(0x70..0x73)
  80. .collect();
  81. print_encode_table(&binhex_alphabet, "BINHEX_ENCODE", 0);
  82. print_decode_table(&binhex_alphabet, "BINHEX_DECODE", 0);
  83. }
  84. fn print_encode_table(alphabet: &[u8], const_name: &str, indent_depth: usize) {
  85. check_alphabet(alphabet);
  86. println!("#[rustfmt::skip]");
  87. println!(
  88. "{:width$}pub const {}: &[u8; 64] = &[",
  89. "",
  90. const_name,
  91. width = indent_depth
  92. );
  93. for (i, b) in alphabet.iter().enumerate() {
  94. println!(
  95. "{:width$}{}, // input {} (0x{:X}) => '{}' (0x{:X})",
  96. "",
  97. b,
  98. i,
  99. i,
  100. String::from_utf8(vec![*b as u8]).unwrap(),
  101. b,
  102. width = indent_depth + 4
  103. );
  104. }
  105. println!("{:width$}];", "", width = indent_depth);
  106. }
  107. fn print_decode_table(alphabet: &[u8], const_name: &str, indent_depth: usize) {
  108. check_alphabet(alphabet);
  109. // map of alphabet bytes to 6-bit morsels
  110. let mut input_to_morsel = HashMap::<u8, u8>::new();
  111. // standard base64 alphabet bytes, in order
  112. for (morsel, ascii_byte) in alphabet.iter().enumerate() {
  113. // truncation cast is fine here
  114. let _ = input_to_morsel.insert(*ascii_byte, morsel as u8);
  115. }
  116. println!("#[rustfmt::skip]");
  117. println!(
  118. "{:width$}pub const {}: &[u8; 256] = &[",
  119. "",
  120. const_name,
  121. width = indent_depth
  122. );
  123. for ascii_byte in 0..256 {
  124. let (value, comment) = match input_to_morsel.get(&(ascii_byte as u8)) {
  125. None => (
  126. "INVALID_VALUE".to_string(),
  127. format!("input {} (0x{:X})", ascii_byte, ascii_byte),
  128. ),
  129. Some(v) => (
  130. format!("{}", *v),
  131. format!(
  132. "input {} (0x{:X} char '{}') => {} (0x{:X})",
  133. ascii_byte,
  134. ascii_byte,
  135. String::from_utf8(vec![ascii_byte as u8]).unwrap(),
  136. *v,
  137. *v
  138. ),
  139. ),
  140. };
  141. println!(
  142. "{:width$}{}, // {}",
  143. "",
  144. value,
  145. comment,
  146. width = indent_depth + 4
  147. );
  148. }
  149. println!("{:width$}];", "", width = indent_depth);
  150. }
  151. fn check_alphabet(alphabet: &[u8]) {
  152. // ensure all characters are distinct
  153. assert_eq!(64, alphabet.len());
  154. let mut set: HashSet<u8> = HashSet::new();
  155. set.extend(alphabet);
  156. assert_eq!(64, set.len());
  157. // must be ASCII to be valid as single UTF-8 bytes
  158. for &b in alphabet {
  159. assert!(b <= 0x7F_u8);
  160. // = is assumed to be padding, so cannot be used as a symbol
  161. assert_ne!(b'=', b);
  162. }
  163. }