base2n.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. <?php
  2. /**
  3. * Binary-to-text PHP Utilities
  4. *
  5. * @package binary-to-text-php
  6. * @link https://github.com/ademarre/binary-to-text-php
  7. * @author Andre DeMarre
  8. * @copyright 2009-2013 Andre DeMarre
  9. * @license http://opensource.org/licenses/MIT MIT
  10. */
  11. /**
  12. * Class for binary-to-text encoding with a base of 2^n
  13. *
  14. * The Base2n class is for binary-to-text conversion. It employs a
  15. * generalization of the algorithms used by many encoding schemes that
  16. * use a fixed number of bits to encode each character. In other words,
  17. * the base is a power of 2.
  18. *
  19. * Earlier versions of this class were named
  20. * FixedBitNotation and FixedBitEncoding.
  21. *
  22. * @package binary-to-text-php
  23. */
  24. class base2n
  25. {
  26. protected $_chars;
  27. protected $_bitsPerCharacter;
  28. protected $_radix;
  29. protected $_rightPadFinalBits;
  30. protected $_padFinalGroup;
  31. protected $_padCharacter;
  32. protected $_caseSensitive;
  33. protected $_charmap;
  34. /**
  35. * Constructor
  36. *
  37. * @param integer $bitsPerCharacter Bits to use for each encoded character
  38. * @param string $chars Base character alphabet
  39. * @param boolean $caseSensitive To decode in a case-sensitive manner
  40. * @param boolean $rightPadFinalBits How to encode last character
  41. * @param boolean $padFinalGroup Add padding to end of encoded output
  42. * @param string $padCharacter Character to use for padding
  43. *
  44. * @throws InvalidArgumentException for incompatible parameters
  45. */
  46. public function __construct(
  47. $bitsPerCharacter,
  48. $chars = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-_',
  49. $caseSensitive = TRUE, $rightPadFinalBits = FALSE,
  50. $padFinalGroup = FALSE, $padCharacter = '=')
  51. {
  52. // Ensure validity of $chars
  53. if (!is_string($chars) || ($charLength = strlen($chars)) < 2) {
  54. throw new InvalidArgumentException('$chars must be a string of at least two characters');
  55. }
  56. // Ensure validity of $padCharacter
  57. if ($padFinalGroup) {
  58. if (!is_string($padCharacter) || !isset($padCharacter[0])) {
  59. throw new InvalidArgumentException('$padCharacter must be a string of one character');
  60. }
  61. if ($caseSensitive) {
  62. $padCharFound = strpos($chars, $padCharacter[0]);
  63. } else {
  64. $padCharFound = stripos($chars, $padCharacter[0]);
  65. }
  66. if ($padCharFound !== FALSE) {
  67. throw new InvalidArgumentException('$padCharacter can not be a member of $chars');
  68. }
  69. }
  70. // Ensure validity of $bitsPerCharacter
  71. if (!is_int($bitsPerCharacter)) {
  72. throw new InvalidArgumentException('$bitsPerCharacter must be an integer');
  73. }
  74. if ($bitsPerCharacter < 1) {
  75. // $bitsPerCharacter must be at least 1
  76. throw new InvalidArgumentException('$bitsPerCharacter can not be less than 1');
  77. } elseif ($charLength < 1 << $bitsPerCharacter) {
  78. // Character length of $chars is too small for $bitsPerCharacter
  79. // Find greatest acceptable value of $bitsPerCharacter
  80. $bitsPerCharacter = 1;
  81. $radix = 2;
  82. while ($charLength >= ($radix <<= 1) && $bitsPerCharacter < 8) {
  83. $bitsPerCharacter++;
  84. }
  85. $radix >>= 1;
  86. throw new InvalidArgumentException(
  87. '$bitsPerCharacter can not be more than ' . $bitsPerCharacter
  88. . ' given $chars length of ' . $charLength
  89. . ' (max radix ' . $radix . ')');
  90. } elseif ($bitsPerCharacter > 8) {
  91. // $bitsPerCharacter must not be greater than 8
  92. throw new InvalidArgumentException('$bitsPerCharacter can not be greater than 8');
  93. } else {
  94. $radix = 1 << $bitsPerCharacter;
  95. }
  96. $this->_chars = $chars;
  97. $this->_bitsPerCharacter = $bitsPerCharacter;
  98. $this->_radix = $radix;
  99. $this->_rightPadFinalBits = $rightPadFinalBits;
  100. $this->_padFinalGroup = $padFinalGroup;
  101. $this->_padCharacter = $padCharacter[0];
  102. $this->_caseSensitive = $caseSensitive;
  103. }
  104. /**
  105. * Encode a string
  106. *
  107. * @param string $rawString Binary data to encode
  108. * @return string
  109. */
  110. public function encode($rawString)
  111. {
  112. // Unpack string into an array of bytes
  113. $bytes = unpack('C*', $rawString);
  114. $byteCount = count($bytes);
  115. $encodedString = '';
  116. $byte = array_shift($bytes);
  117. $bitsRead = 0;
  118. $oldBits = 0;
  119. $chars = $this->_chars;
  120. $bitsPerCharacter = $this->_bitsPerCharacter;
  121. $rightPadFinalBits = $this->_rightPadFinalBits;
  122. $padFinalGroup = $this->_padFinalGroup;
  123. $padCharacter = $this->_padCharacter;
  124. $charsPerByte = 8 / $bitsPerCharacter;
  125. $encodedLength = $byteCount * $charsPerByte;
  126. // Generate encoded output; each loop produces one encoded character
  127. for ($c = 0; $c < $encodedLength; $c++) {
  128. // Get the bits needed for this encoded character
  129. if ($bitsRead + $bitsPerCharacter > 8) {
  130. // Not enough bits remain in this byte for the current character
  131. // Save the remaining bits before getting the next byte
  132. $oldBitCount = 8 - $bitsRead;
  133. $oldBits = $byte ^ ($byte >> $oldBitCount << $oldBitCount);
  134. $newBitCount = $bitsPerCharacter - $oldBitCount;
  135. if (!$bytes) {
  136. // Last bits; match final character and exit loop
  137. if ($rightPadFinalBits) $oldBits <<= $newBitCount;
  138. $encodedString .= $chars[$oldBits];
  139. if ($padFinalGroup) {
  140. // Array of the lowest common multiples of $bitsPerCharacter and 8, divided by 8
  141. $lcmMap = array(1 => 1, 2 => 1, 3 => 3, 4 => 1, 5 => 5, 6 => 3, 7 => 7, 8 => 1);
  142. $bytesPerGroup = $lcmMap[$bitsPerCharacter];
  143. $pads = $bytesPerGroup * $charsPerByte - ceil((strlen($rawString) % $bytesPerGroup) * $charsPerByte);
  144. $encodedString .= str_repeat($padCharacter, $pads);
  145. }
  146. break;
  147. }
  148. // Get next byte
  149. $byte = array_shift($bytes);
  150. $bitsRead = 0;
  151. } else {
  152. $oldBitCount = 0;
  153. $newBitCount = $bitsPerCharacter;
  154. }
  155. // Read only the needed bits from this byte
  156. $bits = $byte >> 8 - ($bitsRead + ($newBitCount));
  157. $bits ^= $bits >> $newBitCount << $newBitCount;
  158. $bitsRead += $newBitCount;
  159. if ($oldBitCount) {
  160. // Bits come from seperate bytes, add $oldBits to $bits
  161. $bits = ($oldBits << $newBitCount) | $bits;
  162. }
  163. $encodedString .= $chars[$bits];
  164. }
  165. return $encodedString;
  166. }
  167. /**
  168. * Decode a string
  169. *
  170. * @param string $encodedString Data to decode
  171. * @param boolean $strict Returns NULL if $encodedString contains an undecodable character
  172. * @return string
  173. */
  174. public function decode($encodedString, $strict = FALSE)
  175. {
  176. if (!$encodedString || !is_string($encodedString)) {
  177. // Empty string, nothing to decode
  178. return '';
  179. }
  180. $chars = $this->_chars;
  181. $bitsPerCharacter = $this->_bitsPerCharacter;
  182. $radix = $this->_radix;
  183. $rightPadFinalBits = $this->_rightPadFinalBits;
  184. $padFinalGroup = $this->_padFinalGroup;
  185. $padCharacter = $this->_padCharacter;
  186. $caseSensitive = $this->_caseSensitive;
  187. // Get index of encoded characters
  188. if ($this->_charmap) {
  189. $charmap = $this->_charmap;
  190. } else {
  191. $charmap = array();
  192. for ($i = 0; $i < $radix; $i++) {
  193. $charmap[$chars[$i]] = $i;
  194. }
  195. $this->_charmap = $charmap;
  196. }
  197. // The last encoded character is $encodedString[$lastNotatedIndex]
  198. $lastNotatedIndex = strlen($encodedString) - 1;
  199. // Remove trailing padding characters
  200. if ($padFinalGroup) {
  201. while ($encodedString[$lastNotatedIndex] === $padCharacter) {
  202. $encodedString = substr($encodedString, 0, $lastNotatedIndex);
  203. $lastNotatedIndex--;
  204. }
  205. }
  206. $rawString = '';
  207. $byte = 0;
  208. $bitsWritten = 0;
  209. // Convert each encoded character to a series of unencoded bits
  210. for ($c = 0; $c <= $lastNotatedIndex; $c++) {
  211. if (!$caseSensitive && !isset($charmap[$encodedString[$c]])) {
  212. // Encoded character was not found; try other case
  213. if (isset($charmap[$cUpper = strtoupper($encodedString[$c])])) {
  214. $charmap[$encodedString[$c]] = $charmap[$cUpper];
  215. } elseif (isset($charmap[$cLower = strtolower($encodedString[$c])])) {
  216. $charmap[$encodedString[$c]] = $charmap[$cLower];
  217. }
  218. }
  219. if (isset($charmap[$encodedString[$c]])) {
  220. $bitsNeeded = 8 - $bitsWritten;
  221. $unusedBitCount = $bitsPerCharacter - $bitsNeeded;
  222. // Get the new bits ready
  223. if ($bitsNeeded > $bitsPerCharacter) {
  224. // New bits aren't enough to complete a byte; shift them left into position
  225. $newBits = $charmap[$encodedString[$c]] << $bitsNeeded - $bitsPerCharacter;
  226. $bitsWritten += $bitsPerCharacter;
  227. } elseif ($c !== $lastNotatedIndex || $rightPadFinalBits) {
  228. // Zero or more too many bits to complete a byte; shift right
  229. $newBits = $charmap[$encodedString[$c]] >> $unusedBitCount;
  230. $bitsWritten = 8; //$bitsWritten += $bitsNeeded;
  231. } else {
  232. // Final bits don't need to be shifted
  233. $newBits = $charmap[$encodedString[$c]];
  234. $bitsWritten = 8;
  235. }
  236. $byte |= $newBits;
  237. if ($bitsWritten === 8 || $c === $lastNotatedIndex) {
  238. // Byte is ready to be written
  239. $rawString .= pack('C', $byte);
  240. if ($c !== $lastNotatedIndex) {
  241. // Start the next byte
  242. $bitsWritten = $unusedBitCount;
  243. $byte = ($charmap[$encodedString[$c]] ^ ($newBits << $unusedBitCount)) << 8 - $bitsWritten;
  244. }
  245. }
  246. } elseif ($strict) {
  247. // Unable to decode character; abort
  248. return NULL;
  249. }
  250. }
  251. return $rawString;
  252. }
  253. }
  254. ?>