String.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. <?php
  2. /**
  3. * Lithium: the most rad php framework
  4. *
  5. * @copyright Copyright 2013, Union of RAD (http://union-of-rad.org)
  6. * @license http://opensource.org/licenses/mit-license.php The MIT License
  7. */
  8. namespace lithium\util;
  9. use COM;
  10. use Closure;
  11. use Exception;
  12. /**
  13. * String manipulation utility class. Includes functionality for generating UUIDs,
  14. * {:tag} and regex replacement, and tokenization. Also includes a cryptographically-strong random
  15. * number generator, and a base64 encoder for use with DES and XDES.
  16. */
  17. class String {
  18. /**
  19. * UUID-related constant. Clears all bits of version byte (`00001111`).
  20. */
  21. const UUID_CLEAR_VER = 15;
  22. /**
  23. * UUID constant that sets the version bit for generated UUIDs (`01000000`).
  24. */
  25. const UUID_VERSION_4 = 64;
  26. /**
  27. * Clears relevant bits of variant byte (`00111111`).
  28. */
  29. const UUID_CLEAR_VAR = 63;
  30. /**
  31. * The RFC 4122 variant (`10000000`).
  32. */
  33. const UUID_VAR_RFC = 128;
  34. /**
  35. * Option flag used in `String::random()`.
  36. */
  37. const ENCODE_BASE_64 = 1;
  38. /**
  39. * A closure which, given a number of bytes, returns that amount of
  40. * random bytes.
  41. *
  42. * @var Closure
  43. */
  44. protected static $_source;
  45. /**
  46. * Generates an RFC 4122-compliant version 4 UUID.
  47. *
  48. * @return string The string representation of an RFC 4122-compliant, version 4 UUID.
  49. * @link http://www.ietf.org/rfc/rfc4122.txt RFC 4122: UUID URN Namespace
  50. */
  51. public static function uuid() {
  52. $uuid = static::random(16);
  53. $uuid[6] = chr(ord($uuid[6]) & static::UUID_CLEAR_VER | static::UUID_VERSION_4);
  54. $uuid[8] = chr(ord($uuid[8]) & static::UUID_CLEAR_VAR | static::UUID_VAR_RFC);
  55. return join('-', array(
  56. bin2hex(substr($uuid, 0, 4)),
  57. bin2hex(substr($uuid, 4, 2)),
  58. bin2hex(substr($uuid, 6, 2)),
  59. bin2hex(substr($uuid, 8, 2)),
  60. bin2hex(substr($uuid, 10, 6))
  61. ));
  62. }
  63. /**
  64. * Generates random bytes for use in UUIDs and password salts, using
  65. * (when available) a cryptographically strong random number generator.
  66. *
  67. * {{{
  68. * $bits = String::random(8); // 64 bits
  69. * $hex = bin2hex($bits); // [0-9a-f]+
  70. * }}}
  71. *
  72. * Optionally base64-encodes the resulting random string per the following:
  73. *
  74. * _The alphabet used by `base64_encode()` is different than the one we should be using. When
  75. * considering the meaty part of the resulting string, however, a bijection allows to go the
  76. * from one to another. Given that we're working on random bytes, we can use safely use
  77. * `base64_encode()` without losing any entropy._
  78. *
  79. * @param integer $bytes The number of random bytes to generate.
  80. * @param array $options The options used when generating random bytes:
  81. * - `'encode'` _integer_: If specified, and set to `String::ENCODE_BASE_64`, the
  82. * resulting value will be base64-encoded, per the notes above.
  83. * @return string Returns a string of random bytes.
  84. */
  85. public static function random($bytes, array $options = array()) {
  86. $defaults = array('encode' => null);
  87. $options += $defaults;
  88. $source = static::$_source ?: static::_source();
  89. $result = $source($bytes);
  90. if ($options['encode'] !== static::ENCODE_BASE_64) {
  91. return $result;
  92. }
  93. return strtr(rtrim(base64_encode($result), '='), '+', '.');
  94. }
  95. /**
  96. * Initializes `String::$_source` using the best available random number generator.
  97. *
  98. * When available, `/dev/urandom` and COM gets used on *nix and
  99. * [Windows systems](http://msdn.microsoft.com/en-us/library/aa388182%28VS.85%29.aspx?ppud=4),
  100. * respectively.
  101. *
  102. * If all else fails, a Mersenne Twister gets used. (Strictly
  103. * speaking, this fallback is inadequate, but good enough.)
  104. *
  105. * @see lithium\util\String::$_source
  106. * @return closure Returns a closure containing a random number generator.
  107. */
  108. protected static function _source() {
  109. switch (true) {
  110. case isset(static::$_source):
  111. return static::$_source;
  112. case is_readable('/dev/urandom') && $fp = fopen('/dev/urandom', 'rb'):
  113. return static::$_source = function($bytes) use (&$fp) {
  114. return fread($fp, $bytes);
  115. };
  116. case class_exists('COM', false):
  117. try {
  118. $com = new COM('CAPICOM.Utilities.1');
  119. return static::$_source = function($bytes) use ($com) {
  120. return base64_decode($com->GetRandom($bytes, 0));
  121. };
  122. } catch (Exception $e) {
  123. }
  124. default:
  125. return static::$_source = function($bytes) {
  126. $rand = '';
  127. for ($i = 0; $i < $bytes; $i++) {
  128. $rand .= chr(mt_rand(0, 255));
  129. }
  130. return $rand;
  131. };
  132. }
  133. }
  134. /**
  135. * Uses PHP's hashing functions to create a hash of the string provided, using the options
  136. * specified. The default hash algorithm is SHA-512.
  137. *
  138. * @link http://php.net/manual/en/function.hash.php PHP Manual: `hash()`
  139. * @link http://php.net/manual/en/function.hash-hmac.php PHP Manual: `hash_hmac()`
  140. * @link http://php.net/manual/en/function.hash-algos.php PHP Manual: `hash_algos()`
  141. * @param string $string The string to hash.
  142. * @param array $options Supported options:
  143. * - `'type'` _string_: Any valid hashing algorithm. See the `hash_algos()` function to
  144. * determine which are available on your system.
  145. * - `'salt'` _string_: A _salt_ value which, if specified, will be prepended to the
  146. * string.
  147. * - `'key'` _string_: If specified `hash_hmac()` will be used to hash the string,
  148. * instead of `hash()`, with `'key'` being used as the message key.
  149. * - `'raw'` _boolean_: If `true`, outputs the raw binary result of the hash operation.
  150. * Defaults to `false`.
  151. * @return string Returns a hashed string.
  152. */
  153. public static function hash($string, array $options = array()) {
  154. $defaults = array(
  155. 'type' => 'sha512',
  156. 'salt' => false,
  157. 'key' => false,
  158. 'raw' => false
  159. );
  160. $options += $defaults;
  161. if ($options['salt']) {
  162. $string = $options['salt'] . $string;
  163. }
  164. if ($options['key']) {
  165. return hash_hmac($options['type'], $string, $options['key'], $options['raw']);
  166. }
  167. return hash($options['type'], $string, $options['raw']);
  168. }
  169. /**
  170. * Compares two strings in constant time to prevent timing attacks.
  171. *
  172. * @link http://codahale.com/a-lesson-in-timing-attacks/ More about timing attacks.
  173. * @param string $left The left side of the comparison.
  174. * @param string $right The right side of the comparison.
  175. * @return boolean Returns a boolean indicating whether the two strings are equal.
  176. */
  177. public static function compare($left, $right) {
  178. $result = true;
  179. if (($length = strlen($left)) !== strlen($right)) {
  180. return false;
  181. }
  182. for ($i = 0; $i < $length; $i++) {
  183. $result = $result && ($left[$i] === $right[$i]);
  184. }
  185. return $result;
  186. }
  187. /**
  188. * Replaces variable placeholders inside a string with any given data. Each key
  189. * in the `$data` array corresponds to a variable placeholder name in `$str`.
  190. *
  191. * Usage:
  192. * {{{
  193. * String::insert(
  194. * 'My name is {:name} and I am {:age} years old.',
  195. * array('name' => 'Bob', 'age' => '65')
  196. * ); // returns 'My name is Bob and I am 65 years old.'
  197. * }}}
  198. *
  199. * @param string $str A string containing variable place-holders.
  200. * @param array $data A key, value array where each key stands for a place-holder variable
  201. * name to be replaced with value.
  202. * @param array $options Available options are:
  203. * - `'after'`: The character or string after the name of the variable place-holder
  204. * (defaults to `}`).
  205. * - `'before'`: The character or string in front of the name of the variable
  206. * place-holder (defaults to `'{:'`).
  207. * - `'clean'`: A boolean or array with instructions for `String::clean()`.
  208. * - `'escape'`: The character or string used to escape the before character or string
  209. * (defaults to `'\'`).
  210. * - `'format'`: A regular expression to use for matching variable place-holders
  211. * (defaults to `'/(?<!\\)\:%s/'`. Please note that this option takes precedence over
  212. * all other options except `'clean'`.
  213. * @return string
  214. * @todo Optimize this
  215. */
  216. public static function insert($str, array $data, array $options = array()) {
  217. $defaults = array(
  218. 'before' => '{:',
  219. 'after' => '}',
  220. 'escape' => null,
  221. 'format' => null,
  222. 'clean' => false
  223. );
  224. $options += $defaults;
  225. $format = $options['format'];
  226. reset($data);
  227. if ($format === 'regex' || (!$format && $options['escape'])) {
  228. $format = sprintf(
  229. '/(?<!%s)%s%%s%s/',
  230. preg_quote($options['escape'], '/'),
  231. str_replace('%', '%%', preg_quote($options['before'], '/')),
  232. str_replace('%', '%%', preg_quote($options['after'], '/'))
  233. );
  234. }
  235. if (!$format && key($data) !== 0) {
  236. $replace = array();
  237. foreach ($data as $key => $value) {
  238. $value = (is_array($value) || $value instanceof Closure) ? '' : $value;
  239. try {
  240. if (is_object($value) && method_exists($value, '__toString')) {
  241. $value = (string) $value;
  242. }
  243. } catch (Exception $e) {
  244. $value = '';
  245. }
  246. $replace["{$options['before']}{$key}{$options['after']}"] = $value;
  247. }
  248. $str = strtr($str, $replace);
  249. return $options['clean'] ? static::clean($str, $options) : $str;
  250. }
  251. if (strpos($str, '?') !== false && isset($data[0])) {
  252. $offset = 0;
  253. while (($pos = strpos($str, '?', $offset)) !== false) {
  254. $val = array_shift($data);
  255. $offset = $pos + strlen($val);
  256. $str = substr_replace($str, $val, $pos, 1);
  257. }
  258. return $options['clean'] ? static::clean($str, $options) : $str;
  259. }
  260. foreach ($data as $key => $value) {
  261. $hashVal = crc32($key);
  262. $key = sprintf($format, preg_quote($key, '/'));
  263. if (!$key) {
  264. continue;
  265. }
  266. $str = preg_replace($key, $hashVal, $str);
  267. $str = str_replace($hashVal, $value, $str);
  268. }
  269. if (!isset($options['format']) && isset($options['before'])) {
  270. $str = str_replace($options['escape'] . $options['before'], $options['before'], $str);
  271. }
  272. return $options['clean'] ? static::clean($str, $options) : $str;
  273. }
  274. /**
  275. * Cleans up a `String::insert()` formatted string with given `$options` depending
  276. * on the `'clean'` option. The goal of this function is to replace all whitespace
  277. * and unneeded mark-up around place-holders that did not get replaced by `String::insert()`.
  278. *
  279. * @param string $str The string to clean.
  280. * @param array $options Available options are:
  281. * - `'after'`: characters marking the end of targeted substring.
  282. * - `'andText'`: (defaults to `true`).
  283. * - `'before'`: characters marking the start of targeted substring.
  284. * - `'clean'`: `true` or an array of clean options:
  285. * - `'gap'`: Regular expression matching gaps.
  286. * - `'method'`: Either `'text'` or `'html'` (defaults to `'text'`).
  287. * - `'replacement'`: String to use for cleaned substrings (defaults to `''`).
  288. * - `'word'`: Regular expression matching words.
  289. * @return string The cleaned string.
  290. */
  291. public static function clean($str, array $options = array()) {
  292. if (!$options['clean']) {
  293. return $str;
  294. }
  295. $clean = $options['clean'];
  296. $clean = ($clean === true) ? array('method' => 'text') : $clean;
  297. $clean = (!is_array($clean)) ? array('method' => $options['clean']) : $clean;
  298. switch ($clean['method']) {
  299. case 'html':
  300. $clean += array('word' => '[\w,.]+', 'andText' => true, 'replacement' => '');
  301. $kleenex = sprintf(
  302. '/[\s]*[a-z]+=(")(%s%s%s[\s]*)+\\1/i',
  303. preg_quote($options['before'], '/'),
  304. $clean['word'],
  305. preg_quote($options['after'], '/')
  306. );
  307. $str = preg_replace($kleenex, $clean['replacement'], $str);
  308. if ($clean['andText']) {
  309. $options['clean'] = array('method' => 'text');
  310. $str = static::clean($str, $options);
  311. }
  312. break;
  313. case 'text':
  314. $clean += array(
  315. 'word' => '[\w,.]+', 'gap' => '[\s]*(?:(?:and|or|,)[\s]*)?', 'replacement' => ''
  316. );
  317. $before = preg_quote($options['before'], '/');
  318. $after = preg_quote($options['after'], '/');
  319. $kleenex = sprintf(
  320. '/(%s%s%s%s|%s%s%s%s|%s%s%s%s%s)/',
  321. $before, $clean['word'], $after, $clean['gap'],
  322. $clean['gap'], $before, $clean['word'], $after,
  323. $clean['gap'], $before, $clean['word'], $after, $clean['gap']
  324. );
  325. $str = preg_replace($kleenex, $clean['replacement'], $str);
  326. break;
  327. }
  328. return $str;
  329. }
  330. /**
  331. * Extract a part of a string based on a regular expression `$regex`.
  332. *
  333. * @param string $regex The regular expression to use.
  334. * @param string $str The string to run the extraction on.
  335. * @param integer $index The number of the part to return based on the regex.
  336. * @return mixed
  337. */
  338. public static function extract($regex, $str, $index = 0) {
  339. if (!preg_match($regex, $str, $match)) {
  340. return false;
  341. }
  342. return isset($match[$index]) ? $match[$index] : null;
  343. }
  344. /**
  345. * Tokenizes a string using `$options['separator']`, ignoring any instances of
  346. * `$options['separator']` that appear between `$options['leftBound']` and
  347. * `$options['rightBound']`.
  348. *
  349. * @param string $data The data to tokenize.
  350. * @param array $options Options to use when tokenizing:
  351. * -`'separator'` _string_: The token to split the data on.
  352. * -`'leftBound'` _string_: Left scope-enclosing boundary.
  353. * -`'rightBound'` _string_: Right scope-enclosing boundary.
  354. * @return array Returns an array of tokens.
  355. */
  356. public static function tokenize($data, array $options = array()) {
  357. $defaults = array('separator' => ',', 'leftBound' => '(', 'rightBound' => ')');
  358. extract($options + $defaults);
  359. if (!$data || is_array($data)) {
  360. return $data;
  361. }
  362. $depth = 0;
  363. $offset = 0;
  364. $buffer = '';
  365. $results = array();
  366. $length = strlen($data);
  367. $open = false;
  368. while ($offset <= $length) {
  369. $tmpOffset = -1;
  370. $offsets = array(
  371. strpos($data, $separator, $offset),
  372. strpos($data, $leftBound, $offset),
  373. strpos($data, $rightBound, $offset)
  374. );
  375. for ($i = 0; $i < 3; $i++) {
  376. if ($offsets[$i] !== false && ($offsets[$i] < $tmpOffset || $tmpOffset === -1)) {
  377. $tmpOffset = $offsets[$i];
  378. }
  379. }
  380. if ($tmpOffset === -1) {
  381. $results[] = $buffer . substr($data, $offset);
  382. $offset = $length + 1;
  383. continue;
  384. }
  385. $buffer .= substr($data, $offset, ($tmpOffset - $offset));
  386. if ($data{$tmpOffset} === $separator && $depth === 0) {
  387. $results[] = $buffer;
  388. $buffer = '';
  389. } else {
  390. $buffer .= $data{$tmpOffset};
  391. }
  392. if ($leftBound !== $rightBound) {
  393. if ($data{$tmpOffset} === $leftBound) {
  394. $depth++;
  395. }
  396. if ($data{$tmpOffset} === $rightBound) {
  397. $depth--;
  398. }
  399. $offset = ++$tmpOffset;
  400. continue;
  401. }
  402. if ($data{$tmpOffset} === $leftBound) {
  403. ($open) ? $depth-- : $depth++;
  404. $open = !$open;
  405. }
  406. $offset = ++$tmpOffset;
  407. }
  408. if (!$results && $buffer) {
  409. $results[] = $buffer;
  410. }
  411. return $results ? array_map('trim', $results) : array();
  412. }
  413. }
  414. ?>