Parser.php 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315
  1. <?php
  2. /**
  3. * Lithium: the most rad php framework
  4. *
  5. * @copyright Copyright 2012, Union of RAD (http://union-of-rad.org)
  6. * @license http://opensource.org/licenses/bsd-license.php The BSD License
  7. */
  8. namespace lithium\analysis;
  9. use lithium\util\Set;
  10. use lithium\util\Collection;
  11. /**
  12. * The parser class uses PHP's tokenizer to provide methods and tools for performing static analysis
  13. * on PHP code.
  14. */
  15. class Parser extends \lithium\core\StaticObject {
  16. /**
  17. * Convenience method to get the token name of a PHP code string. If multiple tokens are
  18. * present in the string, only the first is returned.
  19. *
  20. * @param string $string String of PHP code to get the token name of, i.e. `'=>'` or `'static'`.
  21. * @param array $options
  22. * @return mixed
  23. */
  24. public static function token($string, array $options = array()) {
  25. $defaults = array('id' => false);
  26. $options += $defaults;
  27. if (empty($string) && $string !== '0') {
  28. return false;
  29. }
  30. list($token) = static::tokenize($string);
  31. return $token[($options['id']) ? 'id' : 'name'];
  32. }
  33. /**
  34. * Splits the provided `$code` into PHP language tokens.
  35. *
  36. * @param string $code Source code to be tokenized.
  37. * @param array $options Options consists of:
  38. * -'wrap': Boolean indicating whether or not to wrap the supplied
  39. * code in PHP tags.
  40. * -'ignore': An array containing PHP language tokens to ignore.
  41. * -'include': If supplied, an array of the only language tokens
  42. * to include in the output.
  43. * @return array An array of tokens in the supplied source code.
  44. */
  45. public static function tokenize($code, array $options = array()) {
  46. $defaults = array('wrap' => true, 'ignore' => array(), 'include' => array());
  47. $options += $defaults;
  48. $tokens = array();
  49. $line = 1;
  50. if ($options['wrap']) {
  51. $code = "<?php {$code}?>";
  52. }
  53. foreach (token_get_all($code) as $token) {
  54. $token = (isset($token[1])) ? $token : array(null, $token, $line);
  55. list($id, $content, $line) = $token;
  56. $name = $id ? token_name($id) : $content;
  57. if (!empty($options['include'])) {
  58. if (!in_array($name, $options['include']) && !in_array($id, $options['include'])) {
  59. continue;
  60. }
  61. }
  62. if (!empty($options['ignore'])) {
  63. if (in_array($name, $options['ignore']) || in_array($id, $options['ignore'])) {
  64. continue;
  65. }
  66. }
  67. $tokens[] = array('id' => $id, 'name' => $name, 'content' => $content, 'line' => $line);
  68. $line += count(preg_split('/\r\n|\r|\n/', $content)) - 1;
  69. }
  70. if ($options['wrap'] && empty($options['include'])) {
  71. $tokens = array_slice($tokens, 1, count($tokens) - 2);
  72. }
  73. return $tokens;
  74. }
  75. /**
  76. * Finds a pattern in a block of code.
  77. *
  78. * @param string $code
  79. * @param string $pattern
  80. * @param array $options The list of options to be used when parsing / matching `$code`:
  81. * - 'ignore': An array of token names to ignore while parsing, defaults to
  82. * `array('T_WHITESPACE')`
  83. * - 'lineBreaks': If true, all tokens in a single pattern match must appear on the
  84. * same line of code, defaults to false
  85. * - 'startOfLine': If true, the pattern must match starting with the beginning of
  86. * the line of code to be matched, defaults to false
  87. * @return array
  88. */
  89. public static function find($code, $pattern, array $options = array()) {
  90. $defaults = array(
  91. 'all' => true, 'capture' => array(), 'ignore' => array('T_WHITESPACE'),
  92. 'return' => true, 'lineBreaks' => false, 'startOfLine' => false
  93. );
  94. $options += $defaults;
  95. $results = array();
  96. $matches = array();
  97. $patternMatch = array();
  98. $ret = $options['return'];
  99. $tokens = new Collection(array('data' => static::tokenize($code, $options)));
  100. $pattern = new Collection(array('data' => static::tokenize($pattern, $options)));
  101. $breaks = function($token) use (&$tokens, &$matches, &$patternMatch, $options) {
  102. if (!$options['lineBreaks']) {
  103. return true;
  104. }
  105. if (empty($patternMatch) && !$options['startOfLine']) {
  106. return true;
  107. }
  108. if (empty($patternMatch)) {
  109. $prev = $tokens->prev();
  110. $tokens->next();
  111. } else {
  112. $prev = reset($patternMatch);
  113. }
  114. if (empty($patternMatch) && $options['startOfLine']) {
  115. return ($token['line'] > $prev['line']);
  116. }
  117. return ($token['line'] == $prev['line']);
  118. };
  119. $capture = function($token) use (&$matches, &$patternMatch, $tokens, $breaks, $options) {
  120. if (is_null($token)) {
  121. $matches = $patternMatch = array();
  122. return false;
  123. }
  124. if (empty($patternMatch)) {
  125. $prev = $tokens->prev();
  126. $tokens->next();
  127. if ($options['startOfLine'] && $token['line'] == $prev['line']) {
  128. $patternMatch = $matches = array();
  129. return false;
  130. }
  131. }
  132. $patternMatch[] = $token;
  133. if (empty($options['capture']) || !in_array($token['name'], $options['capture'])) {
  134. return true;
  135. }
  136. if (!$breaks($token)) {
  137. $matches = array();
  138. return true;
  139. }
  140. $matches[] = $token;
  141. return true;
  142. };
  143. $executors = array(
  144. '*' => function(&$tokens, &$pattern) use ($options, $capture) {
  145. $closing = $pattern->next();
  146. $tokens->prev();
  147. while (($t = $tokens->next()) && !Parser::matchToken($closing, $t)) {
  148. $capture($t);
  149. }
  150. $pattern->next();
  151. }
  152. );
  153. $tokens->rewind();
  154. $pattern->rewind();
  155. while ($tokens->valid()) {
  156. if (!$pattern->valid()) {
  157. $pattern->rewind();
  158. if (!empty($matches)) {
  159. $results[] = array_map(
  160. function($i) use ($ret) { return isset($i[$ret]) ? $i[$ret] : $i; },
  161. $matches
  162. );
  163. }
  164. $capture(null);
  165. }
  166. $p = $pattern->current();
  167. $t = $tokens->current();
  168. switch (true) {
  169. case (static::matchToken($p, $t)):
  170. $capture($t) ? $pattern->next() : $pattern->rewind();
  171. break;
  172. case (isset($executors[$p['name']])):
  173. $exec = $executors[$p['name']];
  174. $exec($tokens, $pattern);
  175. break;
  176. default:
  177. $capture(null);
  178. $pattern->rewind();
  179. break;
  180. }
  181. $tokens->next();
  182. }
  183. return $results;
  184. }
  185. /**
  186. * Token pattern matching.
  187. *
  188. * @param string $code Source code to be analyzed.
  189. * @param string $parameters An array containing token patterns to be matched.
  190. * @param array $options The list of options to be used when matching `$code`:
  191. * - 'ignore': An array of language tokens to ignore.
  192. * - 'return': If set to 'content' returns an array of matching tokens.
  193. * @return array Array of matching tokens.
  194. */
  195. public static function match($code, $parameters, array $options = array()) {
  196. $defaults = array('ignore' => array('T_WHITESPACE'), 'return' => true);
  197. $options += $defaults;
  198. $parameters = static::_prepareMatchParams($parameters);
  199. $tokens = is_array($code) ? $code : static::tokenize($code, $options);
  200. $results = array();
  201. foreach ($tokens as $i => $token) {
  202. if (!array_key_exists($token['name'], $parameters)) {
  203. if (!in_array('*', $parameters)) {
  204. continue;
  205. }
  206. }
  207. $param = $parameters[$token['name']];
  208. if (isset($param['before']) && $i > 0) {
  209. if (!in_array($tokens[$i - 1]['name'], (array) $param['before'])) {
  210. continue;
  211. }
  212. }
  213. if (isset($param['after']) && $i + 1 < count($tokens)) {
  214. if (!in_array($tokens[$i + 1]['name'], (array) $param['after'])) {
  215. continue;
  216. }
  217. }
  218. $results[] = isset($token[$options['return']]) ? $token[$options['return']] : $token;
  219. }
  220. return $results;
  221. }
  222. /**
  223. * Compares two PHP language tokens.
  224. *
  225. * @param array $pattern Pattern token.
  226. * @param array $token Token to be compared.
  227. * @return boolean Match result.
  228. */
  229. public static function matchToken($pattern, $token) {
  230. if ($pattern['name'] != $token['name']) {
  231. return false;
  232. }
  233. if (!isset($pattern['content'])) {
  234. return true;
  235. }
  236. $match = $pattern['content'];
  237. $content = $token['content'];
  238. if ($pattern['name'] === 'T_VARIABLE') {
  239. $match = substr($match, 1);
  240. $content = substr($content, 1);
  241. }
  242. switch (true) {
  243. case ($match === '_' || $match == $content):
  244. return true;
  245. }
  246. return false;
  247. }
  248. /**
  249. * Helper function to normalize parameters for token matching.
  250. *
  251. * @see lithium\analysis\Parser::match()
  252. * @param array $parameters Params to be normalized.
  253. * @return array Normalized parameters.
  254. */
  255. protected static function _prepareMatchParams($parameters) {
  256. foreach (Set::normalize($parameters) as $token => $scope) {
  257. if (strpos($token, 'T_') !== 0) {
  258. unset($parameters[$token]);
  259. foreach (array('before', 'after') as $key) {
  260. if (!isset($scope[$key])) {
  261. continue;
  262. }
  263. $items = array();
  264. foreach ((array) $scope[$key] as $item) {
  265. $items[] = (strpos($item, 'T_') !== 0) ? static::token($item) : $item;
  266. }
  267. $scope[$key] = $items;
  268. }
  269. $parameters[static::token($token)] = $scope;
  270. }
  271. }
  272. return $parameters;
  273. }
  274. }
  275. ?>