Inflector.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. <?php
  2. /**
  3. * Lithium: the most rad php framework
  4. *
  5. * @copyright Copyright 2013, Union of RAD (http://union-of-rad.org)
  6. * @license http://opensource.org/licenses/mit-license.php The MIT License
  7. */
  8. namespace lithium\util;
  9. /**
  10. * Utility for modifying format of words. Change singular to plural and vice versa.
  11. * Under_score a CamelCased word and vice versa. Replace spaces and special characters.
  12. * Create a human readable word from the others. Used when consistency in naming
  13. * conventions must be enforced.
  14. */
  15. class Inflector {
  16. /**
  17. * Contains a default map of accented and special characters to ASCII characters. Can be
  18. * extended or added to using `Inflector::rules()`.
  19. *
  20. * @see lithium\util\Inflector::slug()
  21. * @see lithium\util\Inflector::rules()
  22. * @var array
  23. */
  24. protected static $_transliteration = array(
  25. '/à|á|å|â/' => 'a',
  26. '/è|é|ê|ẽ|ë/' => 'e',
  27. '/ì|í|î/' => 'i',
  28. '/ò|ó|ô|ø/' => 'o',
  29. '/ù|ú|ů|û/' => 'u',
  30. '/ç|ć|č/' => 'c',
  31. '/đ/' => 'dj',
  32. '/š/' => 's',
  33. '/ž/' => 'z',
  34. '/ñ/' => 'n',
  35. '/ä|æ/' => 'ae',
  36. '/ö/' => 'oe',
  37. '/ü/' => 'ue',
  38. '/Ä/' => 'Ae',
  39. '/Ü/' => 'Ue',
  40. '/Ö/' => 'Oe',
  41. '/ß/' => 'ss',
  42. '/Č|Ć/' => 'C',
  43. '/DŽ/' => 'Dz',
  44. '/Đ/' => 'Dj',
  45. '/Š/' => 'S',
  46. '/Ž/' => 'Z'
  47. );
  48. /**
  49. * Indexed array of words which are the same in both singular and plural form. You can add
  50. * rules to this list using `Inflector::rules()`.
  51. *
  52. * @see lithium\util\Inflector::rules()
  53. * @var array
  54. */
  55. protected static $_uninflected = array(
  56. 'Amoyese', 'bison', 'Borghese', 'bream', 'breeches', 'britches', 'buffalo', 'cantus',
  57. 'carp', 'chassis', 'clippers', 'cod', 'coitus', 'Congoese', 'contretemps', 'corps',
  58. 'debris', 'diabetes', 'djinn', 'eland', 'elk', 'equipment', 'Faroese', 'flounder',
  59. 'Foochowese', 'gallows', 'Genevese', 'Genoese', 'Gilbertese', 'graffiti',
  60. 'headquarters', 'herpes', 'hijinks', 'Hottentotese', 'information', 'innings',
  61. 'jackanapes', 'Kiplingese', 'Kongoese', 'Lucchese', 'mackerel', 'Maltese', 'media',
  62. 'mews', 'moose', 'mumps', 'Nankingese', 'news', 'nexus', 'Niasese', 'People',
  63. 'Pekingese', 'Piedmontese', 'pincers', 'Pistoiese', 'pliers', 'Portuguese',
  64. 'proceedings', 'rabies', 'rice', 'rhinoceros', 'salmon', 'Sarawakese', 'scissors',
  65. 'sea[- ]bass', 'series', 'Shavese', 'shears', 'siemens', 'species', 'swine', 'testes',
  66. 'trousers', 'trout','tuna', 'Vermontese', 'Wenchowese', 'whiting', 'wildebeest',
  67. 'Yengeese'
  68. );
  69. /**
  70. * Contains the list of pluralization rules.
  71. *
  72. * @see lithium\util\Inflector::rules()
  73. * @var array Contains the following keys:
  74. * - `'rules'`: An array of regular expression rules in the form of `'match' => 'replace'`,
  75. * which specify the matching and replacing rules for the pluralization of words.
  76. * - `'uninflected'`: A indexed array containing regex word patterns which do not get
  77. * inflected (i.e. singular and plural are the same).
  78. * - `'irregular'`: Contains key-value pairs of specific words which are not inflected
  79. * according to the rules. This is populated from `Inflector::$_plural` when the class
  80. * is loaded.
  81. */
  82. protected static $_singular = array(
  83. 'rules' => array(
  84. '/(s)tatuses$/i' => '\1\2tatus',
  85. '/^(.*)(menu)s$/i' => '\1\2',
  86. '/(quiz)zes$/i' => '\\1',
  87. '/(matr)ices$/i' => '\1ix',
  88. '/(vert|ind)ices$/i' => '\1ex',
  89. '/^(ox)en/i' => '\1',
  90. '/(alias)(es)*$/i' => '\1',
  91. '/(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|viri?)i$/i' => '\1us',
  92. '/(cris|ax|test)es$/i' => '\1is',
  93. '/(shoe)s$/i' => '\1',
  94. '/(o)es$/i' => '\1',
  95. '/ouses$/' => 'ouse',
  96. '/([^a])uses$/' => '\1us',
  97. '/([m|l])ice$/i' => '\1ouse',
  98. '/(x|ch|ss|sh)es$/i' => '\1',
  99. '/(m)ovies$/i' => '\1\2ovie',
  100. '/(s)eries$/i' => '\1\2eries',
  101. '/([^aeiouy]|qu)ies$/i' => '\1y',
  102. '/([lr])ves$/i' => '\1f',
  103. '/(tive)s$/i' => '\1',
  104. '/(hive)s$/i' => '\1',
  105. '/(drive)s$/i' => '\1',
  106. '/([^fo])ves$/i' => '\1fe',
  107. '/(^analy)ses$/i' => '\1sis',
  108. '/((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$/i' => '\1\2sis',
  109. '/([ti])a$/i' => '\1um',
  110. '/(p)eople$/i' => '\1\2erson',
  111. '/(m)en$/i' => '\1an',
  112. '/(c)hildren$/i' => '\1\2hild',
  113. '/(n)ews$/i' => '\1\2ews',
  114. '/^(.*us)$/' => '\\1',
  115. '/s$/i' => ''
  116. ),
  117. 'irregular' => array(),
  118. 'uninflected' => array(
  119. '.*[nrlm]ese', '.*deer', '.*fish', '.*measles', '.*ois', '.*pox', '.*sheep', '.*ss'
  120. )
  121. );
  122. /**
  123. * Contains a cache map of previously singularized words.
  124. *
  125. * @var array
  126. */
  127. protected static $_singularized = array();
  128. /**
  129. * Contains the list of pluralization rules.
  130. *
  131. * @see lithium\util\Inflector::rules()
  132. * @var array Contains the following keys:
  133. * - `'rules'`: An array of regular expression rules in the form of `'match' => 'replace'`,
  134. * which specify the matching and replacing rules for the pluralization of words.
  135. * - `'uninflected'`: A indexed array containing regex word patterns which do not get
  136. * inflected (i.e. singular and plural are the same).
  137. * - `'irregular'`: Contains key-value pairs of specific words which are not inflected
  138. * according to the rules.
  139. */
  140. protected static $_plural = array(
  141. 'rules' => array(
  142. '/(s)tatus$/i' => '\1\2tatuses',
  143. '/(quiz)$/i' => '\1zes',
  144. '/^(ox)$/i' => '\1\2en',
  145. '/([m|l])ouse$/i' => '\1ice',
  146. '/(matr|vert|ind)(ix|ex)$/i' => '\1ices',
  147. '/(x|ch|ss|sh)$/i' => '\1es',
  148. '/([^aeiouy]|qu)y$/i' => '\1ies',
  149. '/(hive)$/i' => '\1s',
  150. '/(?:([^f])fe|([lr])f)$/i' => '\1\2ves',
  151. '/sis$/i' => 'ses',
  152. '/([ti])um$/i' => '\1a',
  153. '/(p)erson$/i' => '\1eople',
  154. '/(m)an$/i' => '\1en',
  155. '/(c)hild$/i' => '\1hildren',
  156. '/(buffal|tomat)o$/i' => '\1\2oes',
  157. '/(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|vir)us$/i' => '\1i',
  158. '/us$/' => 'uses',
  159. '/(alias)$/i' => '\1es',
  160. '/(ax|cri|test)is$/i' => '\1es',
  161. '/s$/' => 's',
  162. '/^$/' => '',
  163. '/$/' => 's'
  164. ),
  165. 'irregular' => array(
  166. 'atlas' => 'atlases', 'beef' => 'beefs', 'brother' => 'brothers',
  167. 'child' => 'children', 'corpus' => 'corpuses', 'cow' => 'cows',
  168. 'ganglion' => 'ganglions', 'genie' => 'genies', 'genus' => 'genera',
  169. 'graffito' => 'graffiti', 'hoof' => 'hoofs', 'loaf' => 'loaves', 'man' => 'men',
  170. 'leaf' => 'leaves', 'money' => 'monies', 'mongoose' => 'mongooses', 'move' => 'moves',
  171. 'mythos' => 'mythoi', 'numen' => 'numina', 'occiput' => 'occiputs',
  172. 'octopus' => 'octopuses', 'opus' => 'opuses', 'ox' => 'oxen', 'penis' => 'penises',
  173. 'person' => 'people', 'sex' => 'sexes', 'soliloquy' => 'soliloquies',
  174. 'testis' => 'testes', 'trilby' => 'trilbys', 'turf' => 'turfs'
  175. ),
  176. 'uninflected' => array(
  177. '.*[nrlm]ese', '.*deer', '.*fish', '.*measles', '.*ois', '.*pox', '.*sheep'
  178. )
  179. );
  180. /**
  181. * Contains a cache map of previously pluralized words.
  182. *
  183. * @var array
  184. */
  185. protected static $_pluralized = array();
  186. /**
  187. * Contains a cache map of previously camelized words.
  188. *
  189. * @var array
  190. */
  191. protected static $_camelized = array();
  192. /**
  193. * Contains a cache map of previously underscored words.
  194. *
  195. * @var array
  196. */
  197. protected static $_underscored = array();
  198. /**
  199. * Contains a cache map of previously humanized words.
  200. *
  201. * @var array
  202. */
  203. protected static $_humanized = array();
  204. /**
  205. * Gets or adds inflection and transliteration rules.
  206. *
  207. * @param string $type Either `'transliteration'`, `'uninflected'`, `'singular'` or `'plural'`.
  208. * @param array $config
  209. * @return mixed If `$config` is empty, returns the rules list specified
  210. * by `$type`, otherwise returns `null`.
  211. */
  212. public static function rules($type, $config = array()) {
  213. $var = '_' . $type;
  214. if (!isset(static::${$var})) {
  215. return null;
  216. }
  217. if (empty($config)) {
  218. return static::${$var};
  219. }
  220. switch ($type) {
  221. case 'transliteration':
  222. $_config = array();
  223. foreach ($config as $key => $val) {
  224. if ($key[0] !== '/') {
  225. $key = '/' . join('|', array_filter(preg_split('//u', $key))) . '/';
  226. }
  227. $_config[$key] = $val;
  228. }
  229. static::$_transliteration = array_merge(
  230. $_config, static::$_transliteration, $_config
  231. );
  232. break;
  233. case 'uninflected':
  234. static::$_uninflected = array_merge(static::$_uninflected, (array) $config);
  235. static::$_plural['regexUninflected'] = null;
  236. static::$_singular['regexUninflected'] = null;
  237. foreach ((array) $config as $word) {
  238. unset(static::$_singularized[$word], static::$_pluralized[$word]);
  239. }
  240. break;
  241. case 'singular':
  242. case 'plural':
  243. if (isset(static::${$var}[key($config)])) {
  244. foreach ($config as $rType => $set) {
  245. static::${$var}[$rType] = array_merge($set, static::${$var}[$rType], $set);
  246. if ($rType === 'irregular') {
  247. $swap = ($type === 'singular' ? '_plural' : '_singular');
  248. static::${$swap}[$rType] = array_flip(static::${$var}[$rType]);
  249. }
  250. }
  251. } else {
  252. static::${$var}['rules'] = array_merge(
  253. $config, static::${$var}['rules'], $config
  254. );
  255. }
  256. break;
  257. }
  258. }
  259. /**
  260. * Changes the form of a word from singular to plural.
  261. *
  262. * @param string $word Word in singular form.
  263. * @return string Word in plural form.
  264. */
  265. public static function pluralize($word) {
  266. if (isset(static::$_pluralized[$word])) {
  267. return static::$_pluralized[$word];
  268. }
  269. extract(static::$_plural);
  270. if (!isset($regexUninflected) || !isset($regexIrregular)) {
  271. $regexUninflected = static::_enclose(join( '|', $uninflected + static::$_uninflected));
  272. $regexIrregular = static::_enclose(join( '|', array_keys($irregular)));
  273. static::$_plural += compact('regexUninflected', 'regexIrregular');
  274. }
  275. if (preg_match('/(' . $regexUninflected . ')$/i', $word, $regs)) {
  276. return static::$_pluralized[$word] = $word;
  277. }
  278. if (preg_match('/(.*)\\b(' . $regexIrregular . ')$/i', $word, $regs)) {
  279. $plural = substr($word, 0, 1) . substr($irregular[strtolower($regs[2])], 1);
  280. return static::$_pluralized[$word] = $regs[1] . $plural;
  281. }
  282. foreach ($rules as $rule => $replacement) {
  283. if (preg_match($rule, $word)) {
  284. return static::$_pluralized[$word] = preg_replace($rule, $replacement, $word);
  285. }
  286. }
  287. return static::$_pluralized[$word] = $word;
  288. }
  289. /**
  290. * Changes the form of a word from plural to singular.
  291. *
  292. * @param string $word Word in plural form.
  293. * @return string Word in singular form.
  294. */
  295. public static function singularize($word) {
  296. if (isset(static::$_singularized[$word])) {
  297. return static::$_singularized[$word];
  298. }
  299. if (empty(static::$_singular['irregular'])) {
  300. static::$_singular['irregular'] = array_flip(static::$_plural['irregular']);
  301. }
  302. extract(static::$_singular);
  303. if (!isset($regexUninflected) || !isset($regexIrregular)) {
  304. $regexUninflected = static::_enclose(join('|', $uninflected + static::$_uninflected));
  305. $regexIrregular = static::_enclose(join('|', array_keys($irregular)));
  306. static::$_singular += compact('regexUninflected', 'regexIrregular');
  307. }
  308. if (preg_match("/(.*)\\b({$regexIrregular})\$/i", $word, $regs)) {
  309. $singular = substr($word, 0, 1) . substr($irregular[strtolower($regs[2])], 1);
  310. return static::$_singularized[$word] = $regs[1] . $singular;
  311. }
  312. if (preg_match('/^(' . $regexUninflected . ')$/i', $word, $regs)) {
  313. return static::$_singularized[$word] = $word;
  314. }
  315. foreach ($rules as $rule => $replacement) {
  316. if (preg_match($rule, $word)) {
  317. return static::$_singularized[$word] = preg_replace($rule, $replacement, $word);
  318. }
  319. }
  320. return static::$_singularized[$word] = $word;
  321. }
  322. /**
  323. * Clears local in-memory caches. Can be used to force a full-cache clear when updating
  324. * inflection rules mid-way through request execution.
  325. *
  326. * @return void
  327. */
  328. public static function reset() {
  329. static::$_singularized = static::$_pluralized = array();
  330. static::$_camelized = static::$_underscored = array();
  331. static::$_humanized = array();
  332. static::$_plural['regexUninflected'] = static::$_singular['regexUninflected'] = null;
  333. static::$_plural['regexIrregular'] = static::$_singular['regexIrregular'] = null;
  334. static::$_transliteration = array(
  335. '/à|á|å|â/' => 'a',
  336. '/è|é|ê|ẽ|ë/' => 'e',
  337. '/ì|í|î/' => 'i',
  338. '/ò|ó|ô|ø/' => 'o',
  339. '/ù|ú|ů|û/' => 'u',
  340. '/ç|ć|č/' => 'c',
  341. '/đ/' => 'dj',
  342. '/š/' => 's',
  343. '/ž/' => 'z',
  344. '/ñ/' => 'n',
  345. '/ä|æ/' => 'ae',
  346. '/ö/' => 'oe',
  347. '/ü/' => 'ue',
  348. '/Ä/' => 'Ae',
  349. '/Ü/' => 'Ue',
  350. '/Ö/' => 'Oe',
  351. '/ß/' => 'ss',
  352. '/Č|Ć/' => 'C',
  353. '/DŽ/' => 'Dz',
  354. '/Đ/' => 'Dj',
  355. '/Š/' => 'S',
  356. '/Ž/' => 'Z'
  357. );
  358. }
  359. /**
  360. * Takes a under_scored word and turns it into a CamelCased or camelBack word
  361. *
  362. * @param string $word An under_scored or slugged word (i.e. `'red_bike'` or `'red-bike'`).
  363. * @param boolean $cased If false, first character is not upper cased
  364. * @return string CamelCased version of the word (i.e. `'RedBike'`).
  365. */
  366. public static function camelize($word, $cased = true) {
  367. $_word = $word;
  368. if (isset(static::$_camelized[$_word]) && $cased) {
  369. return static::$_camelized[$_word];
  370. }
  371. $word = str_replace(" ", "", ucwords(str_replace(array("_", '-'), " ", $word)));
  372. if (!$cased) {
  373. return lcfirst($word);
  374. }
  375. return static::$_camelized[$_word] = $word;
  376. }
  377. /**
  378. * Takes a CamelCased version of a word and turns it into an under_scored one.
  379. *
  380. * @param string $word CamelCased version of a word (i.e. `'RedBike'`).
  381. * @return string Under_scored version of the workd (i.e. `'red_bike'`).
  382. */
  383. public static function underscore($word) {
  384. if (isset(static::$_underscored[$word])) {
  385. return static::$_underscored[$word];
  386. }
  387. return static::$_underscored[$word] = strtolower(static::slug($word, '_'));
  388. }
  389. /**
  390. * Returns a string with all spaces converted to given replacement and
  391. * non word characters removed. Maps special characters to ASCII using
  392. * `Inflector::$_transliteration`, which can be updated using `Inflector::rules()`.
  393. *
  394. * @see lithium\util\Inflector::rules()
  395. * @param string $string An arbitrary string to convert.
  396. * @param string $replacement The replacement to use for spaces.
  397. * @return string The converted string.
  398. */
  399. public static function slug($string, $replacement = '-') {
  400. $map = static::$_transliteration + array(
  401. '/[^\w\s]/' => ' ', '/\\s+/' => $replacement,
  402. '/(?<=[a-z])([A-Z])/' => $replacement . '\\1',
  403. str_replace(':rep', preg_quote($replacement, '/'), '/^[:rep]+|[:rep]+$/') => ''
  404. );
  405. return preg_replace(array_keys($map), array_values($map), $string);
  406. }
  407. /**
  408. * Takes an under_scored version of a word and turns it into an human- readable form
  409. * by replacing underscores with a space, and by upper casing the initial character.
  410. *
  411. * @param string $word Under_scored version of a word (i.e. `'red_bike'`).
  412. * @param string $separator The separator character used in the initial string.
  413. * @return string Human readable version of the word (i.e. `'Red Bike'`).
  414. */
  415. public static function humanize($word, $separator = '_') {
  416. if (isset(static::$_humanized[$key = $word . ':' . $separator])) {
  417. return static::$_humanized[$key];
  418. }
  419. return static::$_humanized[$key] = ucwords(str_replace($separator, " ", $word));
  420. }
  421. /**
  422. * Takes a CamelCased class name and returns corresponding under_scored table name.
  423. *
  424. * @param string $className CamelCased class name (i.e. `'Post'`).
  425. * @return string Under_scored and plural table name (i.e. `'posts'`).
  426. */
  427. public static function tableize($className) {
  428. return static::pluralize(static::underscore($className));
  429. }
  430. /**
  431. * Takes a under_scored table name and returns corresponding class name.
  432. *
  433. * @param string $tableName Under_scored and plural table name (i.e. `'posts'`).
  434. * @return string CamelCased class name (i.e. `'Post'`).
  435. */
  436. public static function classify($tableName) {
  437. return static::camelize(static::singularize($tableName));
  438. }
  439. /**
  440. * Enclose a string for preg matching.
  441. *
  442. * @param string $string String to enclose
  443. * @return string Enclosed string
  444. */
  445. protected static function _enclose($string) {
  446. return '(?:' . $string . ')';
  447. }
  448. }
  449. ?>