Locale.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. <?php
  2. /**
  3. * Lithium: the most rad php framework
  4. *
  5. * @copyright Copyright 2013, Union of RAD (http://union-of-rad.org)
  6. * @license http://opensource.org/licenses/bsd-license.php The BSD License
  7. */
  8. namespace lithium\g11n;
  9. use BadMethodCallException;
  10. use InvalidArgumentException;
  11. use lithium\action\Request as ActionRequest;
  12. use lithium\console\Request as ConsoleRequest;
  13. /**
  14. * The `Locale` class provides methods to deal with locale identifiers. The locale
  15. * (here: _locale identifier_) is used to distinguish among different sets of common
  16. * preferences.
  17. *
  18. * In order to avoid unnecessary overhead all methods throughout the framework accepting
  19. * a locale require it to be well-formed according to the structure laid out below. For
  20. * assuring the correct format use `Locale::canonicalize()` once on the locale.
  21. *
  22. * However the methods within this class will also work with not-so-well-formed locales.
  23. * They accept both underscores and hyphens as separators between and don't care about the
  24. * case of the individual tags.
  25. *
  26. * The identifier used by Lithium is based in its structure upon Unicode's
  27. * language identifier and is compliant to BCP 47.
  28. *
  29. * `language[_Script][_TERRITORY][_VARIANT]`
  30. * - `language` The spoken language, here represented by an ISO 639-1 code,
  31. * where not available ISO 639-3 and ISO 639-5 codes are allowed too) tag.
  32. * The tag should be lower-cased and is required.
  33. * - `Script` The tag should have it's first character capitalized, all others
  34. * lower-cased. The tag is optional.
  35. * - `TERRITORY` A geographical area, here represented by an ISO 3166-1 code.
  36. * Should be all upper-cased and is optional.
  37. * - `VARIANT` Should be all upper-cased and is optional.
  38. *
  39. * @link http://www.unicode.org/reports/tr35/tr35-12.html#Identifiers
  40. * @link http://www.rfc-editor.org/rfc/bcp/bcp47.txt
  41. * @link http://www.iana.org/assignments/language-subtag-registry
  42. */
  43. class Locale extends \lithium\core\StaticObject {
  44. /**
  45. * Properties for locale tags.
  46. *
  47. * @var array
  48. */
  49. protected static $_tags = array(
  50. 'language' => array('formatter' => 'strtolower'),
  51. 'script' => array('formatter' => array('strtolower', 'ucfirst')),
  52. 'territory' => array('formatter' => 'strtoupper'),
  53. 'variant' => array('formatter' => 'strtoupper')
  54. );
  55. /**
  56. * Magic method enabling `language`, `script`, `territory` and `variant`
  57. * methods to parse and retrieve individual tags from a locale.
  58. *
  59. * {{{
  60. * Locale::language('en_US'); // returns 'en'
  61. * Locale::territory('en_US'); // returns 'US'
  62. * }}}
  63. *
  64. * @see lithium\g11n\Locale::$_tags
  65. * @see lithium\g11n\Locale::decompose()
  66. * @param string $method
  67. * @param array $params
  68. * @return mixed
  69. */
  70. public static function __callStatic($method, $params = array()) {
  71. $tags = static::invokeMethod('decompose', $params);
  72. if (!isset(static::$_tags[$method])) {
  73. throw new BadMethodCallException("Invalid locale tag `{$method}`.");
  74. }
  75. return isset($tags[$method]) ? $tags[$method] : null;
  76. }
  77. /**
  78. * Custom check to determine if our given magic methods can be responded to.
  79. *
  80. * @param string $method Method name.
  81. * @param bool $internal Interal call or not.
  82. * @return bool
  83. */
  84. public static function respondsTo($method, $internal = false) {
  85. return isset(static::$_tags[$method]) || parent::respondsTo($method, $internal);
  86. }
  87. /**
  88. * Composes a locale from locale tags. This is the pendant to `Locale::decompose()`.
  89. *
  90. * @param array $tags An array as obtained from `Locale::decompose()`.
  91. * @return string A locale with tags separated by underscores or `null`
  92. * if none of the passed tags could be used to compose a locale.
  93. */
  94. public static function compose($tags) {
  95. $result = array();
  96. foreach (static::$_tags as $name => $tag) {
  97. if (isset($tags[$name])) {
  98. $result[] = $tags[$name];
  99. }
  100. }
  101. if ($result) {
  102. return implode('_', $result);
  103. }
  104. }
  105. /**
  106. * Parses a locale into locale tags. This is the pendant to `Locale::compose()``.
  107. *
  108. * @param string $locale A locale in an arbitrary form (i.e. `'en_US'` or `'EN-US'`).
  109. * @return array Parsed language, script, territory and variant tags.
  110. * @throws InvalidArgumentException
  111. */
  112. public static function decompose($locale) {
  113. $regex = '(?P<language>[a-z]{2,3})';
  114. $regex .= '(?:[_-](?P<script>[a-z]{4}))?';
  115. $regex .= '(?:[_-](?P<territory>[a-z]{2}))?';
  116. $regex .= '(?:[_-](?P<variant>[a-z]{5,}))?';
  117. if (!preg_match("/^{$regex}$/i", $locale, $matches)) {
  118. throw new InvalidArgumentException("Locale `{$locale}` could not be parsed.");
  119. }
  120. return array_filter(array_intersect_key($matches, static::$_tags));
  121. }
  122. /**
  123. * Returns a locale in its canonical form with tags formatted properly.
  124. *
  125. * @param string $locale A locale in an arbitrary form (i.e. `'ZH-HANS-HK_REVISED'`).
  126. * @return string A locale in it's canonical form (i.e. `'zh_Hans_HK_REVISED'`).
  127. */
  128. public static function canonicalize($locale) {
  129. $tags = static::decompose($locale);
  130. foreach ($tags as $name => &$tag) {
  131. foreach ((array) static::$_tags[$name]['formatter'] as $formatter) {
  132. $tag = $formatter($tag);
  133. }
  134. }
  135. return static::compose($tags);
  136. }
  137. /**
  138. * Cascades a locale.
  139. *
  140. * Usage:
  141. * {{{
  142. * Locale::cascade('en_US');
  143. * // returns array('en_US', 'en', 'root')
  144. *
  145. * Locale::cascade('zh_Hans_HK_REVISED');
  146. * // returns array('zh_Hans_HK_REVISED', 'zh_Hans_HK', 'zh_Hans', 'zh', 'root')
  147. * }}}
  148. *
  149. * @link http://www.unicode.org/reports/tr35/tr35-13.html#Locale_Inheritance
  150. * @param string $locale A locale in an arbitrary form (i.e. `'en_US'` or `'EN-US'`).
  151. * @return array Indexed array of locales (starting with the most specific one).
  152. */
  153. public static function cascade($locale) {
  154. $locales[] = $locale;
  155. if ($locale === 'root') {
  156. return $locales;
  157. }
  158. $tags = static::decompose($locale);
  159. while (count($tags) > 1) {
  160. array_pop($tags);
  161. $locales[] = static::compose($tags);
  162. }
  163. $locales[] = 'root';
  164. return $locales;
  165. }
  166. /**
  167. * Searches an array of locales for the best match to a locale. The locale
  168. * is iteratively simplified until either it matches one of the locales
  169. * in the list or the locale can't be further simplified.
  170. *
  171. * This method partially implements the lookup matching scheme as described
  172. * in RFC 4647, section 3.4 and thus does not strictly conform to the
  173. * specification.
  174. *
  175. * Differences to specification:
  176. * - No support for wildcards in the to-be-matched locales.
  177. * - No support for locales with private subtags.
  178. * - No support for a default return value.
  179. * - Passed locales are required to be in canonical form (i.e. `'ja_JP'`).
  180. *
  181. * @link http://www.ietf.org/rfc/rfc4647.txt
  182. * @param array $locales Locales to match against `$locale`.
  183. * @param string $locale A locale in it's canonical form (i.e. `'zh_Hans_HK_REVISED'`).
  184. * @return string The matched locale.
  185. */
  186. public static function lookup($locales, $locale) {
  187. $tags = static::decompose($locale);
  188. $count = count($tags);
  189. while ($count > 0) {
  190. if (($key = array_search(static::compose($tags), $locales)) !== false) {
  191. return $locales[$key];
  192. } elseif ($count === 1) {
  193. foreach ($locales as $currentLocale) {
  194. if (strpos($currentLocale, current($tags) . '_') === 0) {
  195. return $currentLocale;
  196. }
  197. }
  198. }
  199. if (($key = array_search(static::compose($tags), $locales)) !== false) {
  200. return $locales[$key];
  201. }
  202. array_pop($tags);
  203. $count = count($tags);
  204. }
  205. }
  206. /**
  207. * Determines the preferred locale from a request or array. Optionally negotiates
  208. * the preferred locale with available locales.
  209. *
  210. * @see lithium\g11n\Locale::_preferredAction()
  211. * @see lithium\g11n\Locale::_preferredConsole()
  212. * @see lithium\g11n\Locale::lookup()
  213. * @param object|array $request An action or console request object or an array of locales.
  214. * @param array $available A list of locales to negotiate the preferred locale with.
  215. * @return string The preferred locale in it's canonical form (i.e. `'fr_CA'`).
  216. * @todo Rewrite this to remove hard-coded class names.
  217. */
  218. public static function preferred($request, $available = null) {
  219. if (is_array($request)) {
  220. $result = $request;
  221. } elseif ($request instanceof ActionRequest) {
  222. $result = static::_preferredAction($request);
  223. } elseif ($request instanceof ConsoleRequest) {
  224. $result = static::_preferredConsole($request);
  225. } else {
  226. return null;
  227. }
  228. if (!$available) {
  229. return array_shift($result);
  230. }
  231. foreach ((array) $result as $locale) {
  232. if ($match = static::lookup($available, $locale)) {
  233. return $match;
  234. }
  235. }
  236. }
  237. /**
  238. * Detects preferred locales from an action request by looking at the
  239. * `'Accept-Language'` header as described by RFC 2616, section 14.4.
  240. *
  241. * @link http://www.ietf.org/rfc/rfc2616.txt
  242. * @param object $request An instance of `lithium\action\Request`.
  243. * @return array Preferred locales in their canonical form (i.e. `'fr_CA'`).
  244. */
  245. protected static function _preferredAction($request) {
  246. $result = array();
  247. $regex = "/^\s*(?P<locale>\w\w(?:[-]\w\w)?)(?:;q=(?P<quality>(0|1|0\.\d+)))?\s*$/";
  248. foreach (explode(',', $request->env('HTTP_ACCEPT_LANGUAGE')) as $part) {
  249. if (preg_match($regex, $part, $matches)) {
  250. $locale = static::canonicalize($matches['locale']);
  251. $quality = isset($matches['quality']) ? $matches['quality'] : 1;
  252. $result[$quality][] = $locale;
  253. }
  254. }
  255. krsort($result);
  256. $return = array();
  257. foreach ($result as $locales) {
  258. $return = array_merge($return, array_values($locales));
  259. }
  260. return $return;
  261. }
  262. /**
  263. * Detects preferred locales from a console request by looking at certain
  264. * environment variables. The environment variables may be present or not
  265. * depending on your system. If multiple variables are present the following
  266. * hierarchy is used: `'LANGUAGE'`, `'LC_ALL'`, `'LANG'`.
  267. *
  268. * The locales of the `'LC_ALL'` and the `'LANG'` are formatted according
  269. * to the posix standard: `language(_territory)(.encoding)(@modifier)`.
  270. * Locales having such a format are automatically canonicalized and transformed
  271. * into the `Locale` class' format.
  272. *
  273. * @link http://www.linux.com/archive/feature/53781
  274. * @param object $request An instance of `lithium\console\Request`.
  275. * @return array Preferred locales in their canonical form (i.e. `'fr_CA'`).
  276. */
  277. protected static function _preferredConsole($request) {
  278. $regex = '(?P<locale>[\w\_]+)(\.|@|$)+';
  279. $result = array();
  280. if ($value = $request->env('LANGUAGE')) {
  281. return explode(':', $value);
  282. }
  283. foreach (array('LC_ALL', 'LANG') as $variable) {
  284. $value = $request->env($variable);
  285. if (!$value || $value === 'C' || $value === 'POSIX') {
  286. continue;
  287. }
  288. if (preg_match("/{$regex}/", $value, $matches)) {
  289. return (array) $matches['locale'];
  290. }
  291. }
  292. return $result;
  293. }
  294. }
  295. ?>