MessageFormatter.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. <?php
  2. /**
  3. * @link http://www.yiiframework.com/
  4. * @copyright Copyright (c) 2008 Yii Software LLC
  5. * @license http://www.yiiframework.com/license/
  6. */
  7. namespace yii\i18n;
  8. use yii\base\Component;
  9. use yii\base\NotSupportedException;
  10. /**
  11. * MessageFormatter allows formatting messages via [ICU message format](http://userguide.icu-project.org/formatparse/messages)
  12. *
  13. * This class enhances the message formatter class provided by the PHP intl extension.
  14. *
  15. * The following enhancements are provided:
  16. *
  17. * - It accepts named arguments and mixed numeric and named arguments.
  18. * - Issues no error when an insufficient number of arguments have been provided. Instead, the placeholders will not be
  19. * substituted.
  20. * - Fixes PHP 5.5 weird placeholder replacement in case no arguments are provided at all (https://bugs.php.net/bug.php?id=65920).
  21. * - Offers limited support for message formatting in case PHP intl extension is not installed.
  22. * However it is highly recommended that you install [PHP intl extension](http://php.net/manual/en/book.intl.php) if you want
  23. * to use MessageFormatter features.
  24. *
  25. * The fallback implementation only supports the following message formats:
  26. * - plural formatting for english ('one' and 'other' selectors)
  27. * - select format
  28. * - simple parameters
  29. * - integer number parameters
  30. *
  31. * The fallback implementation does NOT support the ['apostrophe-friendly' syntax](http://www.php.net/manual/en/messageformatter.formatmessage.php).
  32. * Also messages that are working with the fallback implementation are not necessarily compatible with the
  33. * PHP intl MessageFormatter so do not rely on the fallback if you are able to install intl extension somehow.
  34. *
  35. * @property string $errorCode Code of the last error. This property is read-only.
  36. * @property string $errorMessage Description of the last error. This property is read-only.
  37. *
  38. * @author Alexander Makarov <[email protected]>
  39. * @author Carsten Brandt <[email protected]>
  40. * @since 2.0
  41. */
  42. class MessageFormatter extends Component
  43. {
  44. private $_errorCode = 0;
  45. private $_errorMessage = '';
  46. /**
  47. * Get the error code from the last operation
  48. * @link http://php.net/manual/en/messageformatter.geterrorcode.php
  49. * @return string Code of the last error.
  50. */
  51. public function getErrorCode()
  52. {
  53. return $this->_errorCode;
  54. }
  55. /**
  56. * Get the error text from the last operation
  57. * @link http://php.net/manual/en/messageformatter.geterrormessage.php
  58. * @return string Description of the last error.
  59. */
  60. public function getErrorMessage()
  61. {
  62. return $this->_errorMessage;
  63. }
  64. /**
  65. * Formats a message via [ICU message format](http://userguide.icu-project.org/formatparse/messages)
  66. *
  67. * It uses the PHP intl extension's [MessageFormatter](http://www.php.net/manual/en/class.messageformatter.php)
  68. * and works around some issues.
  69. * If PHP intl is not installed a fallback will be used that supports a subset of the ICU message format.
  70. *
  71. * @param string $pattern The pattern string to insert parameters into.
  72. * @param array $params The array of name value pairs to insert into the format string.
  73. * @param string $language The locale to use for formatting locale-dependent parts
  74. * @return string|boolean The formatted pattern string or `FALSE` if an error occurred
  75. */
  76. public function format($pattern, $params, $language)
  77. {
  78. $this->_errorCode = 0;
  79. $this->_errorMessage = '';
  80. if ($params === []) {
  81. return $pattern;
  82. }
  83. if (!class_exists('MessageFormatter', false)) {
  84. return $this->fallbackFormat($pattern, $params, $language);
  85. }
  86. if (version_compare(PHP_VERSION, '5.5.0', '<') || version_compare(INTL_ICU_VERSION, '4.8', '<')) {
  87. // replace named arguments
  88. $pattern = $this->replaceNamedArguments($pattern, $params, $newParams);
  89. $params = $newParams;
  90. }
  91. $formatter = new \MessageFormatter($language, $pattern);
  92. if ($formatter === null) {
  93. $this->_errorCode = intl_get_error_code();
  94. $this->_errorMessage = "Message pattern is invalid: " . intl_get_error_message();
  95. return false;
  96. }
  97. $result = $formatter->format($params);
  98. if ($result === false) {
  99. $this->_errorCode = $formatter->getErrorCode();
  100. $this->_errorMessage = $formatter->getErrorMessage();
  101. return false;
  102. } else {
  103. return $result;
  104. }
  105. }
  106. /**
  107. * Parses an input string according to an [ICU message format](http://userguide.icu-project.org/formatparse/messages) pattern.
  108. *
  109. * It uses the PHP intl extension's [MessageFormatter::parse()](http://www.php.net/manual/en/messageformatter.parsemessage.php)
  110. * and adds support for named arguments.
  111. * Usage of this method requires PHP intl extension to be installed.
  112. *
  113. * @param string $pattern The pattern to use for parsing the message.
  114. * @param string $message The message to parse, conforming to the pattern.
  115. * @param string $language The locale to use for formatting locale-dependent parts
  116. * @return array|boolean An array containing items extracted, or `FALSE` on error.
  117. * @throws \yii\base\NotSupportedException when PHP intl extension is not installed.
  118. */
  119. public function parse($pattern, $message, $language)
  120. {
  121. $this->_errorCode = 0;
  122. $this->_errorMessage = '';
  123. if (!class_exists('MessageFormatter', false)) {
  124. throw new NotSupportedException('You have to install PHP intl extension to use this feature.');
  125. }
  126. // replace named arguments
  127. if (($tokens = $this->tokenizePattern($pattern)) === false) {
  128. $this->_errorCode = -1;
  129. $this->_errorMessage = "Message pattern is invalid.";
  130. return false;
  131. }
  132. $map = [];
  133. foreach($tokens as $i => $token) {
  134. if (is_array($token)) {
  135. $param = trim($token[0]);
  136. if (!isset($map[$param])) {
  137. $map[$param] = count($map);
  138. }
  139. $token[0] = $map[$param];
  140. $tokens[$i] = '{' . implode(',', $token) . '}';
  141. }
  142. }
  143. $pattern = implode('', $tokens);
  144. $map = array_flip($map);
  145. $formatter = new \MessageFormatter($language, $pattern);
  146. if ($formatter === null) {
  147. $this->_errorCode = -1;
  148. $this->_errorMessage = "Message pattern is invalid.";
  149. return false;
  150. }
  151. $result = $formatter->parse($message);
  152. if ($result === false) {
  153. $this->_errorCode = $formatter->getErrorCode();
  154. $this->_errorMessage = $formatter->getErrorMessage();
  155. return false;
  156. } else {
  157. $values = [];
  158. foreach($result as $key => $value) {
  159. $values[$map[$key]] = $value;
  160. }
  161. return $values;
  162. }
  163. }
  164. /**
  165. * Replace named placeholders with numeric placeholders and quote unused.
  166. *
  167. * @param string $pattern The pattern string to replace things into.
  168. * @param array $givenParams The array of values to insert into the format string.
  169. * @param array $resultingParams Modified array of parameters.
  170. * @param array $map
  171. * @return string The pattern string with placeholders replaced.
  172. */
  173. private function replaceNamedArguments($pattern, $givenParams, &$resultingParams, &$map = [])
  174. {
  175. if (($tokens = $this->tokenizePattern($pattern)) === false) {
  176. return false;
  177. }
  178. foreach($tokens as $i => $token) {
  179. if (!is_array($token)) {
  180. continue;
  181. }
  182. $param = trim($token[0]);
  183. if (isset($givenParams[$param])) {
  184. // if param is given, replace it with a number
  185. if (!isset($map[$param])) {
  186. $map[$param] = count($map);
  187. // make sure only used params are passed to format method
  188. $resultingParams[$map[$param]] = $givenParams[$param];
  189. }
  190. $token[0] = $map[$param];
  191. $quote = "";
  192. } else {
  193. // quote unused token
  194. $quote = "'";
  195. }
  196. $type = isset($token[1]) ? trim($token[1]) : 'none';
  197. // replace plural and select format recursively
  198. if ($type == 'plural' || $type == 'select') {
  199. if (!isset($token[2])) {
  200. return false;
  201. }
  202. $subtokens = $this->tokenizePattern($token[2]);
  203. $c = count($subtokens);
  204. for ($k = 0; $k + 1 < $c; $k++) {
  205. if (is_array($subtokens[$k]) || !is_array($subtokens[++$k])) {
  206. return false;
  207. }
  208. $subpattern = $this->replaceNamedArguments(implode(',', $subtokens[$k]), $givenParams, $resultingParams, $map);
  209. $subtokens[$k] = $quote . '{' . $quote . $subpattern . $quote . '}' . $quote;
  210. }
  211. $token[2] = implode('', $subtokens);
  212. }
  213. $tokens[$i] = $quote . '{' . $quote . implode(',', $token) . $quote . '}' . $quote;
  214. }
  215. return implode('', $tokens);
  216. }
  217. /**
  218. * Fallback implementation for MessageFormatter::formatMessage
  219. * @param string $pattern The pattern string to insert things into.
  220. * @param array $args The array of values to insert into the format string
  221. * @param string $locale The locale to use for formatting locale-dependent parts
  222. * @return string|boolean The formatted pattern string or `FALSE` if an error occurred
  223. */
  224. protected function fallbackFormat($pattern, $args, $locale)
  225. {
  226. if (($tokens = $this->tokenizePattern($pattern)) === false) {
  227. $this->_errorCode = -1;
  228. $this->_errorMessage = "Message pattern is invalid.";
  229. return false;
  230. }
  231. foreach($tokens as $i => $token) {
  232. if (is_array($token)) {
  233. if (($tokens[$i] = $this->parseToken($token, $args, $locale)) === false) {
  234. $this->_errorCode = -1;
  235. $this->_errorMessage = "Message pattern is invalid.";
  236. return false;
  237. }
  238. }
  239. }
  240. return implode('', $tokens);
  241. }
  242. /**
  243. * Tokenizes a pattern by separating normal text from replaceable patterns
  244. * @param string $pattern patter to tokenize
  245. * @return array|bool array of tokens or false on failure
  246. */
  247. private function tokenizePattern($pattern)
  248. {
  249. $depth = 1;
  250. if (($start = $pos = mb_strpos($pattern, '{')) === false) {
  251. return [$pattern];
  252. }
  253. $tokens = [mb_substr($pattern, 0, $pos)];
  254. while (true) {
  255. $open = mb_strpos($pattern, '{', $pos + 1);
  256. $close = mb_strpos($pattern, '}', $pos + 1);
  257. if ($open === false && $close === false) {
  258. break;
  259. }
  260. if ($open === false) {
  261. $open = mb_strlen($pattern);
  262. }
  263. if ($close > $open) {
  264. $depth++;
  265. $pos = $open;
  266. } else {
  267. $depth--;
  268. $pos = $close;
  269. }
  270. if ($depth == 0) {
  271. $tokens[] = explode(',', mb_substr($pattern, $start + 1, $pos - $start - 1), 3);
  272. $start = $pos + 1;
  273. $tokens[] = mb_substr($pattern, $start, $open - $start);
  274. $start = $open;
  275. }
  276. }
  277. if ($depth != 0) {
  278. return false;
  279. }
  280. return $tokens;
  281. }
  282. /**
  283. * Parses a token
  284. * @param array $token the token to parse
  285. * @param array $args arguments to replace
  286. * @param string $locale the locale
  287. * @return bool|string parsed token or false on failure
  288. * @throws \yii\base\NotSupportedException when unsupported formatting is used.
  289. */
  290. private function parseToken($token, $args, $locale)
  291. {
  292. // parsing pattern based on ICU grammar:
  293. // http://icu-project.org/apiref/icu4c/classMessageFormat.html#details
  294. $param = trim($token[0]);
  295. if (isset($args[$param])) {
  296. $arg = $args[$param];
  297. } else {
  298. return '{' . implode(',', $token) . '}';
  299. }
  300. $type = isset($token[1]) ? trim($token[1]) : 'none';
  301. switch($type)
  302. {
  303. case 'date':
  304. case 'time':
  305. case 'spellout':
  306. case 'ordinal':
  307. case 'duration':
  308. case 'choice':
  309. case 'selectordinal':
  310. throw new NotSupportedException("Message format '$type' is not supported. You have to install PHP intl extension to use this feature.");
  311. case 'number':
  312. if (is_int($arg) && (!isset($token[2]) || trim($token[2]) == 'integer')) {
  313. return $arg;
  314. }
  315. throw new NotSupportedException("Message format 'number' is only supported for integer values. You have to install PHP intl extension to use this feature.");
  316. case 'none': return $arg;
  317. case 'select':
  318. /* http://icu-project.org/apiref/icu4c/classicu_1_1SelectFormat.html
  319. selectStyle = (selector '{' message '}')+
  320. */
  321. if (!isset($token[2])) {
  322. return false;
  323. }
  324. $select = static::tokenizePattern($token[2]);
  325. $c = count($select);
  326. $message = false;
  327. for ($i = 0; $i + 1 < $c; $i++) {
  328. if (is_array($select[$i]) || !is_array($select[$i + 1])) {
  329. return false;
  330. }
  331. $selector = trim($select[$i++]);
  332. if ($message === false && $selector == 'other' || $selector == $arg) {
  333. $message = implode(',', $select[$i]);
  334. }
  335. }
  336. if ($message !== false) {
  337. return $this->fallbackFormat($message, $args, $locale);
  338. }
  339. break;
  340. case 'plural':
  341. /* http://icu-project.org/apiref/icu4c/classicu_1_1PluralFormat.html
  342. pluralStyle = [offsetValue] (selector '{' message '}')+
  343. offsetValue = "offset:" number
  344. selector = explicitValue | keyword
  345. explicitValue = '=' number // adjacent, no white space in between
  346. keyword = [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+
  347. message: see MessageFormat
  348. */
  349. if (!isset($token[2])) {
  350. return false;
  351. }
  352. $plural = static::tokenizePattern($token[2]);
  353. $c = count($plural);
  354. $message = false;
  355. $offset = 0;
  356. for ($i = 0; $i + 1 < $c; $i++) {
  357. if (is_array($plural[$i]) || !is_array($plural[$i + 1])) {
  358. return false;
  359. }
  360. $selector = trim($plural[$i++]);
  361. if ($i == 1 && substr($selector, 0, 7) == 'offset:') {
  362. $offset = (int) trim(mb_substr($selector, 7, ($pos = mb_strpos(str_replace(["\n", "\r", "\t"], ' ', $selector), ' ', 7)) - 7));
  363. $selector = trim(mb_substr($selector, $pos + 1));
  364. }
  365. if ($message === false && $selector == 'other' ||
  366. $selector[0] == '=' && (int) mb_substr($selector, 1) == $arg ||
  367. $selector == 'one' && $arg - $offset == 1
  368. ) {
  369. $message = implode(',', str_replace('#', $arg - $offset, $plural[$i]));
  370. }
  371. }
  372. if ($message !== false) {
  373. return $this->fallbackFormat($message, $args, $locale);
  374. }
  375. break;
  376. }
  377. return false;
  378. }
  379. }