MultibyteTest.php 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. <?php
  2. /**
  3. * Lithium: the most rad php framework
  4. *
  5. * @copyright Copyright 2013, Union of RAD (http://union-of-rad.org)
  6. * @license http://opensource.org/licenses/bsd-license.php The BSD License
  7. */
  8. namespace lithium\tests\cases\g11n;
  9. use lithium\g11n\Multibyte;
  10. use lithium\tests\mocks\g11n\multibyte\adapter\MockAdapter;
  11. class MultibyteTest extends \lithium\test\Unit {
  12. protected $_backup = array();
  13. public $adapter;
  14. public function setUp() {
  15. $this->_backup['multibyteConfig'] = Multibyte::config();
  16. Multibyte::reset();
  17. $this->adapter = new MockAdapter();
  18. Multibyte::config(array('default' => array('object' => $this->adapter)));
  19. }
  20. public function tearDown() {
  21. Multibyte::reset();
  22. Multibyte::config($this->_backup['multibyteConfig']);
  23. }
  24. public function testIs() {
  25. $result = Multibyte::is('äbc');
  26. $this->assertTrue($result);
  27. $result = Multibyte::is('κόσμε');
  28. $this->assertTrue($result);
  29. $result = Multibyte::is("κό\nσμε");
  30. $this->assertTrue($result);
  31. $result = Multibyte::is("ab\xe9");
  32. $this->assertFalse($result);
  33. }
  34. public function testIsQuick() {
  35. $result = Multibyte::is('äbc', array('quick' => true));
  36. $this->assertTrue($result);
  37. $result = Multibyte::is('κόσμε', array('quick' => true));
  38. $this->assertTrue($result);
  39. $result = Multibyte::is("κό\nσμε", array('quick' => true));
  40. $this->assertTrue($result);
  41. }
  42. /**
  43. * Verifies the behavior of `Multibyte::is()` when dealing with valid,
  44. * invalid UTF-8 strings as well as edge cases. This test uses the stress
  45. * test created by Markus Kuhn.
  46. *
  47. * This test is "special" in that it doesn't prove that the method returns
  48. * correct results in any case - it shows how it actually behaves. It is no
  49. * requirement that the method successfully detects each and any string as
  50. * valid/invalid UTF-8. But: following a list which could be seen as goals
  51. * we'd like to achieve. Please adapt the list when modifying the method.
  52. * Any modification should result in getting closer to our goals not adding
  53. * more to them ;)
  54. *
  55. * These items should be detected as valid UTF-8 (but currently aren't):
  56. * - lines 70, 74, 75 in section `First possible sequence of a certain length`.
  57. * - lines 79, 82, 83, 84 in section `Last possible sequence of a certain length`.
  58. *
  59. * @link http://www.cl.cam.ac.uk/~mgk25/
  60. * @link http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
  61. */
  62. public function testIsBehavioral() {
  63. $path = LITHIUM_LIBRARY_PATH . '/lithium/tests/resources/utf8_decoder_stress_test.txt';
  64. $data = file($path);
  65. $items = array(
  66. 64 => true,
  67. 70 => false,
  68. 71 => true,
  69. 72 => true,
  70. 73 => true,
  71. 74 => false,
  72. 75 => false,
  73. 79 => false,
  74. 80 => true,
  75. 81 => true,
  76. 82 => false,
  77. 83 => false,
  78. 84 => false,
  79. 101 => false,
  80. 102 => false,
  81. 104 => false,
  82. 105 => false,
  83. 106 => false,
  84. 107 => false,
  85. 108 => false,
  86. 109 => false,
  87. 113 => false,
  88. 114 => false,
  89. 115 => false,
  90. 116 => false,
  91. 123 => false,
  92. 124 => false,
  93. 129 => false,
  94. 134 => false,
  95. 139 => false,
  96. 144 => false,
  97. 154 => false,
  98. 155 => false,
  99. 156 => false,
  100. 157 => false,
  101. 158 => false,
  102. 159 => false,
  103. 160 => false,
  104. 161 => false,
  105. 168 => false,
  106. 174 => false,
  107. 176 => false,
  108. 206 => false,
  109. 207 => false,
  110. 208 => false,
  111. 209 => false,
  112. 210 => false,
  113. 219 => false,
  114. 220 => false,
  115. 221 => false,
  116. 222 => false,
  117. 223 => false,
  118. 231 => false,
  119. 232 => false,
  120. 233 => false,
  121. 234 => false,
  122. 235 => false,
  123. 246 => false,
  124. 247 => false,
  125. 248 => false,
  126. 249 => false,
  127. 250 => false,
  128. 251 => false,
  129. 252 => false,
  130. 256 => false,
  131. 257 => false,
  132. 258 => false,
  133. 259 => false,
  134. 260 => false,
  135. 261 => false,
  136. 262 => false,
  137. 263 => false,
  138. 267 => true,
  139. 268 => true
  140. );
  141. foreach ($items as $number => $expected) {
  142. $result = Multibyte::is($data[$number]);
  143. $message = "Expected item on line {$number} to be detected as ";
  144. $message .= ($expected ? 'valid' : 'invalid') . " UTF-8.\n";
  145. $this->assertEqual($expected, $result, $message);
  146. }
  147. }
  148. /**
  149. * Verifies the behavior of `Multibyte::is()` when dealing with valid,
  150. * invalid UTF-8 strings as well as edge cases. Please see the docblock for
  151. * `testIsBehaviroral` for more contextual information on the type of test
  152. * and data used here.
  153. *
  154. * This test clearly shows and accepts the limitations in which the `quick`
  155. * mode operates. The `quick` mode will obviously never get as good results
  156. * as the normal one.
  157. *
  158. * These items should be detected as *invalid* UTF-8 (but currently aren't):
  159. * - lines 101-263 in nearly all remaining sections.
  160. *
  161. * @see lithium\tests\cases\g11n\MultibyteTest::testIsBehavioral()
  162. */
  163. public function testIsQuickBehavioral() {
  164. $path = LITHIUM_LIBRARY_PATH . '/lithium/tests/resources/utf8_decoder_stress_test.txt';
  165. $data = file($path);
  166. $items = array(
  167. 64 => true,
  168. 70 => true,
  169. 71 => true,
  170. 72 => true,
  171. 73 => true,
  172. 74 => true,
  173. 75 => true,
  174. 79 => true,
  175. 80 => true,
  176. 81 => true,
  177. 82 => true,
  178. 83 => true,
  179. 84 => true,
  180. 101 => true,
  181. 102 => true,
  182. 104 => true,
  183. 105 => true,
  184. 106 => true,
  185. 107 => true,
  186. 108 => true,
  187. 109 => true,
  188. 113 => true,
  189. 114 => true,
  190. 115 => true,
  191. 116 => true,
  192. 123 => true,
  193. 124 => true,
  194. 129 => true,
  195. 134 => true,
  196. 139 => true,
  197. 144 => true,
  198. 154 => true,
  199. 155 => true,
  200. 156 => true,
  201. 157 => true,
  202. 158 => true,
  203. 159 => true,
  204. 160 => true,
  205. 161 => true,
  206. 168 => true,
  207. 174 => true,
  208. 176 => true,
  209. 206 => true,
  210. 207 => true,
  211. 208 => true,
  212. 209 => true,
  213. 210 => true,
  214. 219 => true,
  215. 220 => true,
  216. 221 => true,
  217. 222 => true,
  218. 223 => true,
  219. 231 => true,
  220. 232 => true,
  221. 233 => true,
  222. 234 => true,
  223. 235 => true,
  224. 246 => true,
  225. 247 => true,
  226. 248 => true,
  227. 249 => true,
  228. 250 => true,
  229. 251 => true,
  230. 252 => true,
  231. 256 => true,
  232. 257 => true,
  233. 258 => true,
  234. 259 => true,
  235. 260 => true,
  236. 261 => true,
  237. 262 => true,
  238. 263 => true,
  239. 267 => true,
  240. 268 => true
  241. );
  242. foreach ($items as $number => $expected) {
  243. $result = Multibyte::is($data[$number], array('quick' => true));
  244. $message = "Expected item on line {$number} to be detected as ";
  245. $message .= ($expected ? 'valid' : 'invalid') . " UTF-8.\n";
  246. $this->assertEqual($expected, $result, $message);
  247. }
  248. }
  249. public function testStrlen() {
  250. Multibyte::strlen('test');
  251. $result = $this->adapter->testStrlenArgs;
  252. $expected = array('test');
  253. $this->assertEqual($expected, $result);
  254. }
  255. public function testStrpos() {
  256. Multibyte::strpos('abcab', 'c');
  257. $result = $this->adapter->testStrposArgs;
  258. $expected = array('abcab', 'c', 0);
  259. $this->assertEqual($expected, $result);
  260. Multibyte::strpos('abcab', 'c', 23);
  261. $result = $this->adapter->testStrposArgs;
  262. $expected = array('abcab', 'c', 23);
  263. $this->assertEqual($expected, $result);
  264. }
  265. public function testStrrpos() {
  266. Multibyte::strrpos('abcab', 'c');
  267. $result = $this->adapter->testStrrposArgs;
  268. $expected = array('abcab', 'c');
  269. $this->assertEqual($expected, $result);
  270. }
  271. public function testSubstr() {
  272. Multibyte::substr('abcab', 1);
  273. $result = $this->adapter->testSubstrArgs;
  274. $expected = array('abcab', 1, null);
  275. $this->assertEqual($expected, $result);
  276. Multibyte::substr('abcab', 1, 2);
  277. $result = $this->adapter->testSubstrArgs;
  278. $expected = array('abcab', 1, 2);
  279. $this->assertEqual($expected, $result);
  280. }
  281. }
  282. ?>