TextEncodingTest.cpp 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. 
  2. #include "../testTools.h"
  3. // These tests will fail if the source code document or stored files change their encoding of line breaks.
  4. String expected_latin1 =
  5. R"QUOTE(Hello my friend
  6. Hej min vän
  7. Halló, vinur minn
  8. Hei ystäväni
  9. Hola mi amigo
  10. Ciao amico
  11. )QUOTE";
  12. // Warning!
  13. // String literals containing characters above value 255 must be stored explicitly in unicode literals using U"" instead of "".
  14. // Because string literals do not begin with a byte order mark to say which encoding is being used.
  15. // Also make sure to save the source code document using a byte order mark so that the C++ compiler receives the correct symbol.
  16. String unicodeContent =
  17. UR"QUOTE(Hello my friend
  18. Hej min vän
  19. Halló, vinur minn
  20. Hei ystäväni
  21. Hola mi amigo
  22. Ciao amico
  23. 你好我的朋友
  24. こんにちは、友よ
  25. नमस्ते मेरो साथी
  26. Talofa laʻu uo
  27. Xin chào bạn của tôi
  28. העלא מיין פרייַנד
  29. 안녕 내 친구
  30. سلام دوست من
  31. ਹੈਲੋ ਮੇਰੇ ਦੋਸਤ
  32. ওহে, বন্ধু আমার
  33. សួស្តី​សម្លាញ់
  34. Γεια σου φίλε μου
  35. Привет, мой друг
  36. здраво пријатељу
  37. Բարեւ իմ ընկեր
  38. ආයුබෝවන් මාගේ යාළුවා
  39. ಹಲೋ ನನ್ನ ಸ್ನೇಹಿತನೇ
  40. Silav hevalê min
  41. اهلا صديقي
  42. )QUOTE";
  43. String expected_utf8 = unicodeContent + U"\nThis is UTF-8";
  44. String expected_utf16le = unicodeContent + U"\nThis is UTF-16 Little Endian";
  45. String expected_utf16be = unicodeContent + U"\nThis is UTF-8 Big Endian";
  46. void printCharacterCode(uint32_t value) {
  47. for (int i = 0; i < 32; i++) {
  48. if (value & 0b10000000000000000000000000000000) {
  49. printText(U"1");
  50. } else {
  51. printText(U"0");
  52. }
  53. value = value << 1;
  54. }
  55. }
  56. // Method for printing the character codes of a string for debugging
  57. void compareCharacterCodes(String textA, String textB) {
  58. int lengthA = string_length(textA);
  59. int lengthB = string_length(textB);
  60. int minLength = lengthA < lengthB ? lengthA : lengthB;
  61. printText("Character codes for strings of length ", lengthA, U" and ", lengthB, U":\n");
  62. for (int i = 0; i < minLength; i++) {
  63. uint32_t codeA = (uint32_t)textA[i];
  64. uint32_t codeB = (uint32_t)textB[i];
  65. printCharacterCode(codeA);
  66. if (codeA == codeB) {
  67. printText(U" == ");
  68. } else {
  69. printText(U" != ");
  70. }
  71. printCharacterCode(codeB);
  72. printText(U" (", textA[i], U") (", textB[i], U")\n");
  73. }
  74. if (lengthA > lengthB) {
  75. for (int i = minLength; i < lengthA; i++) {
  76. uint32_t codeA = (uint32_t)textA[i];
  77. printCharacterCode(codeA);
  78. printText(U" (", textA[i], U")\n");
  79. }
  80. } else {
  81. printText(U" ");
  82. for (int i = minLength; i < lengthB; i++) {
  83. uint32_t codeB = (uint32_t)textB[i];
  84. printCharacterCode(codeB);
  85. printText(U" (", textB[i], U")\n");
  86. }
  87. }
  88. }
  89. START_TEST(TextEncoding)
  90. String folderPath = string_combine(U"test", file_separator(), U"tests", file_separator(), U"resources", file_separator());
  91. { // Text encodings stored in memory
  92. // TODO: Test string_loadFromMemory
  93. }
  94. { // Loading strings of different encodings
  95. String fileLatin1 = string_load(folderPath + U"Latin1.txt", true);
  96. //compareCharacterCodes(fileLatin1, expected_latin1);
  97. ASSERT_MATCH(fileLatin1, expected_latin1);
  98. String fileUTF8 = string_load(folderPath + U"BomUtf8.txt", true);
  99. //compareCharacterCodes(fileUTF8, expected_utf8);
  100. ASSERT_MATCH(fileUTF8, expected_utf8);
  101. //String fileUTF16LE = string_load(folderPath + U"BomUtf16Le.txt", true);
  102. //printText("BomUtf16Le.txt contains:\n", fileUTF16LE, "\n");
  103. //ASSERT_MATCH(fileUTF16LE, expected_utf16le);
  104. //String fileUTF16BE = string_load(folderPath + U"BomUtf16Be.txt", true);
  105. //printText("BomUtf16Be.txt contains:\n", fileUTF16BE, "\n");
  106. //ASSERT_MATCH(fileUTF16BE, expected_utf16be);
  107. }
  108. { // Saving text to files
  109. String originalContent = U"Hello my friend\n你好我的朋友";
  110. String tempPath = folderPath + U"Temporary.txt";
  111. // Latin-1 should write ? for complex characters
  112. string_save(tempPath, originalContent, CharacterEncoding::Raw_Latin1, LineEncoding::CrLf);
  113. ASSERT_MATCH(string_load(tempPath, true), U"Hello my friend\n??????");
  114. // UFT-8 should store the Chinese characters correctly
  115. string_save(tempPath, originalContent, CharacterEncoding::BOM_UTF8, LineEncoding::CrLf);
  116. ASSERT_MATCH(string_load(tempPath, true), originalContent);
  117. }
  118. END_TEST