utencoding.pp 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. unit utencoding;
  2. {$mode delphi}{$H+}
  3. {$codepage cp1251}
  4. interface
  5. uses
  6. SysUtils, Classes;
  7. implementation
  8. uses punit, utrtl;
  9. function CheckCodePage(const B: TBytes; AEncoding: TEncoding): Boolean;
  10. var
  11. DetectedEncoding: TEncoding;
  12. begin
  13. DetectedEncoding := nil;
  14. Result :=
  15. (TEncoding.GetBufferEncoding(B, DetectedEncoding) <> 0) and
  16. (DetectedEncoding = AEncoding);
  17. end;
  18. Function DoEncodingTest : AnsiString;
  19. const
  20. UTF8Bytes: array[0..18] of byte = ($EF,$BB,$BF,$D0,$9F,$D1,$80,$D0,$BE,$D0,$B2,$D0,$B5,$D1,$80,$D0,$BA,$D0,$B0);
  21. UTF16Bytes: array[0..17] of byte = ($FF,$FE,$1F,$04,$40,$04,$3E,$04,$32,$04,$35,$04,$40,$04,$3A,$04,$30,$04);
  22. UTF16BEBytes: array[0..17] of byte = ($FE,$FF,$04,$1F,$04,$40,$04,$3E,$04,$32,$04,$35,$04,$40,$04,$3A,$04,$30);
  23. type
  24. TCp1251String = type AnsiString(1251);
  25. TCp866String = type AnsiString(866);
  26. var
  27. Cp866Encoding,
  28. Cp1251Encoding: TEncoding;
  29. Bytes: TBytes;
  30. Cp1251String,
  31. Cp1251String2: TCp1251String;
  32. Cp866String: Tcp866String;
  33. S: AnsiString;
  34. U8: UTF8String;
  35. U1, U2: UnicodeString;
  36. begin
  37. Result:='';
  38. // 1. check various conversions
  39. Cp866Encoding := TEncoding.GetEncoding('IBM866');
  40. Cp1251Encoding := TEncoding.GetEncoding('windows-1251');
  41. Cp1251String := 'Ïðèâåò çåìëÿíå!';
  42. Cp866String := Cp1251String;
  43. Bytes := Cp1251Encoding.GetBytes(Cp1251String);
  44. Bytes := TEncoding.Convert(Cp1251Encoding, Cp866Encoding, Bytes);
  45. SetString(S, PAnsiChar(Bytes), Length(Bytes));
  46. if not CompareMem(Pointer(S), Pointer(Cp866String), Length(S)) then
  47. Exit('Error at 1');
  48. if StringCodePage(S)<>CP_ACP then
  49. Exit('Error at 11');
  50. Cp1251String2:=Cp1251String;
  51. SetString(Cp1251String,pchar(Cp1251String2),length(Cp1251String2));
  52. if StringCodePage(Cp1251String)<>1251 then
  53. Exit('Error at 12');
  54. U1 := Cp866Encoding.GetString(Bytes);
  55. U2 := TEncoding.Unicode.GetString(TEncoding.Convert(Cp866Encoding, TEncoding.Unicode, Bytes));
  56. if U1 <> U2 then
  57. Exit('Error at 2');
  58. U1 := TEncoding.BigEndianUnicode.GetString(TEncoding.Convert(Cp866Encoding, TEncoding.BigEndianUnicode, Bytes));
  59. if U1 <> U2 then
  60. Exit('Error at 3');
  61. Bytes := TEncoding.Convert(Cp866Encoding, TEncoding.UTF8, Bytes);
  62. U8 := Cp866String;
  63. if not CompareMem(Pointer(U8), @Bytes[0], Length(U8)) then
  64. Exit('Error at 4');
  65. // 2. check misc functions
  66. if not (TEncoding.IsStandardEncoding(TEncoding.Unicode) or TEncoding.IsStandardEncoding(TEncoding.UTF8) or TEncoding.IsStandardEncoding(TEncoding.UTF7)) or
  67. TEncoding.IsStandardEncoding(Cp866Encoding) or TEncoding.IsStandardEncoding(Cp1251Encoding) then
  68. Exit('Error at 5');
  69. if Cp866Encoding.EncodingName = '' then
  70. Exit('Error at 6')
  71. else if ShowDebugOutput then
  72. WriteLn(Cp866Encoding.EncodingName);
  73. if TEncoding.Default.CodePage <> DefaultSystemCodePage then
  74. Exit('Error at 7');
  75. // 3. check codepage detection
  76. SetLength(Bytes, Length(UTF8Bytes));
  77. Move(UTF8Bytes[0], Bytes[0], Length(UTF8Bytes));
  78. if not CheckCodePage(Bytes, TEncoding.UTF8) then
  79. Exit('Error at 8');
  80. SetLength(Bytes, Length(UTF16Bytes));
  81. Move(UTF16Bytes[0], Bytes[0], Length(UTF16Bytes));
  82. if not CheckCodePage(Bytes, TEncoding.Unicode) then
  83. Exit('Error at 9');
  84. SetLength(Bytes, Length(UTF16BEBytes));
  85. Move(UTF16BEBytes[0], Bytes[0], Length(UTF16BEBytes));
  86. if not CheckCodePage(Bytes, TEncoding.BigEndianUnicode) then
  87. Exit('Error at 10');
  88. Cp866Encoding.Free;
  89. Cp1251Encoding.Free;
  90. Result:='';
  91. end;
  92. Function DoEncodingTest2 : AnsiString;
  93. var
  94. ACP,StartDefaultSystemCodePage: TSystemCodePage;
  95. begin
  96. StartDefaultSystemCodePage := DefaultSystemCodePage;
  97. ACP:=TEncoding.ANSI.CodePage;
  98. try
  99. // test creating ANSI when DefaultSystemCodePage is set to non-ANSI
  100. if DefaultSystemCodePage<>CP_UTF8 then
  101. DefaultSystemCodePage := CP_UTF8
  102. else
  103. DefaultSystemCodePage := 1250;
  104. if TEncoding.ANSI.CodePage<>ACP then
  105. Exit('AnsiCodePage changed when setting DefaultSystemCodePage to non-initial value');
  106. // test default
  107. DefaultSystemCodePage := StartDefaultSystemCodePage;
  108. if TEncoding.ANSI.CodePage<>TEncoding.SystemEncoding.CodePage then
  109. Exit('Ansi codepage not set to UTF8');
  110. // try utf-8
  111. DefaultSystemCodePage := CP_UTF8;
  112. if TEncoding.ANSI.CodePage<>ACP then
  113. Exit('AnsiCodePage changed when setting DefaultSystemCodePage to UTF8');
  114. if TEncoding.SystemEncoding.CodePage<>DefaultSystemCodePage then
  115. Exit('SystemEncoding differs from defaultsystemcodepage');
  116. // try a different single-byte encoding
  117. if StartDefaultSystemCodePage=1250 then
  118. DefaultSystemCodePage := 1251
  119. else
  120. DefaultSystemCodePage := 1250;
  121. if TEncoding.ANSI.CodePage<>ACP then
  122. Exit('Ansicodepage changed when setting defaultsystemcodepage to different single-byte codepage');
  123. if TEncoding.SystemEncoding.CodePage<>DefaultSystemCodePage then
  124. Exit('SystemEncoding not correctly set after changing to different single-byte codepage');
  125. // try start again
  126. DefaultSystemCodePage := StartDefaultSystemCodePage;
  127. if TEncoding.SystemEncoding.CodePage<>DefaultSystemCodePage then
  128. Exit('Systemencoding codepage not set correct when changing back to original');
  129. finally
  130. DefaultSystemCodePage:=StartDefaultSystemCodePage;
  131. end;
  132. end;
  133. begin
  134. SysUtilsTest('EncodingTest',@DoEncodingTest);
  135. SysUtilsTest('EncodingTest2',@DoEncodingTest2);
  136. end.Encodin