2
0

IdHeaderCoder2022JP.pas 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. unit IdHeaderCoder2022JP;
  2. interface
  3. {$i IdCompilerDefines.inc}
  4. {RLebeau: TODO - move this logic into an IIdTextEncoding implementation}
  5. uses
  6. IdGlobal, IdHeaderCoderBase;
  7. type
  8. TIdHeaderCoder2022JP = class(TIdHeaderCoder)
  9. public
  10. class function Decode(const ACharSet: string; const AData: TIdBytes): String; override;
  11. class function Encode(const ACharSet, AData: String): TIdBytes; override;
  12. class function CanHandle(const ACharSet: String): Boolean; override;
  13. end;
  14. // RLebeau 4/17/10: this forces C++Builder to link to this unit so
  15. // RegisterHeaderCoder can be called correctly at program startup...
  16. {$IFDEF HAS_DIRECTIVE_HPPEMIT_LINKUNIT}
  17. {$HPPEMIT LINKUNIT}
  18. {$ELSE}
  19. {$HPPEMIT '#pragma link "IdHeaderCoder2022JP"'}
  20. {$ENDIF}
  21. implementation
  22. uses
  23. SysUtils;
  24. const
  25. // RLebeau 1/7/09: using integers for #128-#255 because in D2009, the compiler
  26. // may change characters >= #128 from their Ansi codepage value to their true
  27. // Unicode codepoint value, depending on the codepage used for the source code.
  28. // For instance, #128 may become #$20AC...
  29. kana_tbl : array[161..223{#$A1..#$DF}] of Word = (
  30. $2123,$2156,$2157,$2122,$2126,$2572,$2521,$2523,$2525,$2527,
  31. $2529,$2563,$2565,$2567,$2543,$213C,$2522,$2524,$2526,$2528,
  32. $252A,$252B,$252D,$252F,$2531,$2533,$2535,$2537,$2539,$253B,
  33. $253D,$253F,$2541,$2544,$2546,$2548,$254A,$254B,$254C,$254D,
  34. $254E,$254F,$2552,$2555,$2558,$255B,$255E,$255F,$2560,$2561,
  35. $2562,$2564,$2566,$2568,$2569,$256A,$256B,$256C,$256D,$256F,
  36. $2573,$212B,$212C);
  37. vkana_tbl : array[161..223{#$A1..#$DF}] of Word = (
  38. $0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,
  39. $0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$2574,$0000,
  40. $0000,$252C,$252E,$2530,$2532,$2534,$2536,$2538,$253A,$253C,
  41. $253E,$2540,$2542,$2545,$2547,$2549,$0000,$0000,$0000,$0000,
  42. $0000,$2550,$2553,$2556,$2559,$255C,$0000,$0000,$0000,$0000,
  43. $0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,
  44. $0000,$0000,$0000);
  45. sj1_tbl : array[128..255{#128..#255}] of byte = (
  46. $00,$21,$23,$25,$27,$29,$2B,$2D,$2F,$31,$33,$35,$37,$39,$3B,$3D,
  47. $3F,$41,$43,$45,$47,$49,$4B,$4D,$4F,$51,$53,$55,$57,$59,$5B,$5D,
  48. $00,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,
  49. $01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,
  50. $01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,
  51. $01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,
  52. $5F,$61,$63,$65,$67,$69,$6B,$6D,$6F,$71,$73,$75,$77,$79,$7B,$7D,
  53. $02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$00,$00,$00);
  54. sj2_tbl : array[0..255{#0..#255}] of Word = (
  55. $0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,
  56. $0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,
  57. $0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,
  58. $0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,
  59. $0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,
  60. $0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,$0000,
  61. $0000,$0000,$0000,$0000,$0021,$0022,$0023,$0024,$0025,$0026,
  62. $0027,$0028,$0029,$002A,$002B,$002C,$002D,$002E,$002F,$0030,
  63. $0031,$0032,$0033,$0034,$0035,$0036,$0037,$0038,$0039,$003A,
  64. $003B,$003C,$003D,$003E,$003F,$0040,$0041,$0042,$0043,$0044,
  65. $0045,$0046,$0047,$0048,$0049,$004A,$004B,$004C,$004D,$004E,
  66. $004F,$0050,$0051,$0052,$0053,$0054,$0055,$0056,$0057,$0058,
  67. $0059,$005A,$005B,$005C,$005D,$005E,$005F,$0000,$0060,$0061,
  68. $0062,$0063,$0064,$0065,$0066,$0067,$0068,$0069,$006A,$006B,
  69. $006C,$006D,$006E,$006F,$0070,$0071,$0072,$0073,$0074,$0075,
  70. $0076,$0077,$0078,$0079,$007A,$007B,$007C,$007D,$007E,$0121,
  71. $0122,$0123,$0124,$0125,$0126,$0127,$0128,$0129,$012A,$012B,
  72. $012C,$012D,$012E,$012F,$0130,$0131,$0132,$0133,$0134,$0135,
  73. $0136,$0137,$0138,$0139,$013A,$013B,$013C,$013D,$013E,$013F,
  74. $0140,$0141,$0142,$0143,$0144,$0145,$0146,$0147,$0148,$0149,
  75. $014A,$014B,$014C,$014D,$014E,$014F,$0150,$0151,$0152,$0153,
  76. $0154,$0155,$0156,$0157,$0158,$0159,$015A,$015B,$015C,$015D,
  77. $015E,$015F,$0160,$0161,$0162,$0163,$0164,$0165,$0166,$0167,
  78. $0168,$0169,$016A,$016B,$016C,$016D,$016E,$016F,$0170,$0171,
  79. $0172,$0173,$0174,$0175,$0176,$0177,$0178,$0179,$017A,$017B,
  80. $017C,$017D,$017E,$0000,$0000,$0000);
  81. class function TIdHeaderCoder2022JP.Decode(const ACharSet: String; const AData: TIdBytes): String;
  82. var
  83. T : string;
  84. I, L : Integer;
  85. isK : Boolean;
  86. K1, K2 : Byte;
  87. K3 : Byte;
  88. begin
  89. T := ''; {Do not Localize}
  90. isK := False;
  91. L := Length(AData);
  92. I := 0;
  93. while I < L do
  94. begin
  95. if AData[I] = 27 then
  96. begin
  97. Inc(I);
  98. if (I+1) < L then
  99. begin
  100. if (AData[I] = Ord('$')) and (AData[I+1] = Ord('B')) then begin {do not localize}
  101. isK := True;
  102. end
  103. else if (AData[I] = Ord('(')) and (AData[I+1] = Ord('B')) then begin {do not localize}
  104. isK := False;
  105. end;
  106. Inc(I, 2); { TODO -oTArisawa : Check RFC 1468}
  107. end;
  108. end
  109. else if isK then
  110. begin
  111. if (I+1) < L then
  112. begin
  113. K1 := AData[I];
  114. K2 := AData[I+1];
  115. K3 := (K1 - 1) shr 1;
  116. if K1 < 95 then begin
  117. K3:= K3 + 113;
  118. end else begin
  119. K3 := K3 + 177;
  120. end;
  121. if (K1 mod 2) = 1 then
  122. begin
  123. if K2 < 96 then begin
  124. K2 := K2 + 31;
  125. end else begin
  126. K2 := K2 + 32;
  127. end;
  128. end
  129. else begin
  130. K2 := K2 + 126;
  131. end;
  132. T := T + Char(K3) + Char(k2);
  133. Inc(I, 2);
  134. end
  135. else begin
  136. Inc(I); { invalid DBCS }
  137. end;
  138. end
  139. else
  140. begin
  141. T := T + Char(AData[I]);
  142. Inc(I);
  143. end;
  144. end;
  145. Result := T;
  146. end;
  147. class function TIdHeaderCoder2022JP.Encode(const ACharSet, AData: String): TIdBytes;
  148. const
  149. desig_asc: array[0..2] of Byte = (27, Ord('('), Ord('B')); {Do not Localize}
  150. desig_jis: array[0..2] of Byte = (27, Ord('$'), Ord('B')); {Do not Localize}
  151. var
  152. T: TIdBytes;
  153. I, L: Integer;
  154. isK: Boolean;
  155. K1: Byte;
  156. K2, K3: Word;
  157. begin
  158. SetLength(T, 0);
  159. isK := False;
  160. L := Length(AData);
  161. I := 1;
  162. while I <= L do
  163. begin
  164. if Ord(AData[I]) < 128 then {Do not Localize}
  165. begin
  166. if isK then
  167. begin
  168. AppendByte(T, 27);
  169. AppendByte(T, Ord('(')); {Do not Localize}
  170. AppendByte(T, Ord('B')); {Do not Localize}
  171. isK := False;
  172. end;
  173. AppendByte(T, Ord(AData[I]));
  174. Inc(I);
  175. end else
  176. begin
  177. K1 := sj1_tbl[Ord(AData[I])];
  178. case K1 of
  179. 0: Inc(I); { invalid SBCS }
  180. 2: Inc(I, 2); { invalid DBCS }
  181. 1:
  182. begin { halfwidth katakana }
  183. if not isK then begin
  184. AppendByte(T, 27);
  185. AppendByte(T, Ord('$')); {Do not Localize}
  186. AppendByte(T, Ord('B')); {Do not Localize}
  187. isK := True;
  188. end;
  189. { simple SBCS -> DBCS conversion }
  190. K2 := kana_tbl[Ord(AData[I])];
  191. if (I < L) and ((Ord(AData[I+1]) and $FE) = $DE) then
  192. begin { convert kana + voiced mark to voiced kana }
  193. K3 := vkana_tbl[Ord(AData[I])];
  194. // This is an if and not a case because of a D8 bug, return to
  195. // case when d8 patch is released
  196. // RLebeau 1/7/09: using Char() for #128-#255 because in D2009, the compiler
  197. // may change characters >= #128 from their Ansi codepage value to their true
  198. // Unicode codepoint value, depending on the codepage used for the source code.
  199. // For instance, #128 may become #$20AC...
  200. if AData[I+1] = Char($DE) then begin { voiced }
  201. if K3 <> 0 then
  202. begin
  203. K2 := K3;
  204. Inc(I);
  205. end;
  206. end
  207. else if AData[I+1] = Char($DF) then begin { semivoiced }
  208. if (K3 >= $2550) and (K3 <= $255C) then
  209. begin
  210. K2 := K3 + 1;
  211. Inc(I);
  212. end;
  213. end;
  214. end;
  215. AppendByte(T, K2 shr 8);
  216. AppendByte(T, K2 and $FF);
  217. Inc(I);
  218. end;
  219. else { DBCS }
  220. if (I < L) then begin
  221. K2 := sj2_tbl[Ord(AData[I+1])];
  222. if K2 <> 0 then
  223. begin
  224. if not isK then begin
  225. AppendByte(T, 27);
  226. AppendByte(T, Ord('$')); {Do not Localize}
  227. AppendByte(T, Ord('B')); {Do not Localize}
  228. isK := True;
  229. end;
  230. AppendByte(T, K1 + K2 shr 8);
  231. AppendByte(T, K2 and $FF);
  232. end;
  233. end;
  234. Inc(I, 2);
  235. end;
  236. end;
  237. end;
  238. if isK then begin
  239. AppendByte(T, 27);
  240. AppendByte(T, Ord('(')); {Do not Localize}
  241. AppendByte(T, Ord('B')); {Do not Localize}
  242. end;
  243. Result := T;
  244. end;
  245. class function TIdHeaderCoder2022JP.CanHandle(const ACharSet: String): Boolean;
  246. begin
  247. Result := TextIsSame(ACharSet, 'ISO-2022-JP'); {do not localize}
  248. end;
  249. initialization
  250. RegisterHeaderCoder(TIdHeaderCoder2022JP);
  251. finalization
  252. UnregisterHeaderCoder(TIdHeaderCoder2022JP);
  253. end.