fpwidestring.pp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897
  1. {$IFNDEF FPC_DOTTEDUNITS}
  2. unit fpwidestring;
  3. {$ENDIF FPC_DOTTEDUNITS}
  4. {$mode objfpc}
  5. interface
  6. {$IFDEF FPC_DOTTEDUNITS}
  7. uses
  8. System.CodePages.unicodedata;
  9. {$ELSE FPC_DOTTEDUNITS}
  10. uses
  11. unicodedata;
  12. {$ENDIF FPC_DOTTEDUNITS}
  13. {$i rtldefs.inc}
  14. function SetActiveCollation(const AName : UnicodeString) : Boolean;
  15. function SetActiveCollation(const ACollation : PUCA_DataBook) : Boolean;
  16. function GetActiveCollation() : PUCA_DataBook;
  17. var
  18. DefaultCollationName : UnicodeString = '';
  19. implementation
  20. {$IFDEF FPC_DOTTEDUNITS}
  21. uses
  22. {$ifdef MSWINDOWS}
  23. WinApi.Windows,
  24. {$endif MSWINDOWS}
  25. {$ifdef Unix}
  26. UnixApi.CP,
  27. {$endif}
  28. System.CharSet;
  29. {$ELSE FPC_DOTTEDUNITS}
  30. uses
  31. {$ifdef MSWINDOWS}
  32. Windows,
  33. {$endif MSWINDOWS}
  34. {$ifdef Unix}
  35. unixcp,
  36. {$endif}
  37. charset;
  38. {$ENDIF FPC_DOTTEDUNITS}
  39. procedure fpc_rangeerror; [external name 'FPC_RANGEERROR'];
  40. {$ifdef MSWINDOWS}
  41. function GetACP:UINT; external 'kernel32' name 'GetACP';
  42. {$endif MSWINDOWS}
  43. const
  44. IgnoreInvalidSequenceFlag = True;
  45. var
  46. OldManager : TUnicodeStringManager;
  47. {$ifdef FPC_HAS_FEATURE_THREADING}
  48. ThreadVar
  49. {$else FPC_HAS_FEATURE_THREADING}
  50. Var
  51. {$endif FPC_HAS_FEATURE_THREADING}
  52. current_DefaultSystemCodePage : TSystemCodePage;
  53. current_Map : punicodemap;
  54. current_Collation : record
  55. DataPtr : PUCA_DataBook;
  56. Data : TUCA_DataBook;
  57. end;
  58. function SetActiveCollation(const ACollation : PUCA_DataBook) : Boolean;
  59. begin
  60. Result := (ACollation <> nil);
  61. if Result then begin
  62. current_Collation.Data := ACollation^;
  63. current_Collation.DataPtr := @current_Collation.Data;
  64. end;
  65. end;
  66. function SetActiveCollation(const AName : UnicodeString) : Boolean;
  67. var
  68. c : PUCA_DataBook;
  69. begin
  70. c:=FindCollation(AName);
  71. Result := (c <> nil);
  72. if Result then
  73. Result := SetActiveCollation(c);
  74. end;
  75. function GetActiveCollation() : PUCA_DataBook;
  76. begin
  77. Result := current_Collation.DataPtr;
  78. end;
  79. {procedure error_CpNotFound(ACodePage:TSystemCodePage);
  80. begin
  81. System.error(reCodesetConversion);
  82. end;}
  83. procedure InitThread;
  84. var
  85. c : PUCA_DataBook;
  86. begin
  87. current_DefaultSystemCodePage:=DefaultSystemCodePage;
  88. current_Map:=getmap(current_DefaultSystemCodePage);
  89. c:=nil;
  90. if (DefaultCollationName<>'') then
  91. c:=FindCollation(DefaultCollationName);
  92. if (c=nil) and (GetCollationCount()>0) then
  93. c:=FindCollation(0);
  94. if (c<>nil) then
  95. SetActiveCollation(c);
  96. end;
  97. procedure FiniThread;
  98. begin
  99. current_Map:=nil;
  100. end;
  101. function FindMap(const cp: TSystemCodePage): punicodemap;inline;
  102. begin
  103. if (cp=DefaultSystemCodePage) then
  104. begin
  105. { update current_Map in case the DefaultSystemCodePage has been changed }
  106. if (current_DefaultSystemCodePage<>DefaultSystemCodePage) or not Assigned(current_Map) then
  107. begin
  108. FiniThread;
  109. InitThread;
  110. end;
  111. Result:=current_Map;
  112. end
  113. else
  114. Result:=getmap(cp);
  115. end;
  116. { return value:
  117. -1 if incomplete or invalid code point
  118. 0 if NULL character,
  119. > 0 if that's the length in bytes of the code point }
  120. function UTF8CodePointLength(const Str: PAnsiChar; MaxLookAead: PtrInt): Ptrint;
  121. {... taken from ustrings.inc}
  122. var
  123. p: PByte;
  124. TempBYTE: Byte;
  125. CharLen: SizeUint;
  126. LookAhead: SizeUInt;
  127. UC: SizeUInt;
  128. begin
  129. if (Str=nil) then
  130. exit(0);
  131. p:=PByte(Str);
  132. if (p^=0) then
  133. exit(0);
  134. p:=PByte(Str);
  135. if (p^ and $80) = 0 then //One character US-ASCII,
  136. exit(1);
  137. TempByte:=p^;
  138. CharLen:=0;
  139. while (TempByte and $80)<>0 do
  140. begin
  141. TempByte:=(TempByte shl 1) and $FE;
  142. Inc(CharLen);
  143. end;
  144. //Test for the "CharLen" conforms UTF-8 string
  145. //This means the 10xxxxxx pattern.
  146. if SizeUInt(CharLen-1)>MaxLookAead then //Insuficient chars in string to decode UTF-8 array
  147. exit(-1);
  148. for LookAhead := 1 to CharLen-1 do
  149. begin
  150. if ((p[LookAhead] and $80)<>$80) or
  151. ((p[LookAhead] and $40)<>$00)
  152. then
  153. begin
  154. //Invalid UTF-8 sequence, fallback.
  155. exit(-1);
  156. end;
  157. end;
  158. Result:=CharLen;
  159. case CharLen of
  160. 1: begin
  161. //Not valid UTF-8 sequence
  162. Result:=-1;
  163. end;
  164. 2: begin
  165. //Two bytes UTF, convert it
  166. UC:=(p^ and $1F) shl 6;
  167. UC:=UC or (p[1] and $3F);
  168. if UC <= $7F then
  169. begin
  170. //Invalid UTF sequence.
  171. Result:=-1;
  172. end;
  173. end;
  174. 3: begin
  175. //Three bytes, convert it to unicode
  176. UC:= (p^ and $0F) shl 12;
  177. UC:= UC or ((p[1] and $3F) shl 6);
  178. UC:= UC or ((p[2] and $3F));
  179. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  180. begin
  181. //Invalid UTF-8 sequence
  182. Result:=-1;
  183. End;
  184. end;
  185. 4: begin
  186. //Four bytes, convert it to two unicode characters
  187. UC:= (p^ and $07) shl 18;
  188. UC:= UC or ((p[1] and $3F) shl 12);
  189. UC:= UC or ((p[2] and $3F) shl 6);
  190. UC:= UC or ((p[3] and $3F));
  191. if (UC < $10000) or (UC > $10FFFF) then
  192. begin
  193. Result:=-1;
  194. end
  195. end;
  196. 5,6,7: begin
  197. //Invalid UTF8 to unicode conversion,
  198. //mask it as invalid UNICODE too.
  199. Result:=-1;
  200. end;
  201. end;
  202. end;
  203. { return value:
  204. -1 if incomplete or invalid code point
  205. 0 if NULL character,
  206. > 0 if that's the length in bytes of the code point }
  207. function CodePointLength(const Str: PAnsiChar; MaxLookAead: PtrInt): PtrInt;
  208. var
  209. p : PByte;
  210. begin
  211. if (current_DefaultSystemCodePage=CP_UTF8) then
  212. exit(UTF8CodePointLength(Str,MaxLookAead));
  213. if (Str=nil) then
  214. exit(0);
  215. p:=PByte(Str);
  216. if (p^=0) then
  217. exit(0);
  218. if (current_Map=nil) then
  219. exit(1);
  220. if (p^>current_Map^.lastchar) then
  221. exit(-1);
  222. case current_Map^.map[p^].flag of
  223. umf_undefined : Result:=-1;
  224. umf_leadbyte :
  225. begin
  226. if (MaxLookAead>0) then
  227. Result:=2
  228. else
  229. Result:=-1;
  230. end;
  231. else
  232. Result:=1;
  233. end;
  234. end;
  235. procedure Unicode2AnsiMove(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
  236. var
  237. locSource : punicodechar;
  238. locMap : punicodemap;
  239. destBuffer : PAnsiChar;
  240. destLen,actualLen, i : SizeInt;
  241. blockLen : SizeInt;
  242. begin
  243. if (len=0) then
  244. begin
  245. SetLength(dest,0);
  246. exit;
  247. end;
  248. if (cp=CP_UTF8) then
  249. begin
  250. destLen:=UnicodeToUtf8(nil,High(SizeUInt),source,len);
  251. SetLength(dest,destLen-1);
  252. UnicodeToUtf8(@dest[1],destLen,source,len);
  253. SetCodePage(dest,cp,False);
  254. exit;
  255. end;
  256. if (cp=CP_UTF16) then
  257. begin
  258. destLen:=len*SizeOf(UnicodeChar);
  259. SetLength(dest,destLen);
  260. Move(source^,dest[1],destLen);
  261. SetCodePage(dest,cp,False);
  262. exit;
  263. end;
  264. locMap:=FindMap(cp);
  265. if (locMap=nil) then
  266. begin
  267. DefaultUnicode2AnsiMove(source,dest,DefaultSystemCodePage,len);
  268. exit;
  269. end;
  270. destLen:=3*len;
  271. SetLength(dest,destLen);
  272. destBuffer:=@dest[1];
  273. actualLen:=0;
  274. locSource:=source;
  275. for i:=1 to len do
  276. begin
  277. blockLen:=getascii(tunicodechar(locSource^),locMap,destBuffer,(destLen-actualLen));
  278. if (blockLen<0) then
  279. begin
  280. destLen:=destLen + 3*(1+len-i);
  281. SetLength(dest,destLen);
  282. destBuffer:=@dest[1];
  283. blockLen:=getascii(tunicodechar(locSource^),locMap,destBuffer,(destLen-actualLen));
  284. end;
  285. Inc(destBuffer,blockLen);
  286. actualLen:=actualLen+blockLen;
  287. Inc(locSource);
  288. end;
  289. if (actualLen<>Length(dest)) then
  290. SetLength(dest,actualLen);
  291. if (Length(dest)>0) then
  292. SetCodePage(dest,cp,False);
  293. end;
  294. procedure Ansi2UnicodeMove(source:PAnsiChar; cp:TSystemCodePage; var dest:UnicodeString; len:SizeInt);
  295. var
  296. locMap : punicodemap;
  297. destLen : SizeUInt;
  298. begin
  299. if (len<=0) then
  300. begin
  301. SetLength(dest,0);
  302. exit;
  303. end;
  304. if (cp=CP_UTF8) then
  305. begin
  306. destLen:=Utf8ToUnicode(nil,high(SizeUint),source,len);
  307. if destLen > 0 then
  308. SetLength(dest,destLen-1)
  309. else
  310. SetLength(dest,0);
  311. Utf8ToUnicode(@dest[1],destLen,source,len);
  312. exit;
  313. end;
  314. if (cp=CP_UTF16) then
  315. begin
  316. //what if (len mod 2) > 0 ?
  317. destLen:=len div SizeOf(UnicodeChar);
  318. SetLength(dest,destLen);
  319. Move(source^,dest[1],(destLen*SizeOf(UnicodeChar)));
  320. exit;
  321. end;
  322. locMap:=FindMap(cp);
  323. if (locMap=nil) then
  324. begin
  325. DefaultAnsi2UnicodeMove(source,DefaultSystemCodePage,dest,len);
  326. exit;
  327. end;
  328. destLen:=getunicode(source,len,locMap,nil);
  329. SetLength(dest,destLen);
  330. getunicode(source,len,locMap,tunicodestring(@dest[1]));
  331. end;
  332. {$ifdef MSWINDOWS}
  333. procedure Ansi2WideMove(source:PAnsiChar; cp:TSystemCodePage; var dest:WideString; len:SizeInt);
  334. var
  335. locMap : punicodemap;
  336. destLen : SizeInt;
  337. begin
  338. if (len<=0) then
  339. begin
  340. SetLength(dest,0);
  341. exit;
  342. end;
  343. locMap:=FindMap(cp);
  344. if (locMap=nil) then
  345. begin
  346. DefaultAnsi2WideMove(source,DefaultSystemCodePage,dest,len);
  347. exit;
  348. end;
  349. destLen:=getunicode(source,len,locMap,nil);
  350. SetLength(dest,destLen);
  351. getunicode(source,len,locMap,tunicodestring(@dest[1]));
  352. end;
  353. {$endif MSWINDOWS}
  354. function UpperUnicodeString(const S: UnicodeString): UnicodeString;
  355. begin
  356. if (UnicodeToUpper(S,IgnoreInvalidSequenceFlag,Result) <> 0) then
  357. system.error(reRangeError);
  358. end;
  359. function UpperWideString(const S: WideString): WideString;
  360. var
  361. u : UnicodeString;
  362. begin
  363. u:=s;
  364. Result:=UpperUnicodeString(u);
  365. end;
  366. function LowerUnicodeString(const S: UnicodeString): UnicodeString;
  367. begin
  368. if (UnicodeToLower(S,IgnoreInvalidSequenceFlag,Result) <> 0) then
  369. system.error(reRangeError);
  370. end;
  371. function LowerWideString(const S: WideString): WideString;
  372. var
  373. u : UnicodeString;
  374. begin
  375. u:=s;
  376. Result:=LowerUnicodeString(u);
  377. end;
  378. function CompareUnicodeStringUCA(p1,p2:PUnicodeChar; l1,l2:PtrInt) : PtrInt;inline;
  379. begin
  380. Result := IncrementalCompareString(p1,l1,p2,l2,current_Collation.DataPtr);
  381. end;
  382. function CompareUnicodeString(p1,p2:PUnicodeChar; l1,l2:PtrInt) : PtrInt;inline;
  383. begin
  384. if (Pointer(p1)=Pointer(p2)) then
  385. exit(0);
  386. if (l1=0) then
  387. exit(-l2);
  388. if (l2=0) then
  389. exit(l1);
  390. Result := CompareUnicodeStringUCA(p1,p2,l1,l2);
  391. end;
  392. type
  393. TChangedPropsRecord = record
  394. ComparisonStrength : Byte;
  395. end;
  396. const
  397. SECONDARY_STRENGTH_LEVEL = 2;
  398. function CompareUnicodeString(const s1, s2 : UnicodeString;Options : TCompareOptions) : PtrInt;
  399. function DoCompare() : PtrInt;
  400. var
  401. changedProps : TChangedPropsRecord;
  402. begin
  403. changedProps.ComparisonStrength := current_Collation.Data.ComparisonStrength;
  404. try
  405. if (coIgnoreCase in Options) then
  406. current_Collation.Data.ComparisonStrength := SECONDARY_STRENGTH_LEVEL;
  407. Result:=CompareUnicodeString(
  408. PUnicodeChar(Pointer(s1)),
  409. PUnicodeChar(Pointer(s2)),
  410. Length(s1),Length(s2)
  411. );
  412. finally
  413. current_Collation.Data.ComparisonStrength := changedProps.ComparisonStrength;
  414. end;
  415. end;
  416. begin
  417. if (current_Collation.DataPtr=nil) then
  418. exit(OldManager.CompareUnicodeStringProc(s1,s2,Options));
  419. if (Options=[]) then begin
  420. exit(
  421. CompareUnicodeString(
  422. PUnicodeChar(Pointer(s1)),
  423. PUnicodeChar(Pointer(s2)),
  424. Length(s1),Length(s2)
  425. )
  426. );
  427. end;
  428. Result:=DoCompare();
  429. end;
  430. function CompareWideString(const s1, s2 : WideString; Options : TCompareOptions) : PtrInt;
  431. function DoCompare() : PtrInt;
  432. var
  433. changedProps : TChangedPropsRecord;
  434. begin
  435. changedProps.ComparisonStrength := current_Collation.Data.ComparisonStrength;
  436. try
  437. if (coIgnoreCase in Options) then
  438. current_Collation.Data.ComparisonStrength := SECONDARY_STRENGTH_LEVEL;
  439. Result:=CompareUnicodeString(
  440. PUnicodeChar(Pointer(s1)),
  441. PUnicodeChar(Pointer(s2)),
  442. Length(s1),Length(s2)
  443. );
  444. finally
  445. current_Collation.Data.ComparisonStrength := changedProps.ComparisonStrength;
  446. end;
  447. end;
  448. begin
  449. if (current_Collation.DataPtr=nil) then
  450. exit(OldManager.CompareUnicodeStringProc(s1,s2,Options));
  451. if (Options=[]) then begin
  452. exit(
  453. CompareUnicodeString(
  454. PUnicodeChar(Pointer(s1)),
  455. PUnicodeChar(Pointer(s2)),
  456. Length(s1),Length(s2)
  457. )
  458. );
  459. end;
  460. Result:=DoCompare();
  461. end;
  462. function CompareTextUnicodeString(const s1, s2 : UnicodeString) : PtrInt;
  463. begin
  464. Result:=CompareUnicodeString(s1,s2,[coIgnoreCase]);
  465. end;
  466. function CompareTextWideString(const s1, s2 : WideString) : PtrInt;
  467. begin
  468. Result:=CompareWideString(s1,s2,[coIgnoreCase]);
  469. end;
  470. procedure EnsureAnsiLen(var S: AnsiString; const len: SizeInt); inline;
  471. begin
  472. if (len>length(s)) then
  473. if (length(s) < 10*256) then
  474. setlength(s,length(s)+10)
  475. else
  476. setlength(s,length(s)+length(s) shr 8);
  477. end;
  478. procedure ConcatCharToAnsiStr(const c: AnsiChar; var S: AnsiString; var index: SizeInt);
  479. begin
  480. EnsureAnsiLen(s,index);
  481. pansichar(@s[index])^:=c;
  482. inc(index);
  483. end;
  484. function UpperAnsiString(const s : ansistring) : ansistring;
  485. var
  486. p : PAnsiChar;
  487. i,resindex : SizeInt;
  488. mblen : SizeInt;
  489. us,usl : UnicodeString;
  490. locMap : punicodemap;
  491. ulen,slen : SizeUint;
  492. k,aalen,ai : SizeInt;
  493. aa : array[0..8] of AnsiChar;
  494. begin
  495. if (Length(s)=0) then
  496. exit('');
  497. if (DefaultSystemCodePage=CP_UTF8) then
  498. begin
  499. //convert to UnicodeString,uppercase,convert back to utf8
  500. ulen:=Utf8ToUnicode(nil,high(SizeUint),@s[1],Length(s));
  501. if ulen>0 then
  502. SetLength(us,ulen-1);
  503. Utf8ToUnicode(@us[1],ulen,@s[1],Length(s));
  504. us:=UpperUnicodeString(us);
  505. ulen:=Length(us);
  506. slen:=UnicodeToUtf8(nil,high(SizeUInt),@us[1],ulen);
  507. SetLength(Result,slen);
  508. UnicodeToUtf8(@Result[1],slen,@us[1],ulen);
  509. exit;
  510. end;
  511. locMap:=FindMap(DefaultSystemCodePage);
  512. if (locMap=nil) then
  513. exit(System.UpCase(s));
  514. SetLength(us,2);
  515. p:=@s[1];
  516. slen:=length(s);
  517. SetLength(result,slen+10);
  518. i:=1;
  519. resindex:=1;
  520. while (i<=slen) do
  521. begin
  522. mblen:=CodePointLength(p,slen-i);
  523. if (mblen<=0) then
  524. begin
  525. ConcatCharToAnsiStr(p^,result,resindex);
  526. mblen:=1;
  527. end
  528. else
  529. begin
  530. SetLength(us,2);
  531. ulen:=getunicode(p,mblen,locMap,@us[1]);
  532. if (Length(us)<>ulen) then
  533. SetLength(us,ulen);
  534. usl:=UpperUnicodeString(us);
  535. for k:=1 to Length(usl) do
  536. begin
  537. aalen:=getascii(tunicodechar(us[k]),locMap,@aa[Low(aa)],Length(aa));
  538. for ai:=0 to aalen-1 do
  539. ConcatCharToAnsiStr(aa[ai],result,resindex);
  540. end;
  541. end;
  542. Inc(p,mblen);
  543. end;
  544. SetLength(result,resindex-1);
  545. end;
  546. function LowerAnsiString(const s : ansistring) : ansistring;
  547. var
  548. p : PAnsiChar;
  549. i,resindex : SizeInt;
  550. mblen : SizeInt;
  551. us,usl : UnicodeString;
  552. locMap : punicodemap;
  553. k,aalen,ai : SizeInt;
  554. slen, ulen : SizeUInt;
  555. aa : array[0..8] of AnsiChar;
  556. begin
  557. if (Length(s)=0) then
  558. exit('');
  559. if (DefaultSystemCodePage=CP_UTF8) then
  560. begin
  561. //convert to UnicodeString,lowercase,convert back to utf8
  562. ulen:=Utf8ToUnicode(nil,high(SizeUInt),@s[1],Length(s));
  563. if ulen>0 then
  564. SetLength(us,ulen-1);
  565. Utf8ToUnicode(@us[1],ulen,@s[1],Length(s));
  566. us:=LowerUnicodeString(us);
  567. ulen:=Length(us);
  568. slen:=UnicodeToUtf8(nil,high(SizeUInt),@us[1],ulen);
  569. SetLength(Result,slen);
  570. UnicodeToUtf8(@Result[1],slen,@us[1],ulen);
  571. exit;
  572. end;
  573. locMap:=FindMap(DefaultSystemCodePage);
  574. if (locMap=nil) then
  575. exit(System.LowerCase(s));
  576. SetLength(us,2);
  577. p:=@s[1];
  578. slen:=length(s);
  579. SetLength(result,slen+10);
  580. i:=1;
  581. resindex:=1;
  582. while (i<=slen) do
  583. begin
  584. mblen:=CodePointLength(p,slen-i);
  585. if (mblen<=0) then
  586. begin
  587. ConcatCharToAnsiStr(p^,result,resindex);
  588. mblen:=1;
  589. end
  590. else
  591. begin
  592. SetLength(us,2);
  593. ulen:=getunicode(p,mblen,locMap,@us[1]);
  594. if (Length(us)<>ulen) then
  595. SetLength(us,ulen);
  596. usl:=LowerUnicodeString(us);
  597. for k:=1 to Length(usl) do
  598. begin
  599. aalen:=getascii(tunicodechar(us[k]),locMap,@aa[Low(aa)],Length(aa));
  600. for ai:=0 to aalen-1 do
  601. ConcatCharToAnsiStr(aa[ai],result,resindex);
  602. end;
  603. end;
  604. Inc(p,mblen);
  605. end;
  606. SetLength(result,resindex-1);
  607. end;
  608. procedure ansi2pchar(const s: ansistring; const orgp: pansichar; out p: pansichar);
  609. var
  610. newlen: sizeint;
  611. begin
  612. newlen:=length(s);
  613. if newlen>strlen(orgp) then
  614. fpc_rangeerror;
  615. p:=orgp;
  616. if (newlen>0) then
  617. move(s[1],p[0],newlen);
  618. p[newlen]:=#0;
  619. end;
  620. function AnsiStrLower(Str: PAnsiChar): PAnsiChar;
  621. var
  622. temp: ansistring;
  623. begin
  624. temp:=LowerAnsiString(str);
  625. ansi2pchar(temp,str,result);
  626. end;
  627. function AnsiStrUpper(Str: PAnsiChar): PAnsiChar;
  628. var
  629. temp: ansistring;
  630. begin
  631. temp:=UpperAnsiString(str);
  632. ansi2pchar(temp,str,result);
  633. end;
  634. function CharLengthPChar(const Str: PAnsiChar): PtrInt;
  635. var
  636. len:PtrInt;
  637. nextlen: ptrint;
  638. s: PAnsiChar;
  639. begin
  640. Result:=0;
  641. if (Str=nil) or (Byte(Str^)=0) then
  642. exit;
  643. s:=str;
  644. len:=strlen(s);
  645. repeat
  646. nextlen:=CodePointLength(s,len);
  647. { skip invalid/incomplete sequences }
  648. if (nextlen<0) then
  649. nextlen:=1;
  650. Inc(result,nextlen);
  651. Inc(s,nextlen);
  652. Dec(len,nextlen);
  653. until (nextlen=0);
  654. end;
  655. function InternalCompareStrAnsiString(
  656. const S1, S2 : PAnsiChar;
  657. const Len1, Len2 : PtrUInt
  658. ) : PtrInt;inline;
  659. var
  660. a, b : UnicodeString;
  661. begin
  662. a := '';
  663. Ansi2UnicodeMove(S1,DefaultSystemCodePage,a,Len1);
  664. b := '';
  665. Ansi2UnicodeMove(S2,DefaultSystemCodePage,b,Len2);
  666. Result := CompareUnicodeString(a,b,[]);
  667. end;
  668. function StrLCompAnsiString(S1, S2: PAnsiChar; MaxLen: PtrUInt): PtrInt;
  669. begin
  670. if (current_Collation.DataPtr=nil) then
  671. exit(OldManager.StrLCompAnsiStringProc(s1,s2,MaxLen));
  672. if (MaxLen=0) then
  673. exit(0);
  674. Result := InternalCompareStrAnsiString(S1,S2,MaxLen,MaxLen);
  675. end;
  676. function CompareStrAnsiString(const S1, S2: ansistring): PtrInt;
  677. var
  678. l1, l2 : PtrInt;
  679. begin
  680. if (current_Collation.DataPtr=nil) and Assigned(OldManager.CompareStrAnsiStringProc) then
  681. Exit(OldManager.CompareStrAnsiStringProc(s1,s2));
  682. if (Pointer(S1)=Pointer(S2)) then
  683. Exit(0);
  684. l1:=Length(S1);
  685. l2:=Length(S2);
  686. if (l1=0) or (l2=0) then
  687. Exit(l1-l2);
  688. Result := InternalCompareStrAnsiString(@S1[1],@S2[1],l1,l2);
  689. end;
  690. function CompareTextAnsiString(const S1, S2: ansistring): PtrInt;
  691. var
  692. a,b : ansistring;
  693. begin
  694. a:=UpperAnsistring(s1);
  695. b:=UpperAnsistring(s2);
  696. Result:=CompareStrAnsiString(a,b);
  697. end;
  698. function StrCompAnsiString(S1, S2: PAnsiChar): PtrInt;
  699. var
  700. l1,l2 : PtrInt;
  701. begin
  702. if (current_Collation.DataPtr=nil) then
  703. exit(OldManager.StrCompAnsiStringProc(s1,s2));
  704. l1:=strlen(S1);
  705. l2:=strlen(S2);
  706. Result := InternalCompareStrAnsiString(S1,S2,l1,l2);
  707. end;
  708. function StrLICompAnsiString(S1, S2: PAnsiChar; MaxLen: PtrUInt): PtrInt;
  709. var
  710. a, b: ansistring;
  711. begin
  712. if (MaxLen=0) then
  713. exit(0);
  714. SetLength(a,MaxLen);
  715. Move(s1^,a[1],MaxLen);
  716. SetLength(b,MaxLen);
  717. Move(s2^,b[1],MaxLen);
  718. Result:=CompareTextAnsiString(a,b);
  719. end;
  720. function StrICompAnsiString(S1, S2: PAnsiChar): PtrInt;
  721. begin
  722. Result:=CompareTextAnsiString(ansistring(s1),ansistring(s2));
  723. end;
  724. function StrLowerAnsiString(Str: PAnsiChar): PAnsiChar;
  725. var
  726. temp: ansistring;
  727. begin
  728. temp:=LowerAnsiString(str);
  729. ansi2pchar(temp,str,result);
  730. end;
  731. function StrUpperAnsiString(Str: PAnsiChar): PAnsiChar;
  732. var
  733. temp: ansistring;
  734. begin
  735. temp:=UpperAnsiString(str);
  736. ansi2pchar(temp,str,result);
  737. end;
  738. //------------------------------------------------------------------------------
  739. procedure SetPascalWideStringManager();
  740. var
  741. locWideStringManager : TUnicodeStringManager;
  742. begin
  743. OldManager := widestringmanager;
  744. locWideStringManager:=widestringmanager;
  745. With locWideStringManager do
  746. begin
  747. Wide2AnsiMoveProc:=@Unicode2AnsiMove;
  748. {$ifndef FPC_WIDESTRING_EQUAL_UNICODESTRING}
  749. Ansi2WideMoveProc:=@Ansi2WideMove;
  750. UpperWideStringProc:=@UpperWideString;
  751. LowerWideStringProc:=@LowerWideString;
  752. CompareWideStringProc:=@CompareWideString;
  753. {$else FPC_WIDESTRING_EQUAL_UNICODESTRING}
  754. Ansi2WideMoveProc:=@Ansi2UnicodeMove;
  755. UpperWideStringProc:=@UpperUnicodeString;
  756. LowerWideStringProc:=@LowerUnicodeString;
  757. CompareWideStringProc:=@CompareUnicodeString;
  758. {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
  759. CharLengthPCharProc:=@CharLengthPChar;
  760. CodePointLengthProc:=@CodePointLength;
  761. UpperAnsiStringProc:=@UpperAnsiString;
  762. LowerAnsiStringProc:=@LowerAnsiString;
  763. CompareStrAnsiStringProc:=@CompareStrAnsiString;
  764. CompareTextAnsiStringProc:=@CompareTextAnsiString;
  765. StrCompAnsiStringProc:=@StrCompAnsiString;
  766. StrICompAnsiStringProc:=@StrICompAnsiString;
  767. StrLCompAnsiStringProc:=@StrLCompAnsiString;
  768. StrLICompAnsiStringProc:=@StrLICompAnsiString;
  769. StrLowerAnsiStringProc:=@StrLowerAnsiString;
  770. StrUpperAnsiStringProc:=@StrUpperAnsiString;
  771. ThreadInitProc:=@InitThread;
  772. ThreadFiniProc:=@FiniThread;
  773. { Unicode }
  774. Unicode2AnsiMoveProc:=@Unicode2AnsiMove;
  775. Ansi2UnicodeMoveProc:=@Ansi2UnicodeMove;
  776. UpperUnicodeStringProc:=@UpperUnicodeString;
  777. LowerUnicodeStringProc:=@LowerUnicodeString;
  778. CompareUnicodeStringProc:=@CompareUnicodeString;
  779. end;
  780. SetUnicodeStringManager(locWideStringManager);
  781. DefaultUnicodeCodePage:=CP_UTF16;
  782. {$ifdef MSWINDOWS}
  783. DefaultSystemCodePage:=GetACP();
  784. {$ELSE MSWINDOWS}
  785. {$ifdef UNIX}
  786. DefaultSystemCodePage:=GetSystemCodepage;
  787. if (DefaultSystemCodePage = CP_NONE) then
  788. DefaultSystemCodePage:=CP_UTF8;
  789. {$ifdef FPCRTL_FILESYSTEM_UTF8}
  790. DefaultFileSystemCodePage:=CP_UTF8;
  791. {$else}
  792. DefaultFileSystemCodePage:=DefaultSystemCodepage;
  793. {$endif}
  794. DefaultRTLFileSystemCodePage:=DefaultFileSystemCodePage;
  795. {$ELSE UNIX}
  796. if Assigned (WideStringManager.GetStandardCodePageProc) then
  797. DefaultSystemCodePage := WideStringManager.GetStandardCodePageProc (scpAnsi)
  798. else
  799. DefaultSystemCodePage := CP_NONE;
  800. DefaultFileSystemCodePage := DefaultSystemCodePage;
  801. DefaultRTLFileSystemCodePage := DefaultSystemCodePage;
  802. {$endif UNIX}
  803. {$endif MSWINDOWS}
  804. end;
  805. initialization
  806. current_Collation.DataPtr := nil;
  807. SetPascalWideStringManager();
  808. InitThread();
  809. finalization
  810. FiniThread();
  811. end.