fpwidestring.pp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886
  1. unit fpwidestring;
  2. {$mode objfpc}
  3. interface
  4. uses
  5. unicodedata;
  6. {$i rtldefs.inc}
  7. function SetActiveCollation(const AName : UnicodeString) : Boolean;
  8. function SetActiveCollation(const ACollation : PUCA_DataBook) : Boolean;
  9. function GetActiveCollation() : PUCA_DataBook;
  10. var
  11. DefaultCollationName : UnicodeString = '';
  12. implementation
  13. uses
  14. {$ifdef MSWINDOWS}
  15. Windows,
  16. {$endif MSWINDOWS}
  17. {$ifdef Unix}
  18. unixcp,
  19. {$endif}
  20. charset;
  21. procedure fpc_rangeerror; [external name 'FPC_RANGEERROR'];
  22. {$ifdef MSWINDOWS}
  23. function GetACP:UINT; external 'kernel32' name 'GetACP';
  24. {$endif MSWINDOWS}
  25. const
  26. IgnoreInvalidSequenceFlag = True;
  27. var
  28. OldManager : TUnicodeStringManager;
  29. {$ifdef FPC_HAS_FEATURE_THREADING}
  30. ThreadVar
  31. {$else FPC_HAS_FEATURE_THREADING}
  32. Var
  33. {$endif FPC_HAS_FEATURE_THREADING}
  34. current_DefaultSystemCodePage : TSystemCodePage;
  35. current_Map : punicodemap;
  36. current_Collation : record
  37. DataPtr : PUCA_DataBook;
  38. Data : TUCA_DataBook;
  39. end;
  40. function SetActiveCollation(const ACollation : PUCA_DataBook) : Boolean;
  41. begin
  42. Result := (ACollation <> nil);
  43. if Result then begin
  44. current_Collation.Data := ACollation^;
  45. current_Collation.DataPtr := @current_Collation.Data;
  46. end;
  47. end;
  48. function SetActiveCollation(const AName : UnicodeString) : Boolean;
  49. var
  50. c : PUCA_DataBook;
  51. begin
  52. c:=FindCollation(AName);
  53. Result := (c <> nil);
  54. if Result then
  55. Result := SetActiveCollation(c);
  56. end;
  57. function GetActiveCollation() : PUCA_DataBook;
  58. begin
  59. Result := current_Collation.DataPtr;
  60. end;
  61. {procedure error_CpNotFound(ACodePage:TSystemCodePage);
  62. begin
  63. System.error(reCodesetConversion);
  64. end;}
  65. procedure InitThread;
  66. var
  67. c : PUCA_DataBook;
  68. begin
  69. current_DefaultSystemCodePage:=DefaultSystemCodePage;
  70. current_Map:=getmap(current_DefaultSystemCodePage);
  71. c:=nil;
  72. if (DefaultCollationName<>'') then
  73. c:=FindCollation(DefaultCollationName);
  74. if (c=nil) and (GetCollationCount()>0) then
  75. c:=FindCollation(0);
  76. if (c<>nil) then
  77. SetActiveCollation(c);
  78. end;
  79. procedure FiniThread;
  80. begin
  81. current_Map:=nil;
  82. end;
  83. function FindMap(const cp: TSystemCodePage): punicodemap;inline;
  84. begin
  85. if (cp=DefaultSystemCodePage) then
  86. begin
  87. { update current_Map in case the DefaultSystemCodePage has been changed }
  88. if (current_DefaultSystemCodePage<>DefaultSystemCodePage) or not Assigned(current_Map) then
  89. begin
  90. FiniThread;
  91. InitThread;
  92. end;
  93. Result:=current_Map;
  94. end
  95. else
  96. Result:=getmap(cp);
  97. end;
  98. { return value:
  99. -1 if incomplete or invalid code point
  100. 0 if NULL character,
  101. > 0 if that's the length in bytes of the code point }
  102. function UTF8CodePointLength(const Str: PAnsiChar; MaxLookAead: PtrInt): Ptrint;
  103. {... taken from ustrings.inc}
  104. var
  105. p: PByte;
  106. TempBYTE: Byte;
  107. CharLen: SizeUint;
  108. LookAhead: SizeUInt;
  109. UC: SizeUInt;
  110. begin
  111. if (Str=nil) then
  112. exit(0);
  113. p:=PByte(Str);
  114. if (p^=0) then
  115. exit(0);
  116. p:=PByte(Str);
  117. if (p^ and $80) = 0 then //One character US-ASCII,
  118. exit(1);
  119. TempByte:=p^;
  120. CharLen:=0;
  121. while (TempByte and $80)<>0 do
  122. begin
  123. TempByte:=(TempByte shl 1) and $FE;
  124. Inc(CharLen);
  125. end;
  126. //Test for the "CharLen" conforms UTF-8 string
  127. //This means the 10xxxxxx pattern.
  128. if SizeUInt(CharLen-1)>MaxLookAead then //Insuficient chars in string to decode UTF-8 array
  129. exit(-1);
  130. for LookAhead := 1 to CharLen-1 do
  131. begin
  132. if ((p[LookAhead] and $80)<>$80) or
  133. ((p[LookAhead] and $40)<>$00)
  134. then
  135. begin
  136. //Invalid UTF-8 sequence, fallback.
  137. exit(-1);
  138. end;
  139. end;
  140. Result:=CharLen;
  141. case CharLen of
  142. 1: begin
  143. //Not valid UTF-8 sequence
  144. Result:=-1;
  145. end;
  146. 2: begin
  147. //Two bytes UTF, convert it
  148. UC:=(p^ and $1F) shl 6;
  149. UC:=UC or (p[1] and $3F);
  150. if UC <= $7F then
  151. begin
  152. //Invalid UTF sequence.
  153. Result:=-1;
  154. end;
  155. end;
  156. 3: begin
  157. //Three bytes, convert it to unicode
  158. UC:= (p^ and $0F) shl 12;
  159. UC:= UC or ((p[1] and $3F) shl 6);
  160. UC:= UC or ((p[2] and $3F));
  161. if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
  162. begin
  163. //Invalid UTF-8 sequence
  164. Result:=-1;
  165. End;
  166. end;
  167. 4: begin
  168. //Four bytes, convert it to two unicode characters
  169. UC:= (p^ and $07) shl 18;
  170. UC:= UC or ((p[1] and $3F) shl 12);
  171. UC:= UC or ((p[2] and $3F) shl 6);
  172. UC:= UC or ((p[3] and $3F));
  173. if (UC < $10000) or (UC > $10FFFF) then
  174. begin
  175. Result:=-1;
  176. end
  177. end;
  178. 5,6,7: begin
  179. //Invalid UTF8 to unicode conversion,
  180. //mask it as invalid UNICODE too.
  181. Result:=-1;
  182. end;
  183. end;
  184. end;
  185. { return value:
  186. -1 if incomplete or invalid code point
  187. 0 if NULL character,
  188. > 0 if that's the length in bytes of the code point }
  189. function CodePointLength(const Str: PAnsiChar; MaxLookAead: PtrInt): PtrInt;
  190. var
  191. p : PByte;
  192. begin
  193. if (current_DefaultSystemCodePage=CP_UTF8) then
  194. exit(UTF8CodePointLength(Str,MaxLookAead));
  195. if (Str=nil) then
  196. exit(0);
  197. p:=PByte(Str);
  198. if (p^=0) then
  199. exit(0);
  200. if (current_Map=nil) then
  201. exit(1);
  202. if (p^>current_Map^.lastchar) then
  203. exit(-1);
  204. case current_Map^.map[p^].flag of
  205. umf_undefined : Result:=-1;
  206. umf_leadbyte :
  207. begin
  208. if (MaxLookAead>0) then
  209. Result:=2
  210. else
  211. Result:=-1;
  212. end;
  213. else
  214. Result:=1;
  215. end;
  216. end;
  217. procedure Unicode2AnsiMove(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
  218. var
  219. locSource : punicodechar;
  220. locMap : punicodemap;
  221. destBuffer : PAnsiChar;
  222. destLen,actualLen, i : SizeInt;
  223. blockLen : SizeInt;
  224. begin
  225. if (len=0) then
  226. begin
  227. SetLength(dest,0);
  228. exit;
  229. end;
  230. if (cp=CP_UTF8) then
  231. begin
  232. destLen:=UnicodeToUtf8(nil,High(SizeUInt),source,len);
  233. SetLength(dest,destLen-1);
  234. UnicodeToUtf8(@dest[1],destLen,source,len);
  235. SetCodePage(dest,cp,False);
  236. exit;
  237. end;
  238. if (cp=CP_UTF16) then
  239. begin
  240. destLen:=len*SizeOf(UnicodeChar);
  241. SetLength(dest,destLen);
  242. Move(source^,dest[1],destLen);
  243. SetCodePage(dest,cp,False);
  244. exit;
  245. end;
  246. locMap:=FindMap(cp);
  247. if (locMap=nil) then
  248. begin
  249. DefaultUnicode2AnsiMove(source,dest,DefaultSystemCodePage,len);
  250. exit;
  251. end;
  252. destLen:=3*len;
  253. SetLength(dest,destLen);
  254. destBuffer:=@dest[1];
  255. actualLen:=0;
  256. locSource:=source;
  257. for i:=1 to len do
  258. begin
  259. blockLen:=getascii(tunicodechar(locSource^),locMap,destBuffer,(destLen-actualLen));
  260. if (blockLen<0) then
  261. begin
  262. destLen:=destLen + 3*(1+len-i);
  263. SetLength(dest,destLen);
  264. destBuffer:=@dest[1];
  265. blockLen:=getascii(tunicodechar(locSource^),locMap,destBuffer,(destLen-actualLen));
  266. end;
  267. Inc(destBuffer,blockLen);
  268. actualLen:=actualLen+blockLen;
  269. Inc(locSource);
  270. end;
  271. if (actualLen<>Length(dest)) then
  272. SetLength(dest,actualLen);
  273. if (Length(dest)>0) then
  274. SetCodePage(dest,cp,False);
  275. end;
  276. procedure Ansi2UnicodeMove(source:PAnsiChar; cp:TSystemCodePage; var dest:UnicodeString; len:SizeInt);
  277. var
  278. locMap : punicodemap;
  279. destLen : SizeInt;
  280. begin
  281. if (len<=0) then
  282. begin
  283. SetLength(dest,0);
  284. exit;
  285. end;
  286. if (cp=CP_UTF8) then
  287. begin
  288. destLen:=Utf8ToUnicode(nil,source,len);
  289. if destLen > 0 then
  290. SetLength(dest,destLen-1)
  291. else
  292. SetLength(dest,0);
  293. Utf8ToUnicode(@dest[1],source,len);
  294. exit;
  295. end;
  296. if (cp=CP_UTF16) then
  297. begin
  298. //what if (len mod 2) > 0 ?
  299. destLen:=len div SizeOf(UnicodeChar);
  300. SetLength(dest,destLen);
  301. Move(source^,dest[1],(destLen*SizeOf(UnicodeChar)));
  302. exit;
  303. end;
  304. locMap:=FindMap(cp);
  305. if (locMap=nil) then
  306. begin
  307. DefaultAnsi2UnicodeMove(source,DefaultSystemCodePage,dest,len);
  308. exit;
  309. end;
  310. destLen:=getunicode(source,len,locMap,nil);
  311. SetLength(dest,destLen);
  312. getunicode(source,len,locMap,tunicodestring(@dest[1]));
  313. end;
  314. {$ifdef MSWINDOWS}
  315. procedure Ansi2WideMove(source:PAnsiChar; cp:TSystemCodePage; var dest:WideString; len:SizeInt);
  316. var
  317. locMap : punicodemap;
  318. destLen : SizeInt;
  319. begin
  320. if (len<=0) then
  321. begin
  322. SetLength(dest,0);
  323. exit;
  324. end;
  325. locMap:=FindMap(cp);
  326. if (locMap=nil) then
  327. begin
  328. DefaultAnsi2WideMove(source,DefaultSystemCodePage,dest,len);
  329. exit;
  330. end;
  331. destLen:=getunicode(source,len,locMap,nil);
  332. SetLength(dest,destLen);
  333. getunicode(source,len,locMap,tunicodestring(@dest[1]));
  334. end;
  335. {$endif MSWINDOWS}
  336. function UpperUnicodeString(const S: UnicodeString): UnicodeString;
  337. begin
  338. if (UnicodeToUpper(S,IgnoreInvalidSequenceFlag,Result) <> 0) then
  339. system.error(reRangeError);
  340. end;
  341. function UpperWideString(const S: WideString): WideString;
  342. var
  343. u : UnicodeString;
  344. begin
  345. u:=s;
  346. Result:=UpperUnicodeString(u);
  347. end;
  348. function LowerUnicodeString(const S: UnicodeString): UnicodeString;
  349. begin
  350. if (UnicodeToLower(S,IgnoreInvalidSequenceFlag,Result) <> 0) then
  351. system.error(reRangeError);
  352. end;
  353. function LowerWideString(const S: WideString): WideString;
  354. var
  355. u : UnicodeString;
  356. begin
  357. u:=s;
  358. Result:=LowerUnicodeString(u);
  359. end;
  360. function CompareUnicodeStringUCA(p1,p2:PUnicodeChar; l1,l2:PtrInt) : PtrInt;inline;
  361. begin
  362. Result := IncrementalCompareString(p1,l1,p2,l2,current_Collation.DataPtr);
  363. end;
  364. function CompareUnicodeString(p1,p2:PUnicodeChar; l1,l2:PtrInt) : PtrInt;inline;
  365. begin
  366. if (Pointer(p1)=Pointer(p2)) then
  367. exit(0);
  368. if (l1=0) then
  369. exit(-l2);
  370. if (l2=0) then
  371. exit(l1);
  372. Result := CompareUnicodeStringUCA(p1,p2,l1,l2);
  373. end;
  374. type
  375. TChangedPropsRecord = record
  376. ComparisonStrength : Byte;
  377. end;
  378. const
  379. SECONDARY_STRENGTH_LEVEL = 2;
  380. function CompareUnicodeString(const s1, s2 : UnicodeString;Options : TCompareOptions) : PtrInt;
  381. function DoCompare() : PtrInt;
  382. var
  383. changedProps : TChangedPropsRecord;
  384. begin
  385. changedProps.ComparisonStrength := current_Collation.Data.ComparisonStrength;
  386. try
  387. if (coIgnoreCase in Options) then
  388. current_Collation.Data.ComparisonStrength := SECONDARY_STRENGTH_LEVEL;
  389. Result:=CompareUnicodeString(
  390. PUnicodeChar(Pointer(s1)),
  391. PUnicodeChar(Pointer(s2)),
  392. Length(s1),Length(s2)
  393. );
  394. finally
  395. current_Collation.Data.ComparisonStrength := changedProps.ComparisonStrength;
  396. end;
  397. end;
  398. begin
  399. if (current_Collation.DataPtr=nil) then
  400. exit(OldManager.CompareUnicodeStringProc(s1,s2,Options));
  401. if (Options=[]) then begin
  402. exit(
  403. CompareUnicodeString(
  404. PUnicodeChar(Pointer(s1)),
  405. PUnicodeChar(Pointer(s2)),
  406. Length(s1),Length(s2)
  407. )
  408. );
  409. end;
  410. Result:=DoCompare();
  411. end;
  412. function CompareWideString(const s1, s2 : WideString; Options : TCompareOptions) : PtrInt;
  413. function DoCompare() : PtrInt;
  414. var
  415. changedProps : TChangedPropsRecord;
  416. begin
  417. changedProps.ComparisonStrength := current_Collation.Data.ComparisonStrength;
  418. try
  419. if (coIgnoreCase in Options) then
  420. current_Collation.Data.ComparisonStrength := SECONDARY_STRENGTH_LEVEL;
  421. Result:=CompareUnicodeString(
  422. PUnicodeChar(Pointer(s1)),
  423. PUnicodeChar(Pointer(s2)),
  424. Length(s1),Length(s2)
  425. );
  426. finally
  427. current_Collation.Data.ComparisonStrength := changedProps.ComparisonStrength;
  428. end;
  429. end;
  430. begin
  431. if (current_Collation.DataPtr=nil) then
  432. exit(OldManager.CompareUnicodeStringProc(s1,s2,Options));
  433. if (Options=[]) then begin
  434. exit(
  435. CompareUnicodeString(
  436. PUnicodeChar(Pointer(s1)),
  437. PUnicodeChar(Pointer(s2)),
  438. Length(s1),Length(s2)
  439. )
  440. );
  441. end;
  442. Result:=DoCompare();
  443. end;
  444. function CompareTextUnicodeString(const s1, s2 : UnicodeString) : PtrInt;
  445. begin
  446. Result:=CompareUnicodeString(s1,s2,[coIgnoreCase]);
  447. end;
  448. function CompareTextWideString(const s1, s2 : WideString) : PtrInt;
  449. begin
  450. Result:=CompareWideString(s1,s2,[coIgnoreCase]);
  451. end;
  452. procedure EnsureAnsiLen(var S: AnsiString; const len: SizeInt); inline;
  453. begin
  454. if (len>length(s)) then
  455. if (length(s) < 10*256) then
  456. setlength(s,length(s)+10)
  457. else
  458. setlength(s,length(s)+length(s) shr 8);
  459. end;
  460. procedure ConcatCharToAnsiStr(const c: AnsiChar; var S: AnsiString; var index: SizeInt);
  461. begin
  462. EnsureAnsiLen(s,index);
  463. pansichar(@s[index])^:=c;
  464. inc(index);
  465. end;
  466. function UpperAnsiString(const s : ansistring) : ansistring;
  467. var
  468. p : PAnsiChar;
  469. i, slen,
  470. resindex : SizeInt;
  471. mblen : SizeInt;
  472. us,usl : UnicodeString;
  473. locMap : punicodemap;
  474. ulen,k,
  475. aalen,ai : SizeInt;
  476. aa : array[0..8] of AnsiChar;
  477. begin
  478. if (Length(s)=0) then
  479. exit('');
  480. if (DefaultSystemCodePage=CP_UTF8) then
  481. begin
  482. //convert to UnicodeString,uppercase,convert back to utf8
  483. ulen:=Utf8ToUnicode(nil,@s[1],Length(s));
  484. if ulen>0 then
  485. SetLength(us,ulen-1);
  486. Utf8ToUnicode(@us[1],@s[1],Length(s));
  487. us:=UpperUnicodeString(us);
  488. ulen:=Length(us);
  489. slen:=UnicodeToUtf8(nil,0,@us[1],ulen);
  490. SetLength(Result,slen);
  491. UnicodeToUtf8(@Result[1],slen,@us[1],ulen);
  492. exit;
  493. end;
  494. locMap:=FindMap(DefaultSystemCodePage);
  495. if (locMap=nil) then
  496. exit(System.UpCase(s));
  497. SetLength(us,2);
  498. p:=@s[1];
  499. slen:=length(s);
  500. SetLength(result,slen+10);
  501. i:=1;
  502. resindex:=1;
  503. while (i<=slen) do
  504. begin
  505. mblen:=CodePointLength(p,slen-i);
  506. if (mblen<=0) then
  507. begin
  508. ConcatCharToAnsiStr(p^,result,resindex);
  509. mblen:=1;
  510. end
  511. else
  512. begin
  513. SetLength(us,2);
  514. ulen:=getunicode(p,mblen,locMap,@us[1]);
  515. if (Length(us)<>ulen) then
  516. SetLength(us,ulen);
  517. usl:=UpperUnicodeString(us);
  518. for k:=1 to Length(usl) do
  519. begin
  520. aalen:=getascii(tunicodechar(us[k]),locMap,@aa[Low(aa)],Length(aa));
  521. for ai:=0 to aalen-1 do
  522. ConcatCharToAnsiStr(aa[ai],result,resindex);
  523. end;
  524. end;
  525. Inc(p,mblen);
  526. end;
  527. SetLength(result,resindex-1);
  528. end;
  529. function LowerAnsiString(const s : ansistring) : ansistring;
  530. var
  531. p : PAnsiChar;
  532. i, slen,
  533. resindex : SizeInt;
  534. mblen : SizeInt;
  535. us,usl : UnicodeString;
  536. locMap : punicodemap;
  537. ulen,k,
  538. aalen,ai : SizeInt;
  539. aa : array[0..8] of AnsiChar;
  540. begin
  541. if (Length(s)=0) then
  542. exit('');
  543. if (DefaultSystemCodePage=CP_UTF8) then
  544. begin
  545. //convert to UnicodeString,lowercase,convert back to utf8
  546. ulen:=Utf8ToUnicode(nil,@s[1],Length(s));
  547. if ulen>0 then
  548. SetLength(us,ulen-1);
  549. Utf8ToUnicode(@us[1],@s[1],Length(s));
  550. us:=LowerUnicodeString(us);
  551. ulen:=Length(us);
  552. slen:=UnicodeToUtf8(nil,0,@us[1],ulen);
  553. SetLength(Result,slen);
  554. UnicodeToUtf8(@Result[1],slen,@us[1],ulen);
  555. exit;
  556. end;
  557. locMap:=FindMap(DefaultSystemCodePage);
  558. if (locMap=nil) then
  559. exit(System.LowerCase(s));
  560. SetLength(us,2);
  561. p:=@s[1];
  562. slen:=length(s);
  563. SetLength(result,slen+10);
  564. i:=1;
  565. resindex:=1;
  566. while (i<=slen) do
  567. begin
  568. mblen:=CodePointLength(p,slen-i);
  569. if (mblen<=0) then
  570. begin
  571. ConcatCharToAnsiStr(p^,result,resindex);
  572. mblen:=1;
  573. end
  574. else
  575. begin
  576. SetLength(us,2);
  577. ulen:=getunicode(p,mblen,locMap,@us[1]);
  578. if (Length(us)<>ulen) then
  579. SetLength(us,ulen);
  580. usl:=LowerUnicodeString(us);
  581. for k:=1 to Length(usl) do
  582. begin
  583. aalen:=getascii(tunicodechar(us[k]),locMap,@aa[Low(aa)],Length(aa));
  584. for ai:=0 to aalen-1 do
  585. ConcatCharToAnsiStr(aa[ai],result,resindex);
  586. end;
  587. end;
  588. Inc(p,mblen);
  589. end;
  590. SetLength(result,resindex-1);
  591. end;
  592. procedure ansi2pchar(const s: ansistring; const orgp: pansichar; out p: pansichar);
  593. var
  594. newlen: sizeint;
  595. begin
  596. newlen:=length(s);
  597. if newlen>strlen(orgp) then
  598. fpc_rangeerror;
  599. p:=orgp;
  600. if (newlen>0) then
  601. move(s[1],p[0],newlen);
  602. p[newlen]:=#0;
  603. end;
  604. function AnsiStrLower(Str: PAnsiChar): PAnsiChar;
  605. var
  606. temp: ansistring;
  607. begin
  608. temp:=LowerAnsiString(str);
  609. ansi2pchar(temp,str,result);
  610. end;
  611. function AnsiStrUpper(Str: PAnsiChar): PAnsiChar;
  612. var
  613. temp: ansistring;
  614. begin
  615. temp:=UpperAnsiString(str);
  616. ansi2pchar(temp,str,result);
  617. end;
  618. function CharLengthPChar(const Str: PAnsiChar): PtrInt;
  619. var
  620. len:PtrInt;
  621. nextlen: ptrint;
  622. s: PAnsiChar;
  623. begin
  624. Result:=0;
  625. if (Str=nil) or (Byte(Str^)=0) then
  626. exit;
  627. s:=str;
  628. len:=strlen(s);
  629. repeat
  630. nextlen:=CodePointLength(s,len);
  631. { skip invalid/incomplete sequences }
  632. if (nextlen<0) then
  633. nextlen:=1;
  634. Inc(result,nextlen);
  635. Inc(s,nextlen);
  636. Dec(len,nextlen);
  637. until (nextlen=0);
  638. end;
  639. function InternalCompareStrAnsiString(
  640. const S1, S2 : PAnsiChar;
  641. const Len1, Len2 : PtrUInt
  642. ) : PtrInt;inline;
  643. var
  644. a, b : UnicodeString;
  645. begin
  646. a := '';
  647. Ansi2UnicodeMove(S1,DefaultSystemCodePage,a,Len1);
  648. b := '';
  649. Ansi2UnicodeMove(S2,DefaultSystemCodePage,b,Len2);
  650. Result := CompareUnicodeString(a,b,[]);
  651. end;
  652. function StrLCompAnsiString(S1, S2: PAnsiChar; MaxLen: PtrUInt): PtrInt;
  653. begin
  654. if (current_Collation.DataPtr=nil) then
  655. exit(OldManager.StrLCompAnsiStringProc(s1,s2,MaxLen));
  656. if (MaxLen=0) then
  657. exit(0);
  658. Result := InternalCompareStrAnsiString(S1,S2,MaxLen,MaxLen);
  659. end;
  660. function CompareStrAnsiString(const S1, S2: ansistring): PtrInt;
  661. var
  662. l1, l2 : PtrInt;
  663. begin
  664. if (current_Collation.DataPtr=nil) then
  665. exit(OldManager.CompareStrAnsiStringProc(s1,s2));
  666. if (Pointer(S1)=Pointer(S2)) then
  667. exit(0);
  668. l1:=Length(S1);
  669. l2:=Length(S2);
  670. if (l1=0) then begin
  671. if (l2=0) then
  672. exit(0);
  673. exit(-l2);
  674. end;
  675. if (l2=0) then
  676. exit(-l1);
  677. Result := InternalCompareStrAnsiString(@S1[1],@S2[2],l1,l2);
  678. end;
  679. function CompareTextAnsiString(const S1, S2: ansistring): PtrInt;
  680. var
  681. a,b : ansistring;
  682. begin
  683. a:=UpperAnsistring(s1);
  684. b:=UpperAnsistring(s2);
  685. Result:=CompareStrAnsiString(a,b);
  686. end;
  687. function StrCompAnsiString(S1, S2: PChar): PtrInt;
  688. var
  689. l1,l2 : PtrInt;
  690. begin
  691. if (current_Collation.DataPtr=nil) then
  692. exit(OldManager.StrCompAnsiStringProc(s1,s2));
  693. l1:=strlen(S1);
  694. l2:=strlen(S2);
  695. Result := InternalCompareStrAnsiString(S1,S2,l1,l2);
  696. end;
  697. function StrLICompAnsiString(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
  698. var
  699. a, b: ansistring;
  700. begin
  701. if (MaxLen=0) then
  702. exit(0);
  703. SetLength(a,MaxLen);
  704. Move(s1^,a[1],MaxLen);
  705. SetLength(b,MaxLen);
  706. Move(s2^,b[1],MaxLen);
  707. Result:=CompareTextAnsiString(a,b);
  708. end;
  709. function StrICompAnsiString(S1, S2: PChar): PtrInt;
  710. begin
  711. Result:=CompareTextAnsiString(ansistring(s1),ansistring(s2));
  712. end;
  713. function StrLowerAnsiString(Str: PChar): PChar;
  714. var
  715. temp: ansistring;
  716. begin
  717. temp:=LowerAnsiString(str);
  718. ansi2pchar(temp,str,result);
  719. end;
  720. function StrUpperAnsiString(Str: PChar): PChar;
  721. var
  722. temp: ansistring;
  723. begin
  724. temp:=UpperAnsiString(str);
  725. ansi2pchar(temp,str,result);
  726. end;
  727. //------------------------------------------------------------------------------
  728. procedure SetPascalWideStringManager();
  729. var
  730. locWideStringManager : TUnicodeStringManager;
  731. begin
  732. OldManager := widestringmanager;
  733. locWideStringManager:=widestringmanager;
  734. With locWideStringManager do
  735. begin
  736. Wide2AnsiMoveProc:=@Unicode2AnsiMove;
  737. {$ifndef FPC_WIDESTRING_EQUAL_UNICODESTRING}
  738. Ansi2WideMoveProc:=@Ansi2WideMove;
  739. UpperWideStringProc:=@UpperWideString;
  740. LowerWideStringProc:=@LowerWideString;
  741. CompareWideStringProc:=@CompareWideString;
  742. {$else FPC_WIDESTRING_EQUAL_UNICODESTRING}
  743. Ansi2WideMoveProc:=@Ansi2UnicodeMove;
  744. UpperWideStringProc:=@UpperUnicodeString;
  745. LowerWideStringProc:=@LowerUnicodeString;
  746. CompareWideStringProc:=@CompareUnicodeString;
  747. {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING}
  748. CharLengthPCharProc:=@CharLengthPChar;
  749. CodePointLengthProc:=@CodePointLength;
  750. UpperAnsiStringProc:=@UpperAnsiString;
  751. LowerAnsiStringProc:=@LowerAnsiString;
  752. CompareStrAnsiStringProc:=@CompareStrAnsiString;
  753. CompareTextAnsiStringProc:=@CompareTextAnsiString;
  754. StrCompAnsiStringProc:=@StrCompAnsiString;
  755. StrICompAnsiStringProc:=@StrICompAnsiString;
  756. StrLCompAnsiStringProc:=@StrLCompAnsiString;
  757. StrLICompAnsiStringProc:=@StrLICompAnsiString;
  758. StrLowerAnsiStringProc:=@StrLowerAnsiString;
  759. StrUpperAnsiStringProc:=@StrUpperAnsiString;
  760. ThreadInitProc:=@InitThread;
  761. ThreadFiniProc:=@FiniThread;
  762. { Unicode }
  763. Unicode2AnsiMoveProc:=@Unicode2AnsiMove;
  764. Ansi2UnicodeMoveProc:=@Ansi2UnicodeMove;
  765. UpperUnicodeStringProc:=@UpperUnicodeString;
  766. LowerUnicodeStringProc:=@LowerUnicodeString;
  767. CompareUnicodeStringProc:=@CompareUnicodeString;
  768. end;
  769. SetUnicodeStringManager(locWideStringManager);
  770. DefaultUnicodeCodePage:=CP_UTF16;
  771. {$ifdef MSWINDOWS}
  772. DefaultSystemCodePage:=GetACP();
  773. {$ELSE MSWINDOWS}
  774. {$ifdef UNIX}
  775. DefaultSystemCodePage:=GetSystemCodepage;
  776. if (DefaultSystemCodePage = CP_NONE) then
  777. DefaultSystemCodePage:=CP_UTF8;
  778. {$ifdef FPCRTL_FILESYSTEM_UTF8}
  779. DefaultFileSystemCodePage:=CP_UTF8;
  780. {$else}
  781. DefaultFileSystemCodePage:=DefaultSystemCodepage;
  782. {$endif}
  783. DefaultRTLFileSystemCodePage:=DefaultFileSystemCodePage;
  784. {$ELSE UNIX}
  785. if Assigned (WideStringManager.GetStandardCodePageProc) then
  786. DefaultSystemCodePage := WideStringManager.GetStandardCodePageProc (scpAnsi)
  787. else
  788. DefaultSystemCodePage := CP_NONE;
  789. DefaultFileSystemCodePage := DefaultSystemCodePage;
  790. DefaultRTLFileSystemCodePage := DefaultSystemCodePage;
  791. {$endif UNIX}
  792. {$endif MSWINDOWS}
  793. end;
  794. initialization
  795. current_Collation.DataPtr := nil;
  796. SetPascalWideStringManager();
  797. InitThread();
  798. finalization
  799. FiniThread();
  800. end.