pas2jsutils.pp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681
  1. {$IFNDEF FPC_DOTTEDUNITS}
  2. unit Pas2JSUtils;
  3. {$ENDIF FPC_DOTTEDUNITS}
  4. {
  5. This file is part of the Free Component Library (FCL)
  6. Copyright (c) 2018 Mattias Gaertner [email protected]
  7. Pascal to Javascript converter class.
  8. See the file COPYING.FPC, included in this distribution,
  9. for details about the copyright.
  10. This program is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. **********************************************************************
  14. Abstract:
  15. Utility routines that do not need a filesystem or OS functionality.
  16. Filesystem-specific things should go to pas2jsfileutils instead.
  17. }
  18. {$mode objfpc}{$H+}
  19. // Check whether we need the LANG variable
  20. {$IFDEF Unix}
  21. {$IFNDEF Darwin}
  22. {$DEFINE NEEDLANG}
  23. {$ENDIF}
  24. {$ENDIF}
  25. {$IFDEF WASM32}
  26. {$DEFINE NEEDLANG}
  27. {$ENDIF}
  28. interface
  29. {$IFDEF FPC_DOTTEDUNITS}
  30. uses
  31. System.Classes, System.SysUtils;
  32. {$ELSE FPC_DOTTEDUNITS}
  33. uses
  34. Classes, SysUtils;
  35. {$ENDIF FPC_DOTTEDUNITS}
  36. function ChompPathDelim(const Path: string): string;
  37. function GetNextDelimitedItem(const List: string; Delimiter: Char;
  38. var Position: integer): string;
  39. type
  40. TChangeStamp = SizeInt;
  41. const
  42. InvalidChangeStamp = low(TChangeStamp);
  43. Function IncreaseChangeStamp(Stamp: TChangeStamp) : TChangeStamp;
  44. const
  45. EncodingUTF8 = 'UTF-8';
  46. EncodingSystem = 'System';
  47. function NormalizeEncoding(const Encoding: string): string;
  48. function IsASCII(const s: string): boolean; inline;
  49. {$IFDEF FPC_HAS_CPSTRING}
  50. const
  51. UTF8BOM = #$EF#$BB#$BF;
  52. function UTF8CharacterStrictLength(P: PAnsiChar): integer;
  53. function UTF8ToUTF16(const s: AnsiString): UnicodeString;
  54. function UTF16ToUTF8(const s: UnicodeString): AnsiString;
  55. function UTF8ToSystemCP(const s: ansistring): ansistring;
  56. function SystemCPToUTF8(const s: ansistring): ansistring;
  57. function ConsoleToUTF8(const s: ansistring): ansistring;
  58. // converts UTF8 string to console encoding (used by Write, WriteLn)
  59. function UTF8ToConsole(const s: ansistring): ansistring;
  60. {$ENDIF FPC_HAS_CPSTRING}
  61. function IsNonUTF8System: boolean;// true if system encoding is not UTF-8
  62. {$IFDEF Windows}
  63. // AConsole - If false, it is the general system encoding,
  64. // if true, it is the console encoding
  65. function GetWindowsEncoding(AConsole: Boolean = False): string;
  66. {$ENDIF}
  67. {$IF defined(Unix) and not defined(Darwin)}
  68. function GetUnixEncoding: string;
  69. {$ENDIF}
  70. Function NonUTF8System: boolean;
  71. function GetDefaultTextEncoding: string;
  72. function IsEncodingValid : Boolean;
  73. {$IFDEF NEEDLANG}
  74. function GetLang: string;
  75. {$ENDIF}
  76. function GetConsoleTextEncoding: string;
  77. procedure SplitCmdLineParams(const Params: string; ParamList: TStrings;
  78. ReadBackslash: boolean = false);
  79. implementation
  80. {$IFDEF Windows}
  81. {$IFDEF FPC_DOTTEDUNITS}
  82. uses WinApi.Windows;
  83. {$ELSE FPC_DOTTEDUNITS}
  84. uses Windows;
  85. {$ENDIF FPC_DOTTEDUNITS}
  86. {$ENDIF}
  87. Var
  88. {$IFDEF NEEDLANG}
  89. Lang: string = '';
  90. {$ENDIF}
  91. EncodingValid: boolean = false;
  92. DefaultTextEncoding: string = EncodingSystem;
  93. gNonUTF8System : Boolean = {$IFDEF FPC_HAS_CPSTRING}false{$ELSE}true{$ENDIF};
  94. Function NonUTF8System: boolean;
  95. begin
  96. Result:=gNonUTF8System;
  97. end;
  98. function IsEncodingValid : Boolean;
  99. begin
  100. Result:=EncodingValid;
  101. end;
  102. {$IFDEF NEEDLANG}
  103. function GetLang: string;
  104. begin
  105. Result:=Lang;
  106. end;
  107. {$ENDIF}
  108. function GetNextDelimitedItem(const List: string; Delimiter: Char;
  109. var Position: integer): string;
  110. var
  111. StartPos: Integer;
  112. begin
  113. StartPos:=Position;
  114. while (Position<=length(List)) and (List[Position]<>Delimiter) do
  115. inc(Position);
  116. Result:=copy(List,StartPos,Position-StartPos);
  117. if Position<=length(List) then inc(Position); // skip Delimiter
  118. end;
  119. function IncreaseChangeStamp(Stamp: TChangeStamp): TChangeStamp;
  120. begin
  121. if Stamp<High(TChangeStamp) then
  122. Result:=Stamp+1
  123. else
  124. Result:=InvalidChangeStamp+1;
  125. end;
  126. function ChompPathDelim(const Path: string): string;
  127. var
  128. Len, MinLen: Integer;
  129. begin
  130. Result:=Path;
  131. if Path = '' then
  132. exit;
  133. Len:=length(Result);
  134. if (Result[1] in AllowDirectorySeparators) then
  135. begin
  136. MinLen := 1;
  137. {$IFDEF HasUNCPaths}
  138. if (Len >= 2) and (Result[2] in AllowDirectorySeparators) then
  139. MinLen := 2; // keep UNC '\\', chomp 'a\' to 'a'
  140. {$ENDIF}
  141. {$IFDEF Pas2js}
  142. if (Len >= 2) and (Result[2]=Result[1]) and (PathDelim='\') then
  143. MinLen := 2; // keep UNC '\\', chomp 'a\' to 'a'
  144. {$ENDIF}
  145. end
  146. else begin
  147. MinLen := 0;
  148. {$IFdef MSWindows}
  149. if (Len >= 3) and (Result[1] in ['a'..'z', 'A'..'Z']) and
  150. (Result[2] = ':') and (Result[3] in AllowDirectorySeparators)
  151. then
  152. MinLen := 3;
  153. {$ENDIF}
  154. {$IFdef Pas2js}
  155. if (PathDelim='\')
  156. and (Len >= 3) and (Result[1] in ['a'..'z', 'A'..'Z'])
  157. and (Result[2] = ':') and (Result[3] in AllowDirectorySeparators)
  158. then
  159. MinLen := 3;
  160. {$ENDIF}
  161. end;
  162. while (Len > MinLen) and (Result[Len] in AllowDirectorySeparators) do dec(Len);
  163. if Len<length(Result) then
  164. SetLength(Result,Len);
  165. end;
  166. function NormalizeEncoding(const Encoding: string): string;
  167. var
  168. i: Integer;
  169. begin
  170. Result:=LowerCase(Encoding);
  171. for i:=length(Result) downto 1 do
  172. if Result[i]='-' then Delete(Result,i,1);
  173. end;
  174. {$IFDEF WINDOWS}
  175. function GetWindowsEncoding(AConsole: Boolean = False): string;
  176. var
  177. cp : UINT;
  178. {$IFDEF WinCE}
  179. // CP_UTF8 is missing in the windows unit of the Windows CE RTL
  180. const
  181. CP_UTF8 = 65001;
  182. {$ENDIF}
  183. begin
  184. if AConsole then cp := GetOEMCP
  185. else cp := GetACP;
  186. case cp of
  187. CP_UTF8: Result := EncodingUTF8;
  188. else
  189. Result:='cp'+IntToStr(cp);
  190. end;
  191. end;
  192. {$ENDIF}
  193. function IsASCII(const s: string): boolean; inline;
  194. {$IFDEF Pas2js}
  195. var
  196. i: Integer;
  197. begin
  198. for i:=1 to length(s) do
  199. if s[i]>#127 then exit(false);
  200. Result:=true;
  201. end;
  202. {$ELSE}
  203. var
  204. p: PAnsiChar;
  205. begin
  206. if s='' then exit(true);
  207. p:=PAnsiChar(s);
  208. repeat
  209. case p^ of
  210. #0: if p-PAnsiChar(s)=length(s) then exit(true);
  211. #128..#255: exit(false);
  212. end;
  213. inc(p);
  214. until false;
  215. end;
  216. {$ENDIF}
  217. {$IFDEF FPC_HAS_CPSTRING}
  218. function UTF8CharacterStrictLength(P: PAnsiChar): integer;
  219. begin
  220. if p=nil then exit(0);
  221. if ord(p^)<%10000000 then
  222. begin
  223. // regular single byte character
  224. exit(1);
  225. end
  226. else if ord(p^)<%11000000 then
  227. begin
  228. // invalid single byte character
  229. exit(0);
  230. end
  231. else if ((ord(p^) and %11100000) = %11000000) then
  232. begin
  233. // should be 2 byte character
  234. if (ord(p[1]) and %11000000) = %10000000 then
  235. exit(2)
  236. else
  237. exit(0);
  238. end
  239. else if ((ord(p^) and %11110000) = %11100000) then
  240. begin
  241. // should be 3 byte character
  242. if ((ord(p[1]) and %11000000) = %10000000)
  243. and ((ord(p[2]) and %11000000) = %10000000) then
  244. exit(3)
  245. else
  246. exit(0);
  247. end
  248. else if ((ord(p^) and %11111000) = %11110000) then
  249. begin
  250. // should be 4 byte character
  251. if ((ord(p[1]) and %11000000) = %10000000)
  252. and ((ord(p[2]) and %11000000) = %10000000)
  253. and ((ord(p[3]) and %11000000) = %10000000) then
  254. exit(4)
  255. else
  256. exit(0);
  257. end else
  258. exit(0);
  259. end;
  260. function UTF8ToUTF16(const s: AnsiString): UnicodeString;
  261. begin
  262. Result:=UTF8Decode(s);
  263. end;
  264. function UTF16ToUTF8(const s: UnicodeString): ansistring;
  265. begin
  266. if s='' then exit('');
  267. Result:=UTF8Encode(s);
  268. // prevent UTF8 codepage appear in the strings - we don't need codepage
  269. // conversion magic
  270. SetCodePage(RawByteString(Result), CP_ACP, False);
  271. end;
  272. {$ENDIF}
  273. function IsNonUTF8System: boolean;
  274. begin
  275. Result:=NonUTF8System;
  276. end;
  277. {$IFDEF UNIX}
  278. {$IFNDEF Darwin}
  279. function GetUnixEncoding: string;
  280. var
  281. i: integer;
  282. begin
  283. Result:=EncodingSystem;
  284. i:=pos('.',Lang);
  285. if (i>0) and (i<=length(Lang)) then
  286. Result:=copy(Lang,i+1,length(Lang)-i);
  287. end;
  288. {$ENDIF}
  289. {$ENDIF}
  290. function GetDefaultTextEncoding: string;
  291. begin
  292. if EncodingValid then
  293. begin
  294. Result:=DefaultTextEncoding;
  295. exit;
  296. end;
  297. {$IFDEF Pas2js}
  298. Result:=EncodingUTF8;
  299. {$ELSE}
  300. {$IFDEF Windows}
  301. Result:=GetWindowsEncoding;
  302. {$ELSE}
  303. {$IFDEF Darwin}
  304. Result:=EncodingUTF8;
  305. {$ELSE}
  306. // unix & wasm
  307. Lang := GetEnvironmentVariable('LC_ALL');
  308. if Lang='' then
  309. begin
  310. Lang := GetEnvironmentVariable('LC_MESSAGES');
  311. if Lang='' then
  312. Lang := GetEnvironmentVariable('LANG');
  313. end;
  314. {$IFNDEF CPUWASM}
  315. Result:=GetUnixEncoding;
  316. {$ELSE}
  317. // wasm
  318. Result:='UTF8'; // some choice needs to be made
  319. {$ENDIF}
  320. {$ENDIF}
  321. {$ENDIF}
  322. {$ENDIF}
  323. Result:=NormalizeEncoding(Result);
  324. DefaultTextEncoding:=Result;
  325. EncodingValid:=true;
  326. end;
  327. procedure InternalInit;
  328. begin
  329. {$IFDEF FPC_HAS_CPSTRING}
  330. SetMultiByteConversionCodePage(CP_UTF8);
  331. // SetMultiByteFileSystemCodePage(CP_UTF8); not needed, this is the default under Windows
  332. SetMultiByteRTLFileSystemCodePage(CP_UTF8);
  333. GetDefaultTextEncoding;
  334. {$IFDEF Windows}
  335. gNonUTF8System:=true;
  336. {$ELSE}
  337. gNonUTF8System:={$IFDEF FPC_DOTTEDUNITS}System.{$ENDIF}SysUtils.CompareText(DefaultTextEncoding,'UTF8')<>0;
  338. {$ENDIF}
  339. {$ENDIF}
  340. end;
  341. procedure SplitCmdLineParams(const Params: string; ParamList: TStrings;
  342. ReadBackslash: boolean = false);
  343. // split spaces, quotes are parsed as single parameter
  344. // if ReadBackslash=true then \" is replaced to " and not treated as quote
  345. // #0 is always end
  346. type
  347. TMode = (mNormal,mApostrophe,mQuote);
  348. var
  349. p: Integer;
  350. Mode: TMode;
  351. Param: String;
  352. begin
  353. p:=1;
  354. while p<=length(Params) do
  355. begin
  356. // skip whitespace
  357. while (p<=length(Params)) and (Params[p] in [' ',#9,#10,#13]) do inc(p);
  358. if (p>length(Params)) or (Params[p]=#0) then
  359. break;
  360. // read param
  361. Param:='';
  362. Mode:=mNormal;
  363. while p<=length(Params) do
  364. begin
  365. case Params[p] of
  366. #0:
  367. break;
  368. '\':
  369. begin
  370. inc(p);
  371. if ReadBackslash then
  372. begin
  373. // treat next character as normal character
  374. if (p>length(Params)) or (Params[p]=#0) then
  375. break;
  376. if ord(Params[p])<128 then
  377. begin
  378. Param+=Params[p];
  379. inc(p);
  380. end else begin
  381. // next character is already a normal character
  382. end;
  383. end else begin
  384. // treat backslash as normal character
  385. Param+='\';
  386. end;
  387. end;
  388. '''':
  389. begin
  390. inc(p);
  391. case Mode of
  392. mNormal:
  393. Mode:=mApostrophe;
  394. mApostrophe:
  395. Mode:=mNormal;
  396. mQuote:
  397. Param+='''';
  398. end;
  399. end;
  400. '"':
  401. begin
  402. inc(p);
  403. case Mode of
  404. mNormal:
  405. Mode:=mQuote;
  406. mApostrophe:
  407. Param+='"';
  408. mQuote:
  409. Mode:=mNormal;
  410. end;
  411. end;
  412. ' ',#9,#10,#13:
  413. begin
  414. if Mode=mNormal then break;
  415. Param+=Params[p];
  416. inc(p);
  417. end;
  418. else
  419. Param+=Params[p];
  420. inc(p);
  421. end;
  422. end;
  423. //writeln('SplitCmdLineParams Param=#'+Param+'#');
  424. ParamList.Add(Param);
  425. end;
  426. end;
  427. function GetConsoleTextEncoding: string;
  428. begin
  429. {$IFDEF WINDOWS}
  430. Result:=GetWindowsEncoding(True);
  431. {$ELSE}
  432. Result:=GetDefaultTextEncoding;
  433. {$ENDIF}
  434. end;
  435. {$IFDEF WINDOWS}
  436. {$ifdef WinCe}
  437. function UTF8ToSystemCP(const s: ansistring): ansistring; inline;
  438. begin
  439. Result := s;
  440. end;
  441. {$else}
  442. function UTF8ToSystemCP(const s: ansistring): ansistring;
  443. // result has codepage CP_ACP
  444. var
  445. src: UnicodeString;
  446. len: LongInt;
  447. begin
  448. Result:=s;
  449. if IsASCII(Result) then
  450. begin
  451. // prevent codepage conversion magic
  452. SetCodePage(RawByteString(Result), CP_ACP, False);
  453. exit;
  454. end;
  455. src:=UTF8Decode(s);
  456. if src='' then
  457. exit;
  458. len:=WideCharToMultiByte(CP_ACP,0,PUnicodeChar(src),length(src),nil,0,nil,nil);
  459. SetLength(Result,len);
  460. if len>0 then
  461. begin
  462. WideCharToMultiByte(CP_ACP,0,PUnicodeChar(src),length(src),@Result[1],length(Result),nil,nil);
  463. // prevent codepage conversion magic
  464. SetCodePage(RawByteString(Result), CP_ACP, False);
  465. end;
  466. end;
  467. {$endif not wince}
  468. {$ifdef WinCE}
  469. function SystemCPToUTF8(const s: ansistring): ansistring; inline;
  470. begin
  471. Result := SysToUtf8(s);
  472. end;
  473. {$else}
  474. // for all Windows supporting 8bit codepages (e.g. not WinCE)
  475. function SystemCPToUTF8(const s: ansistring): ansistring;
  476. // result has codepage CP_ACP
  477. var
  478. UTF16WordCnt: SizeInt;
  479. UTF16Str: UnicodeString;
  480. begin
  481. Result:=s;
  482. if IsASCII(Result) then
  483. begin
  484. // prevent codepage conversion magic
  485. SetCodePage(RawByteString(Result), CP_ACP, False);
  486. exit;
  487. end;
  488. UTF16WordCnt:=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, Pointer(s), length(s), nil, 0);
  489. // this will null-terminate
  490. if UTF16WordCnt>0 then
  491. begin
  492. setlength(UTF16Str{%H-}, UTF16WordCnt);
  493. MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, Pointer(s), length(s), @UTF16Str[1], UTF16WordCnt);
  494. Result:=UTF16ToUTF8(UTF16Str);
  495. end;
  496. end;
  497. {$endif not wince}
  498. {$ifdef WinCe}
  499. function UTF8ToConsole(const s: ansistring): ansistring; // converts UTF8 to console string (used by Write, WriteLn)
  500. begin
  501. Result := UTF8ToSystemCP(s);
  502. end;
  503. {$else}
  504. function UTF8ToConsole(const s: ansistring): ansistring; // converts UTF8 to console string (used by Write, WriteLn)
  505. var
  506. Dst: PAnsiChar;
  507. begin
  508. {$ifndef NO_CP_RTL}
  509. Result := UTF8ToSystemCP(s);
  510. {$else NO_CP_RTL}
  511. Result := s; // Kept for compatibility
  512. {$endif NO_CP_RTL}
  513. Dst := AllocMem((Length(Result) + 1) * SizeOf(AnsiChar));
  514. if CharToOEM(PAnsiChar(Result), Dst) then
  515. Result := StrPas(Dst);
  516. FreeMem(Dst);
  517. {$ifndef NO_CP_RTL}
  518. SetCodePage(RawByteString(Result), CP_OEMCP, False);
  519. {$endif NO_CP_RTL}
  520. end;
  521. {$endif not WinCE}
  522. {$ifdef WinCE}
  523. function ConsoleToUTF8(const s: ansistring): ansistring;// converts console encoding to UTF8
  524. begin
  525. Result := SysToUTF8(s);
  526. end;
  527. {$else}
  528. function ConsoleToUTF8(const s: ansistring): ansistring;// converts console encoding to UTF8
  529. var
  530. Dst: PAnsiChar;
  531. begin
  532. Dst := AllocMem((Length(s) + 1) * SizeOf(AnsiChar));
  533. if OemToChar(PAnsiChar(s), Dst) then
  534. Result := StrPas(Dst)
  535. else
  536. Result := s;
  537. FreeMem(Dst);
  538. Result := SystemCPToUTF8(Result);
  539. end;
  540. {$endif not wince}
  541. {$ENDIF WINDOWS}
  542. {$IFDEF UNIX}
  543. function UTF8ToSystemCP(const s: Ansistring): Ansistring;
  544. begin
  545. if NonUTF8System and not IsASCII(s) then
  546. begin
  547. Result:=UTF8ToAnsi(s);
  548. // prevent UTF8 codepage appear in the strings - we don't need codepage
  549. // conversion magic
  550. SetCodePage(RawByteString(Result), StringCodePage(s), False);
  551. end
  552. else
  553. Result:=s;
  554. end;
  555. function SystemCPToUTF8(const s: ansistring): ansistring;
  556. begin
  557. if NonUTF8System and not IsASCII(s) then
  558. begin
  559. Result:=AnsiToUTF8(s);
  560. // prevent UTF8 codepage appear in the strings - we don't need codepage
  561. // conversion magic
  562. SetCodePage(RawByteString(Result), StringCodePage(s), False);
  563. end
  564. else
  565. Result:=s;
  566. end;
  567. function ConsoleToUTF8(const s: ansistring): ansistring;
  568. begin
  569. Result:=SystemCPToUTF8(s);
  570. end;
  571. function UTF8ToConsole(const s: ansistring): ansistring;
  572. begin
  573. Result:=UTF8ToSystemCP(s);
  574. end;
  575. {$ENDIF UNIX}
  576. {$IF NOT DEFINED(UNIX) AND NOT DEFINED(WINDOWS) and defined(FPC_HAS_CPSTRING)}
  577. function UTF8ToSystemCP(const s: Ansistring): Ansistring;
  578. begin
  579. if NonUTF8System and not IsASCII(s) then
  580. begin
  581. Result:=UTF8ToAnsi(s);
  582. // prevent UTF8 codepage appear in the strings - we don't need codepage
  583. // conversion magic
  584. SetCodePage(RawByteString(Result), StringCodePage(s), False);
  585. end
  586. else
  587. Result:=s;
  588. end;
  589. function SystemCPToUTF8(const s: ansistring): ansistring;
  590. begin
  591. if NonUTF8System and not IsASCII(s) then
  592. begin
  593. Result:=AnsiToUTF8(s);
  594. // prevent UTF8 codepage appear in the strings - we don't need codepage
  595. // conversion magic
  596. SetCodePage(RawByteString(Result), StringCodePage(s), False);
  597. end
  598. else
  599. Result:=s;
  600. end;
  601. function ConsoleToUTF8(const s: ansistring): ansistring;
  602. begin
  603. Result:=SystemCPToUTF8(s);
  604. end;
  605. function UTF8ToConsole(const s: ansistring): ansistring;
  606. begin
  607. Result:=UTF8ToSystemCP(s);
  608. end;
  609. {$ENDIF}
  610. initialization
  611. InternalInit;
  612. end.