pas2jsutils.pp 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. unit pas2jsutils;
  2. {
  3. This file is part of the Free Component Library (FCL)
  4. Copyright (c) 2018 Mattias Gaertner [email protected]
  5. Pascal to Javascript converter class.
  6. See the file COPYING.FPC, included in this distribution,
  7. for details about the copyright.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  11. **********************************************************************
  12. Abstract:
  13. Utility routines that do not need a filesystem or OS functionality.
  14. Filesystem-specific things should go to pas2jsfileutils instead.
  15. }
  16. {$mode objfpc}{$H+}
  17. interface
  18. uses
  19. Classes, SysUtils;
  20. function ChompPathDelim(const Path: string): string;
  21. function GetNextDelimitedItem(const List: string; Delimiter: char;
  22. var Position: integer): string;
  23. type
  24. TChangeStamp = SizeInt;
  25. const
  26. InvalidChangeStamp = low(TChangeStamp);
  27. Function IncreaseChangeStamp(Stamp: TChangeStamp) : TChangeStamp;
  28. const
  29. EncodingUTF8 = 'UTF-8';
  30. EncodingSystem = 'System';
  31. function NormalizeEncoding(const Encoding: string): string;
  32. function IsASCII(const s: string): boolean; inline;
  33. {$IFDEF FPC_HAS_CPSTRING}
  34. const
  35. UTF8BOM = #$EF#$BB#$BF;
  36. function UTF8CharacterStrictLength(P: PChar): integer;
  37. function UTF8ToUTF16(const s: string): UnicodeString;
  38. function UTF16ToUTF8(const s: UnicodeString): string;
  39. {$ENDIF FPC_HAS_CPSTRING}
  40. function IsNonUTF8System: boolean;// true if system encoding is not UTF-8
  41. {$IFDEF Windows}
  42. // AConsole - If false, it is the general system encoding,
  43. // if true, it is the console encoding
  44. function GetWindowsEncoding(AConsole: Boolean = False): string;
  45. {$ENDIF}
  46. {$IF defined(Unix) and not defined(Darwin)}
  47. function GetUnixEncoding: string;
  48. {$ENDIF}
  49. Function NonUTF8System: boolean;
  50. function GetDefaultTextEncoding: string;
  51. procedure SplitCmdLineParams(const Params: string; ParamList: TStrings;
  52. ReadBackslash: boolean = false);
  53. implementation
  54. {$IFDEF Windows}
  55. uses Windows;
  56. {$ENDIF}
  57. Var
  58. {$IFDEF Unix}
  59. {$IFNDEF Darwin}
  60. Lang: string = '';
  61. {$ENDIF}
  62. {$ENDIF}
  63. EncodingValid: boolean = false;
  64. DefaultTextEncoding: string = EncodingSystem;
  65. gNonUTF8System : Boolean = {$IFDEF FPC_HAS_CPSTRING}false{$ELSE}true{$ENDIF};
  66. Function NonUTF8System: boolean;
  67. begin
  68. Result:=gNonUTF8System;
  69. end;
  70. function GetNextDelimitedItem(const List: string; Delimiter: char;
  71. var Position: integer): string;
  72. var
  73. StartPos: Integer;
  74. begin
  75. StartPos:=Position;
  76. while (Position<=length(List)) and (List[Position]<>Delimiter) do
  77. inc(Position);
  78. Result:=copy(List,StartPos,Position-StartPos);
  79. if Position<=length(List) then inc(Position); // skip Delimiter
  80. end;
  81. function IncreaseChangeStamp(Stamp: TChangeStamp): TChangeStamp;
  82. begin
  83. if Stamp<High(TChangeStamp) then
  84. Result:=Stamp+1
  85. else
  86. Result:=InvalidChangeStamp+1;
  87. end;
  88. function ChompPathDelim(const Path: string): string;
  89. var
  90. Len, MinLen: Integer;
  91. begin
  92. Result:=Path;
  93. if Path = '' then
  94. exit;
  95. Len:=length(Result);
  96. if (Result[1] in AllowDirectorySeparators) then
  97. begin
  98. MinLen := 1;
  99. {$IFDEF HasUNCPaths}
  100. if (Len >= 2) and (Result[2] in AllowDirectorySeparators) then
  101. MinLen := 2; // keep UNC '\\', chomp 'a\' to 'a'
  102. {$ENDIF}
  103. {$IFDEF Pas2js}
  104. if (Len >= 2) and (Result[2]=Result[1]) and (PathDelim='\') then
  105. MinLen := 2; // keep UNC '\\', chomp 'a\' to 'a'
  106. {$ENDIF}
  107. end
  108. else begin
  109. MinLen := 0;
  110. {$IFdef MSWindows}
  111. if (Len >= 3) and (Result[1] in ['a'..'z', 'A'..'Z']) and
  112. (Result[2] = ':') and (Result[3] in AllowDirectorySeparators)
  113. then
  114. MinLen := 3;
  115. {$ENDIF}
  116. {$IFdef Pas2js}
  117. if (PathDelim='\')
  118. and (Len >= 3) and (Result[1] in ['a'..'z', 'A'..'Z'])
  119. and (Result[2] = ':') and (Result[3] in AllowDirectorySeparators)
  120. then
  121. MinLen := 3;
  122. {$ENDIF}
  123. end;
  124. while (Len > MinLen) and (Result[Len] in AllowDirectorySeparators) do dec(Len);
  125. if Len<length(Result) then
  126. SetLength(Result,Len);
  127. end;
  128. function NormalizeEncoding(const Encoding: string): string;
  129. var
  130. i: Integer;
  131. begin
  132. Result:=LowerCase(Encoding);
  133. for i:=length(Result) downto 1 do
  134. if Result[i]='-' then Delete(Result,i,1);
  135. end;
  136. {$IFDEF WINDOWS}
  137. function GetWindowsEncoding(AConsole: Boolean = False): string;
  138. var
  139. cp : UINT;
  140. {$IFDEF WinCE}
  141. // CP_UTF8 is missing in the windows unit of the Windows CE RTL
  142. const
  143. CP_UTF8 = 65001;
  144. {$ENDIF}
  145. begin
  146. if AConsole then cp := GetOEMCP
  147. else cp := GetACP;
  148. case cp of
  149. CP_UTF8: Result := EncodingUTF8;
  150. else
  151. Result:='cp'+IntToStr(cp);
  152. end;
  153. end;
  154. {$ENDIF}
  155. function IsASCII(const s: string): boolean; inline;
  156. {$IFDEF Pas2js}
  157. var
  158. i: Integer;
  159. begin
  160. for i:=1 to length(s) do
  161. if s[i]>#127 then exit(false);
  162. Result:=true;
  163. end;
  164. {$ELSE}
  165. var
  166. p: PChar;
  167. begin
  168. if s='' then exit(true);
  169. p:=PChar(s);
  170. repeat
  171. case p^ of
  172. #0: if p-PChar(s)=length(s) then exit(true);
  173. #128..#255: exit(false);
  174. end;
  175. inc(p);
  176. until false;
  177. end;
  178. {$ENDIF}
  179. {$IFDEF FPC_HAS_CPSTRING}
  180. function UTF8CharacterStrictLength(P: PChar): integer;
  181. begin
  182. if p=nil then exit(0);
  183. if ord(p^)<%10000000 then
  184. begin
  185. // regular single byte character
  186. exit(1);
  187. end
  188. else if ord(p^)<%11000000 then
  189. begin
  190. // invalid single byte character
  191. exit(0);
  192. end
  193. else if ((ord(p^) and %11100000) = %11000000) then
  194. begin
  195. // should be 2 byte character
  196. if (ord(p[1]) and %11000000) = %10000000 then
  197. exit(2)
  198. else
  199. exit(0);
  200. end
  201. else if ((ord(p^) and %11110000) = %11100000) then
  202. begin
  203. // should be 3 byte character
  204. if ((ord(p[1]) and %11000000) = %10000000)
  205. and ((ord(p[2]) and %11000000) = %10000000) then
  206. exit(3)
  207. else
  208. exit(0);
  209. end
  210. else if ((ord(p^) and %11111000) = %11110000) then
  211. begin
  212. // should be 4 byte character
  213. if ((ord(p[1]) and %11000000) = %10000000)
  214. and ((ord(p[2]) and %11000000) = %10000000)
  215. and ((ord(p[3]) and %11000000) = %10000000) then
  216. exit(4)
  217. else
  218. exit(0);
  219. end else
  220. exit(0);
  221. end;
  222. function UTF8ToUTF16(const s: string): UnicodeString;
  223. begin
  224. Result:=UTF8Decode(s);
  225. end;
  226. function UTF16ToUTF8(const s: UnicodeString): string;
  227. begin
  228. if s='' then exit('');
  229. Result:=UTF8Encode(s);
  230. // prevent UTF8 codepage appear in the strings - we don't need codepage
  231. // conversion magic
  232. SetCodePage(RawByteString(Result), CP_ACP, False);
  233. end;
  234. {$ENDIF}
  235. function IsNonUTF8System: boolean;
  236. begin
  237. Result:=NonUTF8System;
  238. end;
  239. {$IFDEF UNIX}
  240. {$IFNDEF Darwin}
  241. function GetUnixEncoding: string;
  242. var
  243. i: integer;
  244. begin
  245. Result:=EncodingSystem;
  246. i:=pos('.',Lang);
  247. if (i>0) and (i<=length(Lang)) then
  248. Result:=copy(Lang,i+1,length(Lang)-i);
  249. end;
  250. {$ENDIF}
  251. {$ENDIF}
  252. function GetDefaultTextEncoding: string;
  253. begin
  254. if EncodingValid then
  255. begin
  256. Result:=DefaultTextEncoding;
  257. exit;
  258. end;
  259. {$IFDEF Pas2js}
  260. Result:=EncodingUTF8;
  261. {$ELSE}
  262. {$IFDEF Windows}
  263. Result:=GetWindowsEncoding;
  264. {$ELSE}
  265. {$IFDEF Darwin}
  266. Result:=EncodingUTF8;
  267. {$ELSE}
  268. // unix
  269. Lang := GetEnvironmentVariable('LC_ALL');
  270. if Lang='' then
  271. begin
  272. Lang := GetEnvironmentVariable('LC_MESSAGES');
  273. if Lang='' then
  274. Lang := GetEnvironmentVariable('LANG');
  275. end;
  276. Result:=GetUnixEncoding;
  277. {$ENDIF}
  278. {$ENDIF}
  279. {$ENDIF}
  280. Result:=NormalizeEncoding(Result);
  281. DefaultTextEncoding:=Result;
  282. EncodingValid:=true;
  283. end;
  284. procedure InternalInit;
  285. begin
  286. {$IFDEF FPC_HAS_CPSTRING}
  287. SetMultiByteConversionCodePage(CP_UTF8);
  288. // SetMultiByteFileSystemCodePage(CP_UTF8); not needed, this is the default under Windows
  289. SetMultiByteRTLFileSystemCodePage(CP_UTF8);
  290. GetDefaultTextEncoding;
  291. {$IFDEF Windows}
  292. gNonUTF8System:=true;
  293. {$ELSE}
  294. gNonUTF8System:=SysUtils.CompareText(DefaultTextEncoding,'UTF8')<>0;
  295. {$ENDIF}
  296. {$ENDIF}
  297. end;
  298. procedure SplitCmdLineParams(const Params: string; ParamList: TStrings;
  299. ReadBackslash: boolean = false);
  300. // split spaces, quotes are parsed as single parameter
  301. // if ReadBackslash=true then \" is replaced to " and not treated as quote
  302. // #0 is always end
  303. type
  304. TMode = (mNormal,mApostrophe,mQuote);
  305. var
  306. p: Integer;
  307. Mode: TMode;
  308. Param: String;
  309. begin
  310. p:=1;
  311. while p<=length(Params) do
  312. begin
  313. // skip whitespace
  314. while (p<=length(Params)) and (Params[p] in [' ',#9,#10,#13]) do inc(p);
  315. if (p>length(Params)) or (Params[p]=#0) then
  316. break;
  317. // read param
  318. Param:='';
  319. Mode:=mNormal;
  320. while p<=length(Params) do
  321. begin
  322. case Params[p] of
  323. #0:
  324. break;
  325. '\':
  326. begin
  327. inc(p);
  328. if ReadBackslash then
  329. begin
  330. // treat next character as normal character
  331. if (p>length(Params)) or (Params[p]=#0) then
  332. break;
  333. if ord(Params[p])<128 then
  334. begin
  335. Param+=Params[p];
  336. inc(p);
  337. end else begin
  338. // next character is already a normal character
  339. end;
  340. end else begin
  341. // treat backslash as normal character
  342. Param+='\';
  343. end;
  344. end;
  345. '''':
  346. begin
  347. inc(p);
  348. case Mode of
  349. mNormal:
  350. Mode:=mApostrophe;
  351. mApostrophe:
  352. Mode:=mNormal;
  353. mQuote:
  354. Param+='''';
  355. end;
  356. end;
  357. '"':
  358. begin
  359. inc(p);
  360. case Mode of
  361. mNormal:
  362. Mode:=mQuote;
  363. mApostrophe:
  364. Param+='"';
  365. mQuote:
  366. Mode:=mNormal;
  367. end;
  368. end;
  369. ' ',#9,#10,#13:
  370. begin
  371. if Mode=mNormal then break;
  372. Param+=Params[p];
  373. inc(p);
  374. end;
  375. else
  376. Param+=Params[p];
  377. inc(p);
  378. end;
  379. end;
  380. //writeln('SplitCmdLineParams Param=#'+Param+'#');
  381. ParamList.Add(Param);
  382. end;
  383. end;
  384. initialization
  385. InternalInit;
  386. end.