parseutils.pas 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. { This file is part of wasmbin - a collection of WebAssembly binary utils.
  2. Copyright (C) 2019, 2020 Dmitry Boyarintsev <[email protected]>
  3. Copyright (C) 2020 by the Free Pascal development team
  4. This source is free software; you can redistribute it and/or modify it under
  5. the terms of the GNU General Public License as published by the Free
  6. Software Foundation; either version 2 of the License, or (at your option)
  7. any later version.
  8. This code is distributed in the hope that it will be useful, but WITHOUT ANY
  9. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  10. FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  11. details.
  12. A copy of the GNU General Public License is available on the World Wide Web
  13. at <http://www.gnu.org/copyleft/gpl.html>. You can also obtain it by writing
  14. to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
  15. Boston, MA 02110-1335, USA.
  16. }
  17. unit parseutils;
  18. {$ifdef fpc}{$mode delphi}{$h+}{$endif}
  19. interface
  20. uses
  21. Classes, SysUtils;
  22. type
  23. TCharSet = set of Char;
  24. const
  25. EoLnChars = [#10,#13];
  26. SpaceChars = [#32,#9];
  27. InvsChars = [#0..#32];
  28. WhiteSpaceChars = SpaceChars;
  29. SpaceEolnChars = EoLnChars+SpaceChars;
  30. NumericChars = ['0'..'9'];
  31. HexChars = ['0'..'9','a'..'f','A'..'F'];
  32. SignChars = ['+','-'];
  33. SignNumericChars = NumericChars + SignChars;
  34. AlphabetChars = ['a'..'z','A'..'Z'];
  35. AlphaNumChars = AlphabetChars+NumericChars;
  36. function ScanWhileWithFirst(const s: AnsiString; var index: Integer; const first, body: TCharSet): AnsiString;
  37. function ScanWhile(const s: AnsiString; var index: Integer; const ch: TCharSet): AnsiString;
  38. function ScanTo(const s: AnsiString; var index: Integer; const ch: TCharSet): AnsiString;
  39. function SkipToEoln(const s: AnsiString; var index: Integer): AnsiString;
  40. function ScanToSubstr(const s: AnsiString; var index: Integer; const substr: string): AnsiString;
  41. // returns #10, #13, #10#13 or #13#10, if s[index] is end-of-line sequence
  42. // otherwise returns empty string
  43. function EolnStr(const s: AnsiString; index: Integer): String;
  44. function IsSubStr(const sbs, s: AnsiString; index: Integer): Boolean;
  45. // todo: not used?
  46. function SkipCommentBlock(const s: AnsiString; var index: Integer; const closecmt: AnsiString): AnsiString;
  47. function SkipLine(const s: AnsiString; var index: Integer): AnsiString;
  48. procedure OffsetToLinePos(const t: AnsiString; Offset: Integer; var P: TPoint);
  49. procedure ParseCSSValues(const s: String; css: TStrings);
  50. procedure GetCssAbsBoundsRect(Css: TStrings; var r: TRect);
  51. function CssValInt(const s: String; Def: integer): Integer;
  52. type
  53. TCNumberFormat = (nfError, nfInteger, nfHex, nfFloat, nfFloatHex);
  54. // if buf contains "nan" or "inf" it's also recognized as float numbers
  55. function ScanNumberC(const buf: string; var idx: Integer;
  56. var numberText: string): TCNumberFormat;
  57. implementation
  58. function CssValInt(const s: String; Def: integer): Integer;
  59. var
  60. i : integer;
  61. n : String;
  62. err : Integer;
  63. begin
  64. i:=1;
  65. n:=ScanWhile(s, i, ['+','-']+NumericChars);
  66. Val(n, Result, err);
  67. if err<>0 then Result:=Def;
  68. end;
  69. procedure GetCssAbsBoundsRect(Css: TStrings; var r: TRect);
  70. begin
  71. r.Left:=CssValInt(Css.Values['LEFT'], 0);
  72. r.Top:=CssValInt(Css.Values['top'], 0);
  73. r.Right:=r.Left+CssValInt(Css.Values['width'], 0);
  74. r.Bottom:=r.Top+CssValInt(Css.Values['height'], 0);
  75. end;
  76. procedure ParseCSSValues(const s: String; css: TStrings);
  77. var
  78. i : integer;
  79. n : String;
  80. v : String;
  81. begin
  82. i:=1;
  83. if (s='') or not Assigned(css) then Exit;
  84. while (i<=length(s)) do begin
  85. ScanTo(s, i, AlphaNumChars);
  86. n:=ScanWhile(s, i, AlphaNumChars+['_']);
  87. ScanTo(s, i, [':']);
  88. inc(i);
  89. ScanWhile(s, i, SpaceEolnChars);
  90. v:=ScanTo(s, i, [';']);
  91. css.Values[n]:=v;
  92. end;
  93. end;
  94. function ScanWhile(const s: AnsiString; var index: Integer; const ch: TCharSet): AnsiString;
  95. var
  96. i : Integer;
  97. begin
  98. Result := '';
  99. if (index <= 0) or (index > length(s)) then Exit;
  100. for i := index to length(s) do
  101. if not (s[i] in ch) then begin
  102. if i = index then Result := ''
  103. else Result := Copy(s, index, i - index);
  104. index := i;
  105. Exit;
  106. end;
  107. Result := Copy(s, index, length(s) - index + 1);
  108. index := length(s) + 1;
  109. end;
  110. function ScanWhileWithFirst(const s: AnsiString; var index: Integer; const first, body: TCharSet): AnsiString;
  111. var
  112. i : Integer;
  113. begin
  114. Result := '';
  115. if (index <= 0) or (index > length(s)) then Exit;
  116. i:=index;
  117. if not (s[i] in first) then Exit;
  118. inc(i);
  119. while (i<=length(s)) and (s[i] in body) do inc(i);
  120. Result := Copy(s, index, i-index);
  121. index:=i;
  122. end;
  123. function ScanTo(const s: AnsiString; var index: Integer; const ch: TCharSet): AnsiString;
  124. var
  125. i : Integer;
  126. begin
  127. Result := '';
  128. if (index <= 0) or (index > length(s)) then Exit;
  129. for i := index to length(s) do
  130. if (s[i] in ch) then begin
  131. if i = index then Result := ''
  132. else Result := Copy(s, index, i - index);
  133. index := i;
  134. Exit;
  135. end;
  136. Result := Copy(s, index, length(s) - index + 1);
  137. index := length(s) + 1;
  138. end;
  139. function EolnStr(const s: AnsiString; index: Integer): String;
  140. begin
  141. if (index<=0) or (index>length(s)) or (not (s[index] in EoLnChars)) then
  142. Result:=''
  143. else begin
  144. if (index<length(s)) and (s[index+1] in EolnChars) and (s[index]<>s[index+1]) then
  145. Result:=Copy(s, index, 2)
  146. else
  147. Result:=s[index];
  148. end;
  149. end;
  150. function SkipToEoln(const s: AnsiString; var index: Integer): AnsiString;
  151. begin
  152. Result := ScanTo(s, index, EoLnChars);
  153. end;
  154. function IsSubStr(const sbs, s: AnsiString; index: Integer): Boolean;
  155. var
  156. i : Integer;
  157. j : Integer;
  158. begin
  159. Result := false;
  160. if (sbs = '') or (length(sbs) > length(s) - index) then Exit;
  161. j := index;
  162. for i := 1 to length(sbs) do begin
  163. if sbs[i] <> s[j] then Exit;
  164. inc(j);
  165. end;
  166. Result := true;
  167. end;
  168. function SkipCommentBlock(const s: AnsiString; var index: Integer; const closecmt: AnsiString): AnsiString;
  169. begin
  170. Result := '';
  171. if closecmt = '' then begin
  172. index := length(s) + 1;
  173. Exit;
  174. end;
  175. while index <= length(s) do begin
  176. Result := Result + ScanTo(s, index, [closecmt[1]]+EoLnChars);
  177. //if (index<=length(s)) and (s in EoLnChars(
  178. if IsSubStr(closecmt, s, index) then begin
  179. inc(index, length(closecmt));
  180. Exit;
  181. end else begin
  182. Result := Result + s[index];
  183. inc(index);
  184. end;
  185. end;
  186. end;
  187. function SkipLine(const s: AnsiString; var index: Integer): AnsiString;
  188. begin
  189. Result:=ScanTo(s, index, EoLnChars);
  190. if (index<length(s)) and (s[index+1] in EoLnChars) and (s[index]<>s[index+1]) then
  191. inc(index);
  192. inc(index);
  193. end;
  194. procedure OffsetToLinePos(const t: AnsiString; Offset: Integer; var P: TPoint);
  195. var
  196. i, le : Integer;
  197. begin
  198. i := 1;
  199. le := 0;
  200. P.X := 0;
  201. P.Y := 0;
  202. while i < Offset do begin
  203. Inc(P.Y);
  204. le := i;
  205. SkipLine(t, i);
  206. end;
  207. P.X := Offset - le + 1;
  208. end;
  209. function isSubStrMatch(const s: AnsiString; index: integer; const substr: string): Boolean;
  210. var
  211. i : integer;
  212. j : integer;
  213. begin
  214. j:=index;
  215. Result:=false;
  216. for i:=1 to length(substr) do begin
  217. if s[j]<>substr[i] then Exit;
  218. inc(j);
  219. end;
  220. Result:=true;
  221. end;
  222. function ScanToSubstr(const s: AnsiString; var index: Integer; const substr: string): AnsiString;
  223. var
  224. i: integer;
  225. begin
  226. if substr='' then begin
  227. Result:='';
  228. Exit;
  229. end;
  230. i:=index;
  231. while (index<=length(s)) do begin
  232. ScanTo(s, index, [substr[1]]);
  233. if isSubStrMatch(s, index, substr) then begin
  234. inc(index, length(substr));
  235. Break;
  236. end else
  237. inc(index);
  238. end;
  239. Result:=Copy(s, i, index-i);
  240. end;
  241. function ScanHexNumber(const buf: string; var idx: Integer; var numberText: string): TCNumberFormat;
  242. var
  243. xp : char;
  244. s : string;
  245. begin
  246. Result := nfError;
  247. if (idx=length(buf)) or (buf[idx]<>'0') or (buf[idx+1]<>'x') then Exit;
  248. inc(idx, 2);
  249. numberText := ScanWhile(buf, idx, HexChars);
  250. if numberText = '' then Exit;
  251. numberText := '0x'+numberText;
  252. if ((idx<=length(buf)) and (buf[idx] in ['.','p','P'])) then begin
  253. if buf[idx]='.' then begin
  254. s := ScanWhileWithFirst(buf, idx, ['.']+HexChars, HexChars);
  255. if s = '' then Exit; // should not be empty
  256. numberText := numberText + s;
  257. end;
  258. if buf[idx] in ['p','P'] then begin
  259. // hexal exponenta is numeric, not hexidemical
  260. xp := buf[idx];
  261. inc(idx);
  262. s := ScanWhileWithFirst(buf, idx, SignNumericChars, NumericChars);
  263. if s = '' then Exit;
  264. numberText := numberText + xp+s;
  265. end;
  266. Result := nfFloathex
  267. end else
  268. Result := nfHex;
  269. end;
  270. function ScanNumeric(const buf: string; var idx: integer; var numberText: string): TCNumberFormat;
  271. var
  272. mnt : string;
  273. exp : string;
  274. xp : char;
  275. begin
  276. Result := nfError;
  277. numberText:=ScanWhile(buf, idx, NumericChars);
  278. if ((idx<=length(buf)) and (buf[idx] in ['.','e','E'])) then begin
  279. // mantissa (or fractional part) can be empty
  280. mnt := ScanWhileWithFirst(buf, idx, ['.']+NumericChars, NumericChars);
  281. if (buf[idx] in ['e','E']) then begin
  282. xp:=buf[idx];
  283. inc(idx);
  284. exp := ScanWhileWithFirst(buf, idx, SignNumericChars, NumericChars);
  285. // exponent cannot be empty, if "e" is present
  286. if exp='' then Exit;
  287. exp := xp+exp;
  288. end else
  289. exp := '';
  290. numberText:=NumberText+mnt+exp;
  291. Result := nfFloat;
  292. end else if numberText<>'' then
  293. Result := nfInteger;
  294. end;
  295. function ScanNumberC(const buf: string; var idx: Integer; var numberText: string): TCNumberFormat;
  296. var
  297. ch : char;
  298. sub : string;
  299. begin
  300. Result := nfError;
  301. if buf[idx] in SignChars then begin
  302. ch:=buf[idx];
  303. inc(idx);
  304. end else
  305. ch := #0;
  306. if (idx+2<=length(buf)) and ((buf[idx]='i') and (buf[idx+1]='n') and (buf[idx+2]='f')) then begin
  307. numberText:='inf';
  308. inc(idx, 3);
  309. Result := nfFloat;
  310. end else if (idx+2<=length(buf)) and ((buf[idx]='n') and (buf[idx+1]='a') and (buf[idx+2]='n')) then begin
  311. numberText:='nan';
  312. inc(idx, 3);
  313. if (idx < length(buf)) and (buf[idx]=':') then begin
  314. inc(idx);
  315. sub := '';
  316. if (ScanNumberC(buf, idx, sub) in [nfHex, nfInteger]) then
  317. numberText:=numberText+':'+sub
  318. else
  319. Exit; // error
  320. end;
  321. Result := nfFloat;
  322. end else if (idx<length(buf)) and (buf[idx]='0') and (buf[idx+1]='x') then begin
  323. Result := ScanHexNumber(buf, idx, numberText)
  324. end else
  325. Result := ScanNumeric(buf, idx, numberText);
  326. if Result = nfError then Exit;
  327. if (ch<>#0) then begin
  328. if (numberText = '') then Exit;
  329. numberText:=ch+numberText;
  330. end;
  331. end;
  332. end.