fpcssscanner.pp 25 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085
  1. {
  2. This file is part of the Free Pascal Run time library.
  3. Copyright (c) 2022- by Michael Van Canneyt ([email protected])
  4. This file contains CSS scanner and tokenizer
  5. See the File COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. {$IFNDEF FPC_DOTTEDUNITS}
  12. unit fpCSSScanner;
  13. {$ENDIF FPC_DOTTEDUNITS}
  14. {$mode ObjFPC}{$H+}
  15. interface
  16. {$IFDEF FPC_DOTTEDUNITS}
  17. uses
  18. System.Classes, System.SysUtils, FpCss.Tree.pp;
  19. {$ELSE FPC_DOTTEDUNITS}
  20. uses
  21. Classes, SysUtils, fpCSSTree;
  22. {$ENDIF FPC_DOTTEDUNITS}
  23. Type
  24. TCSSToken = (
  25. ctkUNKNOWN,
  26. ctkEOF,
  27. ctkWHITESPACE,
  28. ctkCOMMENT,
  29. ctkSEMICOLON,
  30. ctkLPARENTHESIS,
  31. ctkRPARENTHESIS,
  32. ctkLBRACE,
  33. ctkRBRACE,
  34. ctkLBRACKET,
  35. ctkRBRACKET,
  36. ctkCOMMA,
  37. ctkEQUALS,
  38. ctkAND,
  39. ctkTILDE,
  40. ctkTILDEEQUAL,
  41. ctkPLUS,
  42. ctkCOLON,
  43. ctkDOUBLECOLON,
  44. ctkDOT,
  45. ctkDIV,
  46. ctkGT,
  47. ctkGE,
  48. ctkLT,
  49. ctkLE,
  50. ctkPERCENTAGE,
  51. ctkMINUS,
  52. ctkSTAR,
  53. ctkSTAREQUAL,
  54. ctkINTEGER,
  55. ctkFLOAT,
  56. ctkHASH,
  57. ctkSTRING,
  58. ctkIDENTIFIER,
  59. ctkATKEYWORD,
  60. ctkURL,
  61. ctkBADURL,
  62. ctkIMPORTANT,
  63. ctkCLASSNAME,
  64. ctkFUNCTION,
  65. ctkPSEUDO,
  66. ctkPSEUDOFUNCTION,
  67. ctkSQUARED,
  68. ctkSQUAREDEQUAL,
  69. ctkUNICODERANGE,
  70. ctkPIPE,
  71. ctkPIPEEQUAL,
  72. ctkDOLLAR,
  73. ctkDOLLAREQUAL,
  74. ctkINVALID
  75. );
  76. TCSSTokens = Set of TCSSToken;
  77. TCSSString = UTF8String;
  78. resourcestring
  79. SErrInvalidCharacter = 'Invalid character ''%s''';
  80. SErrOpenString = 'String exceeds end of line';
  81. SErrIncludeFileNotFound = 'Could not find include file ''%s''';
  82. SInvalidHexadecimalNumber = 'Invalid decimal number';
  83. SErrUnknownCharacter = 'Unknown character: %s';
  84. Type
  85. ECSSScanner = Class(ECSSException);
  86. TLineReader = class
  87. public
  88. function IsEOF: Boolean; virtual; abstract;
  89. function ReadLine: TCSSString; virtual; abstract;
  90. end;
  91. { TStreamLineReader }
  92. TStreamLineReader = class(TLineReader)
  93. private
  94. FStream : TStream;
  95. Buffer : Array[0..1024] of Byte;
  96. FBufPos,
  97. FBufLen : Integer;
  98. procedure FillBuffer;
  99. public
  100. Constructor Create(AStream : TStream);
  101. function IsEOF: Boolean; override;
  102. function ReadLine: TCSSString; override;
  103. end;
  104. TFileLineReader = class(TLineReader)
  105. private
  106. FTextFile: Text;
  107. FileOpened: Boolean;
  108. public
  109. constructor Create(const AFilename: TCSSString);
  110. destructor Destroy; override;
  111. function IsEOF: Boolean; override;
  112. function ReadLine: TCSSString; override;
  113. end;
  114. { TCSSScanner }
  115. TCSSScannerOption = (csoExtendedIdentifiers,csoReturnComments,csoReturnWhiteSpace);
  116. TCSSScannerOptions = set of TCSSScannerOption;
  117. TCSSScannerWarnEvent = procedure(Sender: TObject; Msg: string) of object;
  118. TCSSScanner = class
  119. private
  120. FDisablePseudo: Boolean;
  121. FOnWarn: TCSSScannerWarnEvent;
  122. FOptions: TCSSScannerOptions;
  123. FSourceFile: TLineReader;
  124. FSourceFilename: TCSSString;
  125. FCurRow: Integer;
  126. FCurToken: TCSSToken;
  127. FCurTokenString: TCSSString;
  128. FCurLine: TCSSString;
  129. TokenStr: PAnsiChar;
  130. FSourceStream : TStream;
  131. FOwnSourceFile : Boolean;
  132. function DoHash: TCSSToken;
  133. function DoIdentifierLike : TCSSToken;
  134. function DoInvalidChars : TCSSToken;
  135. function DoMultiLineComment: TCSSToken;
  136. function CommentDiv: TCSSToken;
  137. function DoNumericLiteral: TCSSToken;
  138. function DoSingleLineComment: TCSSToken;
  139. function DoStringLiteral: TCSSToken;
  140. function DoWhiteSpace: TCSSToken;
  141. function EatBadURL: TCSSToken;
  142. Function DoUnicodeRange : TCSSTOKEN;
  143. function FetchLine: Boolean;
  144. function GetCurColumn: Integer;
  145. function GetReturnComments: Boolean;
  146. function GetReturnWhiteSpace: Boolean;
  147. function ReadUnicodeEscape: WideChar;
  148. procedure SetReturnComments(AValue: Boolean);
  149. procedure SetReturnWhiteSpace(AValue: Boolean);
  150. class function UnknownCharToStr(C: AnsiChar): TCSSString;
  151. protected
  152. procedure DoError(const Msg: TCSSString; Args: array of const); overload;
  153. procedure DoError(const Msg: TCSSString); overload;
  154. function DoFetchToken: TCSSToken; virtual;
  155. public
  156. constructor Create(ALineReader: TLineReader);
  157. constructor Create(AStream : TStream);
  158. destructor Destroy; override;
  159. procedure OpenFile(const AFilename: TCSSString);
  160. Function FetchToken: TCSSToken;
  161. function IsUTF8BOM: boolean;
  162. Property ReturnComments : Boolean Read GetReturnComments Write SetReturnComments;
  163. Property ReturnWhiteSpace : Boolean Read GetReturnWhiteSpace Write SetReturnWhiteSpace;
  164. Property Options : TCSSScannerOptions Read FOptions Write FOptions;
  165. property SourceFile: TLineReader read FSourceFile;
  166. property CurFilename: TCSSString read FSourceFilename;
  167. property CurLine: TCSSString read FCurLine;
  168. property CurRow: Integer read FCurRow;
  169. property CurColumn: Integer read GetCurColumn;
  170. property CurToken: TCSSToken read FCurToken;
  171. property CurTokenString: TCSSString read FCurTokenString;
  172. property DisablePseudo : Boolean Read FDisablePseudo Write FDisablePseudo;
  173. property OnWarn: TCSSScannerWarnEvent read FOnWarn write FOnWarn;
  174. end;
  175. function SafeFormat(const Fmt: string; const Args: array of const): string;
  176. implementation
  177. Const
  178. Alpha = ['A'..'Z','a'..'z'];
  179. Num = ['0'..'9'];
  180. AlNum = Alpha+Num;
  181. AlNumIden = Alpha+Num+['-'];
  182. WhiteSpace = [' ',#9];
  183. WhiteSpaceEx = WhiteSpace+[#0];
  184. type
  185. TMessageArgs = array of string;
  186. procedure CreateMsgArgs(var MsgArgs: TMessageArgs; const Args: array of const);
  187. var
  188. i: Integer;
  189. A : AnsiString;
  190. U : UnicodeString;
  191. {$ifdef pas2js}
  192. v: jsvalue;
  193. {$endif}
  194. begin
  195. SetLength(MsgArgs, High(Args)-Low(Args)+1);
  196. for i:=Low(Args) to High(Args) do
  197. {$ifdef pas2js}
  198. begin
  199. v:=Args[i];
  200. if isBoolean(v) then
  201. MsgArgs[i] := BoolToStr(Boolean(v))
  202. else if isString(v) then
  203. MsgArgs[i] := String(v)
  204. else if isNumber(v) then
  205. begin
  206. if IsInteger(v) then
  207. MsgArgs[i] := str(NativeInt(v))
  208. else
  209. MsgArgs[i] := str(double(v));
  210. end
  211. else
  212. MsgArgs[i]:='';
  213. end;
  214. {$else}
  215. case Args[i].VType of
  216. vtInteger: MsgArgs[i] := IntToStr(Args[i].VInteger);
  217. vtBoolean: MsgArgs[i] := BoolToStr(Args[i].VBoolean);
  218. vtChar: MsgArgs[i] := Args[i].VChar;
  219. {$ifndef FPUNONE}
  220. vtExtended: ; // Args[i].VExtended^;
  221. {$ENDIF}
  222. vtString: MsgArgs[i] := Args[i].VString^;
  223. vtPointer: ; // Args[i].VPointer;
  224. vtPChar: MsgArgs[i] := Args[i].VPChar;
  225. vtObject: ; // Args[i].VObject;
  226. vtClass: ; // Args[i].VClass;
  227. vtWideChar:
  228. begin
  229. U:=Args[i].VWideChar;
  230. MsgArgs[i] := U;
  231. end;
  232. vtPWideChar:
  233. begin
  234. U:=Args[i].VPWideChar;
  235. MsgArgs[i] := U;
  236. end;
  237. vtAnsiString:
  238. begin
  239. A:=AnsiString(Args[i].VAnsiString);
  240. MsgArgs[i]:=A;
  241. end;
  242. vtCurrency: ; // Args[i].VCurrency^);
  243. vtVariant: ; // Args[i].VVariant^);
  244. vtInterface: ; // Args[i].VInterface^);
  245. vtWidestring:
  246. begin
  247. U:=WideString(Args[i].VWideString);
  248. MsgArgs[i] := U;
  249. end;
  250. vtInt64: MsgArgs[i] := IntToStr(Args[i].VInt64^);
  251. vtQWord: MsgArgs[i] := IntToStr(Args[i].VQWord^);
  252. vtUnicodeString:
  253. begin
  254. U:=UnicodeString(Args[i].VUnicodeString);
  255. MsgArgs[i] := U;
  256. end;
  257. end;
  258. {$endif}
  259. end;
  260. function SafeFormat(const Fmt: string; const Args: array of const): string;
  261. var
  262. MsgArgs: TMessageArgs;
  263. i: Integer;
  264. begin
  265. try
  266. Result:=Format(Fmt,Args);
  267. except
  268. Result:='';
  269. MsgArgs:=nil;
  270. CreateMsgArgs(MsgArgs,Args);
  271. for i:=0 to length(MsgArgs)-1 do
  272. begin
  273. if i>0 then
  274. Result:=Result+',';
  275. Result:=Result+MsgArgs[i];
  276. end;
  277. Result:='{'+Fmt+'}['+Result+']';
  278. end;
  279. end;
  280. constructor TFileLineReader.Create(const AFilename: TCSSString);
  281. begin
  282. inherited Create;
  283. Assign(FTextFile, AFilename);
  284. Reset(FTextFile);
  285. FileOpened := true;
  286. end;
  287. destructor TFileLineReader.Destroy;
  288. begin
  289. if FileOpened then
  290. Close(FTextFile);
  291. inherited Destroy;
  292. end;
  293. function TFileLineReader.IsEOF: Boolean;
  294. begin
  295. Result := EOF(FTextFile);
  296. end;
  297. function TFileLineReader.ReadLine: TCSSString;
  298. begin
  299. ReadLn(FTextFile, Result);
  300. end;
  301. constructor TCSSScanner.Create(ALineReader: TLineReader);
  302. begin
  303. inherited Create;
  304. FSourceFile := ALineReader;
  305. end;
  306. constructor TCSSScanner.Create(AStream: TStream);
  307. begin
  308. FSourceStream:=ASTream;
  309. FOwnSourceFile:=True;
  310. Create(TStreamLineReader.Create(AStream));
  311. end;
  312. destructor TCSSScanner.Destroy;
  313. begin
  314. If FOwnSourceFile then
  315. FSourceFile.Free;
  316. inherited Destroy;
  317. end;
  318. procedure TCSSScanner.OpenFile(const AFilename: TCSSString);
  319. begin
  320. FSourceFile := TFileLineReader.Create(AFilename);
  321. FSourceFilename := AFilename;
  322. end;
  323. function TCSSScanner.FetchLine: Boolean;
  324. begin
  325. if FSourceFile.IsEOF then
  326. begin
  327. FCurLine := '';
  328. TokenStr := nil;
  329. Result := false;
  330. end else
  331. begin
  332. FCurLine := FSourceFile.ReadLine;
  333. TokenStr := PAnsiChar(CurLine);
  334. Result := true;
  335. Inc(FCurRow);
  336. end;
  337. end;
  338. function TCSSScanner.DoWhiteSpace : TCSSToken;
  339. begin
  340. Result:=ctkWhitespace;
  341. repeat
  342. Inc(TokenStr);
  343. if TokenStr[0] = #0 then
  344. if not FetchLine then
  345. begin
  346. FCurToken := Result;
  347. exit;
  348. end;
  349. until not (TokenStr[0] in [#9, ' ']);
  350. end;
  351. function TCSSScanner.DoSingleLineComment : TCSSToken;
  352. Var
  353. TokenStart : PAnsiChar;
  354. Len : Integer;
  355. begin
  356. Inc(TokenStr);
  357. TokenStart := TokenStr;
  358. while TokenStr[0] <> #0 do
  359. Inc(TokenStr);
  360. Len:=TokenStr-TokenStart;
  361. SetLength(FCurTokenString, Len);
  362. if (Len>0) then
  363. Move(TokenStart^,FCurTokenString[1],Len);
  364. Result := ctkComment;
  365. end;
  366. function TCSSScanner.DoMultiLineComment : TCSSToken;
  367. Var
  368. TokenStart : PAnsiChar;
  369. Len,OLen : Integer;
  370. PrevToken : AnsiChar;
  371. begin
  372. Inc(TokenStr);
  373. TokenStart := TokenStr;
  374. FCurTokenString := '';
  375. OLen:= 0;
  376. PrevToken:=#0;
  377. while Not ((TokenStr[0]='/') and (PrevToken='*')) do
  378. begin
  379. if (TokenStr[0]=#0) then
  380. begin
  381. Len:=TokenStr-TokenStart+1;
  382. SetLength(FCurTokenString,OLen+Len);
  383. if Len>1 then
  384. Move(TokenStart^,FCurTokenString[OLen+1],Len-1);
  385. Inc(OLen,Len);
  386. FCurTokenString[OLen]:=#10;
  387. if not FetchLine then
  388. begin
  389. Result := ctkEOF;
  390. FCurToken := Result;
  391. exit;
  392. end;
  393. TokenStart := TokenStr;
  394. PrevToken:=#0;
  395. end
  396. else
  397. begin
  398. PrevToken:=TokenStr[0];
  399. Inc(TokenStr);
  400. end;
  401. end;
  402. Len:=TokenStr-TokenStart-1; // -1 for *
  403. SetLength(FCurTokenString, Olen+Len);
  404. if (Len>0) then
  405. Move(TokenStart^, FCurTokenString[Olen + 1], Len);
  406. Inc(TokenStr);
  407. Result := ctkComment;
  408. end;
  409. function TCSSScanner.CommentDiv : TCSSToken;
  410. begin
  411. FCurTokenString := '';
  412. Inc(TokenStr);
  413. if (TokenStr[0] = '/') then // Single-line comment
  414. Result:=DoSingleLineComment
  415. else if (TokenStr[0]='*') then
  416. Result:=DoMultiLineComment
  417. else
  418. Result:=ctkDiv;
  419. end;
  420. function TCSSScanner.ReadUnicodeEscape: WideChar;
  421. const
  422. Hex = ['0'..'9','A'..'F','a'..'f' ];
  423. Var
  424. S : TCSSString;
  425. I : Integer;
  426. HaveHex : Boolean;
  427. begin
  428. S:='';
  429. I:=1;
  430. Repeat
  431. S:=S+Upcase(TokenStr[0]);
  432. HaveHex:=TokenStr[1] in Hex;
  433. if HaveHex then
  434. Inc(TokenStr);
  435. Inc(I);
  436. Until (I>4) or not HaveHex;
  437. // Takes care of conversion... This needs improvement !!
  438. Result:=WideChar(StrToInt('$'+S));
  439. end;
  440. procedure TCSSScanner.SetReturnComments(AValue: Boolean);
  441. begin
  442. if AValue then
  443. Include(FOptions,csoReturnComments)
  444. else
  445. Exclude(FOptions,csoReturnComments)
  446. end;
  447. procedure TCSSScanner.SetReturnWhiteSpace(AValue: Boolean);
  448. begin
  449. if AValue then
  450. Include(FOptions,csoReturnWhiteSpace)
  451. else
  452. Exclude(FOptions,csoReturnWhiteSpace)
  453. end;
  454. function TCSSScanner.DoStringLiteral: TCSSToken;
  455. Var
  456. Delim : AnsiChar;
  457. TokenStart : PAnsiChar;
  458. Len,OLen: Integer;
  459. S : TCSSString;
  460. begin
  461. Delim:=TokenStr[0];
  462. Inc(TokenStr);
  463. TokenStart := TokenStr;
  464. OLen := 0;
  465. FCurTokenString := '';
  466. while not (TokenStr[0] in [#0,Delim]) do
  467. begin
  468. if (TokenStr[0]='\') then
  469. begin
  470. // Save length
  471. Len := TokenStr - TokenStart;
  472. Inc(TokenStr);
  473. // Read escaped token
  474. Case TokenStr[0] of
  475. '"' : S:='"';
  476. 'a'..'f',
  477. 'A'..'F',
  478. '0'..'9':
  479. begin
  480. S:=UTF8Encode(ReadUniCodeEscape);
  481. end;
  482. #0 : DoError(SErrOpenString);
  483. else
  484. DoError(SErrInvalidCharacter, [TokenStr[0]]);
  485. end;
  486. SetLength(FCurTokenString, OLen + Len+1+Length(S));
  487. if Len > 0 then
  488. Move(TokenStart^, FCurTokenString[OLen + 1], Len);
  489. Move(S[1],FCurTokenString[OLen + Len+1],Length(S));
  490. Inc(OLen, Len+Length(S));
  491. // Next AnsiChar
  492. // Inc(TokenStr);
  493. TokenStart := TokenStr+1;
  494. end;
  495. if TokenStr[0] = #0 then
  496. DoError(SErrOpenString);
  497. Inc(TokenStr);
  498. end;
  499. if TokenStr[0] = #0 then
  500. DoError(SErrOpenString);
  501. Len := TokenStr - TokenStart;
  502. SetLength(FCurTokenString, OLen + Len);
  503. if Len > 0 then
  504. Move(TokenStart^, FCurTokenString[OLen+1], Len);
  505. Inc(TokenStr);
  506. Result := ctkSTRING;
  507. end;
  508. function TCSSScanner.DoNumericLiteral :TCSSToken;
  509. Var
  510. TokenStart : PAnsiChar;
  511. Len : Integer;
  512. isEscape : Boolean;
  513. begin
  514. Result := ctkINTEGER;
  515. isEscape:=TokenStr[0]='\';
  516. if IsEscape then
  517. Inc(TokenStr);
  518. TokenStart := TokenStr;
  519. while true do
  520. begin
  521. Inc(TokenStr);
  522. case TokenStr[0] of
  523. '.':
  524. if IsEscape then
  525. Break
  526. else
  527. begin
  528. Result := ctkFLOAT;
  529. if TokenStr[1] in ['0'..'9'] then
  530. begin
  531. Inc(TokenStr);
  532. repeat
  533. Inc(TokenStr);
  534. until not (TokenStr[0] in ['0'..'9']);
  535. end;
  536. break;
  537. end;
  538. '0'..'9': ;
  539. else
  540. break;
  541. end;
  542. end;
  543. Len:=TokenStr-TokenStart;
  544. Setlength(FCurTokenString, Len);
  545. if (Len>0) then
  546. Move(TokenStart^,FCurTokenString[1],Len);
  547. if IsEscape then
  548. begin
  549. Result:=ctkString;
  550. FCurTokenString:=AnsiChar(StrToInt(FCurTokenString));
  551. end;
  552. end;
  553. function TCSSScanner.DoHash :TCSSToken;
  554. Var
  555. TokenStart : PAnsiChar;
  556. Len : Integer;
  557. begin
  558. Result := ctkHASH;
  559. TokenStart := TokenStr;
  560. Inc(TokenStr);
  561. while (TokenStr[0]<>'#') and (TokenStr[0] in AlNumIden) do
  562. inc(TokenStr);
  563. Len:=TokenStr-TokenStart;
  564. Setlength(FCurTokenString, Len);
  565. if (Len>0) then
  566. Move(TokenStart^,FCurTokenString[1],Len);
  567. end;
  568. function TCSSScanner.EatBadURL: TCSSToken;
  569. var
  570. TokenStart : PAnsiChar;
  571. C : AnsiChar;
  572. len,oldlen : integer;
  573. begin
  574. Result:=ctkURL;
  575. While not (TokenStr[0] in [#0,')']) do
  576. begin
  577. TokenStart:=TokenStr;
  578. While not (TokenStr[0] in [#0,')']) do
  579. begin
  580. C:=TokenStr[0];
  581. if (Ord(C)<=Ord(' ')) or (Ord(C)>127) then
  582. Result:=ctkBADURL;
  583. inc(TokenStr);
  584. end;
  585. Len:=TokenStr-TokenStart;
  586. oldLen:=Length(FCurTokenString);
  587. Setlength(FCurTokenString, OldLen+Len);
  588. if (Len>0) then
  589. Move(TokenStart^,FCurTokenString[OldLen+1],Len);
  590. if TokenStr[0]=#0 then
  591. if not FetchLine then
  592. Exit(ctkEOF);
  593. end;
  594. end;
  595. function TCSSScanner.DoUnicodeRange: TCSSTOKEN;
  596. Var
  597. TokenStart:PAnsiChar;
  598. Len : Integer;
  599. Tokens : Set of AnsiChar;
  600. begin
  601. Tokens:= ['A'..'F', 'a'..'f', '0'..'9', '-'];
  602. Result:=ctkUNICODERANGE;
  603. TokenStart := TokenStr;
  604. Inc(TokenStr,2); // U+
  605. repeat
  606. if (TokenStr[0]='-') then
  607. Tokens:=Tokens-['-'];
  608. Inc(TokenStr);
  609. //If (TokenStr[0]='\') and (TokenStr[1]='u') then
  610. until not (TokenStr[0] in Tokens);
  611. Len:=(TokenStr-TokenStart);
  612. SetLength(FCurTokenString,Len);
  613. if Len > 0 then
  614. Move(TokenStart^,FCurTokenString[1],Len);
  615. end;
  616. class function TCSSScanner.UnknownCharToStr(C: AnsiChar): TCSSString;
  617. begin
  618. if C=#0 then
  619. Result:='EOF'
  620. else if (C in WhiteSpace) then
  621. Result:='#'+IntToStr(Ord(C))
  622. else
  623. Result:='"'+C+'"';
  624. end;
  625. function TCSSScanner.DoIdentifierLike : TCSSToken;
  626. Var
  627. TokenStart:PAnsiChar;
  628. Len,oLen : Integer;
  629. IsEscape,IsAt, IsPseudo, IsFunc : Boolean;
  630. begin
  631. Result:=ctkIDENTIFIER;
  632. TokenStart := TokenStr;
  633. IsPseudo:=False;
  634. IsAt:=TokenStr[0]='@';
  635. IsFunc:=false;
  636. For Len:=1 to 2 do
  637. if TokenStr[0]=':' then
  638. begin
  639. IsPseudo:=True;
  640. Inc(TokenStr);
  641. end;
  642. Repeat
  643. if not (TokenStr[0]='\') then
  644. repeat
  645. Inc(TokenStr);
  646. //If (TokenStr[0]='\') and (TokenStr[1]='u') then
  647. until not (TokenStr[0] in ['A'..'Z', 'a'..'z', '0'..'9', '_','-']);
  648. IsEscape:=TokenStr[0]='\';
  649. if IsEscape then
  650. begin
  651. if ((TokenStr[0] in WhiteSpace) or (TokenStr[0]=#0)) then
  652. DoError(SErrUnknownCharacter ,[UnknownCharToStr(TokenStr[0])])
  653. end
  654. else if not IsAt then
  655. begin
  656. IsFunc:=TokenStr[0]='(';
  657. if IsFunc then
  658. Inc(TokenStr);
  659. end;
  660. Len:=(TokenStr-TokenStart);
  661. oLen:=Length(FCurTokenString);
  662. SetLength(FCurTokenString,Olen+Len);
  663. if Len > 0 then
  664. Move(TokenStart^,FCurTokenString[Olen+1],Len);
  665. if IsEscape then
  666. Inc(TokenStr);
  667. TokenStart := TokenStr;
  668. until Not IsEscape;
  669. // Some specials
  670. if (CurTokenString[1]='.') and not IsFunc then
  671. Result:=ctkCLASSNAME
  672. else if isAt then
  673. Result:=ctkATKEYWORD
  674. else if CurTokenString='!important' then
  675. Result:=ctkIMPORTANT
  676. else if (CurtokenString='url(') then
  677. begin
  678. Result:=ctkURL;
  679. If TokenStr[0] in ['"',''''] then
  680. DoStringLiteral
  681. else
  682. begin
  683. result:=EatBadURL;
  684. end;
  685. If (result<>ctkEOF) and (TokenStr[0] in [')']) then
  686. Inc(TokenStr);
  687. end
  688. else if IsPseudo then
  689. begin
  690. if IsFunc then
  691. Result:=ctkPSEUDOFUNCTION
  692. else
  693. Result:=ctkPSEUDO;
  694. end
  695. else if IsFunc then
  696. Result:=ctkFUNCTION;
  697. end;
  698. function TCSSScanner.DoInvalidChars: TCSSToken;
  699. var
  700. TokenStart: PAnsiChar;
  701. Len: SizeUInt;
  702. begin
  703. Result:=ctkINVALID;
  704. TokenStart := TokenStr;
  705. repeat
  706. writeln('TCSSScanner.DoInvalidChars ',hexstr(ord(TokenStr^),2));
  707. Inc(TokenStr);
  708. until (TokenStr[0] in [#0,#9,#10,#13,#32..#127]);
  709. Len:=TokenStr-TokenStart;
  710. SetLength(FCurTokenString,Len);
  711. if Len > 0 then
  712. Move(TokenStart^,FCurTokenString[1],Len);
  713. end;
  714. function TCSSScanner.FetchToken: TCSSToken;
  715. var
  716. CanStop : Boolean;
  717. begin
  718. Repeat
  719. Result:=DoFetchToken;
  720. if (Result=ctkINVALID) and IsUTF8BOM then
  721. CanStop:=false
  722. else
  723. CanStop:=(Not (Result in [ctkComment,ctkWhiteSpace]))
  724. or ((ReturnComments and (Result=ctkComment))
  725. or
  726. (ReturnWhiteSpace and (Result=ctkWhiteSpace))
  727. )
  728. Until CanStop;
  729. end;
  730. function TCSSScanner.IsUTF8BOM: boolean;
  731. begin
  732. Result:=(length(FCurTokenString)=3)
  733. and (FCurTokenString[1]=#$EF)
  734. and (FCurTokenString[2]=#$BB)
  735. and (FCurTokenString[3]=#$BF);
  736. end;
  737. function TCSSScanner.DoFetchToken: TCSSToken;
  738. Procedure CharToken(aToken : TCSSToken);
  739. begin
  740. FCurTokenString:=TokenStr[0];
  741. Inc(TokenStr);
  742. Result:=aToken;
  743. end;
  744. Procedure TwoCharsToken(aToken : TCSSToken);
  745. begin
  746. FCurTokenString:=TokenStr[0]+TokenStr[1];
  747. Inc(TokenStr,2);
  748. Result:=aToken;
  749. end;
  750. begin
  751. if TokenStr = nil then
  752. begin
  753. if not FetchLine then
  754. begin
  755. Result := ctkEOF;
  756. FCurToken := Result;
  757. exit;
  758. end;
  759. end;
  760. //CurPos:=TokenStr;
  761. FCurTokenString := '';
  762. case TokenStr[0] of
  763. #0: // Empty line
  764. begin
  765. FetchLine;
  766. Result := ctkWhitespace;
  767. end;
  768. '''','"':
  769. Result:=DoStringLiteral;
  770. '/' :
  771. Result:=CommentDiv;
  772. #9, ' ':
  773. Result := DoWhiteSpace;
  774. '#':
  775. Result:=DoHash;
  776. '\':
  777. begin
  778. if TokenStr[1] in ['0'..'9'] then
  779. Result:=DoNumericLiteral
  780. else
  781. begin
  782. if (TokenStr[1] in WhiteSpace) or (TokenStr[1]=#0) then
  783. DoError(SErrUnknownCharacter ,[UnknownCharToStr(TokenStr[1])])
  784. else
  785. Result:=DoIdentifierLike
  786. end;
  787. end;
  788. '0'..'9':
  789. Result:=DoNumericLiteral;
  790. '&': CharToken(ctkAnd);
  791. '{': CharToken( ctkLBRACE);
  792. '}': CharToken(ctkRBRACE);
  793. '*': if TokenStr[1]='=' then
  794. TwoCharsToken(ctkSTAREQUAL)
  795. else if (csoExtendedIdentifiers in Options) and (TokenStr[1] in AlNumIden) then
  796. Result:=DoIdentifierLike
  797. else
  798. CharToken(ctkSTAR);
  799. '^':
  800. if TokenStr[1]='=' then
  801. TwoCharsToken(ctkSQUAREDEQUAL)
  802. else
  803. CharToken(ctkSQUARED);
  804. ',': CharToken(ctkCOMMA);
  805. '~':
  806. if TokenStr[1]='=' then
  807. TwoCharsToken(ctkTILDEEQUAL)
  808. else
  809. CharToken(ctkTILDE);
  810. '|':
  811. if TokenStr[1]='=' then
  812. TwoCharsToken(ctkPIPEEQUAL)
  813. else
  814. CharToken(ctkPIPE);
  815. '$':
  816. if TokenStr[1]='=' then
  817. TwoCharsToken(ctkDOLLAREQUAL)
  818. else
  819. CharToken(ctkDOLLAR);
  820. ';': CharToken(ctkSEMICOLON);
  821. '@': Result:=DoIdentifierLike;
  822. ':':
  823. begin
  824. if DisablePseudo then
  825. CharToken(ctkCOLON)
  826. else if (TokenStr[1]=':') then
  827. begin
  828. if (TokenStr[2] in AlNumIden) then
  829. Result:=DoIdentifierLike
  830. else
  831. Result:=ctkDoubleCOLON
  832. end
  833. else if (TokenStr[1] in AlNumIden) then
  834. Result:=DoIdentifierLike
  835. else
  836. CharToken(ctkCOLON);
  837. end;
  838. '.':
  839. begin
  840. if (TokenStr[1] in AlNum) then
  841. Result:=Self.DoIdentifierLike
  842. else
  843. CharToken(ctkDOT);
  844. end;
  845. '>':
  846. if TokenStr[1]='=' then
  847. TwoCharsToken(ctkGE)
  848. else
  849. CharToken(ctkGT);
  850. '<':
  851. if TokenStr[1]='=' then
  852. TwoCharsToken(ctkLE)
  853. else
  854. CharToken(ctkLT);
  855. '(': CharToken(ctkLPARENTHESIS);
  856. ')': CharToken(ctkRPARENTHESIS);
  857. '[': CharToken(ctkLBRACKET);
  858. ']': CharToken(ctkRBRACKET);
  859. '=': CharToken(ctkEQUALS);
  860. '-':
  861. begin
  862. if (TokenStr[1] in ['0'..'9']) then
  863. Result:=DoNumericLiteral
  864. else if Not (TokenStr[1] in WhiteSpaceEx) then
  865. Result:=DoIdentifierLike
  866. else
  867. CharToken(ctkMINUS);
  868. end;
  869. '+': CharToken(ctkPLUS);
  870. '%': CharToken(ctkPERCENTAGE);
  871. '_','!',
  872. 'a'..'z',
  873. 'A'..'Z':
  874. begin
  875. if (TokenStr[0] in ['u','U']) and (TokenStr[1]='+') then
  876. Result:=DoUnicodeRange
  877. else
  878. Result:=DoIdentifierLike;
  879. end;
  880. else
  881. writeln('TCSSScanner.DoFetchToken ',Ord(TokenStr[0]));
  882. If Ord(TokenStr[0])>127 then
  883. Result:=DoInvalidChars
  884. else
  885. DoError(SErrUnknownCharacter ,['"'+TokenStr[0]+'"']);
  886. end; // Case
  887. end;
  888. procedure TCSSScanner.DoError(const Msg: TCSSString; Args: array of const);
  889. begin
  890. DoError(Format(Msg,Args));
  891. end;
  892. procedure TCSSScanner.DoError(const Msg: TCSSString);
  893. Var
  894. S : TCSSString;
  895. begin
  896. S:=Format('Error at (%d,%d): ',[CurRow,CurColumn])+Msg;
  897. Raise ECSSScanner.Create(S);
  898. end;
  899. function TCSSScanner.GetCurColumn: Integer;
  900. begin
  901. if (TokenStr=Nil) or (Length(CurLine)=0) then
  902. Result:=0
  903. else
  904. Result := TokenStr - PAnsiChar(CurLine);
  905. end;
  906. function TCSSScanner.GetReturnComments: Boolean;
  907. begin
  908. Result:=(csoReturnComments in FOptions);
  909. end;
  910. function TCSSScanner.GetReturnWhiteSpace: Boolean;
  911. begin
  912. Result:=(csoReturnWhiteSpace in FOptions);
  913. end;
  914. { TStreamLineReader }
  915. constructor TStreamLineReader.Create(AStream: TStream);
  916. begin
  917. FStream:=AStream;
  918. FBufPos:=0;
  919. FBufLen:=0;
  920. end;
  921. function TStreamLineReader.IsEOF: Boolean;
  922. begin
  923. Result:=(FBufPos>=FBufLen);
  924. If Result then
  925. begin
  926. FillBuffer;
  927. Result:=(FBufLen=0);
  928. end;
  929. end;
  930. procedure TStreamLineReader.FillBuffer;
  931. begin
  932. FBufLen:=FStream.Read(Buffer,SizeOf(Buffer)-1);
  933. Buffer[FBufLen]:=0;
  934. FBufPos:=0;
  935. end;
  936. function TStreamLineReader.ReadLine: TCSSString;
  937. Var
  938. FPos,OLen,Len: Integer;
  939. PRun : PByte;
  940. begin
  941. Result:='';
  942. FPos:=FBufPos;
  943. Repeat
  944. PRun:=@Buffer[FBufPos];
  945. While (FBufPos<FBufLen) and Not (PRun^ in [10,13]) do
  946. begin
  947. Inc(PRun);
  948. Inc(FBufPos);
  949. end;
  950. If (FBufPos=FBufLen) then
  951. begin
  952. Len:=FBufPos-FPos;
  953. If (Len>0) then
  954. begin
  955. Olen:=Length(Result);
  956. SetLength(Result,OLen+Len);
  957. Move(Buffer[FPos],Result[OLen+1],Len);
  958. end;
  959. FillBuffer;
  960. FPos:=FBufPos;
  961. end;
  962. until (FBufPos=FBufLen) or (PRun^ in [10,13]);
  963. Len:=FBufPos-FPos;
  964. If (Len>0) then
  965. begin
  966. Olen:=Length(Result);
  967. SetLength(Result,OLen+Len);
  968. Move(Buffer[FPos],Result[OLen+1],Len)
  969. end;
  970. If (PRun^ in [10,13]) and (FBufPos<FBufLen) then
  971. begin
  972. Inc(FBufPos);
  973. // Check #13#10
  974. If (PRun^=13) then
  975. begin
  976. If (FBufPos=FBufLen) then
  977. FillBuffer;
  978. If (FBufPos<FBufLen) and (Buffer[FBufpos]=10) then
  979. Inc(FBufPos);
  980. end;
  981. end;
  982. end;
  983. end.