webidlscanner.pp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705
  1. {
  2. This file is part of the Free Component Library
  3. WEBIDL source lexical scanner
  4. Copyright (c) 2018 by Michael Van Canneyt [email protected]
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. {$mode objfpc}
  12. {$h+}
  13. unit webidlscanner;
  14. interface
  15. uses SysUtils, Classes;
  16. type
  17. TWebIDLVersion = (v1,v2);
  18. TIDLToken = (
  19. tkEOF,
  20. tkUnknown ,
  21. tkComment,
  22. tkWhitespace,
  23. tkString,
  24. tkNumberInteger,
  25. tkNumberFloat,
  26. // Simple (one-character) tokens
  27. tkDot, // '.',
  28. tkSemiColon, // ';'
  29. tkComma, // ','
  30. tkColon, // ':'
  31. tkBracketOpen, // '('
  32. tkBracketClose, // ')'
  33. tkCurlyBraceOpen, // '{'
  34. tkCurlyBraceClose, // '}'
  35. tkSquaredBraceOpen, // '['
  36. tkSquaredBraceClose, // ']'
  37. tkLess, // '<'
  38. tkEqual, // '='
  39. tkLarger, // '>'
  40. tkQuestionmark, // '?'
  41. tkminus, // '-'
  42. tkIdentifier, // Any identifier
  43. tkTrue,
  44. tkFalse,
  45. tkNull,
  46. tkAny,
  47. tkAttribute,
  48. tkCallback,
  49. tkConst,
  50. tkDeleter,
  51. tkDictionary,
  52. tkEllipsis,
  53. tkEnum,
  54. tkGetter,
  55. tkImplements,
  56. tkInfinity,
  57. tkInherit,
  58. tkInterface,
  59. tkIterable,
  60. tkLegacyCaller,
  61. tkNan,
  62. tkNegInfinity,
  63. tkOptional,
  64. tkOr,
  65. tkPartial,
  66. tkReadOnly,
  67. tkRequired,
  68. tkSetter,
  69. tkStatic,
  70. tkStringifier,
  71. tkSerializer,
  72. tkTypedef,
  73. tkUnrestricted,
  74. tkPromise,
  75. tkFrozenArray,
  76. tkByteString,
  77. tkDOMString,
  78. tkUSVString,
  79. tkboolean,
  80. tkbyte,
  81. tkdouble,
  82. tkfloat,
  83. tklong,
  84. tkobject,
  85. tkoctet,
  86. tkunsigned,
  87. tkvoid,
  88. tkShort,
  89. tkSequence,
  90. tkStringToken,
  91. tkMixin,
  92. tkIncludes,
  93. tkMapLike,
  94. tkRecord,
  95. tkSetLike,
  96. tkOther,
  97. tkConstructor
  98. );
  99. TIDLTokens = Set of TIDLToken;
  100. EWebIDLScanner = class(EParserError);
  101. Const
  102. V2Tokens = [tkMixin,tkIncludes,tkMapLike,tkRecord,tkSetLike,tkFrozenArray,tkConstructor];
  103. V1Tokens = [tkImplements];
  104. VersionNonTokens : Array[TWebIDLVersion] of TIDLTokens = (V2Tokens,V1Tokens);
  105. Type
  106. { TWebIDLScanner }
  107. TWebIDLScanner = class
  108. private
  109. FSource : TStringList;
  110. FCurRow: Integer;
  111. FCurToken: TIDLToken;
  112. FCurTokenString: UTF8string;
  113. FCurLine: UTF8string;
  114. FVersion: TWebIDLVersion;
  115. TokenStr: PChar;
  116. function DetermineToken: TIDLToken;
  117. function DetermineToken2: TIDLToken;
  118. function FetchLine: Boolean;
  119. function GetCurColumn: Integer;
  120. function ReadComment: UTF8String;
  121. function ReadIdent: UTF8String;
  122. function ReadNumber(var S: UTF8String): TIDLToken;
  123. protected
  124. Function GetErrorPos : String;
  125. procedure Error(const Msg: string);overload;
  126. procedure Error(const Msg: string; Const Args: array of Const);overload;
  127. function ReadString: UTF8String; virtual;
  128. function DoFetchToken: TIDLToken;
  129. public
  130. constructor Create(Source: TStream); overload;
  131. constructor Create(const Source: UTF8String); overload;
  132. constructor CreateFile(const aFileName: UTF8String);
  133. destructor Destroy; override;
  134. function FetchToken: TIDLToken;
  135. property CurLine: UTF8String read FCurLine;
  136. property CurRow: Integer read FCurRow;
  137. property CurColumn: Integer read GetCurColumn;
  138. property CurToken: TIDLToken read FCurToken;
  139. property CurTokenString: UTF8String read FCurTokenString;
  140. Property Version : TWebIDLVersion Read FVersion Write FVersion;
  141. end;
  142. const
  143. TokenInfos: array[TIDLToken] of string = (
  144. '',
  145. '',
  146. '',
  147. '',
  148. '',
  149. '',
  150. '',
  151. // Simple (one-character) tokens
  152. '.',
  153. ';',
  154. ',', // ','
  155. ':', // ':'
  156. '(', // '('
  157. ')', // ')'
  158. '{', // '{'
  159. '}', // '}'
  160. '[', // '['
  161. ']', // ']'
  162. '<',
  163. '=',
  164. '>',
  165. '?',
  166. '-',
  167. '', // Any identifier
  168. 'true',
  169. 'false',
  170. 'null',
  171. 'any',
  172. 'attribute',
  173. 'callback',
  174. 'const',
  175. 'deleter',
  176. 'dictionary',
  177. 'ellipsis',
  178. 'enum',
  179. 'getter',
  180. 'implements',
  181. 'Infinity',
  182. 'inherit',
  183. 'interface',
  184. 'iterable',
  185. 'legacycaller',
  186. 'NaN',
  187. '-Infinity',
  188. 'optional',
  189. 'or',
  190. 'partial',
  191. 'readonly',
  192. 'required',
  193. 'setter',
  194. 'static',
  195. 'stringifier',
  196. 'serializer',
  197. 'typedef',
  198. 'unrestricted',
  199. 'Promise',
  200. 'FrozenArray',
  201. 'ByteString',
  202. 'DOMString',
  203. 'USVString',
  204. 'boolean',
  205. 'byte',
  206. 'double',
  207. 'float',
  208. 'long',
  209. 'object',
  210. 'octet',
  211. 'unsigned',
  212. 'void',
  213. 'short',
  214. 'sequence',
  215. 'string',
  216. 'mixin',
  217. 'includes',
  218. 'maplike',
  219. 'record',
  220. 'setlike',
  221. 'other',
  222. 'constructor'
  223. );
  224. Function GetTokenName(aToken : TIDLToken) : String;
  225. Function GetTokenNames(aTokenList : TIDLTokens) : String;
  226. implementation
  227. uses typinfo;
  228. Resourcestring
  229. SErrUnknownTerminator = 'Unknown terminator: "%s"';
  230. SErrInvalidCharacter = 'Invalid character at line %d, pos %d: ''%s''';
  231. SUnterminatedComment = 'Unterminated comment at line %d, pos %d: ''%s''';
  232. SErrOpenString = 'string exceeds end of line';
  233. SErrInvalidEllipsis = 'Invalid ellipsis token';
  234. SErrUnknownToken = 'Unknown token, expected number or minus : "%s"';
  235. // SerrExpectedTokenButWasIdentifier = 'Invalid terminator: "%s"';
  236. Function GetTokenName(aToken : TIDLToken) : String;
  237. begin
  238. Result:=TokenInfos[aToken];
  239. if Result='' then
  240. begin
  241. Result:=GetEnumName(TypeInfo(TIDLToken),Ord(aToken));
  242. Delete(Result,1,2);
  243. end;
  244. end;
  245. Function GetTokenNames(aTokenList : TIDLTokens) : String;
  246. Var
  247. T : TIDLToken;
  248. begin
  249. Result:='';
  250. For T in aTokenList do
  251. begin
  252. if (Result<>'') then
  253. Result:=Result+',';
  254. Result:=Result+GetTokenName(T);
  255. end;
  256. end;
  257. constructor TWebIDLScanner.Create(Source: TStream);
  258. begin
  259. FSource:=TStringList.Create;
  260. FSource.LoadFromStream(Source);
  261. end;
  262. constructor TWebIDLScanner.Create(const Source: UTF8String);
  263. begin
  264. FSource:=TStringList.Create;
  265. FSource.Text:=Source;
  266. end;
  267. constructor TWebIDLScanner.CreateFile(const aFileName: UTF8String);
  268. begin
  269. FSource:=TStringList.Create;
  270. FSource.LoadFromFile(aFileName);
  271. end;
  272. destructor TWebIDLScanner.Destroy;
  273. begin
  274. FreeAndNil(FSource);
  275. Inherited;
  276. end;
  277. function TWebIDLScanner.FetchToken: TIDLToken;
  278. begin
  279. Result:=DoFetchToken;
  280. end;
  281. procedure TWebIDLScanner.Error(const Msg: string);
  282. begin
  283. raise EWebIDLScanner.Create(GetErrorPos+Msg);
  284. end;
  285. procedure TWebIDLScanner.Error(const Msg: string; const Args: array of const);
  286. begin
  287. raise EWebIDLScanner.Create(GetErrorPos+Format(Msg, Args));
  288. end;
  289. function TWebIDLScanner.ReadString : UTF8String;
  290. Var
  291. C : Char;
  292. I, OldLength, SectionLength: Integer;
  293. S : UTF8String;
  294. TokenStart: PChar;
  295. begin
  296. C:=TokenStr[0];
  297. Inc(TokenStr);
  298. TokenStart := TokenStr;
  299. OldLength := 0;
  300. Result := '';
  301. while not (TokenStr[0] in [#0,C]) do
  302. begin
  303. if (TokenStr[0]='\') then
  304. begin
  305. // Save length
  306. SectionLength := TokenStr - TokenStart;
  307. Inc(TokenStr);
  308. // Read escaped token
  309. Case TokenStr[0] of
  310. '"' : S:='"';
  311. '''' : S:='''';
  312. 't' : S:=#9;
  313. 'b' : S:=#8;
  314. 'n' : S:=#10;
  315. 'r' : S:=#13;
  316. 'f' : S:=#12;
  317. '\' : S:='\';
  318. '/' : S:='/';
  319. 'u' : begin
  320. S:='0000';
  321. For I:=1 to 4 do
  322. begin
  323. Inc(TokenStr);
  324. Case TokenStr[0] of
  325. '0'..'9','A'..'F','a'..'f' :
  326. S[i]:=Upcase(TokenStr[0]);
  327. else
  328. Error(SErrInvalidCharacter, [CurRow,CurColumn,TokenStr[0]]);
  329. end;
  330. end;
  331. // WideChar takes care of conversion...
  332. S:=Utf8Encode(WideString(WideChar(StrToInt('$'+S))))
  333. end;
  334. #0 : Error(SErrOpenString);
  335. else
  336. Error(SErrInvalidCharacter, [CurRow,CurColumn,TokenStr[0]]);
  337. end;
  338. SetLength(Result, OldLength + SectionLength+1+Length(S));
  339. if SectionLength > 0 then
  340. Move(TokenStart^, Result[OldLength + 1], SectionLength);
  341. Move(S[1],Result[OldLength + SectionLength+1],Length(S));
  342. Inc(OldLength, SectionLength+Length(S));
  343. // Next char
  344. // Inc(TokenStr);
  345. TokenStart := TokenStr+1;
  346. end;
  347. if TokenStr[0] = #0 then
  348. Error(SErrOpenString);
  349. Inc(TokenStr);
  350. end;
  351. if TokenStr[0] = #0 then
  352. Error(SErrOpenString);
  353. SectionLength := TokenStr - TokenStart;
  354. SetLength(Result, OldLength + SectionLength);
  355. if SectionLength > 0 then
  356. Move(TokenStart^, Result[OldLength + 1], SectionLength);
  357. Inc(TokenStr);
  358. end;
  359. function TWebIDLScanner.ReadIdent: UTF8String;
  360. Var
  361. TokenStart : PChar;
  362. SectionLength : Integer;
  363. begin
  364. Result:='';
  365. if TokenStr[0]='_' then
  366. Inc(TokenStr);
  367. if TokenStr[0]=#0 then
  368. Exit;
  369. TokenStart := TokenStr;
  370. repeat
  371. Inc(TokenStr);
  372. until not (TokenStr[0] in ['A'..'Z', 'a'..'z', '0'..'9', '_']);
  373. SectionLength := TokenStr - TokenStart;
  374. SetString(Result, TokenStart, SectionLength);
  375. end;
  376. function TWebIDLScanner.FetchLine: Boolean;
  377. begin
  378. Result:=FCurRow<FSource.Count;
  379. if Result then
  380. begin
  381. FCurLine:=FSource[FCurRow];
  382. TokenStr:=PChar(FCurLine);
  383. Inc(FCurRow);
  384. end
  385. else
  386. begin
  387. FCurLine:='';
  388. TokenStr:=nil;
  389. end;
  390. end;
  391. function TWebIDLScanner.ReadNumber(var S : UTF8String) : TIDLToken;
  392. Var
  393. TokenStart : PChar;
  394. SectionLength : Integer;
  395. isHex : Boolean;
  396. begin
  397. isHex:=False;
  398. TokenStart := TokenStr;
  399. Result:=tkNumberInteger;
  400. while true do
  401. begin
  402. Inc(TokenStr);
  403. SectionLength := TokenStr - TokenStart;
  404. case TokenStr[0] of
  405. 'x':
  406. begin
  407. isHex:=True;
  408. end;
  409. 'I':
  410. begin
  411. repeat
  412. Inc(TokenStr);
  413. until not (TokenStr[0] in ['i','n','f','t','y']);
  414. Result:=tkNegInfinity; // We'll check at the end if the string is actually correct
  415. break;
  416. end;
  417. '.':
  418. begin
  419. Result:=tkNumberFloat;
  420. if TokenStr[1] in ['0'..'9', 'e', 'E'] then
  421. begin
  422. Inc(TokenStr);
  423. repeat
  424. Inc(TokenStr);
  425. until not (TokenStr[0] in ['0'..'9', 'e', 'E','-','+']);
  426. end;
  427. break;
  428. end;
  429. '0'..'9':
  430. begin
  431. end;
  432. 'a'..'d','f',
  433. 'A'..'D','F':
  434. begin
  435. if Not isHex then
  436. Error(SErrUnknownToken,[S]);
  437. end;
  438. 'e', 'E':
  439. begin
  440. if not IsHex then
  441. begin
  442. Inc(TokenStr);
  443. if TokenStr[0] in ['-','+'] then
  444. Inc(TokenStr);
  445. while TokenStr[0] in ['0'..'9'] do
  446. Inc(TokenStr);
  447. break;
  448. end;
  449. end;
  450. else
  451. if (SectionLength=1) and (TokenStart[0]='-') then
  452. result:=tkMinus;
  453. break;
  454. end;
  455. end;
  456. SectionLength := TokenStr - TokenStart;
  457. S:='';
  458. SetString(S, TokenStart, SectionLength);
  459. if (Result=tkNegInfinity) and (S<>'-Infinity') then
  460. Error(SErrUnknownToken,[S]);
  461. if (Result=tkMinus) and (S<>'-') then
  462. Error(SErrUnknownTerminator,[s]);
  463. end;
  464. function TWebIDLScanner.GetErrorPos: String;
  465. begin
  466. Result:=Format('Scanner error at line %d, pos %d: ',[CurRow,CurColumn]);
  467. end;
  468. function TWebIDLScanner.ReadComment : UTF8String;
  469. Var
  470. TokenStart : PChar;
  471. SectionLength : Integer;
  472. EOC,IsStar : Boolean;
  473. S : String;
  474. begin
  475. Result:='';
  476. TokenStart:=TokenStr;
  477. Inc(TokenStr);
  478. Case Tokenstr[0] of
  479. '/' : begin
  480. SectionLength := Length(FCurLine)- (TokenStr - PChar(FCurLine));
  481. Inc(TokenStr);
  482. SetString(Result, TokenStr, SectionLength);
  483. Fetchline;
  484. end;
  485. '*' :
  486. begin
  487. IsStar:=False;
  488. Inc(TokenStr);
  489. TokenStart:=TokenStr;
  490. Repeat
  491. if (TokenStr[0]=#0) then
  492. begin
  493. SectionLength := (TokenStr - TokenStart);
  494. S:='';
  495. SetString(S, TokenStart, SectionLength);
  496. Result:=Result+S;
  497. if not fetchLine then
  498. Error(SUnterminatedComment, [CurRow,CurCOlumn,TokenStr[0]]);
  499. TokenStart:=TokenStr;
  500. end;
  501. IsStar:=TokenStr[0]='*';
  502. Inc(TokenStr);
  503. EOC:=(isStar and (TokenStr[0]='/'));
  504. Until EOC;
  505. if EOC then
  506. begin
  507. SectionLength := (TokenStr - TokenStart-1);
  508. S:='';
  509. SetString(S, TokenStart, SectionLength);
  510. Result:=Result+S;
  511. Inc(TokenStr);
  512. end;
  513. end;
  514. else
  515. Error(SErrInvalidCharacter, [CurRow,CurCOlumn,TokenStr[0]]);
  516. end;
  517. end;
  518. function TWebIDLScanner.DetermineToken : TIDLToken;
  519. begin
  520. Result:=High(TIDLToken);
  521. While (Result<>tkIdentifier) and (TokenInfos[result]<>FCurTokenString) do
  522. Result:=Pred(Result);
  523. if Result in VersionNonTokens[Version] then
  524. Result:=tkIdentifier;
  525. // if Result=tkIdentifier then
  526. // Error(SErrExpectedTokenButWasIdentifier,[FCurTokenString]);
  527. end;
  528. function TWebIDLScanner.DetermineToken2 : TIDLToken;
  529. Const
  530. InfTokens = [tkNan,tkInfinity,tkNegInfinity,tkByteString,tkUSVString,tkDOMString,tkPromise,tkFrozenArray];
  531. begin
  532. For Result in InfTokens do
  533. if (TokenInfos[result]=FCurTokenString) then exit;
  534. Result:=tkIdentifier;
  535. end;
  536. function TWebIDLScanner.DoFetchToken: TIDLToken;
  537. Procedure SetSingleToken(tk : TIDLToken);
  538. begin
  539. FCurTokenString:=TokenStr[0];
  540. Inc(TokenStr);
  541. Result :=tk;
  542. end;
  543. begin
  544. if TokenStr = nil then
  545. if not FetchLine then
  546. begin
  547. Result := tkEOF;
  548. FCurToken := Result;
  549. exit;
  550. end;
  551. FCurTokenString := '';
  552. case TokenStr[0] of
  553. #0: // Empty line
  554. begin
  555. if not FetchLine then
  556. Result:=tkEOF
  557. else
  558. Result := tkWhitespace;
  559. end;
  560. #9, ' ':
  561. begin
  562. Result := tkWhitespace;
  563. repeat
  564. Inc(TokenStr);
  565. if TokenStr[0] = #0 then
  566. if not FetchLine then
  567. begin
  568. FCurToken := Result;
  569. exit;
  570. end;
  571. until not (TokenStr[0] in [#9, ' ']);
  572. end;
  573. '"':
  574. begin
  575. FCurTokenString:=ReadString;
  576. Result := tkString;
  577. end;
  578. ',':
  579. begin
  580. Inc(TokenStr);
  581. Result := tkComma;
  582. end;
  583. '0'..'9','-':
  584. begin
  585. Result := ReadNumber(FCurTokenString);
  586. end;
  587. ':': SetSingleToken(tkColon);
  588. '(': SetSingleToken(tkBracketOpen);
  589. ')': SetSingleToken(tkBracketClose);
  590. '{': SetSingleToken(tkCurlyBraceOpen);
  591. '}': SetSingleToken(tkCurlyBraceClose);
  592. '[': SetSingleToken(tkSquaredBraceOpen);
  593. ']': SetSingleToken(tkSquaredBraceClose);
  594. '<': SetSingleToken(tkLess);
  595. '=': SetSingleToken(tkEqual);
  596. '>': SetSingleToken(tkLarger);
  597. '?' : SetSingleToken(tkQuestionmark);
  598. ';' : SetSingleToken(tkSemicolon);
  599. '.' :
  600. begin
  601. inc(TokenStr);
  602. if TokenStr[0]<>'.' then
  603. begin
  604. Dec(Tokenstr);// Setsingletoken advances
  605. SetSingleToken(tkDot);
  606. end
  607. else
  608. begin
  609. inc(TokenStr);
  610. if TokenStr[0]<>'.' then
  611. Error(SErrInvalidEllipsis);
  612. inc(TokenStr);
  613. FCurTokenString:='...';
  614. Result:=tkEllipsis;
  615. end;
  616. end;
  617. '/' :
  618. begin
  619. FCurTokenString:=ReadComment;
  620. Result:=tkComment;
  621. end;
  622. 'a'..'z':
  623. begin
  624. FCurTokenString:=ReadIdent;
  625. Result:=DetermineToken;
  626. end;
  627. 'A'..'Z','_':
  628. begin
  629. FCurTokenString:=ReadIdent;
  630. Result:=tkIdentifier;
  631. Result:=DetermineToken2;
  632. end;
  633. else
  634. Error(SErrInvalidCharacter, [CurRow,CurCOlumn,TokenStr[0]]);
  635. end;
  636. FCurToken := Result;
  637. end;
  638. function TWebIDLScanner.GetCurColumn: Integer;
  639. begin
  640. Result := TokenStr - PChar(CurLine);
  641. end;
  642. end.