ISPP.CTokenizer.pas 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572
  1. {
  2. Inno Setup Preprocessor
  3. Copyright (C) 2001-2002 Alex Yackimoff
  4. Inno Setup
  5. Copyright (C) 1997-2020 Jordan Russell
  6. Portions by Martijn Laan
  7. For conditions of distribution and use, see LICENSE.TXT.
  8. }
  9. unit ISPP.CTokenizer;
  10. interface
  11. uses SysUtils;
  12. type
  13. EParsingError = class(Exception)
  14. Position: Integer;
  15. end;
  16. TTokenKind = (tkError, tkEOF, tkIdent, tkNumber, tkString, opGreater,
  17. opLess,
  18. opGreaterEqual,
  19. opLessEqual,
  20. opEqual,
  21. opNotEqual,
  22. opOr,
  23. opAnd,
  24. opAdd,
  25. opSubtract,
  26. opBwOr,
  27. opXor,
  28. opMul,
  29. opDiv,
  30. opBwAnd,
  31. opShl,
  32. opShr,
  33. opMod,
  34. opNot,
  35. opBwNot,
  36. opAssign,
  37. opAgnAdd,
  38. opAgnSub,
  39. opAgnOr,
  40. opAgnXor,
  41. opAgnMul,
  42. opAgnDiv,
  43. opAgnAnd,
  44. opAgnShl,
  45. opAgnShr,
  46. opAgnMod,
  47. opInc,
  48. opDec,
  49. tkOpenParen,
  50. tkOpenBracket,
  51. tkOpenBrace,
  52. tkCloseParen,
  53. tkCloseBracket,
  54. tkCloseBrace,
  55. tkPeriod,
  56. tkComma,
  57. tkColon,
  58. tkSemicolon,
  59. tkQuestion,
  60. tkPtr);
  61. TTokenKinds = set of TTokenKind;
  62. TCTokenizer = class(TObject)
  63. private
  64. FEscapeSequences: Boolean;
  65. FExprStart: PChar;
  66. FIdent: string;
  67. FToken: TTokenKind;
  68. FNextTokenKnown: Boolean;
  69. FNextToken: TTokenKind;
  70. FNextTokenPos: PChar;
  71. FNextIdent: string;
  72. FStoredPos: PChar;
  73. procedure IllegalChar(C: Char);
  74. function InternalNextToken: TTokenKind;
  75. protected
  76. FExpr: PChar;
  77. FExprOffset: Integer;
  78. procedure EndOfExpr;
  79. procedure Error(const Message: string);
  80. procedure ErrorFmt(const Message: string; Args: array of const);
  81. public
  82. constructor Create(const Expression: string;
  83. EscapeSequences: Boolean);
  84. procedure SkipBlanks;
  85. function NextToken: TTokenKind;
  86. function NextTokenExpect(Expected: TTokenKinds): TTokenKind;
  87. function TokenInt: Longint;
  88. function PeekAtNextToken: TTokenKind;
  89. function PeekAtNextTokenString: string;
  90. procedure Store;
  91. procedure Restore;
  92. procedure SetPos(NewPos: PChar);
  93. property Token: TTokenKind read FToken;
  94. property TokenString: string read FIdent;
  95. end;
  96. const
  97. ExpressionStartTokens = [tkOpenParen, tkIdent, tkNumber, tkString, opNot,
  98. opBwNot, opAdd, opSubtract, opInc, opDec, tkPtr];
  99. implementation
  100. uses
  101. ISPP.Consts, Shared.CommonFunc;
  102. { TCTokenizer }
  103. constructor TCTokenizer.Create(const Expression: string;
  104. EscapeSequences: Boolean);
  105. begin
  106. FExpr := PChar(Expression);
  107. FExprStart := FExpr;
  108. FEscapeSequences := EscapeSequences;
  109. end;
  110. procedure TCTokenizer.SkipBlanks;
  111. begin
  112. while CharInSet(FExpr^, [#1..#32]) do Inc(FExpr);
  113. if (FExpr^ = '/') and (FExpr[1] = '*') then
  114. begin
  115. Inc(FExpr, 2);
  116. while True do
  117. begin
  118. while not CharInSet(FExpr^, [#0, '*']) do Inc(FExpr);
  119. if (FExpr^ = '*') then
  120. if FExpr[1] = '/' then
  121. begin
  122. Inc(FExpr, 2);
  123. SkipBlanks;
  124. Exit;
  125. end
  126. else
  127. Inc(FExpr)
  128. else
  129. Error('Unterminated comment');
  130. end;
  131. end
  132. end;
  133. function TCTokenizer.InternalNextToken: TTokenKind;
  134. procedure Promote(T: TTokenKind);
  135. begin
  136. Result := T;
  137. Inc(FExpr);
  138. end;
  139. function GetString(QuoteChar: Char): string;
  140. var
  141. P: PChar;
  142. S: string;
  143. I: Integer;
  144. C: Byte;
  145. procedure Unterminated;
  146. begin
  147. if FExpr^ = #0 then
  148. Error('Unterminated string');
  149. end;
  150. begin
  151. Inc(FExpr);
  152. Result := '';
  153. while True do
  154. begin
  155. P := FExpr;
  156. while not CharInSet(FExpr^, [#0, '\', QuoteChar]) do Inc(FExpr);
  157. SetString(S, P, FExpr - P);
  158. Result := Result + S;
  159. Unterminated;
  160. if FExpr^ = QuoteChar then
  161. begin
  162. Inc(FExpr);
  163. Break;
  164. end;
  165. Inc(FExpr);
  166. Unterminated;
  167. case FExpr^ of
  168. #0: Unterminated;
  169. '0'..'7':// octal 400 = $100
  170. begin
  171. C := 0;
  172. I := 0;
  173. while CharInSet(FExpr^, ['0'..'7']) and (I < 3) do
  174. begin
  175. Inc(I);
  176. C := (C shl 3) + (Ord(FExpr^) - Ord('0'));
  177. Inc(FExpr);
  178. Unterminated;
  179. end;
  180. Result := Result + Char(C);
  181. Continue;
  182. end;
  183. 'a': Result := Result + #7;
  184. 'b': Result := Result + #8;
  185. 'f': Result := Result + #12;
  186. 'n': Result := Result + #10;
  187. 'r': Result := Result + #13;
  188. 't': Result := Result + #9;
  189. 'v': Result := Result + #11;
  190. 'x':
  191. begin
  192. Inc(FExpr);
  193. C := 0;
  194. I := 0;
  195. while CharInSet(FExpr^, ['0'..'9', 'A'..'F', 'a'..'f']) and (I < 2) do
  196. begin
  197. Inc(I);
  198. C := C shl 4;
  199. case FExpr^ of
  200. '0'..'9': C := C + (Ord(FExpr^) - Ord('0'));
  201. 'A'..'F': C := C + (Ord(FExpr^) - Ord('A')) + $0A;
  202. else
  203. C := C + (Ord(FExpr^) - Ord('a')) + $0A;
  204. end;
  205. Inc(FExpr);
  206. Unterminated;
  207. end;
  208. Result := Result + Char(C);
  209. Continue;
  210. end;
  211. else
  212. Result := Result + FExpr^
  213. end;
  214. Inc(FExpr);
  215. end;
  216. SkipBlanks;
  217. if FExpr^ = QuoteChar then
  218. Result := Result + GetString(QuoteChar);
  219. end;
  220. var
  221. P: PChar;
  222. begin
  223. SkipBlanks;
  224. Result := tkError;
  225. case FExpr^ of
  226. #0:
  227. begin
  228. Result := tkEOF;
  229. Exit;
  230. end;
  231. '!': if FExpr[1] = '=' then Promote(opNotEqual) else Result := opNot;
  232. '&':
  233. case FExpr[1] of
  234. '&': Promote(opAnd);
  235. '=': Promote(opAgnAnd)
  236. else
  237. Result := opBwAnd
  238. end;
  239. '|':
  240. case FExpr[1] of
  241. '|': Promote(opOr);
  242. '=': Promote(opAgnOr)
  243. else
  244. Result := opBwOr
  245. end;
  246. '^': if FExpr[1] = '=' then Promote(opAgnXor) else Result := opXor;
  247. '=': if FExpr[1] = '=' then Promote(opEqual) else Result := opAssign;
  248. '>':
  249. case FExpr[1] of
  250. '>':
  251. begin
  252. Promote(opShr);
  253. if FExpr[1] = '=' then Promote(opAgnShr);
  254. end;
  255. '=': Promote(opGreaterEqual)
  256. else
  257. Result := opGreater
  258. end;
  259. '<':
  260. case FExpr[1] of
  261. '<':
  262. begin
  263. Promote(opShl);
  264. if FExpr[1] = '=' then Promote(opAgnShl);
  265. end;
  266. '=': Promote(opLessEqual)
  267. else
  268. Result := opLess
  269. end;
  270. '+':
  271. case FExpr[1] of
  272. '=': Promote(opAgnAdd);
  273. '+': Promote(opInc)
  274. else
  275. Result := opAdd
  276. end;
  277. '-':
  278. case FExpr[1] of
  279. '=': Promote(opAgnSub);
  280. '-': Promote(opDec)
  281. else
  282. Result := opSubtract
  283. end;
  284. '/': if FExpr[1] = '=' then Promote(opAgnDiv) else Result := opDiv;
  285. '%': if FExpr[1] = '=' then Promote(opAgnMod) else Result := opMod;
  286. '*': if FExpr[1] = '=' then Promote(opAgnMul) else Result := opMul;
  287. '?': Result := tkQuestion;
  288. ':': Result := tkColon;
  289. ';': Result := tkSemicolon;
  290. ',': Result := tkComma;
  291. '.': Result := tkPeriod;
  292. '~': Result := opBwNot;
  293. '(': Result := tkOpenParen;
  294. '[': Result := tkOpenBracket;
  295. '{': Result := tkOpenBrace;
  296. ')': Result := tkCloseParen;
  297. ']': Result := tkCloseBracket;
  298. '}': Result := tkCloseBrace;
  299. '@': Result := tkPtr;
  300. 'A'..'Z', '_', 'a'..'z':
  301. begin
  302. P := FExpr;
  303. repeat
  304. Inc(FExpr)
  305. until not CharInSet(FExpr^, ['0'..'9', 'A'..'Z', '_', 'a'..'z']);
  306. SetString(FIdent, P, FExpr - P);
  307. Result := tkIdent;
  308. Exit;
  309. end;
  310. '0'..'9':
  311. begin
  312. P := FExpr;
  313. repeat
  314. Inc(FExpr)
  315. until not CharInSet(FExpr^, ['0'..'9', 'A'..'F', 'X', 'a'..'f', 'x']);
  316. SetString(FIdent, P, FExpr - P);
  317. while CharInSet(FExpr^, ['L', 'U', 'l', 'u']) do Inc(FExpr);
  318. Result := tkNumber;
  319. Exit;
  320. end;
  321. '"', '''':
  322. begin
  323. if FEscapeSequences then
  324. FIdent := GetString(FExpr^)
  325. else
  326. FIdent := AnsiExtractQuotedStr(FExpr, FExpr^);
  327. Result := tkString;
  328. Exit;
  329. end;
  330. end;
  331. if Result = tkError then IllegalChar(FExpr^);
  332. Inc(FExpr)
  333. end;
  334. function TCTokenizer.PeekAtNextToken: TTokenKind;
  335. var
  336. P: PChar;
  337. SaveIdent: string;
  338. begin
  339. if not FNextTokenKnown then
  340. begin
  341. P := FExpr;
  342. SaveIdent := FIdent;
  343. FNextToken := InternalNextToken;
  344. FNextIdent := FIdent;
  345. FIdent := SaveIdent;
  346. FNextTokenPos := FExpr;
  347. FExpr := P;
  348. FNextTokenKnown := True;
  349. end;
  350. Result := FNextToken;
  351. end;
  352. function TCTokenizer.NextToken: TTokenKind;
  353. begin
  354. if FNextTokenKnown then
  355. begin
  356. FToken := FNextToken;
  357. FIdent := FNextIdent;
  358. FExpr := FNextTokenPos;
  359. FNextTokenKnown := False;
  360. end
  361. else
  362. FToken := InternalNextToken;
  363. Result := FToken;
  364. end;
  365. function TCTokenizer.PeekAtNextTokenString: string;
  366. begin
  367. PeekAtNextToken;
  368. Result := FNextIdent;
  369. end;
  370. function TCTokenizer.TokenInt: Longint;
  371. var
  372. E: Integer;
  373. begin
  374. Val(FIdent, Result, E);
  375. if E <> 0 then
  376. Error('Cannot convert to integer');
  377. end;
  378. procedure TCTokenizer.Restore;
  379. begin
  380. FExpr := FStoredPos;
  381. FNextTokenKnown := False;
  382. end;
  383. procedure TCTokenizer.Store;
  384. begin
  385. FStoredPos := FExpr;
  386. end;
  387. function TCTokenizer.NextTokenExpect(Expected: TTokenKinds): TTokenKind;
  388. function GetFriendlyTokenDesc(T: TTokenKind; Found: Boolean): string;
  389. const
  390. TokenNames: array[TTokenKind] of string =
  391. ('illegal character', 'end of expression', 'identifier', 'number', 'string literal',
  392. 'right angle bracket (">")',
  393. 'left angle bracket ("<")',
  394. 'greater-or-equal-to operator (">=")',
  395. 'less-or-equal-to operator ("<=")',
  396. 'equality operator ("==")',
  397. 'inequality operator ("!=")',
  398. 'logical OR operator ("||")',
  399. 'logical AND operator ("&&")',
  400. 'plus sign ("+")',
  401. 'minus sign ("-")',
  402. 'OR sign ("|")',
  403. 'XOR operator ("^")',
  404. 'star sign ("*")',
  405. 'slash ("/")',
  406. 'AND sign ("&")',
  407. 'SHL operator ("<<")',
  408. 'SHR operator (">>")',
  409. 'percent sign ("%")',
  410. 'exclamation sign ("!")',
  411. 'tilde ("~")',
  412. 'equal sign ("=")',
  413. 'compound assignment operator ("+=")',
  414. 'compound assignment operator ("-=")',
  415. 'compound assignment operator ("|=")',
  416. 'compound assignment operator ("^=")',
  417. 'compound assignment operator ("*=")',
  418. 'compound assignment operator ("/=")',
  419. 'compound assignment operator ("&=")',
  420. 'compound assignment operator ("<<=")',
  421. 'compound assignment operator (">>=")',
  422. 'compound assignment operator ("%=")',
  423. 'increment operator ("++")',
  424. 'decrement operator ("--")',
  425. 'opening parenthesis ("(")',
  426. 'opening bracket ("[")',
  427. 'opening brace ("{")',
  428. 'closing parenthesis (")")',
  429. 'closing bracket ("]")',
  430. 'closing brace ("}")',
  431. 'period (".")',
  432. 'comma (",")',
  433. 'colon (":")',
  434. 'semicolon (";")',
  435. 'question sign ("?")',
  436. 'call-context-of operator ("@")');
  437. begin
  438. case T of
  439. tkIdent:
  440. if Found then
  441. Result := Format('identifier "%s"', [TokenString])
  442. else
  443. Result := 'identifier';
  444. tkNumber:
  445. if Found then
  446. Result := Format('number %d (0x%0:.2x)', [TokenInt])
  447. else
  448. Result := 'number';
  449. else
  450. Result := TokenNames[T];
  451. end;
  452. end;
  453. function Capitalize(const S: string): string;
  454. begin
  455. if (S <> '') and CharInSet(S[1], ['a'..'z']) then
  456. Result := UpCase(S[1]) + Copy(S, 2, MaxInt)
  457. else
  458. Result := S;
  459. end;
  460. var
  461. M1, M2: string;
  462. I: TTokenKind;
  463. C: Integer;
  464. begin
  465. Result := NextToken;
  466. if not (Result in Expected) then
  467. begin
  468. C := 0;
  469. if Expected * ExpressionStartTokens = ExpressionStartTokens then
  470. begin
  471. M2 := 'expression';
  472. Expected := Expected - ExpressionStartTokens;
  473. end;
  474. for I := Low(TTokenKind) to High(TTokenKind) do
  475. if I in Expected then
  476. begin
  477. Inc(C);
  478. if M2 <> '' then
  479. begin
  480. if M1 <> '' then M1 := M1 + ', ';
  481. M1 := M1 + M2;
  482. M2 := '';
  483. end;
  484. M2 := GetFriendlyTokenDesc(I, False);
  485. end;
  486. if M2 <> '' then
  487. if M1 <> '' then
  488. begin
  489. if C > 2 then M1 := M1 + ',';
  490. M1 := M1 + ' or ' + M2
  491. end
  492. else
  493. M1 := M2;
  494. Error(Capitalize(Format('%s expected but %s found', [M1, GetFriendlyTokenDesc(Token, True)])));
  495. end;
  496. end;
  497. procedure TCTokenizer.EndOfExpr;
  498. begin
  499. NextTokenExpect([tkEOF, tkSemicolon])
  500. end;
  501. procedure TCTokenizer.Error(const Message: string);
  502. begin
  503. var E := EParsingError.Create(Message);
  504. if FExprOffset <> -1 then
  505. E.Position := FExprOffset + (FExpr - FExprStart) + 1;
  506. raise E;
  507. end;
  508. procedure TCTokenizer.ErrorFmt(const Message: string;
  509. Args: array of const);
  510. begin
  511. Error(Format(Message, Args));
  512. end;
  513. procedure TCTokenizer.IllegalChar(C: Char);
  514. begin
  515. raise EParsingError.CreateFmt(SIllegalChar, [C, Ord(C)]);
  516. end;
  517. procedure TCTokenizer.SetPos(NewPos: PChar);
  518. begin
  519. FExpr := NewPos;
  520. FNextTokenKnown := False;
  521. end;
  522. end.