123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572 |
- {
- Inno Setup Preprocessor
- Copyright (C) 2001-2002 Alex Yackimoff
- Inno Setup
- Copyright (C) 1997-2020 Jordan Russell
- Portions by Martijn Laan
- For conditions of distribution and use, see LICENSE.TXT.
- }
- unit ISPP.CTokenizer;
- interface
- uses SysUtils;
- type
- EParsingError = class(Exception)
- Position: Integer;
- end;
- TTokenKind = (tkError, tkEOF, tkIdent, tkNumber, tkString, opGreater,
- opLess,
- opGreaterEqual,
- opLessEqual,
- opEqual,
- opNotEqual,
- opOr,
- opAnd,
- opAdd,
- opSubtract,
- opBwOr,
- opXor,
- opMul,
- opDiv,
- opBwAnd,
- opShl,
- opShr,
- opMod,
- opNot,
- opBwNot,
- opAssign,
- opAgnAdd,
- opAgnSub,
- opAgnOr,
- opAgnXor,
- opAgnMul,
- opAgnDiv,
- opAgnAnd,
- opAgnShl,
- opAgnShr,
- opAgnMod,
- opInc,
- opDec,
- tkOpenParen,
- tkOpenBracket,
- tkOpenBrace,
- tkCloseParen,
- tkCloseBracket,
- tkCloseBrace,
- tkPeriod,
- tkComma,
- tkColon,
- tkSemicolon,
- tkQuestion,
- tkPtr);
- TTokenKinds = set of TTokenKind;
- TCTokenizer = class(TObject)
- private
- FEscapeSequences: Boolean;
- FExprStart: PChar;
- FIdent: string;
- FToken: TTokenKind;
- FNextTokenKnown: Boolean;
- FNextToken: TTokenKind;
- FNextTokenPos: PChar;
- FNextIdent: string;
- FStoredPos: PChar;
- procedure IllegalChar(C: Char);
- function InternalNextToken: TTokenKind;
- protected
- FExpr: PChar;
- FExprOffset: Integer;
- procedure EndOfExpr;
- procedure Error(const Message: string);
- procedure ErrorFmt(const Message: string; Args: array of const);
- public
- constructor Create(const Expression: string;
- EscapeSequences: Boolean);
- procedure SkipBlanks;
- function NextToken: TTokenKind;
- function NextTokenExpect(Expected: TTokenKinds): TTokenKind;
- function TokenInt: Longint;
- function PeekAtNextToken: TTokenKind;
- function PeekAtNextTokenString: string;
- procedure Store;
- procedure Restore;
- procedure SetPos(NewPos: PChar);
- property Token: TTokenKind read FToken;
- property TokenString: string read FIdent;
- end;
- const
- ExpressionStartTokens = [tkOpenParen, tkIdent, tkNumber, tkString, opNot,
- opBwNot, opAdd, opSubtract, opInc, opDec, tkPtr];
- implementation
- uses
- ISPP.Consts, Shared.CommonFunc;
- { TCTokenizer }
- constructor TCTokenizer.Create(const Expression: string;
- EscapeSequences: Boolean);
- begin
- FExpr := PChar(Expression);
- FExprStart := FExpr;
- FEscapeSequences := EscapeSequences;
- end;
- procedure TCTokenizer.SkipBlanks;
- begin
- while CharInSet(FExpr^, [#1..#32]) do Inc(FExpr);
- if (FExpr^ = '/') and (FExpr[1] = '*') then
- begin
- Inc(FExpr, 2);
- while True do
- begin
- while not CharInSet(FExpr^, [#0, '*']) do Inc(FExpr);
- if (FExpr^ = '*') then
- if FExpr[1] = '/' then
- begin
- Inc(FExpr, 2);
- SkipBlanks;
- Exit;
- end
- else
- Inc(FExpr)
- else
- Error('Unterminated comment');
- end;
- end
- end;
- function TCTokenizer.InternalNextToken: TTokenKind;
- procedure Promote(T: TTokenKind);
- begin
- Result := T;
- Inc(FExpr);
- end;
- function GetString(QuoteChar: Char): string;
- var
- P: PChar;
- S: string;
- I: Integer;
- C: Byte;
- procedure Unterminated;
- begin
- if FExpr^ = #0 then
- Error('Unterminated string');
- end;
- begin
- Inc(FExpr);
- Result := '';
- while True do
- begin
- P := FExpr;
- while not CharInSet(FExpr^, [#0, '\', QuoteChar]) do Inc(FExpr);
- SetString(S, P, FExpr - P);
- Result := Result + S;
- Unterminated;
- if FExpr^ = QuoteChar then
- begin
- Inc(FExpr);
- Break;
- end;
- Inc(FExpr);
- Unterminated;
- case FExpr^ of
- #0: Unterminated;
- '0'..'7':// octal 400 = $100
- begin
- C := 0;
- I := 0;
- while CharInSet(FExpr^, ['0'..'7']) and (I < 3) do
- begin
- Inc(I);
- C := (C shl 3) + (Ord(FExpr^) - Ord('0'));
- Inc(FExpr);
- Unterminated;
- end;
- Result := Result + Char(C);
- Continue;
- end;
- 'a': Result := Result + #7;
- 'b': Result := Result + #8;
- 'f': Result := Result + #12;
- 'n': Result := Result + #10;
- 'r': Result := Result + #13;
- 't': Result := Result + #9;
- 'v': Result := Result + #11;
- 'x':
- begin
- Inc(FExpr);
- C := 0;
- I := 0;
- while CharInSet(FExpr^, ['0'..'9', 'A'..'F', 'a'..'f']) and (I < 2) do
- begin
- Inc(I);
- C := C shl 4;
- case FExpr^ of
- '0'..'9': C := C + (Ord(FExpr^) - Ord('0'));
- 'A'..'F': C := C + (Ord(FExpr^) - Ord('A')) + $0A;
- else
- C := C + (Ord(FExpr^) - Ord('a')) + $0A;
- end;
- Inc(FExpr);
- Unterminated;
- end;
- Result := Result + Char(C);
- Continue;
- end;
- else
- Result := Result + FExpr^
- end;
- Inc(FExpr);
- end;
- SkipBlanks;
- if FExpr^ = QuoteChar then
- Result := Result + GetString(QuoteChar);
- end;
- var
- P: PChar;
- begin
- SkipBlanks;
- Result := tkError;
- case FExpr^ of
- #0:
- begin
- Result := tkEOF;
- Exit;
- end;
- '!': if FExpr[1] = '=' then Promote(opNotEqual) else Result := opNot;
- '&':
- case FExpr[1] of
- '&': Promote(opAnd);
- '=': Promote(opAgnAnd)
- else
- Result := opBwAnd
- end;
- '|':
- case FExpr[1] of
- '|': Promote(opOr);
- '=': Promote(opAgnOr)
- else
- Result := opBwOr
- end;
- '^': if FExpr[1] = '=' then Promote(opAgnXor) else Result := opXor;
- '=': if FExpr[1] = '=' then Promote(opEqual) else Result := opAssign;
- '>':
- case FExpr[1] of
- '>':
- begin
- Promote(opShr);
- if FExpr[1] = '=' then Promote(opAgnShr);
- end;
- '=': Promote(opGreaterEqual)
- else
- Result := opGreater
- end;
- '<':
- case FExpr[1] of
- '<':
- begin
- Promote(opShl);
- if FExpr[1] = '=' then Promote(opAgnShl);
- end;
- '=': Promote(opLessEqual)
- else
- Result := opLess
- end;
- '+':
- case FExpr[1] of
- '=': Promote(opAgnAdd);
- '+': Promote(opInc)
- else
- Result := opAdd
- end;
- '-':
- case FExpr[1] of
- '=': Promote(opAgnSub);
- '-': Promote(opDec)
- else
- Result := opSubtract
- end;
- '/': if FExpr[1] = '=' then Promote(opAgnDiv) else Result := opDiv;
- '%': if FExpr[1] = '=' then Promote(opAgnMod) else Result := opMod;
- '*': if FExpr[1] = '=' then Promote(opAgnMul) else Result := opMul;
- '?': Result := tkQuestion;
- ':': Result := tkColon;
- ';': Result := tkSemicolon;
- ',': Result := tkComma;
- '.': Result := tkPeriod;
- '~': Result := opBwNot;
- '(': Result := tkOpenParen;
- '[': Result := tkOpenBracket;
- '{': Result := tkOpenBrace;
- ')': Result := tkCloseParen;
- ']': Result := tkCloseBracket;
- '}': Result := tkCloseBrace;
- '@': Result := tkPtr;
- 'A'..'Z', '_', 'a'..'z':
- begin
- P := FExpr;
- repeat
- Inc(FExpr)
- until not CharInSet(FExpr^, ['0'..'9', 'A'..'Z', '_', 'a'..'z']);
- SetString(FIdent, P, FExpr - P);
- Result := tkIdent;
- Exit;
- end;
- '0'..'9':
- begin
- P := FExpr;
- repeat
- Inc(FExpr)
- until not CharInSet(FExpr^, ['0'..'9', 'A'..'F', 'X', 'a'..'f', 'x']);
- SetString(FIdent, P, FExpr - P);
- while CharInSet(FExpr^, ['L', 'U', 'l', 'u']) do Inc(FExpr);
- Result := tkNumber;
- Exit;
- end;
- '"', '''':
- begin
- if FEscapeSequences then
- FIdent := GetString(FExpr^)
- else
- FIdent := AnsiExtractQuotedStr(FExpr, FExpr^);
- Result := tkString;
- Exit;
- end;
- end;
- if Result = tkError then IllegalChar(FExpr^);
- Inc(FExpr)
- end;
- function TCTokenizer.PeekAtNextToken: TTokenKind;
- var
- P: PChar;
- SaveIdent: string;
- begin
- if not FNextTokenKnown then
- begin
- P := FExpr;
- SaveIdent := FIdent;
- FNextToken := InternalNextToken;
- FNextIdent := FIdent;
- FIdent := SaveIdent;
- FNextTokenPos := FExpr;
- FExpr := P;
- FNextTokenKnown := True;
- end;
- Result := FNextToken;
- end;
- function TCTokenizer.NextToken: TTokenKind;
- begin
- if FNextTokenKnown then
- begin
- FToken := FNextToken;
- FIdent := FNextIdent;
- FExpr := FNextTokenPos;
- FNextTokenKnown := False;
- end
- else
- FToken := InternalNextToken;
- Result := FToken;
- end;
- function TCTokenizer.PeekAtNextTokenString: string;
- begin
- PeekAtNextToken;
- Result := FNextIdent;
- end;
- function TCTokenizer.TokenInt: Longint;
- var
- E: Integer;
- begin
- Val(FIdent, Result, E);
- if E <> 0 then
- Error('Cannot convert to integer');
- end;
- procedure TCTokenizer.Restore;
- begin
- FExpr := FStoredPos;
- FNextTokenKnown := False;
- end;
- procedure TCTokenizer.Store;
- begin
- FStoredPos := FExpr;
- end;
- function TCTokenizer.NextTokenExpect(Expected: TTokenKinds): TTokenKind;
- function GetFriendlyTokenDesc(T: TTokenKind; Found: Boolean): string;
- const
- TokenNames: array[TTokenKind] of string =
- ('illegal character', 'end of expression', 'identifier', 'number', 'string literal',
- 'right angle bracket (">")',
- 'left angle bracket ("<")',
- 'greater-or-equal-to operator (">=")',
- 'less-or-equal-to operator ("<=")',
- 'equality operator ("==")',
- 'inequality operator ("!=")',
- 'logical OR operator ("||")',
- 'logical AND operator ("&&")',
- 'plus sign ("+")',
- 'minus sign ("-")',
- 'OR sign ("|")',
- 'XOR operator ("^")',
- 'star sign ("*")',
- 'slash ("/")',
- 'AND sign ("&")',
- 'SHL operator ("<<")',
- 'SHR operator (">>")',
- 'percent sign ("%")',
- 'exclamation sign ("!")',
- 'tilde ("~")',
- 'equal sign ("=")',
- 'compound assignment operator ("+=")',
- 'compound assignment operator ("-=")',
- 'compound assignment operator ("|=")',
- 'compound assignment operator ("^=")',
- 'compound assignment operator ("*=")',
- 'compound assignment operator ("/=")',
- 'compound assignment operator ("&=")',
- 'compound assignment operator ("<<=")',
- 'compound assignment operator (">>=")',
- 'compound assignment operator ("%=")',
- 'increment operator ("++")',
- 'decrement operator ("--")',
- 'opening parenthesis ("(")',
- 'opening bracket ("[")',
- 'opening brace ("{")',
- 'closing parenthesis (")")',
- 'closing bracket ("]")',
- 'closing brace ("}")',
- 'period (".")',
- 'comma (",")',
- 'colon (":")',
- 'semicolon (";")',
- 'question sign ("?")',
- 'call-context-of operator ("@")');
- begin
- case T of
- tkIdent:
- if Found then
- Result := Format('identifier "%s"', [TokenString])
- else
- Result := 'identifier';
- tkNumber:
- if Found then
- Result := Format('number %d (0x%0:.2x)', [TokenInt])
- else
- Result := 'number';
- else
- Result := TokenNames[T];
- end;
- end;
- function Capitalize(const S: string): string;
- begin
- if (S <> '') and CharInSet(S[1], ['a'..'z']) then
- Result := UpCase(S[1]) + Copy(S, 2, MaxInt)
- else
- Result := S;
- end;
- var
- M1, M2: string;
- I: TTokenKind;
- C: Integer;
- begin
- Result := NextToken;
- if not (Result in Expected) then
- begin
- C := 0;
- if Expected * ExpressionStartTokens = ExpressionStartTokens then
- begin
- M2 := 'expression';
- Expected := Expected - ExpressionStartTokens;
- end;
- for I := Low(TTokenKind) to High(TTokenKind) do
- if I in Expected then
- begin
- Inc(C);
- if M2 <> '' then
- begin
- if M1 <> '' then M1 := M1 + ', ';
- M1 := M1 + M2;
- M2 := '';
- end;
- M2 := GetFriendlyTokenDesc(I, False);
- end;
- if M2 <> '' then
- if M1 <> '' then
- begin
- if C > 2 then M1 := M1 + ',';
- M1 := M1 + ' or ' + M2
- end
- else
- M1 := M2;
- Error(Capitalize(Format('%s expected but %s found', [M1, GetFriendlyTokenDesc(Token, True)])));
- end;
- end;
- procedure TCTokenizer.EndOfExpr;
- begin
- NextTokenExpect([tkEOF, tkSemicolon])
- end;
- procedure TCTokenizer.Error(const Message: string);
- begin
- var E := EParsingError.Create(Message);
- if FExprOffset <> -1 then
- E.Position := FExprOffset + (FExpr - FExprStart) + 1;
- raise E;
- end;
- procedure TCTokenizer.ErrorFmt(const Message: string;
- Args: array of const);
- begin
- Error(Format(Message, Args));
- end;
- procedure TCTokenizer.IllegalChar(C: Char);
- begin
- raise EParsingError.CreateFmt(SIllegalChar, [C, Ord(C)]);
- end;
- procedure TCTokenizer.SetPos(NewPos: PChar);
- begin
- FExpr := NewPos;
- FNextTokenKnown := False;
- end;
- end.
|