| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311 | {    Copyright (c) 2012 by Sergei Gorelkin    A basic lexer for GNU ld scripts    This program is free software; you can redistribute it and/or modify    iu under the terms of the GNU General Public License as published by    the Free Software Foundation; either version 2 of the License, or    (at your option) any later version.    This program is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    GNU General Public License for more details.    You should have received a copy of the GNU General Public License    along with this program; if not, write to the Free Software    Foundation, Inc., 675 Mass Ave, Cambridge- MA 02139, USA. ****************************************************************************}unit ldscript;{$i fpcdefs.inc}interfaceuses  owbase;type  TldScriptToken=char;  TScriptLexer=class(TObject)    data: ansistring;    curtoken: TldScriptToken;    curtokenstr: string;    curpos: longint;    line: longint;    linestart: longint;  public    constructor Create(aReader:TObjectReader);    procedure nextToken;    function CheckForIdent(const s:string):boolean;    function CheckFor(c:TldScriptToken):boolean;    procedure Expect(c:TldScriptToken);    property token:TldScriptToken read curtoken;    property tokenstr:string read curtokenstr;  end;const  tkEOF      = #0;  tkINVALID  = #1;  tkIDENT    = #2;  tkNUMBER   = #3;  tkLITERAL  = #4;  tkLSHIFT   = #5;   { << }  tkLE       = #6;   { <= }  tkRSHIFT   = #7;   { >> }  tkGE       = #8;   { >= }  tkANDAND   = #9;   { && }  tkANDEQ    = #10;   { &= }  tkOROR     = #11;   { || }  tkOREQ     = #12;   { |= }  tkDIVEQ    = #13;   { /= }  tkMULTEQ   = #14;   { *= }  tkMINUSEQ  = #15;   { -= }  tkPLUSEQ   = #16;   { += }  tkNE       = #17;   { != }  tkEQ       = #18;   { == }  tkRSHIFTEQ = #19;   { >>= }  tkLSHIFTEQ = #20;   { <<= }implementationuses  sysutils;const  NameChars=['A'..'Z','a'..'z','_','.','$','0'..'9','+','-','=',',','*','?','/','~','\','[',']'];{*****************************************************************************                               TSCRIPTLEXER*****************************************************************************}constructor TScriptLexer.Create(AReader:TObjectReader);begin  { Expected data size is few hundred bytes,  }  SetLength(data,AReader.size);  AReader.Read(data[1],AReader.size);  curpos:=1;end;procedure TScriptLexer.nextToken;  var    p,start: longint;  begin    p:=curpos;    repeat      { skip whitespace }      while (data[p] in [#32,#9,#13]) do        inc(p);      start:=p;      { C-style comment }      if (data[p]='/') and (data[p+1]='*') then        begin          inc(p,2);          while (data[p]<>'*') and (data[p+1]<>'/') do            begin              if (data[p]=#0) then                begin                  curtoken:=tkINVALID;                  exit;                end;              if (data[p]=#10) then                begin                  inc(line);                  linestart:=p+1;                end;              inc(p);            end;          inc(p,2);          continue;        end      else if (data[p]=#10) then        begin          inc(p);          inc(line);          linestart:=p;          continue;        end      else if (data[p]='#') then  { line comment }        begin          inc(p);          while (data[p]<>#0) and (data[p]<>#10) do            inc(p);          continue;        end;      case data[p] of        #0: curtoken:=tkEOF;        '/':          if (data[p+1] in NameChars) then            begin              inc(p);              while (data[p] in NameChars) do                inc(p);              curtoken:=tkIDENT;            end          else if (data[p+1]='=') then            curtoken:=tkDIVEQ          else            curtoken:='/';        'A'..'Z','a'..'z','_','.','$','\':          begin            inc(p);            while (data[p] in NameChars) do              inc(p);            curtoken:=tkIDENT;          end;        '0'..'9':          begin            if (data[p]='0') and (data[p+1] in ['x','X']) then              begin                inc(p,2);                while data[p] in ['0'..'9','a'..'f','A'..'F'] do                  inc(p);              end            else              while (data[p] in ['0'..'9']) do                inc(p);            curtoken:=tkNUMBER;          end;        '"':          begin            inc(p);            while (data[p]<>'"') and (data[p]<>#10) do              inc(p);            if data[p]=#10 then              begin                curtoken:=tkINVALID;                exit;              end;            inc(p);            curtoken:=tkLITERAL;          end;        '<':          if (data[p+1]='<') then            begin              if (data[p+2]='=') then                curtoken:=tkLSHIFTEQ              else                curtoken:=tkLSHIFT;            end          else if (data[p+1]='=') then            curtoken:=tkLE          else            curtoken:='<';        '>':          if (data[p+1]='>') then            begin              if (data[p+2]='=') then                curtoken:=tkRSHIFTEQ              else                curtoken:=tkRSHIFT;            end          else if (data[p+1]='=') then            curtoken:=tkGE          else            curtoken:='>';        '!':          if (data[p+1]='=') then            curtoken:=tkNE          else            curtoken:='!';        '&':          if (data[p+1]='&') then            curtoken:=tkANDAND          else if (data[p+1]='=') then            curtoken:=tkANDEQ          else            curtoken:='&';        '|':          if (data[p+1]='|') then            curtoken:=tkOROR          else if (data[p+1]='=') then            curtoken:=tkOREQ          else            curtoken:='|';        '*':          if (data[p+1]='=') then            curtoken:=tkMULTEQ          else            curtoken:='*';        '+':          if (data[p+1]='=') then            curtoken:=tkPLUSEQ          else            curtoken:='+';        '-':          if (data[p+1]='=') then            curtoken:=tkMINUSEQ          else            curtoken:='-';        '=':          if (data[p+1]='=') then            curtoken:=tkEQ          else            curtoken:='=';        '(',')','{','}','[',']',';','?',':':          curtoken:=data[p];      else        curtoken:=tkINVALID;        exit;      end;      break;    until false;    case curtoken of      tkRSHIFTEQ,tkLSHIFTEQ: inc(p,3);      tkLSHIFT..tkEQ: inc(p,2);      #32..#255: inc(p);      tkIDENT,tkNUMBER:        setstring(curtokenstr,PChar(@data[start]),p-start);      tkLITERAL:        setstring(curtokenstr,PChar(@data[start+1]),p-start-2);    end;    curpos:=p;  end;procedure TScriptLexer.Expect(c:TldScriptToken);  begin    if (curtoken=c) then      nextToken    else      {error};  end;function TScriptLexer.CheckForIdent(const s:string):boolean;  begin    result:=(curtoken=tkIDENT) and (curtokenstr=s);    if result then      nextToken;  end;function TScriptLexer.CheckFor(c:TldScriptToken):boolean;  begin    result:=(curtoken=c);    if result then      nextToken;  end;end.
 |