123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299 |
- unit TestPasUtils;
- {$mode ObjFPC}{$H+}
- interface
- uses
- Classes, SysUtils, PasTree;
- function ExtractFileUnitName(aFilename: string): string;
- function GetPasElementDesc(El: TPasElement): string;
- procedure ReadNextPascalToken(var Position: PChar; out TokenStart: PChar;
- NestedComments: boolean; SkipDirectives: boolean);
- implementation
- function ExtractFileUnitName(aFilename: string): string;
- var
- p: Integer;
- begin
- Result:=ExtractFileName(aFilename);
- if Result='' then exit;
- for p:=length(Result) downto 1 do
- case Result[p] of
- '/','\': exit;
- '.':
- begin
- Delete(Result,p,length(Result));
- exit;
- end;
- end;
- end;
- function GetPasElementDesc(El: TPasElement): string;
- begin
- if El=nil then exit('nil');
- Result:=El.Name+':'+El.ClassName+'['+El.SourceFilename+','+IntToStr(El.SourceLinenumber)+']';
- end;
- procedure ReadNextPascalToken(var Position: PChar; out TokenStart: PChar;
- NestedComments: boolean; SkipDirectives: boolean);
- const
- IdentChars = ['a'..'z','A'..'Z','_','0'..'9'];
- HexNumberChars = ['0'..'9','a'..'f','A'..'F'];
- var
- c1:char;
- CommentLvl: Integer;
- Src: PChar;
- begin
- Src:=Position;
- // read till next atom
- while true do
- begin
- case Src^ of
- #0: break;
- #1..#32: // spaces and special characters
- inc(Src);
- #$EF:
- if (Src[1]=#$BB)
- and (Src[2]=#$BF) then
- begin
- // skip UTF BOM
- inc(Src,3);
- end
- else
- break;
- '{': // comment start or compiler directive
- if (Src[1]='$') and (not SkipDirectives) then
- // compiler directive
- break
- else begin
- // Pascal comment => skip
- CommentLvl:=1;
- while true do
- begin
- inc(Src);
- case Src^ of
- #0: break;
- '{':
- if NestedComments then
- inc(CommentLvl);
- '}':
- begin
- dec(CommentLvl);
- if CommentLvl=0 then
- begin
- inc(Src);
- break;
- end;
- end;
- end;
- end;
- end;
- '/': // comment or real division
- if (Src[1]='/') then
- begin
- // comment start -> read til line end
- inc(Src);
- while not (Src^ in [#0,#10,#13]) do
- inc(Src);
- end
- else
- break;
- '(': // comment, bracket or compiler directive
- if (Src[1]='*') then
- begin
- if (Src[2]='$') and (not SkipDirectives) then
- // compiler directive
- break
- else
- begin
- // comment start -> read til comment end
- inc(Src,2);
- CommentLvl:=1;
- while true do
- begin
- case Src^ of
- #0: break;
- '(':
- if NestedComments and (Src[1]='*') then
- inc(CommentLvl);
- '*':
- if (Src[1]=')') then
- begin
- dec(CommentLvl);
- if CommentLvl=0 then
- begin
- inc(Src,2);
- break;
- end;
- inc(Position);
- end;
- end;
- inc(Src);
- end;
- end;
- end else
- // round bracket open
- break;
- else
- break;
- end;
- end;
- // read token
- TokenStart:=Src;
- c1:=Src^;
- case c1 of
- #0:
- ;
- 'A'..'Z','a'..'z','_':
- begin
- // identifier
- inc(Src);
- while Src^ in IdentChars do
- inc(Src);
- end;
- '0'..'9': // number
- begin
- inc(Src);
- // read numbers
- while (Src^ in ['0'..'9']) do
- inc(Src);
- if (Src^='.') and (Src[1]<>'.') then
- begin
- // real type number
- inc(Src);
- while (Src^ in ['0'..'9']) do
- inc(Src);
- end;
- if (Src^ in ['e','E']) then
- begin
- // read exponent
- inc(Src);
- if (Src^='-') then inc(Src);
- while (Src^ in ['0'..'9']) do
- inc(Src);
- end;
- end;
- '''','#': // string constant
- while true do
- case Src^ of
- #0: break;
- '#':
- begin
- inc(Src);
- while Src^ in ['0'..'9'] do
- inc(Src);
- end;
- '''':
- begin
- inc(Src);
- while not (Src^ in ['''',#0]) do
- inc(Src);
- if Src^='''' then
- inc(Src);
- end;
- else
- break;
- end;
- '$': // hex constant
- begin
- inc(Src);
- while Src^ in HexNumberChars do
- inc(Src);
- end;
- '&': // octal constant or keyword as identifier (e.g. &label)
- begin
- inc(Src);
- if Src^ in ['0'..'7'] then
- while Src^ in ['0'..'7'] do
- inc(Src)
- else
- while Src^ in IdentChars do
- inc(Src);
- end;
- '{': // compiler directive (it can't be a comment, because see above)
- begin
- CommentLvl:=1;
- while true do
- begin
- inc(Src);
- case Src^ of
- #0: break;
- '{':
- if NestedComments then
- inc(CommentLvl);
- '}':
- begin
- dec(CommentLvl);
- if CommentLvl=0 then
- begin
- inc(Src);
- break;
- end;
- end;
- end;
- end;
- end;
- '(': // bracket or compiler directive
- if (Src[1]='*') then
- begin
- // compiler directive -> read til comment end
- inc(Src,2);
- while (Src^<>#0) and ((Src^<>'*') or (Src[1]<>')')) do
- inc(Src);
- inc(Src,2);
- end
- else
- // round bracket open
- inc(Src);
- #192..#255:
- begin
- // read UTF8 character
- inc(Src);
- if ((ord(c1) and %11100000) = %11000000) then
- begin
- // could be 2 byte character
- if (ord(Src[0]) and %11000000) = %10000000 then
- inc(Src);
- end
- else if ((ord(c1) and %11110000) = %11100000) then
- begin
- // could be 3 byte character
- if ((ord(Src[0]) and %11000000) = %10000000)
- and ((ord(Src[1]) and %11000000) = %10000000) then
- inc(Src,2);
- end
- else if ((ord(c1) and %11111000) = %11110000) then
- begin
- // could be 4 byte character
- if ((ord(Src[0]) and %11000000) = %10000000)
- and ((ord(Src[1]) and %11000000) = %10000000)
- and ((ord(Src[2]) and %11000000) = %10000000) then
- inc(Src,3);
- end;
- end;
- else
- inc(Src);
- case c1 of
- '<': if Src^ in ['>','='] then inc(Src);
- '.': if Src^='.' then inc(Src);
- '@':
- if Src^='@' then
- begin
- // @@ label
- repeat
- inc(Src);
- until not (Src^ in IdentChars);
- end
- else
- if (Src^='=') and (c1 in [':','+','-','/','*','<','>']) then
- inc(Src);
- end;
- end;
- Position:=Src;
- end;
- end.
|