testpasutils.pas 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. unit TestPasUtils;
  2. {$mode ObjFPC}{$H+}
  3. interface
  4. uses
  5. Classes, SysUtils, PasTree;
  6. function ExtractFileUnitName(aFilename: string): string;
  7. function GetPasElementDesc(El: TPasElement): string;
  8. procedure ReadNextPascalToken(var Position: PChar; out TokenStart: PChar;
  9. NestedComments: boolean; SkipDirectives: boolean);
  10. implementation
  11. function ExtractFileUnitName(aFilename: string): string;
  12. var
  13. p: Integer;
  14. begin
  15. Result:=ExtractFileName(aFilename);
  16. if Result='' then exit;
  17. for p:=length(Result) downto 1 do
  18. case Result[p] of
  19. '/','\': exit;
  20. '.':
  21. begin
  22. Delete(Result,p,length(Result));
  23. exit;
  24. end;
  25. end;
  26. end;
  27. function GetPasElementDesc(El: TPasElement): string;
  28. begin
  29. if El=nil then exit('nil');
  30. Result:=El.Name+':'+El.ClassName+'['+El.SourceFilename+','+IntToStr(El.SourceLinenumber)+']';
  31. end;
  32. procedure ReadNextPascalToken(var Position: PChar; out TokenStart: PChar;
  33. NestedComments: boolean; SkipDirectives: boolean);
  34. const
  35. IdentChars = ['a'..'z','A'..'Z','_','0'..'9'];
  36. HexNumberChars = ['0'..'9','a'..'f','A'..'F'];
  37. var
  38. c1:char;
  39. CommentLvl: Integer;
  40. Src: PChar;
  41. begin
  42. Src:=Position;
  43. // read till next atom
  44. while true do
  45. begin
  46. case Src^ of
  47. #0: break;
  48. #1..#32: // spaces and special characters
  49. inc(Src);
  50. #$EF:
  51. if (Src[1]=#$BB)
  52. and (Src[2]=#$BF) then
  53. begin
  54. // skip UTF BOM
  55. inc(Src,3);
  56. end
  57. else
  58. break;
  59. '{': // comment start or compiler directive
  60. if (Src[1]='$') and (not SkipDirectives) then
  61. // compiler directive
  62. break
  63. else begin
  64. // Pascal comment => skip
  65. CommentLvl:=1;
  66. while true do
  67. begin
  68. inc(Src);
  69. case Src^ of
  70. #0: break;
  71. '{':
  72. if NestedComments then
  73. inc(CommentLvl);
  74. '}':
  75. begin
  76. dec(CommentLvl);
  77. if CommentLvl=0 then
  78. begin
  79. inc(Src);
  80. break;
  81. end;
  82. end;
  83. end;
  84. end;
  85. end;
  86. '/': // comment or real division
  87. if (Src[1]='/') then
  88. begin
  89. // comment start -> read til line end
  90. inc(Src);
  91. while not (Src^ in [#0,#10,#13]) do
  92. inc(Src);
  93. end
  94. else
  95. break;
  96. '(': // comment, bracket or compiler directive
  97. if (Src[1]='*') then
  98. begin
  99. if (Src[2]='$') and (not SkipDirectives) then
  100. // compiler directive
  101. break
  102. else
  103. begin
  104. // comment start -> read til comment end
  105. inc(Src,2);
  106. CommentLvl:=1;
  107. while true do
  108. begin
  109. case Src^ of
  110. #0: break;
  111. '(':
  112. if NestedComments and (Src[1]='*') then
  113. inc(CommentLvl);
  114. '*':
  115. if (Src[1]=')') then
  116. begin
  117. dec(CommentLvl);
  118. if CommentLvl=0 then
  119. begin
  120. inc(Src,2);
  121. break;
  122. end;
  123. inc(Position);
  124. end;
  125. end;
  126. inc(Src);
  127. end;
  128. end;
  129. end else
  130. // round bracket open
  131. break;
  132. else
  133. break;
  134. end;
  135. end;
  136. // read token
  137. TokenStart:=Src;
  138. c1:=Src^;
  139. case c1 of
  140. #0:
  141. ;
  142. 'A'..'Z','a'..'z','_':
  143. begin
  144. // identifier
  145. inc(Src);
  146. while Src^ in IdentChars do
  147. inc(Src);
  148. end;
  149. '0'..'9': // number
  150. begin
  151. inc(Src);
  152. // read numbers
  153. while (Src^ in ['0'..'9']) do
  154. inc(Src);
  155. if (Src^='.') and (Src[1]<>'.') then
  156. begin
  157. // real type number
  158. inc(Src);
  159. while (Src^ in ['0'..'9']) do
  160. inc(Src);
  161. end;
  162. if (Src^ in ['e','E']) then
  163. begin
  164. // read exponent
  165. inc(Src);
  166. if (Src^='-') then inc(Src);
  167. while (Src^ in ['0'..'9']) do
  168. inc(Src);
  169. end;
  170. end;
  171. '''','#': // string constant
  172. while true do
  173. case Src^ of
  174. #0: break;
  175. '#':
  176. begin
  177. inc(Src);
  178. while Src^ in ['0'..'9'] do
  179. inc(Src);
  180. end;
  181. '''':
  182. begin
  183. inc(Src);
  184. while not (Src^ in ['''',#0]) do
  185. inc(Src);
  186. if Src^='''' then
  187. inc(Src);
  188. end;
  189. else
  190. break;
  191. end;
  192. '$': // hex constant
  193. begin
  194. inc(Src);
  195. while Src^ in HexNumberChars do
  196. inc(Src);
  197. end;
  198. '&': // octal constant or keyword as identifier (e.g. &label)
  199. begin
  200. inc(Src);
  201. if Src^ in ['0'..'7'] then
  202. while Src^ in ['0'..'7'] do
  203. inc(Src)
  204. else
  205. while Src^ in IdentChars do
  206. inc(Src);
  207. end;
  208. '{': // compiler directive (it can't be a comment, because see above)
  209. begin
  210. CommentLvl:=1;
  211. while true do
  212. begin
  213. inc(Src);
  214. case Src^ of
  215. #0: break;
  216. '{':
  217. if NestedComments then
  218. inc(CommentLvl);
  219. '}':
  220. begin
  221. dec(CommentLvl);
  222. if CommentLvl=0 then
  223. begin
  224. inc(Src);
  225. break;
  226. end;
  227. end;
  228. end;
  229. end;
  230. end;
  231. '(': // bracket or compiler directive
  232. if (Src[1]='*') then
  233. begin
  234. // compiler directive -> read til comment end
  235. inc(Src,2);
  236. while (Src^<>#0) and ((Src^<>'*') or (Src[1]<>')')) do
  237. inc(Src);
  238. inc(Src,2);
  239. end
  240. else
  241. // round bracket open
  242. inc(Src);
  243. #192..#255:
  244. begin
  245. // read UTF8 character
  246. inc(Src);
  247. if ((ord(c1) and %11100000) = %11000000) then
  248. begin
  249. // could be 2 byte character
  250. if (ord(Src[0]) and %11000000) = %10000000 then
  251. inc(Src);
  252. end
  253. else if ((ord(c1) and %11110000) = %11100000) then
  254. begin
  255. // could be 3 byte character
  256. if ((ord(Src[0]) and %11000000) = %10000000)
  257. and ((ord(Src[1]) and %11000000) = %10000000) then
  258. inc(Src,2);
  259. end
  260. else if ((ord(c1) and %11111000) = %11110000) then
  261. begin
  262. // could be 4 byte character
  263. if ((ord(Src[0]) and %11000000) = %10000000)
  264. and ((ord(Src[1]) and %11000000) = %10000000)
  265. and ((ord(Src[2]) and %11000000) = %10000000) then
  266. inc(Src,3);
  267. end;
  268. end;
  269. else
  270. inc(Src);
  271. case c1 of
  272. '<': if Src^ in ['>','='] then inc(Src);
  273. '.': if Src^='.' then inc(Src);
  274. '@':
  275. if Src^='@' then
  276. begin
  277. // @@ label
  278. repeat
  279. inc(Src);
  280. until not (Src^ in IdentChars);
  281. end
  282. else
  283. if (Src^='=') and (c1 in [':','+','-','/','*','<','>']) then
  284. inc(Src);
  285. end;
  286. end;
  287. Position:=Src;
  288. end;
  289. end.