pascallang.l 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. %{
  2. /* This file is part of the software similarity tester SIM.
  3. Written by Dick Grune, Vrije Universiteit, Amsterdam.
  4. $Id: pascallang.l,v 2.9 2007/08/29 09:10:35 dick Exp $
  5. */
  6. /*
  7. PASCAL language front end for the similarity tester.
  8. Author: Maarten van der Meulen <[email protected]>
  9. */
  10. #include "options.h"
  11. #include "algollike.h"
  12. #include "token.h"
  13. #include "idf.h"
  14. #include "lex.h"
  15. #include "lang.h"
  16. /* Language-dependent Code */
  17. /* Data for module idf */
  18. static const struct idf ppcmd[] = {
  19. {"define", META('d')},
  20. {"else", META('e')},
  21. {"endif", META('E')},
  22. {"if", META('i')},
  23. {"ifdef", META('I')},
  24. {"ifndef", META('x')},
  25. {"include", MTCT('I')},
  26. {"line", META('l')},
  27. {"undef", META('u')}
  28. };
  29. static const struct idf reserved[] = {
  30. {"and", NORM('&')},
  31. {"array", NORM('A')},
  32. {"as", NORM('a')},
  33. {"begin", NORM('{')},
  34. {"case", NORM('c')},
  35. {"catch", META('C')},
  36. {"class", META('c')},
  37. {"continue", CTRL('C')},
  38. {"constructor", NORM('p')}, /* equal to procedure */
  39. {"const", NORM('C')},
  40. {"destructor", NORM('p')}, /* equal to procedure */
  41. {"div", NORM('/')},
  42. {"do", NORM('D')},
  43. {"downto", NORM('d')},
  44. {"else", NORM('e')},
  45. {"end", NORM('}')},
  46. {"extern", CTRL('E')},
  47. {"except", MTCT('E')},
  48. {"file", NORM('F')},
  49. {"finally", META('F')},
  50. {"for", NORM('f')},
  51. {"function", NORM('p')}, /* Equal to procedure */
  52. {"goto", NORM('g')},
  53. {"if", NORM('i')},
  54. {"in", NORM('I')},
  55. {"inherited", CTRL('I')},
  56. {"is", NORM('j')},
  57. {"label", NORM('l')},
  58. {"mod", NORM('%')},
  59. {"nil", NORM('n')},
  60. {"not", NORM('!')},
  61. {"of", SKIP},
  62. {"on", SKIP},
  63. {"or", NORM('|')},
  64. {"object", NORM('O')},
  65. {"override", NORM('o')},
  66. {"packed", NORM('P')},
  67. {"procedure", NORM('p')},
  68. {"program", SKIP},
  69. {"private", META('P')},
  70. {"protected", META('p')},
  71. {"public", CTRL('P')},
  72. {"raise", META('R')},
  73. {"record", NORM('r')},
  74. {"repeat", NORM('R')},
  75. {"set", NORM('s')},
  76. {"then", SKIP},
  77. {"to", NORM('t')},
  78. {"type", NORM('T')},
  79. {"until", NORM('u')},
  80. {"var", NORM('v')},
  81. {"virtual", NORM('V')},
  82. {"while", NORM('w')},
  83. {"with", NORM('W')}
  84. };
  85. /* Special treatment of identifiers */
  86. static void
  87. lower_case(char *str) {
  88. /* Turns upper case into lower case, since Pascal does not
  89. distinguish between them.
  90. */
  91. register char *s;
  92. for (s = str; *s; s++) {
  93. if ('A' <= *s && *s <= 'Z') {
  94. *s += (-'A' + 'a');
  95. }
  96. }
  97. }
  98. static TOKEN
  99. idf2token(int hashing) {
  100. register TOKEN tk;
  101. lower_case(yytext);
  102. tk = idf_in_list(yytext, reserved, sizeof reserved, IDF);
  103. if (TOKEN_EQ(tk, IDF) && hashing) {
  104. /* return a one-token hash code */
  105. tk = idf_hashed(yytext);
  106. }
  107. return tk;
  108. }
  109. /* Token sets for module algollike */
  110. const TOKEN NonFinals[] = {
  111. IDF, /* identifier */
  112. NORM('{'), /* also begin */
  113. NORM('('),
  114. NORM('['),
  115. NORM('A'), /* array */
  116. NORM('c'), /* case */
  117. META('C'), /* catch */
  118. META('c'), /* class */
  119. NORM('C'), /* const */
  120. NORM('/'), /* div */
  121. CTRL('E'), /* extern */
  122. NORM('F'), /* file */
  123. NORM('f'), /* for */
  124. NORM('g'), /* goto */
  125. NORM('i'), /* if */
  126. CTRL('I'), /* inherited */
  127. NORM('l'), /* label */
  128. NORM('O'), /* object */
  129. NORM('P'), /* packed */
  130. NORM('p'), /* procedure/function/constructor/destructor */
  131. META('P'), /* private */
  132. META('p'), /* protected */
  133. CTRL('p'), /* public */
  134. META('R'), /* raise */
  135. NORM('r'), /* record */
  136. NORM('R'), /* repeat */
  137. NORM('s'), /* set */
  138. NORM('T'), /* type */
  139. NORM('v'), /* var */
  140. NORM('w'), /* while */
  141. NORM('W'), /* with */
  142. NOTOKEN
  143. };
  144. const TOKEN NonInitials[] = {
  145. NORM(')'),
  146. NORM('}'),
  147. NORM(';'),
  148. NOTOKEN
  149. };
  150. const TOKEN Openers[] = {
  151. NORM('{'),
  152. NORM('('),
  153. NORM('['),
  154. NOTOKEN
  155. };
  156. const TOKEN Closers[] = {
  157. NORM('}'),
  158. NORM(')'),
  159. NORM(']'),
  160. NOTOKEN
  161. };
  162. %}
  163. %option nounput
  164. %option never-interactive
  165. %Start Comment
  166. Layout ([ \t\r\f])
  167. ASCII95 ([- !"#$%&'()*+,./0-9:;<=>?@A-Z\[\\\]^_`a-z{|}~])
  168. AnyQuoted (\\.)
  169. StrChar ([^'\n\\]|{AnyQuoted})
  170. StartComment ("{"|"(*")
  171. EndComment ("}"|"*)")
  172. SafeComChar ([^*}\n])
  173. UnsafeComChar ("*")
  174. SingleLineCom ("//".*)
  175. Digit ([0-9])
  176. Idf ([A-Za-z][A-Za-z0-9_]*)
  177. %%
  178. {StartComment} { /* See clang.l */
  179. BEGIN Comment;
  180. }
  181. <Comment>{SafeComChar}+ { /* safe comment chunk */
  182. }
  183. <Comment>{UnsafeComChar} { /* unsafe char, read one by one */
  184. }
  185. <Comment>"\n" { /* to break up long comments */
  186. return_eol();
  187. }
  188. <Comment>{EndComment} { /* end-of-comment */
  189. BEGIN INITIAL;
  190. }
  191. \'{StrChar}*\' { /* character strings */
  192. return_ch('"');
  193. }
  194. {SingleLineCom}"\n" { /* single-line comment */
  195. return_eol();
  196. }
  197. ^#{Layout}*include.* { /* ignore #include lines */
  198. }
  199. ^#{Layout}*{Idf} { /* a preprocessor line */
  200. register char *idf = yytext+1;
  201. /* skip layout in front of preprocessor identifier */
  202. while (*idf == ' ' || *idf == '\t') {
  203. idf++;
  204. }
  205. return_tk(idf_in_list(idf, ppcmd, sizeof ppcmd, NORM('#')));
  206. }
  207. {Digit}+ { /* numeral, passed as an identifier */
  208. return_tk(IDF);
  209. }
  210. {Idf}/"(" { /* identifier in front of ( */
  211. register TOKEN tk;
  212. tk = idf2token(option_set('F'));
  213. if (!TOKEN_EQ(tk, SKIP)) return_tk(tk);
  214. }
  215. {Idf} { /* identifier */
  216. register TOKEN tk;
  217. tk = idf2token(0 /* no hashing */);
  218. if (!TOKEN_EQ(tk, SKIP)) return_tk(tk);
  219. }
  220. \; { /* semicolon, conditionally ignored */
  221. if (option_set('f')) return_ch(yytext[0]);
  222. }
  223. \n { /* count newlines */
  224. return_eol();
  225. }
  226. {Layout} { /* ignore layout */
  227. }
  228. {ASCII95} { /* copy other text */
  229. return_ch(yytext[0]);
  230. }
  231. . { /* count non-ASCII chars */
  232. lex_non_ascii_cnt++;
  233. }
  234. %%
  235. /* Language-INdependent Code */
  236. void
  237. yystart(void) {
  238. BEGIN INITIAL;
  239. }
  240. int
  241. yywrap(void) {
  242. return 1;
  243. }