clang.l 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. %{
  2. /* This file is part of the software similarity tester SIM.
  3. Written by Dick Grune, Vrije Universiteit, Amsterdam.
  4. $Id: clang.l,v 2.9 2007/08/29 09:10:31 dick Exp $
  5. */
  6. /*
  7. C language front end for the similarity tester.
  8. Author: Dick Grune <[email protected]>
  9. */
  10. #include "options.h"
  11. #include "algollike.h"
  12. #include "token.h"
  13. #include "idf.h"
  14. #include "lex.h"
  15. #include "lang.h"
  16. /* Language-dependent Code */
  17. /* Data for module idf */
  18. static const struct idf ppcmd[] = {
  19. {"define", META('d')},
  20. {"else", META('e')},
  21. {"endif", META('E')},
  22. {"if", META('i')},
  23. {"ifdef", META('I')},
  24. {"ifndef", META('x')},
  25. {"include", MTCT('I')},
  26. {"line", META('l')},
  27. {"undef", META('u')}
  28. };
  29. static const struct idf reserved[] = {
  30. {"auto", NORM('a')},
  31. {"break", NORM('b')},
  32. {"case", NORM('c')},
  33. {"char", NORM('C')},
  34. {"continue", CTRL('C')},
  35. {"default", NORM('d')},
  36. {"do", NORM('D')},
  37. {"double", CTRL('D')},
  38. {"else", NORM('e')},
  39. {"enum", NORM('E')},
  40. {"extern", CTRL('E')},
  41. {"float", NORM('f')},
  42. {"for", NORM('F')},
  43. {"goto", NORM('g')},
  44. {"if", NORM('i')},
  45. {"int", NORM('I')},
  46. {"long", NORM('l')},
  47. {"register", SKIP},
  48. {"return", NORM('r')},
  49. {"short", NORM('s')},
  50. {"sizeof", NORM('S')},
  51. {"static", CTRL('S')},
  52. {"struct", META('s')},
  53. {"switch", META('S')},
  54. {"typedef", NORM('t')},
  55. {"union", NORM('u')},
  56. {"unsigned", NORM('U')},
  57. {"void", SKIP},
  58. {"while", NORM('w')}
  59. };
  60. /* Special treatment of identifiers */
  61. static TOKEN
  62. idf2token(int hashing) {
  63. register TOKEN tk;
  64. tk = idf_in_list(yytext, reserved, sizeof reserved, IDF);
  65. if (TOKEN_EQ(tk, IDF) && hashing) {
  66. /* return a one-token hash code */
  67. tk = idf_hashed(yytext);
  68. }
  69. return tk;
  70. }
  71. /* Token sets for module algollike */
  72. const TOKEN NonFinals[] = {
  73. IDF, /* identifier */
  74. NORM('{'),
  75. NORM('('),
  76. NORM('a'), /* auto */
  77. NORM('b'), /* break */
  78. NORM('c'), /* case */
  79. NORM('C'), /* char */
  80. CTRL('C'), /* continue */
  81. NORM('d'), /* default */
  82. NORM('D'), /* do */
  83. CTRL('D'), /* double */
  84. NORM('E'), /* enum */
  85. CTRL('E'), /* extern */
  86. NORM('f'), /* float */
  87. NORM('F'), /* for */
  88. NORM('g'), /* goto */
  89. NORM('i'), /* if */
  90. NORM('I'), /* int */
  91. NORM('l'), /* long */
  92. NORM('r'), /* return */
  93. NORM('s'), /* short */
  94. CTRL('S'), /* static */
  95. META('s'), /* struct */
  96. META('S'), /* switch */
  97. NORM('t'), /* typedef */
  98. NORM('u'), /* union */
  99. NORM('U'), /* unsigned */
  100. NORM('w'), /* while */
  101. NOTOKEN
  102. };
  103. const TOKEN NonInitials[] = {
  104. NORM(')'),
  105. NORM('}'),
  106. NORM(';'),
  107. NOTOKEN
  108. };
  109. const TOKEN Openers[] = {
  110. NORM('{'),
  111. NORM('('),
  112. NORM('['),
  113. NOTOKEN
  114. };
  115. const TOKEN Closers[] = {
  116. NORM('}'),
  117. NORM(')'),
  118. NORM(']'),
  119. NOTOKEN
  120. };
  121. %}
  122. %option nounput
  123. %option never-interactive
  124. %Start Comment
  125. Layout ([ \t\r\f])
  126. ASCII95 ([- !"#$%&'()*+,./0-9:;<=>?@A-Z\[\\\]^_`a-z{|}~])
  127. AnyQuoted (\\.)
  128. StrChar ([^"\n\\]|{AnyQuoted})
  129. ChrChar ([^'\n\\]|{AnyQuoted})
  130. StartComment ("/*")
  131. EndComment ("*/")
  132. SafeComChar ([^*\n])
  133. UnsafeComChar ("*")
  134. Digit ([0-9a-fA-F])
  135. Idf ([A-Za-z][A-Za-z0-9_]*)
  136. %%
  137. {StartComment} {
  138. /* We do not have one single pattern to match a comment
  139. (although one can be written), for two reasons.
  140. The matched string might overflow lex-internal buffers
  141. like yysbuf and yytext; and the pattern would be very
  142. complicated and overtax lex.
  143. So we break up the string into safe chunks and keep
  144. track of where we are in a start condition <Comment>.
  145. */
  146. BEGIN Comment;
  147. }
  148. <Comment>{SafeComChar}+ { /* safe comment chunk */
  149. }
  150. <Comment>{UnsafeComChar} { /* unsafe char, read one by one */
  151. }
  152. <Comment>"\n" { /* to break up long comments */
  153. return_eol();
  154. }
  155. <Comment>{EndComment} { /* end-of-comment */
  156. BEGIN INITIAL;
  157. }
  158. \"{StrChar}*\" { /* strings */
  159. return_ch('"');
  160. }
  161. \'{ChrChar}+\' { /* characters */
  162. return_ch('\'');
  163. }
  164. ^#{Layout}*include.* { /* ignore #include lines */
  165. }
  166. ^#{Layout}*{Idf} { /* a preprocessor line */
  167. register char *idf = yytext+1;
  168. /* skip layout in front of preprocessor identifier */
  169. while (*idf == ' ' || *idf == '\t') {
  170. idf++;
  171. }
  172. return_tk(idf_in_list(idf, ppcmd, sizeof ppcmd, NORM('#')));
  173. }
  174. (0x)?{Digit}+("l"|"L")? { /* numeral, passed as an identifier */
  175. return_tk(IDF);
  176. }
  177. {Idf}/"(" { /* identifier in front of ( */
  178. register TOKEN tk;
  179. tk = idf2token(option_set('F'));
  180. if (!TOKEN_EQ(tk, SKIP)) return_tk(tk);
  181. }
  182. {Idf} { /* identifier */
  183. register TOKEN tk;
  184. tk = idf2token(0 /* no hashing */);
  185. if (!TOKEN_EQ(tk, SKIP)) return_tk(tk);
  186. }
  187. \; { /* semicolon, conditionally ignored */
  188. if (option_set('f')) return_ch(yytext[0]);
  189. }
  190. \n { /* count newlines */
  191. return_eol();
  192. }
  193. {Layout} { /* ignore layout */
  194. }
  195. {ASCII95} { /* copy other text */
  196. return_ch(yytext[0]);
  197. }
  198. . { /* count non-ASCII chars */
  199. lex_non_ascii_cnt++;
  200. }
  201. %%
  202. /* Language-INdependent Code */
  203. void
  204. yystart(void) {
  205. BEGIN INITIAL;
  206. }
  207. int
  208. yywrap(void) {
  209. return 1;
  210. }