MILexer.cpp 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. //===- MILexer.cpp - Machine instructions lexer implementation ----------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements the lexing of machine instructions.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "MILexer.h"
  14. #include "llvm/ADT/StringSwitch.h"
  15. #include "llvm/ADT/Twine.h"
  16. #include <cctype>
  17. using namespace llvm;
  18. namespace {
  19. /// This class provides a way to iterate and get characters from the source
  20. /// string.
  21. class Cursor {
  22. const char *Ptr;
  23. const char *End;
  24. public:
  25. Cursor(NoneType) : Ptr(nullptr), End(nullptr) {}
  26. explicit Cursor(StringRef Str) {
  27. Ptr = Str.data();
  28. End = Ptr + Str.size();
  29. }
  30. bool isEOF() const { return Ptr == End; }
  31. char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; }
  32. void advance(unsigned I = 1) { Ptr += I; }
  33. StringRef remaining() const { return StringRef(Ptr, End - Ptr); }
  34. StringRef upto(Cursor C) const {
  35. assert(C.Ptr >= Ptr && C.Ptr <= End);
  36. return StringRef(Ptr, C.Ptr - Ptr);
  37. }
  38. StringRef::iterator location() const { return Ptr; }
  39. operator bool() const { return Ptr != nullptr; }
  40. };
  41. } // end anonymous namespace
  42. /// Skip the leading whitespace characters and return the updated cursor.
  43. static Cursor skipWhitespace(Cursor C) {
  44. while (isspace(C.peek()))
  45. C.advance();
  46. return C;
  47. }
  48. static bool isIdentifierChar(char C) {
  49. return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.';
  50. }
  51. static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
  52. return StringSwitch<MIToken::TokenKind>(Identifier)
  53. .Case("_", MIToken::underscore)
  54. .Case("implicit", MIToken::kw_implicit)
  55. .Case("implicit-def", MIToken::kw_implicit_define)
  56. .Case("dead", MIToken::kw_dead)
  57. .Case("killed", MIToken::kw_killed)
  58. .Case("undef", MIToken::kw_undef)
  59. .Default(MIToken::Identifier);
  60. }
  61. static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
  62. if (!isalpha(C.peek()) && C.peek() != '_')
  63. return None;
  64. auto Range = C;
  65. while (isIdentifierChar(C.peek()))
  66. C.advance();
  67. auto Identifier = Range.upto(C);
  68. Token = MIToken(getIdentifierKind(Identifier), Identifier);
  69. return C;
  70. }
  71. static Cursor maybeLexMachineBasicBlock(
  72. Cursor C, MIToken &Token,
  73. function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
  74. if (!C.remaining().startswith("%bb."))
  75. return None;
  76. auto Range = C;
  77. C.advance(4); // Skip '%bb.'
  78. if (!isdigit(C.peek())) {
  79. Token = MIToken(MIToken::Error, C.remaining());
  80. ErrorCallback(C.location(), "expected a number after '%bb.'");
  81. return C;
  82. }
  83. auto NumberRange = C;
  84. while (isdigit(C.peek()))
  85. C.advance();
  86. StringRef Number = NumberRange.upto(C);
  87. unsigned StringOffset = 4 + Number.size(); // Drop '%bb.<id>'
  88. if (C.peek() == '.') {
  89. C.advance(); // Skip '.'
  90. ++StringOffset;
  91. while (isIdentifierChar(C.peek()))
  92. C.advance();
  93. }
  94. Token = MIToken(MIToken::MachineBasicBlock, Range.upto(C), APSInt(Number),
  95. StringOffset);
  96. return C;
  97. }
  98. static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
  99. auto Range = C;
  100. C.advance(); // Skip '%'
  101. auto NumberRange = C;
  102. while (isdigit(C.peek()))
  103. C.advance();
  104. Token = MIToken(MIToken::VirtualRegister, Range.upto(C),
  105. APSInt(NumberRange.upto(C)));
  106. return C;
  107. }
  108. static Cursor maybeLexRegister(Cursor C, MIToken &Token) {
  109. if (C.peek() != '%')
  110. return None;
  111. if (isdigit(C.peek(1)))
  112. return lexVirtualRegister(C, Token);
  113. auto Range = C;
  114. C.advance(); // Skip '%'
  115. while (isIdentifierChar(C.peek()))
  116. C.advance();
  117. Token = MIToken(MIToken::NamedRegister, Range.upto(C),
  118. /*StringOffset=*/1); // Drop the '%'
  119. return C;
  120. }
  121. static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token) {
  122. if (C.peek() != '@')
  123. return None;
  124. auto Range = C;
  125. C.advance(); // Skip the '@'
  126. // TODO: add support for quoted names.
  127. if (!isdigit(C.peek())) {
  128. while (isIdentifierChar(C.peek()))
  129. C.advance();
  130. Token = MIToken(MIToken::NamedGlobalValue, Range.upto(C),
  131. /*StringOffset=*/1); // Drop the '@'
  132. return C;
  133. }
  134. auto NumberRange = C;
  135. while (isdigit(C.peek()))
  136. C.advance();
  137. Token =
  138. MIToken(MIToken::GlobalValue, Range.upto(C), APSInt(NumberRange.upto(C)));
  139. return C;
  140. }
  141. static Cursor maybeLexIntegerLiteral(Cursor C, MIToken &Token) {
  142. if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1))))
  143. return None;
  144. auto Range = C;
  145. C.advance();
  146. while (isdigit(C.peek()))
  147. C.advance();
  148. StringRef StrVal = Range.upto(C);
  149. Token = MIToken(MIToken::IntegerLiteral, StrVal, APSInt(StrVal));
  150. return C;
  151. }
  152. static MIToken::TokenKind symbolToken(char C) {
  153. switch (C) {
  154. case ',':
  155. return MIToken::comma;
  156. case '=':
  157. return MIToken::equal;
  158. case ':':
  159. return MIToken::colon;
  160. default:
  161. return MIToken::Error;
  162. }
  163. }
  164. static Cursor maybeLexSymbol(Cursor C, MIToken &Token) {
  165. auto Kind = symbolToken(C.peek());
  166. if (Kind == MIToken::Error)
  167. return None;
  168. auto Range = C;
  169. C.advance();
  170. Token = MIToken(Kind, Range.upto(C));
  171. return C;
  172. }
  173. StringRef llvm::lexMIToken(
  174. StringRef Source, MIToken &Token,
  175. function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
  176. auto C = skipWhitespace(Cursor(Source));
  177. if (C.isEOF()) {
  178. Token = MIToken(MIToken::Eof, C.remaining());
  179. return C.remaining();
  180. }
  181. if (Cursor R = maybeLexIdentifier(C, Token))
  182. return R.remaining();
  183. if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback))
  184. return R.remaining();
  185. if (Cursor R = maybeLexRegister(C, Token))
  186. return R.remaining();
  187. if (Cursor R = maybeLexGlobalValue(C, Token))
  188. return R.remaining();
  189. if (Cursor R = maybeLexIntegerLiteral(C, Token))
  190. return R.remaining();
  191. if (Cursor R = maybeLexSymbol(C, Token))
  192. return R.remaining();
  193. Token = MIToken(MIToken::Error, C.remaining());
  194. ErrorCallback(C.location(),
  195. Twine("unexpected character '") + Twine(C.peek()) + "'");
  196. return C.remaining();
  197. }