AsmLexer.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598
  1. //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This class implements the lexer for assembly files.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/MC/MCParser/AsmLexer.h"
  14. #include "llvm/MC/MCAsmInfo.h"
  15. #include "llvm/Support/MemoryBuffer.h"
  16. #include "llvm/Support/SMLoc.h"
  17. #include <cctype>
  18. #include <cerrno>
  19. #include <cstdio>
  20. #include <cstdlib>
  21. using namespace llvm;
  22. AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
  23. CurPtr = nullptr;
  24. isAtStartOfLine = true;
  25. AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
  26. }
  27. AsmLexer::~AsmLexer() {
  28. }
  29. void AsmLexer::setBuffer(StringRef Buf, const char *ptr) {
  30. CurBuf = Buf;
  31. if (ptr)
  32. CurPtr = ptr;
  33. else
  34. CurPtr = CurBuf.begin();
  35. TokStart = nullptr;
  36. }
  37. /// ReturnError - Set the error to the specified string at the specified
  38. /// location. This is defined to always return AsmToken::Error.
  39. AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
  40. SetError(SMLoc::getFromPointer(Loc), Msg);
  41. return AsmToken(AsmToken::Error, StringRef(Loc, 0));
  42. }
  43. int AsmLexer::getNextChar() {
  44. char CurChar = *CurPtr++;
  45. switch (CurChar) {
  46. default:
  47. return (unsigned char)CurChar;
  48. case 0:
  49. // A nul character in the stream is either the end of the current buffer or
  50. // a random nul in the file. Disambiguate that here.
  51. if (CurPtr - 1 != CurBuf.end())
  52. return 0; // Just whitespace.
  53. // Otherwise, return end of file.
  54. --CurPtr; // Another call to lex will return EOF again.
  55. return EOF;
  56. }
  57. }
  58. /// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
  59. ///
  60. /// The leading integral digit sequence and dot should have already been
  61. /// consumed, some or all of the fractional digit sequence *can* have been
  62. /// consumed.
  63. AsmToken AsmLexer::LexFloatLiteral() {
  64. // Skip the fractional digit sequence.
  65. while (isdigit(*CurPtr))
  66. ++CurPtr;
  67. // Check for exponent; we intentionally accept a slighlty wider set of
  68. // literals here and rely on the upstream client to reject invalid ones (e.g.,
  69. // "1e+").
  70. if (*CurPtr == 'e' || *CurPtr == 'E') {
  71. ++CurPtr;
  72. if (*CurPtr == '-' || *CurPtr == '+')
  73. ++CurPtr;
  74. while (isdigit(*CurPtr))
  75. ++CurPtr;
  76. }
  77. return AsmToken(AsmToken::Real,
  78. StringRef(TokStart, CurPtr - TokStart));
  79. }
  80. /// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+
  81. /// while making sure there are enough actual digits around for the constant to
  82. /// be valid.
  83. ///
  84. /// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed
  85. /// before we get here.
  86. AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
  87. assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') &&
  88. "unexpected parse state in floating hex");
  89. bool NoFracDigits = true;
  90. // Skip the fractional part if there is one
  91. if (*CurPtr == '.') {
  92. ++CurPtr;
  93. const char *FracStart = CurPtr;
  94. while (isxdigit(*CurPtr))
  95. ++CurPtr;
  96. NoFracDigits = CurPtr == FracStart;
  97. }
  98. if (NoIntDigits && NoFracDigits)
  99. return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
  100. "expected at least one significand digit");
  101. // Make sure we do have some kind of proper exponent part
  102. if (*CurPtr != 'p' && *CurPtr != 'P')
  103. return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
  104. "expected exponent part 'p'");
  105. ++CurPtr;
  106. if (*CurPtr == '+' || *CurPtr == '-')
  107. ++CurPtr;
  108. // N.b. exponent digits are *not* hex
  109. const char *ExpStart = CurPtr;
  110. while (isdigit(*CurPtr))
  111. ++CurPtr;
  112. if (CurPtr == ExpStart)
  113. return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
  114. "expected at least one exponent digit");
  115. return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
  116. }
  117. /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
  118. static bool IsIdentifierChar(char c, bool AllowAt) {
  119. return isalnum(c) || c == '_' || c == '$' || c == '.' ||
  120. (c == '@' && AllowAt) || c == '?';
  121. }
  122. AsmToken AsmLexer::LexIdentifier() {
  123. // Check for floating point literals.
  124. if (CurPtr[-1] == '.' && isdigit(*CurPtr)) {
  125. // Disambiguate a .1243foo identifier from a floating literal.
  126. while (isdigit(*CurPtr))
  127. ++CurPtr;
  128. if (*CurPtr == 'e' || *CurPtr == 'E' ||
  129. !IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
  130. return LexFloatLiteral();
  131. }
  132. while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
  133. ++CurPtr;
  134. // Handle . as a special case.
  135. if (CurPtr == TokStart+1 && TokStart[0] == '.')
  136. return AsmToken(AsmToken::Dot, StringRef(TokStart, 1));
  137. return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
  138. }
  139. /// LexSlash: Slash: /
  140. /// C-Style Comment: /* ... */
  141. AsmToken AsmLexer::LexSlash() {
  142. switch (*CurPtr) {
  143. case '*': break; // C style comment.
  144. case '/': return ++CurPtr, LexLineComment();
  145. default: return AsmToken(AsmToken::Slash, StringRef(CurPtr-1, 1));
  146. }
  147. // C Style comment.
  148. ++CurPtr; // skip the star.
  149. while (1) {
  150. int CurChar = getNextChar();
  151. switch (CurChar) {
  152. case EOF:
  153. return ReturnError(TokStart, "unterminated comment");
  154. case '*':
  155. // End of the comment?
  156. if (CurPtr[0] != '/') break;
  157. ++CurPtr; // End the */.
  158. return LexToken();
  159. }
  160. }
  161. }
  162. /// LexLineComment: Comment: #[^\n]*
  163. /// : //[^\n]*
  164. AsmToken AsmLexer::LexLineComment() {
  165. // FIXME: This is broken if we happen to a comment at the end of a file, which
  166. // was .included, and which doesn't end with a newline.
  167. int CurChar = getNextChar();
  168. while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
  169. CurChar = getNextChar();
  170. if (CurChar == EOF)
  171. return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
  172. return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0));
  173. }
  174. static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
  175. // Skip ULL, UL, U, L and LL suffices.
  176. if (CurPtr[0] == 'U')
  177. ++CurPtr;
  178. if (CurPtr[0] == 'L')
  179. ++CurPtr;
  180. if (CurPtr[0] == 'L')
  181. ++CurPtr;
  182. }
  183. // Look ahead to search for first non-hex digit, if it's [hH], then we treat the
  184. // integer as a hexadecimal, possibly with leading zeroes.
  185. static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) {
  186. const char *FirstHex = nullptr;
  187. const char *LookAhead = CurPtr;
  188. while (1) {
  189. if (isdigit(*LookAhead)) {
  190. ++LookAhead;
  191. } else if (isxdigit(*LookAhead)) {
  192. if (!FirstHex)
  193. FirstHex = LookAhead;
  194. ++LookAhead;
  195. } else {
  196. break;
  197. }
  198. }
  199. bool isHex = *LookAhead == 'h' || *LookAhead == 'H';
  200. CurPtr = isHex || !FirstHex ? LookAhead : FirstHex;
  201. if (isHex)
  202. return 16;
  203. return DefaultRadix;
  204. }
  205. static AsmToken intToken(StringRef Ref, APInt &Value)
  206. {
  207. if (Value.isIntN(64))
  208. return AsmToken(AsmToken::Integer, Ref, Value);
  209. return AsmToken(AsmToken::BigNum, Ref, Value);
  210. }
  211. /// LexDigit: First character is [0-9].
  212. /// Local Label: [0-9][:]
  213. /// Forward/Backward Label: [0-9][fb]
  214. /// Binary integer: 0b[01]+
  215. /// Octal integer: 0[0-7]+
  216. /// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
  217. /// Decimal integer: [1-9][0-9]*
  218. AsmToken AsmLexer::LexDigit() {
  219. // Decimal integer: [1-9][0-9]*
  220. if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
  221. unsigned Radix = doLookAhead(CurPtr, 10);
  222. bool isHex = Radix == 16;
  223. // Check for floating point literals.
  224. if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
  225. ++CurPtr;
  226. return LexFloatLiteral();
  227. }
  228. StringRef Result(TokStart, CurPtr - TokStart);
  229. APInt Value(128, 0, true);
  230. if (Result.getAsInteger(Radix, Value))
  231. return ReturnError(TokStart, !isHex ? "invalid decimal number" :
  232. "invalid hexdecimal number");
  233. // Consume the [bB][hH].
  234. if (Radix == 2 || Radix == 16)
  235. ++CurPtr;
  236. // The darwin/x86 (and x86-64) assembler accepts and ignores type
  237. // suffices on integer literals.
  238. SkipIgnoredIntegerSuffix(CurPtr);
  239. return intToken(Result, Value);
  240. }
  241. if (*CurPtr == 'b') {
  242. ++CurPtr;
  243. // See if we actually have "0b" as part of something like "jmp 0b\n"
  244. if (!isdigit(CurPtr[0])) {
  245. --CurPtr;
  246. StringRef Result(TokStart, CurPtr - TokStart);
  247. return AsmToken(AsmToken::Integer, Result, 0);
  248. }
  249. const char *NumStart = CurPtr;
  250. while (CurPtr[0] == '0' || CurPtr[0] == '1')
  251. ++CurPtr;
  252. // Requires at least one binary digit.
  253. if (CurPtr == NumStart)
  254. return ReturnError(TokStart, "invalid binary number");
  255. StringRef Result(TokStart, CurPtr - TokStart);
  256. APInt Value(128, 0, true);
  257. if (Result.substr(2).getAsInteger(2, Value))
  258. return ReturnError(TokStart, "invalid binary number");
  259. // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
  260. // suffixes on integer literals.
  261. SkipIgnoredIntegerSuffix(CurPtr);
  262. return intToken(Result, Value);
  263. }
  264. if (*CurPtr == 'x') {
  265. ++CurPtr;
  266. const char *NumStart = CurPtr;
  267. while (isxdigit(CurPtr[0]))
  268. ++CurPtr;
  269. // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be
  270. // diagnosed by LexHexFloatLiteral).
  271. if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P')
  272. return LexHexFloatLiteral(NumStart == CurPtr);
  273. // Otherwise requires at least one hex digit.
  274. if (CurPtr == NumStart)
  275. return ReturnError(CurPtr-2, "invalid hexadecimal number");
  276. APInt Result(128, 0);
  277. if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
  278. return ReturnError(TokStart, "invalid hexadecimal number");
  279. // Consume the optional [hH].
  280. if (*CurPtr == 'h' || *CurPtr == 'H')
  281. ++CurPtr;
  282. // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
  283. // suffixes on integer literals.
  284. SkipIgnoredIntegerSuffix(CurPtr);
  285. return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
  286. }
  287. // Either octal or hexadecimal.
  288. APInt Value(128, 0, true);
  289. unsigned Radix = doLookAhead(CurPtr, 8);
  290. bool isHex = Radix == 16;
  291. StringRef Result(TokStart, CurPtr - TokStart);
  292. if (Result.getAsInteger(Radix, Value))
  293. return ReturnError(TokStart, !isHex ? "invalid octal number" :
  294. "invalid hexdecimal number");
  295. // Consume the [hH].
  296. if (Radix == 16)
  297. ++CurPtr;
  298. // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
  299. // suffixes on integer literals.
  300. SkipIgnoredIntegerSuffix(CurPtr);
  301. return intToken(Result, Value);
  302. }
  303. /// LexSingleQuote: Integer: 'b'
  304. AsmToken AsmLexer::LexSingleQuote() {
  305. int CurChar = getNextChar();
  306. if (CurChar == '\\')
  307. CurChar = getNextChar();
  308. if (CurChar == EOF)
  309. return ReturnError(TokStart, "unterminated single quote");
  310. CurChar = getNextChar();
  311. if (CurChar != '\'')
  312. return ReturnError(TokStart, "single quote way too long");
  313. // The idea here being that 'c' is basically just an integral
  314. // constant.
  315. StringRef Res = StringRef(TokStart,CurPtr - TokStart);
  316. long long Value;
  317. if (Res.startswith("\'\\")) {
  318. char theChar = Res[2];
  319. switch (theChar) {
  320. default: Value = theChar; break;
  321. case '\'': Value = '\''; break;
  322. case 't': Value = '\t'; break;
  323. case 'n': Value = '\n'; break;
  324. case 'b': Value = '\b'; break;
  325. }
  326. } else
  327. Value = TokStart[1];
  328. return AsmToken(AsmToken::Integer, Res, Value);
  329. }
  330. /// LexQuote: String: "..."
  331. AsmToken AsmLexer::LexQuote() {
  332. int CurChar = getNextChar();
  333. // TODO: does gas allow multiline string constants?
  334. while (CurChar != '"') {
  335. if (CurChar == '\\') {
  336. // Allow \", etc.
  337. CurChar = getNextChar();
  338. }
  339. if (CurChar == EOF)
  340. return ReturnError(TokStart, "unterminated string constant");
  341. CurChar = getNextChar();
  342. }
  343. return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
  344. }
  345. StringRef AsmLexer::LexUntilEndOfStatement() {
  346. TokStart = CurPtr;
  347. while (!isAtStartOfComment(CurPtr) && // Start of line comment.
  348. !isAtStatementSeparator(CurPtr) && // End of statement marker.
  349. *CurPtr != '\n' && *CurPtr != '\r' &&
  350. (*CurPtr != 0 || CurPtr != CurBuf.end())) {
  351. ++CurPtr;
  352. }
  353. return StringRef(TokStart, CurPtr-TokStart);
  354. }
  355. StringRef AsmLexer::LexUntilEndOfLine() {
  356. TokStart = CurPtr;
  357. while (*CurPtr != '\n' && *CurPtr != '\r' &&
  358. (*CurPtr != 0 || CurPtr != CurBuf.end())) {
  359. ++CurPtr;
  360. }
  361. return StringRef(TokStart, CurPtr-TokStart);
  362. }
  363. const AsmToken AsmLexer::peekTok(bool ShouldSkipSpace) {
  364. const char *SavedTokStart = TokStart;
  365. const char *SavedCurPtr = CurPtr;
  366. bool SavedAtStartOfLine = isAtStartOfLine;
  367. bool SavedSkipSpace = SkipSpace;
  368. std::string SavedErr = getErr();
  369. SMLoc SavedErrLoc = getErrLoc();
  370. SkipSpace = ShouldSkipSpace;
  371. AsmToken Token = LexToken();
  372. SetError(SavedErrLoc, SavedErr);
  373. SkipSpace = SavedSkipSpace;
  374. isAtStartOfLine = SavedAtStartOfLine;
  375. CurPtr = SavedCurPtr;
  376. TokStart = SavedTokStart;
  377. return Token;
  378. }
  379. bool AsmLexer::isAtStartOfComment(const char *Ptr) {
  380. const char *CommentString = MAI.getCommentString();
  381. if (CommentString[1] == '\0')
  382. return CommentString[0] == Ptr[0];
  383. // FIXME: special case for the bogus "##" comment string in X86MCAsmInfoDarwin
  384. if (CommentString[1] == '#')
  385. return CommentString[0] == Ptr[0];
  386. return strncmp(Ptr, CommentString, strlen(CommentString)) == 0;
  387. }
  388. bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
  389. return strncmp(Ptr, MAI.getSeparatorString(),
  390. strlen(MAI.getSeparatorString())) == 0;
  391. }
  392. AsmToken AsmLexer::LexToken() {
  393. TokStart = CurPtr;
  394. // This always consumes at least one character.
  395. int CurChar = getNextChar();
  396. if (isAtStartOfComment(TokStart)) {
  397. // If this comment starts with a '#', then return the Hash token and let
  398. // the assembler parser see if it can be parsed as a cpp line filename
  399. // comment. We do this only if we are at the start of a line.
  400. if (CurChar == '#' && isAtStartOfLine)
  401. return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
  402. isAtStartOfLine = true;
  403. return LexLineComment();
  404. }
  405. if (isAtStatementSeparator(TokStart)) {
  406. CurPtr += strlen(MAI.getSeparatorString()) - 1;
  407. return AsmToken(AsmToken::EndOfStatement,
  408. StringRef(TokStart, strlen(MAI.getSeparatorString())));
  409. }
  410. // If we're missing a newline at EOF, make sure we still get an
  411. // EndOfStatement token before the Eof token.
  412. if (CurChar == EOF && !isAtStartOfLine) {
  413. isAtStartOfLine = true;
  414. return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
  415. }
  416. isAtStartOfLine = false;
  417. switch (CurChar) {
  418. default:
  419. // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
  420. if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
  421. return LexIdentifier();
  422. // Unknown character, emit an error.
  423. return ReturnError(TokStart, "invalid character in input");
  424. case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
  425. case 0:
  426. case ' ':
  427. case '\t':
  428. if (SkipSpace) {
  429. // Ignore whitespace.
  430. return LexToken();
  431. } else {
  432. int len = 1;
  433. while (*CurPtr==' ' || *CurPtr=='\t') {
  434. CurPtr++;
  435. len++;
  436. }
  437. return AsmToken(AsmToken::Space, StringRef(TokStart, len));
  438. }
  439. case '\n': // FALL THROUGH.
  440. case '\r':
  441. isAtStartOfLine = true;
  442. return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
  443. case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
  444. case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
  445. case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
  446. case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
  447. case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
  448. case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
  449. case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
  450. case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
  451. case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
  452. case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
  453. case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
  454. case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
  455. case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
  456. case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
  457. case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
  458. case '=':
  459. if (*CurPtr == '=')
  460. return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
  461. return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
  462. case '|':
  463. if (*CurPtr == '|')
  464. return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
  465. return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
  466. case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
  467. case '&':
  468. if (*CurPtr == '&')
  469. return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
  470. return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
  471. case '!':
  472. if (*CurPtr == '=')
  473. return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
  474. return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
  475. case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
  476. case '/': return LexSlash();
  477. case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
  478. case '\'': return LexSingleQuote();
  479. case '"': return LexQuote();
  480. case '0': case '1': case '2': case '3': case '4':
  481. case '5': case '6': case '7': case '8': case '9':
  482. return LexDigit();
  483. case '<':
  484. switch (*CurPtr) {
  485. case '<': return ++CurPtr, AsmToken(AsmToken::LessLess,
  486. StringRef(TokStart, 2));
  487. case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual,
  488. StringRef(TokStart, 2));
  489. case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater,
  490. StringRef(TokStart, 2));
  491. default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
  492. }
  493. case '>':
  494. switch (*CurPtr) {
  495. case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater,
  496. StringRef(TokStart, 2));
  497. case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual,
  498. StringRef(TokStart, 2));
  499. default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
  500. }
  501. // TODO: Quoted identifiers (objc methods etc)
  502. // local labels: [0-9][:]
  503. // Forward/backward labels: [0-9][fb]
  504. // Integers, fp constants, character constants.
  505. }
  506. }