token.odin 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. package odin_tokenizer
  2. import "core:strings"
  3. Token :: struct {
  4. kind: Token_Kind,
  5. text: string,
  6. pos: Pos,
  7. }
  8. Pos :: struct {
  9. file: string,
  10. offset: int, // starting at 0
  11. line: int, // starting at 1
  12. column: int, // starting at 1
  13. }
  14. pos_compare :: proc(lhs, rhs: Pos) -> int {
  15. if lhs.offset != rhs.offset {
  16. return (lhs.offset < rhs.offset) ? -1 : +1;
  17. }
  18. if lhs.line != rhs.line {
  19. return (lhs.line < rhs.line) ? -1 : +1;
  20. }
  21. if lhs.column != rhs.column {
  22. return (lhs.column < rhs.column) ? -1 : +1;
  23. }
  24. return strings.compare(lhs.file, rhs.file);
  25. }
  26. Token_Kind :: enum u32 {
  27. Invalid,
  28. EOF,
  29. Comment,
  30. B_Literal_Begin,
  31. Ident,
  32. Integer,
  33. Float,
  34. Imag,
  35. Rune,
  36. String,
  37. B_Literal_End,
  38. B_Operator_Begin,
  39. Eq,
  40. Not,
  41. Hash,
  42. At,
  43. Dollar,
  44. Pointer,
  45. Question,
  46. Add,
  47. Sub,
  48. Mul,
  49. Quo,
  50. Mod,
  51. Mod_Mod,
  52. And,
  53. Or,
  54. Xor,
  55. And_Not,
  56. Shl,
  57. Shr,
  58. Cmp_And,
  59. Cmp_Or,
  60. B_Assign_Op_Begin,
  61. Add_Eq,
  62. Sub_Eq,
  63. Mul_Eq,
  64. Quo_Eq,
  65. Mod_Eq,
  66. Mod_Mod_Eq,
  67. And_Eq,
  68. Or_Eq,
  69. Xor_Eq,
  70. And_Not_Eq,
  71. Shl_Eq,
  72. Shr_Eq,
  73. Cmp_And_Eq,
  74. Cmp_Or_Eq,
  75. B_Assign_Op_End,
  76. Arrow_Right,
  77. Arrow_Left,
  78. Double_Arrow_Right,
  79. Undef,
  80. B_Comparison_Begin,
  81. Cmp_Eq,
  82. Not_Eq,
  83. Lt,
  84. Gt,
  85. Lt_Eq,
  86. Gt_Eq,
  87. B_Comparison_End,
  88. Open_Paren,
  89. Close_Paren,
  90. Open_Bracket,
  91. Close_Bracket,
  92. Open_Brace,
  93. Close_Brace,
  94. Colon,
  95. Semicolon,
  96. Period,
  97. Comma,
  98. Ellipsis,
  99. Range_Half,
  100. Back_Slash,
  101. B_Operator_End,
  102. B_Keyword_Begin,
  103. Import,
  104. Foreign,
  105. Package,
  106. Typeid,
  107. When,
  108. Where,
  109. If,
  110. Else,
  111. For,
  112. Switch,
  113. In,
  114. Notin,
  115. Do,
  116. Case,
  117. Break,
  118. Continue,
  119. Fallthrough,
  120. Defer,
  121. Return,
  122. Proc,
  123. Macro,
  124. Struct,
  125. Union,
  126. Enum,
  127. Bit_Field,
  128. Bit_Set,
  129. Map,
  130. Dynamic,
  131. Auto_Cast,
  132. Cast,
  133. Transmute,
  134. Distinct,
  135. Opaque,
  136. Using,
  137. Inline,
  138. No_Inline,
  139. Context,
  140. Size_Of,
  141. Align_Of,
  142. Offset_Of,
  143. Type_Of,
  144. Const,
  145. B_Keyword_End,
  146. COUNT,
  147. B_Custom_Keyword_Begin = COUNT+1,
  148. // ... Custom keywords
  149. };
  150. tokens := [Token_Kind.COUNT]string {
  151. "Invalid",
  152. "EOF",
  153. "Comment",
  154. "",
  155. "identifier",
  156. "integer",
  157. "float",
  158. "imaginary",
  159. "rune",
  160. "string",
  161. "",
  162. "",
  163. "=",
  164. "!",
  165. "#",
  166. "@",
  167. "$",
  168. "^",
  169. "?",
  170. "+",
  171. "-",
  172. "*",
  173. "/",
  174. "%",
  175. "%%",
  176. "&",
  177. "|",
  178. "~",
  179. "&~",
  180. "<<",
  181. ">>",
  182. "&&",
  183. "||",
  184. "",
  185. "+=",
  186. "-=",
  187. "*=",
  188. "/=",
  189. "%=",
  190. "%%=",
  191. "&=",
  192. "|=",
  193. "~=",
  194. "&~=",
  195. "<<=",
  196. ">>=",
  197. "&&=",
  198. "||=",
  199. "",
  200. "->",
  201. "<-",
  202. "=>",
  203. "---",
  204. "",
  205. "==",
  206. "!=",
  207. "<",
  208. ">",
  209. "<=",
  210. ">=",
  211. "",
  212. "(",
  213. ")",
  214. "[",
  215. "]",
  216. "{",
  217. "}",
  218. ":",
  219. ";",
  220. ".",
  221. ",",
  222. "..",
  223. "..<",
  224. "\\",
  225. "",
  226. "",
  227. "import",
  228. "foreign",
  229. "package",
  230. "typeid",
  231. "when",
  232. "where",
  233. "if",
  234. "else",
  235. "for",
  236. "switch",
  237. "in",
  238. "notin",
  239. "do",
  240. "case",
  241. "break",
  242. "continue",
  243. "fallthrough",
  244. "defer",
  245. "return",
  246. "proc",
  247. "macro",
  248. "struct",
  249. "union",
  250. "enum",
  251. "bit_field",
  252. "bit_set",
  253. "map",
  254. "dynamic",
  255. "auto_cast",
  256. "cast",
  257. "transmute",
  258. "distinct",
  259. "opaque",
  260. "using",
  261. "inline",
  262. "no_inline",
  263. "context",
  264. "size_of",
  265. "align_of",
  266. "offset_of",
  267. "type_of",
  268. "const",
  269. "",
  270. };
  271. custom_keyword_tokens: []string;
  272. to_string :: proc(kind: Token_Kind) -> string {
  273. if Token_Kind.Invalid <= kind && kind < Token_Kind.COUNT {
  274. return tokens[kind];
  275. }
  276. if Token_Kind.B_Custom_Keyword_Begin < kind {
  277. n := int(u16(kind)-u16(Token_Kind.B_Custom_Keyword_Begin));
  278. if n < len(custom_keyword_tokens) {
  279. return custom_keyword_tokens[n];
  280. }
  281. }
  282. return "Invalid";
  283. }
  284. is_literal :: proc(kind: Token_Kind) -> bool {
  285. return Token_Kind.B_Literal_Begin < kind && kind < Token_Kind.B_Literal_End;
  286. }
  287. is_operator :: proc(kind: Token_Kind) -> bool {
  288. switch kind {
  289. case .B_Operator_Begin .. .B_Operator_End:
  290. return true;
  291. case .In, .Notin:
  292. return true;
  293. }
  294. return false;
  295. }
  296. is_assignment_operator :: proc(kind: Token_Kind) -> bool {
  297. return Token_Kind.B_Assign_Op_Begin < kind && kind < Token_Kind.B_Assign_Op_End || kind == Token_Kind.Eq;
  298. }
  299. is_keyword :: proc(kind: Token_Kind) -> bool {
  300. switch {
  301. case Token_Kind.B_Keyword_Begin < kind && kind < Token_Kind.B_Keyword_End:
  302. return true;
  303. case Token_Kind.B_Custom_Keyword_Begin < kind:
  304. return true;
  305. }
  306. return false;
  307. }