tokenizer.cpp 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457
  1. #define TOKEN_KINDS \
  2. TOKEN_KIND(Token_Invalid, "Invalid"), \
  3. TOKEN_KIND(Token_EOF, "EOF"), \
  4. TOKEN_KIND(Token_Comment, "Comment"), \
  5. \
  6. TOKEN_KIND(Token__LiteralBegin, ""), \
  7. TOKEN_KIND(Token_Ident, "identifier"), \
  8. TOKEN_KIND(Token_Integer, "integer"), \
  9. TOKEN_KIND(Token_Float, "float"), \
  10. TOKEN_KIND(Token_Imag, "imaginary"), \
  11. TOKEN_KIND(Token_Rune, "rune"), \
  12. TOKEN_KIND(Token_String, "string"), \
  13. TOKEN_KIND(Token__LiteralEnd, ""), \
  14. \
  15. TOKEN_KIND(Token__OperatorBegin, ""), \
  16. TOKEN_KIND(Token_Eq, "="), \
  17. TOKEN_KIND(Token_Not, "!"), \
  18. TOKEN_KIND(Token_Hash, "#"), \
  19. TOKEN_KIND(Token_At, "@"), \
  20. TOKEN_KIND(Token_Dollar, "$"), \
  21. TOKEN_KIND(Token_Pointer, "^"), \
  22. TOKEN_KIND(Token_Question, "?"), \
  23. TOKEN_KIND(Token_Add, "+"), \
  24. TOKEN_KIND(Token_Sub, "-"), \
  25. TOKEN_KIND(Token_Mul, "*"), \
  26. TOKEN_KIND(Token_Quo, "/"), \
  27. TOKEN_KIND(Token_Mod, "%"), \
  28. TOKEN_KIND(Token_ModMod, "%%"), \
  29. TOKEN_KIND(Token_And, "&"), \
  30. TOKEN_KIND(Token_Or, "|"), \
  31. TOKEN_KIND(Token_Xor, "~"), \
  32. TOKEN_KIND(Token_AndNot, "&~"), \
  33. TOKEN_KIND(Token_Shl, "<<"), \
  34. TOKEN_KIND(Token_Shr, ">>"), \
  35. TOKEN_KIND(Token_CmpAnd, "&&"), \
  36. TOKEN_KIND(Token_CmpOr, "||"), \
  37. \
  38. TOKEN_KIND(Token__AssignOpBegin, ""), \
  39. TOKEN_KIND(Token_AddEq, "+="), \
  40. TOKEN_KIND(Token_SubEq, "-="), \
  41. TOKEN_KIND(Token_MulEq, "*="), \
  42. TOKEN_KIND(Token_QuoEq, "/="), \
  43. TOKEN_KIND(Token_ModEq, "%="), \
  44. TOKEN_KIND(Token_ModModEq, "%%="), \
  45. TOKEN_KIND(Token_AndEq, "&="), \
  46. TOKEN_KIND(Token_OrEq, "|="), \
  47. TOKEN_KIND(Token_XorEq, "~="), \
  48. TOKEN_KIND(Token_AndNotEq, "&~="), \
  49. TOKEN_KIND(Token_ShlEq, "<<="), \
  50. TOKEN_KIND(Token_ShrEq, ">>="), \
  51. TOKEN_KIND(Token_CmpAndEq, "&&="), \
  52. TOKEN_KIND(Token_CmpOrEq, "||="), \
  53. TOKEN_KIND(Token__AssignOpEnd, ""), \
  54. TOKEN_KIND(Token_Increment, "++"), \
  55. TOKEN_KIND(Token_Decrement, "--"), \
  56. TOKEN_KIND(Token_ArrowRight,"->"), \
  57. TOKEN_KIND(Token_Undef, "---"), \
  58. \
  59. TOKEN_KIND(Token__ComparisonBegin, ""), \
  60. TOKEN_KIND(Token_CmpEq, "=="), \
  61. TOKEN_KIND(Token_NotEq, "!="), \
  62. TOKEN_KIND(Token_Lt, "<"), \
  63. TOKEN_KIND(Token_Gt, ">"), \
  64. TOKEN_KIND(Token_LtEq, "<="), \
  65. TOKEN_KIND(Token_GtEq, ">="), \
  66. TOKEN_KIND(Token__ComparisonEnd, ""), \
  67. \
  68. TOKEN_KIND(Token_OpenParen, "("), \
  69. TOKEN_KIND(Token_CloseParen, ")"), \
  70. TOKEN_KIND(Token_OpenBracket, "["), \
  71. TOKEN_KIND(Token_CloseBracket, "]"), \
  72. TOKEN_KIND(Token_OpenBrace, "{"), \
  73. TOKEN_KIND(Token_CloseBrace, "}"), \
  74. TOKEN_KIND(Token_Colon, ":"), \
  75. TOKEN_KIND(Token_Semicolon, ";"), \
  76. TOKEN_KIND(Token_Period, "."), \
  77. TOKEN_KIND(Token_Comma, ","), \
  78. TOKEN_KIND(Token_Ellipsis, ".."), \
  79. TOKEN_KIND(Token_RangeHalf, "..<"), \
  80. TOKEN_KIND(Token_BackSlash, "\\"), \
  81. TOKEN_KIND(Token__OperatorEnd, ""), \
  82. \
  83. TOKEN_KIND(Token__KeywordBegin, ""), \
  84. TOKEN_KIND(Token_import, "import"), \
  85. TOKEN_KIND(Token_foreign, "foreign"), \
  86. TOKEN_KIND(Token_package, "package"), \
  87. TOKEN_KIND(Token_typeid, "typeid"), \
  88. TOKEN_KIND(Token_when, "when"), \
  89. TOKEN_KIND(Token_where, "where"), \
  90. TOKEN_KIND(Token_if, "if"), \
  91. TOKEN_KIND(Token_else, "else"), \
  92. TOKEN_KIND(Token_for, "for"), \
  93. TOKEN_KIND(Token_switch, "switch"), \
  94. TOKEN_KIND(Token_in, "in"), \
  95. TOKEN_KIND(Token_not_in, "not_in"), \
  96. TOKEN_KIND(Token_do, "do"), \
  97. TOKEN_KIND(Token_case, "case"), \
  98. TOKEN_KIND(Token_break, "break"), \
  99. TOKEN_KIND(Token_continue, "continue"), \
  100. TOKEN_KIND(Token_fallthrough, "fallthrough"), \
  101. TOKEN_KIND(Token_defer, "defer"), \
  102. TOKEN_KIND(Token_return, "return"), \
  103. TOKEN_KIND(Token_proc, "proc"), \
  104. TOKEN_KIND(Token_struct, "struct"), \
  105. TOKEN_KIND(Token_union, "union"), \
  106. TOKEN_KIND(Token_enum, "enum"), \
  107. TOKEN_KIND(Token_bit_set, "bit_set"), \
  108. TOKEN_KIND(Token_map, "map"), \
  109. TOKEN_KIND(Token_dynamic, "dynamic"), \
  110. TOKEN_KIND(Token_auto_cast, "auto_cast"), \
  111. TOKEN_KIND(Token_cast, "cast"), \
  112. TOKEN_KIND(Token_transmute, "transmute"), \
  113. TOKEN_KIND(Token_distinct, "distinct"), \
  114. TOKEN_KIND(Token_using, "using"), \
  115. TOKEN_KIND(Token_inline, "inline"), \
  116. TOKEN_KIND(Token_no_inline, "no_inline"), \
  117. TOKEN_KIND(Token_context, "context"), \
  118. TOKEN_KIND(Token_asm, "asm"), \
  119. TOKEN_KIND(Token__KeywordEnd, ""), \
  120. TOKEN_KIND(Token_Count, "")
  121. enum TokenKind {
  122. #define TOKEN_KIND(e, s) e
  123. TOKEN_KINDS
  124. #undef TOKEN_KIND
  125. };
  126. String const token_strings[] = {
  127. #define TOKEN_KIND(e, s) {cast(u8 *)s, gb_size_of(s)-1}
  128. TOKEN_KINDS
  129. #undef TOKEN_KIND
  130. };
  131. struct KeywordHashEntry {
  132. u32 hash;
  133. TokenKind kind;
  134. String text;
  135. };
  136. enum {
  137. KEYWORD_HASH_TABLE_COUNT = 1<<9,
  138. KEYWORD_HASH_TABLE_MASK = KEYWORD_HASH_TABLE_COUNT-1,
  139. };
  140. gb_global KeywordHashEntry keyword_hash_table[KEYWORD_HASH_TABLE_COUNT] = {};
  141. GB_STATIC_ASSERT(Token__KeywordEnd-Token__KeywordBegin <= gb_count_of(keyword_hash_table));
  142. gb_global isize const min_keyword_size = 2;
  143. gb_global isize max_keyword_size = 11;
  144. gb_global bool keyword_indices[16] = {};
  145. gb_inline u32 keyword_hash(u8 const *text, isize len) {
  146. return fnv32a(text, len);
  147. // return murmur3_32(text, len, 0x6f64696e);
  148. }
  149. void add_keyword_hash_entry(String const &s, TokenKind kind) {
  150. max_keyword_size = gb_max(max_keyword_size, s.len);
  151. keyword_indices[s.len] = true;
  152. u32 hash = keyword_hash(s.text, s.len);
  153. // NOTE(bill): This is a bit of an empirical hack in order to speed things up
  154. u32 index = hash & KEYWORD_HASH_TABLE_MASK;
  155. KeywordHashEntry *entry = &keyword_hash_table[index];
  156. GB_ASSERT_MSG(entry->kind == Token_Invalid, "Keyword hash table initialtion collision: %.*s %.*s %08x %08x", LIT(s), LIT(token_strings[entry->kind]), hash, entry->hash);
  157. entry->hash = hash;
  158. entry->kind = kind;
  159. entry->text = s;
  160. }
  161. void init_keyword_hash_table(void) {
  162. for (i32 kind = Token__KeywordBegin+1; kind < Token__KeywordEnd; kind++) {
  163. add_keyword_hash_entry(token_strings[kind], cast(TokenKind)kind);
  164. }
  165. static struct {
  166. String s;
  167. TokenKind kind;
  168. } const legacy_keywords[] = {
  169. {str_lit("notin"), Token_not_in},
  170. };
  171. for (i32 i = 0; i < gb_count_of(legacy_keywords); i++) {
  172. add_keyword_hash_entry(legacy_keywords[i].s, legacy_keywords[i].kind);
  173. }
  174. GB_ASSERT(max_keyword_size < 16);
  175. }
  176. gb_global Array<String> global_file_path_strings; // index is file id
  177. String get_file_path_string(i32 index);
  178. struct TokenPos {
  179. i32 file_id;
  180. i32 offset; // starting at 0
  181. i32 line; // starting at 1
  182. i32 column; // starting at 1
  183. };
  184. // temporary
  185. char *token_pos_to_string(TokenPos const &pos) {
  186. gbString s = gb_string_make_reserve(temporary_allocator(), 128);
  187. String file = get_file_path_string(pos.file_id);
  188. s = gb_string_append_fmt(s, "%.*s(%d:%d)", LIT(file), pos.line, pos.column);
  189. return s;
  190. }
  191. i32 token_pos_cmp(TokenPos const &a, TokenPos const &b) {
  192. if (a.offset != b.offset) {
  193. return (a.offset < b.offset) ? -1 : +1;
  194. }
  195. if (a.line != b.line) {
  196. return (a.line < b.line) ? -1 : +1;
  197. }
  198. if (a.column != b.column) {
  199. return (a.column < b.column) ? -1 : +1;
  200. }
  201. return string_compare(get_file_path_string(a.file_id), get_file_path_string(b.file_id));
  202. }
  203. bool operator==(TokenPos const &a, TokenPos const &b) { return token_pos_cmp(a, b) == 0; }
  204. bool operator!=(TokenPos const &a, TokenPos const &b) { return token_pos_cmp(a, b) != 0; }
  205. bool operator< (TokenPos const &a, TokenPos const &b) { return token_pos_cmp(a, b) < 0; }
  206. bool operator<=(TokenPos const &a, TokenPos const &b) { return token_pos_cmp(a, b) <= 0; }
  207. bool operator> (TokenPos const &a, TokenPos const &b) { return token_pos_cmp(a, b) > 0; }
  208. bool operator>=(TokenPos const &a, TokenPos const &b) { return token_pos_cmp(a, b) >= 0; }
  209. TokenPos token_pos_add_column(TokenPos pos) {
  210. pos.column += 1;
  211. pos.offset += 1;
  212. return pos;
  213. }
  214. struct Token {
  215. TokenKind kind;
  216. String string;
  217. TokenPos pos;
  218. };
  219. Token empty_token = {Token_Invalid};
  220. Token blank_token = {Token_Ident, {cast(u8 *)"_", 1}};
  221. Token make_token_ident(String s) {
  222. Token t = {Token_Ident, s};
  223. return t;
  224. }
  225. Token make_token_ident(char const *s) {
  226. Token t = {Token_Ident, make_string_c(s)};
  227. return t;
  228. }
  229. bool token_is_newline(Token const &tok) {
  230. return tok.kind == Token_Semicolon && tok.string == "\n";
  231. }
  232. struct ErrorCollector {
  233. TokenPos prev;
  234. i64 count;
  235. i64 warning_count;
  236. bool in_block;
  237. gbMutex mutex;
  238. gbMutex string_mutex;
  239. Array<u8> error_buffer;
  240. Array<String> errors;
  241. };
  242. gb_global ErrorCollector global_error_collector;
  243. #define MAX_ERROR_COLLECTOR_COUNT (36)
  244. bool any_errors(void) {
  245. return global_error_collector.error_buffer.count > 0;
  246. }
  247. void init_global_error_collector(void) {
  248. gb_mutex_init(&global_error_collector.mutex);
  249. gb_mutex_init(&global_error_collector.string_mutex);
  250. array_init(&global_error_collector.errors, heap_allocator());
  251. array_init(&global_error_collector.error_buffer, heap_allocator());
  252. array_init(&global_file_path_strings, heap_allocator(), 4096);
  253. }
  254. bool set_file_path_string(i32 index, String const &path) {
  255. bool ok = false;
  256. GB_ASSERT(index >= 0);
  257. gb_mutex_lock(&global_error_collector.string_mutex);
  258. if (index >= global_file_path_strings.count) {
  259. array_resize(&global_file_path_strings, index);
  260. }
  261. String prev = global_file_path_strings[index];
  262. if (prev.len == 0) {
  263. global_file_path_strings[index] = path;
  264. ok = true;
  265. }
  266. gb_mutex_unlock(&global_error_collector.string_mutex);
  267. return ok;
  268. }
  269. String get_file_path_string(i32 index) {
  270. GB_ASSERT(index >= 0);
  271. gb_mutex_lock(&global_error_collector.string_mutex);
  272. String path = {};
  273. if (index < global_file_path_strings.count) {
  274. path = global_file_path_strings[index];
  275. }
  276. gb_mutex_unlock(&global_error_collector.string_mutex);
  277. return path;
  278. }
  279. void begin_error_block(void) {
  280. gb_mutex_lock(&global_error_collector.mutex);
  281. global_error_collector.in_block = true;
  282. }
  283. void end_error_block(void) {
  284. if (global_error_collector.error_buffer.count > 0) {
  285. isize n = global_error_collector.error_buffer.count;
  286. u8 *text = gb_alloc_array(heap_allocator(), u8, n+1);
  287. gb_memmove(text, global_error_collector.error_buffer.data, n);
  288. text[n] = 0;
  289. String s = {text, n};
  290. array_add(&global_error_collector.errors, s);
  291. global_error_collector.error_buffer.count = 0;
  292. // gbFile *f = gb_file_get_standard(gbFileStandard_Error);
  293. // gb_file_write(f, text, n);
  294. }
  295. global_error_collector.in_block = false;
  296. gb_mutex_unlock(&global_error_collector.mutex);
  297. }
  298. #define ERROR_OUT_PROC(name) void name(char const *fmt, va_list va)
  299. typedef ERROR_OUT_PROC(ErrorOutProc);
  300. ERROR_OUT_PROC(default_error_out_va) {
  301. gbFile *f = gb_file_get_standard(gbFileStandard_Error);
  302. char buf[4096] = {};
  303. isize len = gb_snprintf_va(buf, gb_size_of(buf), fmt, va);
  304. isize n = len-1;
  305. if (global_error_collector.in_block) {
  306. isize cap = global_error_collector.error_buffer.count + n;
  307. array_reserve(&global_error_collector.error_buffer, cap);
  308. u8 *data = global_error_collector.error_buffer.data + global_error_collector.error_buffer.count;
  309. gb_memmove(data, buf, n);
  310. global_error_collector.error_buffer.count += n;
  311. } else {
  312. gb_mutex_lock(&global_error_collector.mutex);
  313. {
  314. u8 *text = gb_alloc_array(heap_allocator(), u8, n+1);
  315. gb_memmove(text, buf, n);
  316. text[n] = 0;
  317. array_add(&global_error_collector.errors, make_string(text, n));
  318. }
  319. gb_mutex_unlock(&global_error_collector.mutex);
  320. }
  321. gb_file_write(f, buf, n);
  322. }
  323. ErrorOutProc *error_out_va = default_error_out_va;
  324. // NOTE: defined in build_settings.cpp
  325. bool global_warnings_as_errors(void);
  326. bool global_ignore_warnings(void);
  327. void error_out(char const *fmt, ...) {
  328. va_list va;
  329. va_start(va, fmt);
  330. error_out_va(fmt, va);
  331. va_end(va);
  332. }
  333. void error_va(Token token, char const *fmt, va_list va) {
  334. gb_mutex_lock(&global_error_collector.mutex);
  335. global_error_collector.count++;
  336. // NOTE(bill): Duplicate error, skip it
  337. if (token.pos.line == 0) {
  338. error_out("Error: %s\n", gb_bprintf_va(fmt, va));
  339. } else if (global_error_collector.prev != token.pos) {
  340. global_error_collector.prev = token.pos;
  341. error_out("%s %s\n",
  342. token_pos_to_string(token.pos),
  343. gb_bprintf_va(fmt, va));
  344. }
  345. gb_mutex_unlock(&global_error_collector.mutex);
  346. if (global_error_collector.count > MAX_ERROR_COLLECTOR_COUNT) {
  347. gb_exit(1);
  348. }
  349. }
  350. void warning_va(Token token, char const *fmt, va_list va) {
  351. if (global_warnings_as_errors()) {
  352. error_va(token, fmt, va);
  353. return;
  354. }
  355. gb_mutex_lock(&global_error_collector.mutex);
  356. global_error_collector.warning_count++;
  357. if (!global_ignore_warnings()) {
  358. // NOTE(bill): Duplicate error, skip it
  359. if (token.pos.line == 0) {
  360. error_out("Warning: %s\n", gb_bprintf_va(fmt, va));
  361. } else if (global_error_collector.prev != token.pos) {
  362. global_error_collector.prev = token.pos;
  363. error_out("%s Warning: %s\n",
  364. token_pos_to_string(token.pos),
  365. gb_bprintf_va(fmt, va));
  366. }
  367. }
  368. gb_mutex_unlock(&global_error_collector.mutex);
  369. }
  370. void error_line_va(char const *fmt, va_list va) {
  371. gb_mutex_lock(&global_error_collector.mutex);
  372. error_out_va(fmt, va);
  373. gb_mutex_unlock(&global_error_collector.mutex);
  374. }
  375. void error_no_newline_va(Token token, char const *fmt, va_list va) {
  376. gb_mutex_lock(&global_error_collector.mutex);
  377. global_error_collector.count++;
  378. // NOTE(bill): Duplicate error, skip it
  379. if (token.pos.line == 0) {
  380. error_out("Error: %s", gb_bprintf_va(fmt, va));
  381. } else if (global_error_collector.prev != token.pos) {
  382. global_error_collector.prev = token.pos;
  383. error_out("%s %s",
  384. token_pos_to_string(token.pos),
  385. gb_bprintf_va(fmt, va));
  386. }
  387. gb_mutex_unlock(&global_error_collector.mutex);
  388. if (global_error_collector.count > MAX_ERROR_COLLECTOR_COUNT) {
  389. gb_exit(1);
  390. }
  391. }
  392. void syntax_error_va(Token token, char const *fmt, va_list va) {
  393. gb_mutex_lock(&global_error_collector.mutex);
  394. global_error_collector.count++;
  395. // NOTE(bill): Duplicate error, skip it
  396. if (global_error_collector.prev != token.pos) {
  397. global_error_collector.prev = token.pos;
  398. error_out("%s Syntax Error: %s\n",
  399. token_pos_to_string(token.pos),
  400. gb_bprintf_va(fmt, va));
  401. } else if (token.pos.line == 0) {
  402. error_out("Syntax Error: %s\n", gb_bprintf_va(fmt, va));
  403. }
  404. gb_mutex_unlock(&global_error_collector.mutex);
  405. if (global_error_collector.count > MAX_ERROR_COLLECTOR_COUNT) {
  406. gb_exit(1);
  407. }
  408. }
  409. void syntax_warning_va(Token token, char const *fmt, va_list va) {
  410. if (global_warnings_as_errors()) {
  411. syntax_error_va(token, fmt, va);
  412. return;
  413. }
  414. gb_mutex_lock(&global_error_collector.mutex);
  415. global_error_collector.warning_count++;
  416. if (!global_ignore_warnings()) {
  417. // NOTE(bill): Duplicate error, skip it
  418. if (global_error_collector.prev != token.pos) {
  419. global_error_collector.prev = token.pos;
  420. error_out("%s Syntax Warning: %s\n",
  421. token_pos_to_string(token.pos),
  422. gb_bprintf_va(fmt, va));
  423. } else if (token.pos.line == 0) {
  424. error_out("Warning: %s\n", gb_bprintf_va(fmt, va));
  425. }
  426. }
  427. gb_mutex_unlock(&global_error_collector.mutex);
  428. }
  429. void warning(Token token, char const *fmt, ...) {
  430. va_list va;
  431. va_start(va, fmt);
  432. warning_va(token, fmt, va);
  433. va_end(va);
  434. }
  435. void error(Token token, char const *fmt, ...) {
  436. va_list va;
  437. va_start(va, fmt);
  438. error_va(token, fmt, va);
  439. va_end(va);
  440. }
  441. void error(TokenPos pos, char const *fmt, ...) {
  442. va_list va;
  443. va_start(va, fmt);
  444. Token token = {};
  445. token.pos = pos;
  446. error_va(token, fmt, va);
  447. va_end(va);
  448. }
  449. void error_line(char const *fmt, ...) {
  450. va_list va;
  451. va_start(va, fmt);
  452. error_line_va(fmt, va);
  453. va_end(va);
  454. }
  455. void syntax_error(Token token, char const *fmt, ...) {
  456. va_list va;
  457. va_start(va, fmt);
  458. syntax_error_va(token, fmt, va);
  459. va_end(va);
  460. }
  461. void syntax_error(TokenPos pos, char const *fmt, ...) {
  462. va_list va;
  463. va_start(va, fmt);
  464. Token token = {};
  465. token.pos = pos;
  466. syntax_error_va(token, fmt, va);
  467. va_end(va);
  468. }
  469. void syntax_warning(Token token, char const *fmt, ...) {
  470. va_list va;
  471. va_start(va, fmt);
  472. syntax_warning_va(token, fmt, va);
  473. va_end(va);
  474. }
  475. void compiler_error(char const *fmt, ...) {
  476. va_list va;
  477. va_start(va, fmt);
  478. gb_printf_err("Internal Compiler Error: %s\n",
  479. gb_bprintf_va(fmt, va));
  480. va_end(va);
  481. gb_exit(1);
  482. }
  483. gb_inline bool token_is_literal(TokenKind t) {
  484. return gb_is_between(t, Token__LiteralBegin+1, Token__LiteralEnd-1);
  485. }
  486. gb_inline bool token_is_operator(TokenKind t) {
  487. return gb_is_between(t, Token__OperatorBegin+1, Token__OperatorEnd-1);
  488. }
  489. gb_inline bool token_is_keyword(TokenKind t) {
  490. return gb_is_between(t, Token__KeywordBegin+1, Token__KeywordEnd-1);
  491. }
  492. gb_inline bool token_is_comparison(TokenKind t) {
  493. return gb_is_between(t, Token__ComparisonBegin+1, Token__ComparisonEnd-1);
  494. }
  495. gb_inline bool token_is_shift(TokenKind t) {
  496. return t == Token_Shl || t == Token_Shr;
  497. }
  498. gb_inline void print_token(Token t) { gb_printf("%.*s\n", LIT(t.string)); }
  499. enum TokenizerInitError {
  500. TokenizerInit_None,
  501. TokenizerInit_Invalid,
  502. TokenizerInit_NotExists,
  503. TokenizerInit_Permission,
  504. TokenizerInit_Empty,
  505. TokenizerInit_FileTooLarge,
  506. TokenizerInit_Count,
  507. };
  508. struct TokenizerState {
  509. Rune curr_rune; // current character
  510. u8 * curr; // character pos
  511. u8 * read_curr; // pos from start
  512. u8 * line; // current line pos
  513. i32 line_count;
  514. bool insert_semicolon;
  515. };
  516. enum TokenizerFlags {
  517. TokenizerFlag_None = 0,
  518. TokenizerFlag_InsertSemicolon = 1<<0,
  519. };
  520. struct Tokenizer {
  521. i32 curr_file_id;
  522. String fullpath;
  523. u8 *start;
  524. u8 *end;
  525. Rune curr_rune; // current character
  526. u8 * curr; // character pos
  527. u8 * read_curr; // pos from start
  528. u8 * line; // current line pos
  529. i32 line_count;
  530. i32 error_count;
  531. Array<String> allocated_strings;
  532. TokenizerFlags flags;
  533. bool insert_semicolon;
  534. };
  535. TokenizerState save_tokenizer_state(Tokenizer *t) {
  536. TokenizerState state = {};
  537. state.curr_rune = t->curr_rune;
  538. state.curr = t->curr;
  539. state.read_curr = t->read_curr;
  540. state.line = t->line;
  541. state.line_count = t->line_count;
  542. state.insert_semicolon = t->insert_semicolon;
  543. return state;
  544. }
  545. void restore_tokenizer_state(Tokenizer *t, TokenizerState *state) {
  546. t->curr_rune = state->curr_rune;
  547. t->curr = state->curr;
  548. t->read_curr = state->read_curr;
  549. t->line = state->line;
  550. t->line_count = state->line_count;
  551. t->insert_semicolon = state->insert_semicolon;
  552. }
  553. void tokenizer_err(Tokenizer *t, char const *msg, ...) {
  554. va_list va;
  555. isize column = t->read_curr - t->line+1;
  556. if (column < 1) {
  557. column = 1;
  558. }
  559. Token token = {};
  560. token.pos.file_id = t->curr_file_id;
  561. token.pos.line = t->line_count;
  562. token.pos.column = cast(i32)column;
  563. va_start(va, msg);
  564. syntax_error_va(token, msg, va);
  565. va_end(va);
  566. t->error_count++;
  567. }
  568. void tokenizer_err(Tokenizer *t, TokenPos const &pos, char const *msg, ...) {
  569. va_list va;
  570. isize column = t->read_curr - t->line+1;
  571. if (column < 1) {
  572. column = 1;
  573. }
  574. Token token = {};
  575. token.pos = pos;
  576. va_start(va, msg);
  577. syntax_error_va(token, msg, va);
  578. va_end(va);
  579. t->error_count++;
  580. }
  581. void advance_to_next_rune(Tokenizer *t) {
  582. if (t->read_curr < t->end) {
  583. Rune rune;
  584. isize width = 1;
  585. t->curr = t->read_curr;
  586. if (t->curr_rune == '\n') {
  587. t->line = t->curr;
  588. t->line_count++;
  589. }
  590. rune = *t->read_curr;
  591. if (rune == 0) {
  592. tokenizer_err(t, "Illegal character NUL");
  593. } else if (rune >= 0x80) { // not ASCII
  594. width = gb_utf8_decode(t->read_curr, t->end-t->read_curr, &rune);
  595. if (rune == GB_RUNE_INVALID && width == 1) {
  596. tokenizer_err(t, "Illegal UTF-8 encoding");
  597. } else if (rune == GB_RUNE_BOM && t->curr-t->start > 0){
  598. tokenizer_err(t, "Illegal byte order mark");
  599. }
  600. }
  601. t->read_curr += width;
  602. t->curr_rune = rune;
  603. } else {
  604. t->curr = t->end;
  605. if (t->curr_rune == '\n') {
  606. t->line = t->curr;
  607. t->line_count++;
  608. }
  609. t->curr_rune = GB_RUNE_EOF;
  610. }
  611. }
  612. TokenizerInitError init_tokenizer(Tokenizer *t, String fullpath, TokenizerFlags flags = TokenizerFlag_None) {
  613. TokenizerInitError err = TokenizerInit_None;
  614. char *c_str = alloc_cstring(heap_allocator(), fullpath);
  615. defer (gb_free(heap_allocator(), c_str));
  616. // TODO(bill): Memory map rather than copy contents
  617. gbFileContents fc = gb_file_read_contents(heap_allocator(), true, c_str);
  618. t->flags = flags;
  619. t->fullpath = fullpath;
  620. t->line_count = 1;
  621. if (fc.size > I32_MAX) {
  622. err = TokenizerInit_FileTooLarge;
  623. gb_file_free_contents(&fc);
  624. } else if (fc.data != nullptr) {
  625. t->start = cast(u8 *)fc.data;
  626. t->line = t->read_curr = t->curr = t->start;
  627. t->end = t->start + fc.size;
  628. advance_to_next_rune(t);
  629. if (t->curr_rune == GB_RUNE_BOM) {
  630. advance_to_next_rune(t); // Ignore BOM at file beginning
  631. }
  632. array_init(&t->allocated_strings, heap_allocator());
  633. } else {
  634. gbFile f = {};
  635. gbFileError file_err = gb_file_open(&f, c_str);
  636. defer (gb_file_close(&f));
  637. switch (file_err) {
  638. case gbFileError_Invalid: err = TokenizerInit_Invalid; break;
  639. case gbFileError_NotExists: err = TokenizerInit_NotExists; break;
  640. case gbFileError_Permission: err = TokenizerInit_Permission; break;
  641. }
  642. if (err == TokenizerInit_None && gb_file_size(&f) == 0) {
  643. err = TokenizerInit_Empty;
  644. }
  645. }
  646. return err;
  647. }
  648. gb_inline void destroy_tokenizer(Tokenizer *t) {
  649. if (t->start != nullptr) {
  650. gb_free(heap_allocator(), t->start);
  651. }
  652. for_array(i, t->allocated_strings) {
  653. gb_free(heap_allocator(), t->allocated_strings[i].text);
  654. }
  655. array_free(&t->allocated_strings);
  656. }
  657. gb_inline i32 digit_value(Rune r) {
  658. switch (r) {
  659. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
  660. return r - '0';
  661. case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
  662. return r - 'a' + 10;
  663. case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
  664. return r - 'A' + 10;
  665. }
  666. return 16; // NOTE(bill): Larger than highest possible
  667. }
  668. gb_inline void scan_mantissa(Tokenizer *t, i32 base) {
  669. while (digit_value(t->curr_rune) < base || t->curr_rune == '_') {
  670. advance_to_next_rune(t);
  671. }
  672. }
  673. u8 peek_byte(Tokenizer *t, isize offset=0) {
  674. if (t->read_curr+offset < t->end) {
  675. return t->read_curr[offset];
  676. }
  677. return 0;
  678. }
  679. void scan_number_to_token(Tokenizer *t, Token *token, bool seen_decimal_point) {
  680. token->kind = Token_Integer;
  681. token->string = {t->curr, 1};
  682. token->pos.file_id = t->curr_file_id;
  683. token->pos.line = t->line_count;
  684. token->pos.column = cast(i32)(t->curr-t->line+1);
  685. if (seen_decimal_point) {
  686. token->string.text -= 1;
  687. token->string.len += 1;
  688. token->pos.column -= 1;
  689. token->kind = Token_Float;
  690. scan_mantissa(t, 10);
  691. goto exponent;
  692. }
  693. if (t->curr_rune == '0') {
  694. u8 *prev = t->curr;
  695. advance_to_next_rune(t);
  696. switch (t->curr_rune) {
  697. case 'b': // Binary
  698. advance_to_next_rune(t);
  699. scan_mantissa(t, 2);
  700. if (t->curr - prev <= 2) {
  701. token->kind = Token_Invalid;
  702. }
  703. goto end;
  704. case 'o': // Octal
  705. advance_to_next_rune(t);
  706. scan_mantissa(t, 8);
  707. if (t->curr - prev <= 2) {
  708. token->kind = Token_Invalid;
  709. }
  710. goto end;
  711. case 'd': // Decimal
  712. advance_to_next_rune(t);
  713. scan_mantissa(t, 10);
  714. if (t->curr - prev <= 2) {
  715. token->kind = Token_Invalid;
  716. }
  717. goto end;
  718. case 'z': // Dozenal
  719. advance_to_next_rune(t);
  720. scan_mantissa(t, 12);
  721. if (t->curr - prev <= 2) {
  722. token->kind = Token_Invalid;
  723. }
  724. goto end;
  725. case 'x': // Hexadecimal
  726. advance_to_next_rune(t);
  727. scan_mantissa(t, 16);
  728. if (t->curr - prev <= 2) {
  729. token->kind = Token_Invalid;
  730. }
  731. goto end;
  732. case 'h': // Hexadecimal Float
  733. token->kind = Token_Float;
  734. advance_to_next_rune(t);
  735. scan_mantissa(t, 16);
  736. if (t->curr - prev <= 2) {
  737. token->kind = Token_Invalid;
  738. } else {
  739. u8 *start = prev+2;
  740. isize n = t->curr - start;
  741. isize digit_count = 0;
  742. for (isize i = 0; i < n; i++) {
  743. if (start[i] != '_') {
  744. digit_count += 1;
  745. }
  746. }
  747. switch (digit_count) {
  748. case 4:
  749. case 8:
  750. case 16:
  751. break;
  752. default:
  753. tokenizer_err(t, "Invalid hexadecimal float, expected 4, 8, or 16 digits, got %td", digit_count);
  754. break;
  755. }
  756. }
  757. goto end;
  758. default:
  759. scan_mantissa(t, 10);
  760. goto fraction;
  761. }
  762. }
  763. scan_mantissa(t, 10);
  764. fraction:
  765. if (t->curr_rune == '.') {
  766. if (peek_byte(t) == '.') {
  767. // NOTE(bill): this is kind of ellipsis
  768. goto end;
  769. }
  770. advance_to_next_rune(t);
  771. token->kind = Token_Float;
  772. scan_mantissa(t, 10);
  773. }
  774. exponent:
  775. if (t->curr_rune == 'e' || t->curr_rune == 'E') {
  776. token->kind = Token_Float;
  777. advance_to_next_rune(t);
  778. if (t->curr_rune == '-' || t->curr_rune == '+') {
  779. advance_to_next_rune(t);
  780. }
  781. scan_mantissa(t, 10);
  782. }
  783. switch (t->curr_rune) {
  784. case 'i': case 'j': case 'k':
  785. token->kind = Token_Imag;
  786. advance_to_next_rune(t);
  787. break;
  788. }
  789. end:
  790. token->string.len = t->curr - token->string.text;
  791. return;
  792. }
  793. bool scan_escape(Tokenizer *t) {
  794. isize len = 0;
  795. u32 base = 0, max = 0, x = 0;
  796. Rune r = t->curr_rune;
  797. switch (r) {
  798. case 'a':
  799. case 'b':
  800. case 'e':
  801. case 'f':
  802. case 'n':
  803. case 'r':
  804. case 't':
  805. case 'v':
  806. case '\\':
  807. case '\'':
  808. case '\"':
  809. advance_to_next_rune(t);
  810. return true;
  811. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
  812. len = 3; base = 8; max = 255;
  813. break;
  814. case 'x':
  815. advance_to_next_rune(t);
  816. len = 2; base = 16; max = 255;
  817. break;
  818. case 'u':
  819. advance_to_next_rune(t);
  820. len = 4; base = 16; max = GB_RUNE_MAX;
  821. break;
  822. case 'U':
  823. advance_to_next_rune(t);
  824. len = 8; base = 16; max = GB_RUNE_MAX;
  825. break;
  826. default:
  827. if (t->curr_rune < 0) {
  828. tokenizer_err(t, "Escape sequence was not terminated");
  829. } else {
  830. tokenizer_err(t, "Unknown escape sequence");
  831. }
  832. return false;
  833. }
  834. while (len --> 0) {
  835. u32 d = cast(u32)digit_value(t->curr_rune);
  836. if (d >= base) {
  837. if (t->curr_rune < 0) {
  838. tokenizer_err(t, "Escape sequence was not terminated");
  839. } else {
  840. tokenizer_err(t, "Illegal character %d in escape sequence", t->curr_rune);
  841. }
  842. return false;
  843. }
  844. x = x*base + d;
  845. advance_to_next_rune(t);
  846. }
  847. return true;
  848. }
  849. void tokenizer_get_token(Tokenizer *t, Token *token, int repeat=0) {
  850. // Skip whitespace
  851. for (;;) {
  852. switch (t->curr_rune) {
  853. case '\n':
  854. if (t->insert_semicolon) {
  855. break;
  856. }
  857. /*fallthrough*/
  858. case ' ':
  859. case '\t':
  860. case '\r':
  861. advance_to_next_rune(t);
  862. continue;
  863. }
  864. break;
  865. }
  866. token->kind = Token_Invalid;
  867. token->string.text = t->curr;
  868. token->string.len = 1;
  869. token->pos.file_id = t->curr_file_id;
  870. token->pos.line = t->line_count;
  871. token->pos.offset = cast(i32)(t->curr - t->start);
  872. token->pos.column = cast(i32)(t->curr - t->line + 1);
  873. TokenPos current_pos = token->pos;
  874. bool insert_semicolon = false;
  875. Rune curr_rune = t->curr_rune;
  876. if (rune_is_letter(curr_rune)) {
  877. token->kind = Token_Ident;
  878. while (rune_is_letter_or_digit(t->curr_rune)) {
  879. advance_to_next_rune(t);
  880. }
  881. token->string.len = t->curr - token->string.text;
  882. // NOTE(bill): Heavily optimize to make it faster to find keywords
  883. if (1 < token->string.len && token->string.len <= max_keyword_size && keyword_indices[token->string.len]) {
  884. u32 hash = keyword_hash(token->string.text, token->string.len);
  885. u32 index = hash & KEYWORD_HASH_TABLE_MASK;
  886. KeywordHashEntry *entry = &keyword_hash_table[index];
  887. if (entry->kind != Token_Invalid && entry->hash == hash) {
  888. if (str_eq(entry->text, token->string)) {
  889. token->kind = entry->kind;
  890. if (token->kind == Token_not_in && entry->text == "notin") {
  891. syntax_warning(*token, "'notin' is deprecated in favour of 'not_in'");
  892. }
  893. }
  894. }
  895. }
  896. switch (token->kind) {
  897. case Token_Ident:
  898. case Token_context:
  899. case Token_typeid: // Dunno?
  900. case Token_break:
  901. case Token_continue:
  902. case Token_fallthrough:
  903. case Token_return:
  904. insert_semicolon = true;
  905. break;
  906. }
  907. if (t->flags & TokenizerFlag_InsertSemicolon) {
  908. t->insert_semicolon = insert_semicolon;
  909. }
  910. return;
  911. } else if (gb_is_between(curr_rune, '0', '9')) {
  912. insert_semicolon = true;
  913. scan_number_to_token(t, token, false);
  914. } else {
  915. advance_to_next_rune(t);
  916. switch (curr_rune) {
  917. case GB_RUNE_EOF:
  918. token->kind = Token_EOF;
  919. if (t->insert_semicolon) {
  920. t->insert_semicolon = false; // EOF consumed
  921. token->string = str_lit("\n");
  922. token->kind = Token_Semicolon;
  923. return;
  924. }
  925. break;
  926. case '\n':
  927. t->insert_semicolon = false;
  928. token->string = str_lit("\n");
  929. token->kind = Token_Semicolon;
  930. return;
  931. case '\\':
  932. if (t->flags & TokenizerFlag_InsertSemicolon) {
  933. t->insert_semicolon = false;
  934. }
  935. tokenizer_get_token(t, token);
  936. if (token->pos.line == current_pos.line) {
  937. tokenizer_err(t, token_pos_add_column(current_pos), "Expected a newline after \\");
  938. }
  939. // NOTE(bill): tokenizer_get_token has been called already, return early
  940. return;
  941. case '\'': // Rune Literal
  942. {
  943. insert_semicolon = true;
  944. token->kind = Token_Rune;
  945. Rune quote = curr_rune;
  946. bool valid = true;
  947. i32 n = 0, success;
  948. for (;;) {
  949. Rune r = t->curr_rune;
  950. if (r == '\n' || r < 0) {
  951. tokenizer_err(t, "Rune literal not terminated");
  952. break;
  953. }
  954. advance_to_next_rune(t);
  955. if (r == quote) {
  956. break;
  957. }
  958. n++;
  959. if (r == '\\') {
  960. if (!scan_escape(t)) {
  961. valid = false;
  962. }
  963. }
  964. }
  965. // TODO(bill): Better Error Handling
  966. if (valid && n != 1) {
  967. tokenizer_err(t, "Invalid rune literal");
  968. }
  969. token->string.len = t->curr - token->string.text;
  970. success = unquote_string(heap_allocator(), &token->string, 0);
  971. if (success > 0) {
  972. if (success == 2) {
  973. array_add(&t->allocated_strings, token->string);
  974. }
  975. } else {
  976. tokenizer_err(t, "Invalid rune literal");
  977. }
  978. if (t->flags & TokenizerFlag_InsertSemicolon) {
  979. t->insert_semicolon = insert_semicolon;
  980. }
  981. return;
  982. } break;
  983. case '`': // Raw String Literal
  984. case '"': // String Literal
  985. {
  986. insert_semicolon = true;
  987. bool has_carriage_return = false;
  988. i32 success;
  989. Rune quote = curr_rune;
  990. token->kind = Token_String;
  991. if (curr_rune == '"') {
  992. for (;;) {
  993. Rune r = t->curr_rune;
  994. if (r == '\n' || r < 0) {
  995. tokenizer_err(t, "String literal not terminated");
  996. break;
  997. }
  998. advance_to_next_rune(t);
  999. if (r == quote) {
  1000. break;
  1001. }
  1002. if (r == '\\') {
  1003. scan_escape(t);
  1004. }
  1005. }
  1006. } else {
  1007. for (;;) {
  1008. Rune r = t->curr_rune;
  1009. if (r < 0) {
  1010. tokenizer_err(t, "String literal not terminated");
  1011. break;
  1012. }
  1013. advance_to_next_rune(t);
  1014. if (r == quote) {
  1015. break;
  1016. }
  1017. if (r == '\r') {
  1018. has_carriage_return = true;
  1019. }
  1020. }
  1021. }
  1022. token->string.len = t->curr - token->string.text;
  1023. success = unquote_string(heap_allocator(), &token->string, 0, has_carriage_return);
  1024. if (success > 0) {
  1025. if (success == 2) {
  1026. array_add(&t->allocated_strings, token->string);
  1027. }
  1028. } else {
  1029. tokenizer_err(t, "Invalid string literal");
  1030. }
  1031. if (t->flags & TokenizerFlag_InsertSemicolon) {
  1032. t->insert_semicolon = insert_semicolon;
  1033. }
  1034. return;
  1035. } break;
  1036. case '.':
  1037. if (t->curr_rune == '.') {
  1038. advance_to_next_rune(t);
  1039. token->kind = Token_Ellipsis;
  1040. if (t->curr_rune == '<') {
  1041. advance_to_next_rune(t);
  1042. token->kind = Token_RangeHalf;
  1043. }
  1044. } else if ('0' <= t->curr_rune && t->curr_rune <= '9') {
  1045. scan_number_to_token(t, token, true);
  1046. } else {
  1047. token->kind = Token_Period;
  1048. }
  1049. break;
  1050. case '@': token->kind = Token_At; break;
  1051. case '$': token->kind = Token_Dollar; break;
  1052. case '?':
  1053. insert_semicolon = true;
  1054. token->kind = Token_Question;
  1055. break;
  1056. case '^':
  1057. insert_semicolon = true;
  1058. token->kind = Token_Pointer;
  1059. break;
  1060. case ';': token->kind = Token_Semicolon; break;
  1061. case ',': token->kind = Token_Comma; break;
  1062. case ':': token->kind = Token_Colon; break;
  1063. case '(': token->kind = Token_OpenParen; break;
  1064. case ')':
  1065. insert_semicolon = true;
  1066. token->kind = Token_CloseParen;
  1067. break;
  1068. case '[': token->kind = Token_OpenBracket; break;
  1069. case ']':
  1070. insert_semicolon = true;
  1071. token->kind = Token_CloseBracket;
  1072. break;
  1073. case '{': token->kind = Token_OpenBrace; break;
  1074. case '}':
  1075. insert_semicolon = true;
  1076. token->kind = Token_CloseBrace;
  1077. break;
  1078. case '%':
  1079. token->kind = Token_Mod;
  1080. if (t->curr_rune == '=') {
  1081. advance_to_next_rune(t);
  1082. token->kind = Token_ModEq;
  1083. } else if (t->curr_rune == '%') {
  1084. token->kind = Token_ModMod;
  1085. advance_to_next_rune(t);
  1086. if (t->curr_rune == '=') {
  1087. token->kind = Token_ModModEq;
  1088. advance_to_next_rune(t);
  1089. }
  1090. }
  1091. break;
  1092. case '*':
  1093. token->kind = Token_Mul;
  1094. if (t->curr_rune == '=') {
  1095. advance_to_next_rune(t);
  1096. token->kind = Token_MulEq;
  1097. }
  1098. break;
  1099. case '=':
  1100. token->kind = Token_Eq;
  1101. if (t->curr_rune == '=') {
  1102. advance_to_next_rune(t);
  1103. token->kind = Token_CmpEq;
  1104. }
  1105. break;
  1106. case '~':
  1107. token->kind = Token_Xor;
  1108. if (t->curr_rune == '=') {
  1109. advance_to_next_rune(t);
  1110. token->kind = Token_XorEq;
  1111. }
  1112. break;
  1113. case '!':
  1114. token->kind = Token_Not;
  1115. if (t->curr_rune == '=') {
  1116. advance_to_next_rune(t);
  1117. token->kind = Token_NotEq;
  1118. }
  1119. break;
  1120. case '+':
  1121. token->kind = Token_Add;
  1122. if (t->curr_rune == '=') {
  1123. advance_to_next_rune(t);
  1124. token->kind = Token_AddEq;
  1125. } else if (t->curr_rune == '+') {
  1126. advance_to_next_rune(t);
  1127. token->kind = Token_Increment;
  1128. insert_semicolon = true;
  1129. }
  1130. break;
  1131. case '-':
  1132. token->kind = Token_Sub;
  1133. if (t->curr_rune == '=') {
  1134. advance_to_next_rune(t);
  1135. token->kind = Token_SubEq;
  1136. } else if (t->curr_rune == '-' && peek_byte(t) == '-') {
  1137. advance_to_next_rune(t);
  1138. advance_to_next_rune(t);
  1139. token->kind = Token_Undef;
  1140. } else if (t->curr_rune == '-') {
  1141. advance_to_next_rune(t);
  1142. token->kind = Token_Decrement;
  1143. insert_semicolon = true;
  1144. }else if (t->curr_rune == '>') {
  1145. advance_to_next_rune(t);
  1146. token->kind = Token_ArrowRight;
  1147. }
  1148. break;
  1149. case '#':
  1150. if (t->curr_rune == '!') {
  1151. insert_semicolon = t->insert_semicolon;
  1152. token->kind = Token_Comment;
  1153. while (t->curr_rune != '\n' && t->curr_rune != GB_RUNE_EOF) {
  1154. advance_to_next_rune(t);
  1155. }
  1156. } else {
  1157. token->kind = Token_Hash;
  1158. }
  1159. break;
  1160. case '/': {
  1161. token->kind = Token_Quo;
  1162. if (t->curr_rune == '/') {
  1163. insert_semicolon = t->insert_semicolon;
  1164. token->kind = Token_Comment;
  1165. while (t->curr_rune != '\n' && t->curr_rune != GB_RUNE_EOF) {
  1166. advance_to_next_rune(t);
  1167. }
  1168. } else if (t->curr_rune == '*') {
  1169. token->kind = Token_Comment;
  1170. isize comment_scope = 1;
  1171. advance_to_next_rune(t);
  1172. while (comment_scope > 0) {
  1173. if (t->curr_rune == GB_RUNE_EOF) {
  1174. break;
  1175. } else if (t->curr_rune == '/') {
  1176. advance_to_next_rune(t);
  1177. if (t->curr_rune == '*') {
  1178. advance_to_next_rune(t);
  1179. comment_scope++;
  1180. }
  1181. } else if (t->curr_rune == '*') {
  1182. advance_to_next_rune(t);
  1183. if (t->curr_rune == '/') {
  1184. advance_to_next_rune(t);
  1185. comment_scope--;
  1186. }
  1187. } else {
  1188. advance_to_next_rune(t);
  1189. }
  1190. }
  1191. } else if (t->curr_rune == '=') {
  1192. advance_to_next_rune(t);
  1193. token->kind = Token_QuoEq;
  1194. }
  1195. } break;
  1196. case '<':
  1197. token->kind = Token_Lt;
  1198. if (t->curr_rune == '=') {
  1199. token->kind = Token_LtEq;
  1200. advance_to_next_rune(t);
  1201. } else if (t->curr_rune == '<') {
  1202. token->kind = Token_Shl;
  1203. advance_to_next_rune(t);
  1204. if (t->curr_rune == '=') {
  1205. token->kind = Token_ShlEq;
  1206. advance_to_next_rune(t);
  1207. }
  1208. }
  1209. break;
  1210. case '>':
  1211. token->kind = Token_Gt;
  1212. if (t->curr_rune == '=') {
  1213. token->kind = Token_GtEq;
  1214. advance_to_next_rune(t);
  1215. } else if (t->curr_rune == '>') {
  1216. token->kind = Token_Shr;
  1217. advance_to_next_rune(t);
  1218. if (t->curr_rune == '=') {
  1219. token->kind = Token_ShrEq;
  1220. advance_to_next_rune(t);
  1221. }
  1222. }
  1223. break;
  1224. case '&':
  1225. token->kind = Token_And;
  1226. if (t->curr_rune == '~') {
  1227. token->kind = Token_AndNot;
  1228. advance_to_next_rune(t);
  1229. if (t->curr_rune == '=') {
  1230. token->kind = Token_AndNotEq;
  1231. advance_to_next_rune(t);
  1232. }
  1233. } else if (t->curr_rune == '=') {
  1234. token->kind = Token_AndEq;
  1235. advance_to_next_rune(t);
  1236. } else if (t->curr_rune == '&') {
  1237. token->kind = Token_CmpAnd;
  1238. advance_to_next_rune(t);
  1239. if (t->curr_rune == '=') {
  1240. token->kind = Token_CmpAndEq;
  1241. advance_to_next_rune(t);
  1242. }
  1243. }
  1244. break;
  1245. case '|':
  1246. token->kind = Token_Or;
  1247. if (t->curr_rune == '=') {
  1248. token->kind = Token_OrEq;
  1249. advance_to_next_rune(t);
  1250. } else if (t->curr_rune == '|') {
  1251. token->kind = Token_CmpOr;
  1252. advance_to_next_rune(t);
  1253. if (t->curr_rune == '=') {
  1254. token->kind = Token_CmpOrEq;
  1255. advance_to_next_rune(t);
  1256. }
  1257. }
  1258. break;
  1259. default:
  1260. if (curr_rune != GB_RUNE_BOM) {
  1261. u8 str[4] = {};
  1262. int len = cast(int)gb_utf8_encode_rune(str, curr_rune);
  1263. tokenizer_err(t, "Illegal character: %.*s (%d) ", len, str, curr_rune);
  1264. }
  1265. insert_semicolon = t->insert_semicolon; // Preserve insert_semicolon info
  1266. token->kind = Token_Invalid;
  1267. break;
  1268. }
  1269. }
  1270. if (t->flags & TokenizerFlag_InsertSemicolon) {
  1271. t->insert_semicolon = insert_semicolon;
  1272. }
  1273. token->string.len = t->curr - token->string.text;
  1274. return;
  1275. }