ソースを参照

Implement lexer based syntax highlighting

rexim 2 年 前
コミット
0b01e5b14c
9 ファイル変更378 行追加24 行削除
  1. 1 1
      build.sh
  2. BIN
      iosevka-regular.ttf
  3. 14 0
      src/common.c
  4. 9 0
      src/common.h
  5. 69 17
      src/editor.c
  6. 10 0
      src/editor.h
  7. 221 0
      src/lexer.c
  8. 45 0
      src/lexer.h
  9. 9 6
      src/main.c

+ 1 - 1
build.sh

@@ -6,7 +6,7 @@ CC="${CXX:-cc}"
 PKGS="sdl2 glew freetype2"
 CFLAGS="-Wall -Wextra -std=c11 -pedantic -ggdb"
 LIBS=-lm
-SRC="src/main.c src/la.c src/editor.c src/file_browser.c src/free_glyph.c src/simple_renderer.c src/common.c"
+SRC="src/main.c src/la.c src/editor.c src/file_browser.c src/free_glyph.c src/simple_renderer.c src/common.c src/lexer.c"
 
 if [ `uname` = "Darwin" ]; then
     CFLAGS+=" -framework OpenGL"

BIN
iosevka-regular.ttf


+ 14 - 0
src/common.c

@@ -110,3 +110,17 @@ defer:
     if (f) fclose(f);
     return result;
 }
+
+Vec4f hex_to_vec4f(uint32_t color)
+{
+    Vec4f result;
+    uint32_t r = (color>>(3*8))&0xFF;
+    uint32_t g = (color>>(2*8))&0xFF;
+    uint32_t b = (color>>(1*8))&0xFF;
+    uint32_t a = (color>>(0*8))&0xFF;
+    result.x = r/255.0f;
+    result.y = g/255.0f;
+    result.z = b/255.0f;
+    result.w = a/255.0f;
+    return result;
+}

+ 9 - 0
src/common.h

@@ -3,6 +3,8 @@
 
 #include <stdlib.h>
 #include <stdio.h>
+#include <stdint.h>
+#include "./la.h"
 
 #define SCREEN_WIDTH 800
 #define SCREEN_HEIGHT 600
@@ -20,6 +22,11 @@ typedef int Errno;
         printf("%s:%d: UNIMPLEMENTED: %s \n", __FILE__, __LINE__, __VA_ARGS__); \
         exit(1);                                                                \
     } while(0)
+#define UNREACHABLE(...)                                                      \
+    do {                                                                      \
+        printf("%s:%d: UNREACHABLE: %s \n", __FILE__, __LINE__, __VA_ARGS__); \
+        exit(1);                                                              \
+    } while(0)
 #define UNUSED(x) (void)(x)
 
 #define DA_INIT_CAP 256
@@ -78,4 +85,6 @@ Errno read_entire_file(const char *file_path, String_Builder *sb);
 Errno write_entire_file(const char *file_path, const char *buf, size_t buf_size);
 Errno read_entire_dir(const char *dir_path, Files *files);
 
+Vec4f hex_to_vec4f(uint32_t color);
+
 #endif // COMMON_H_

+ 69 - 17
src/editor.c

@@ -4,7 +4,7 @@
 #include <errno.h>
 #include <string.h>
 #include "./editor.h"
-#include "common.h"
+#include "./common.h"
 
 void editor_backspace(Editor *e)
 {
@@ -149,6 +149,16 @@ void editor_recompute_lines(Editor *e)
 
     line.end = e->data.count;
     da_append(&e->lines, line);
+
+    //////////////////////////////
+
+    e->tokens.count = 0;
+    Lexer l = lexer_new(e->atlas, e->data.items, e->data.count);
+    Token t = lexer_next(&l);
+    while (t.kind != TOKEN_END) {
+        da_append(&e->tokens, t);
+        t = lexer_next(&l);
+    }
 }
 
 bool editor_line_starts_with(Editor *e, size_t row, size_t col, const char *prefix)
@@ -181,22 +191,6 @@ const char *editor_line_starts_with_one_of(Editor *e, size_t row, size_t col, co
 
 
 
-const char *keywords[] = {
-    "auto", "break", "case", "char", "const", "continue", "default", "do", "double",
-    "else", "enum", "extern", "float", "for", "goto", "if", "int", "long", "register",
-    "return", "short", "signed", "sizeof", "static", "struct", "switch", "typedef",
-    "union", "unsigned", "void", "volatile", "while", "alignas", "alignof", "and",
-    "and_eq", "asm", "atomic_cancel", "atomic_commit", "atomic_noexcept", "bitand",
-    "bitor", "bool", "catch", "char16_t", "char32_t", "char8_t", "class", "co_await",
-    "co_return", "co_yield", "compl", "concept", "const_cast", "consteval", "constexpr",
-    "constinit", "decltype", "delete", "dynamic_cast", "explicit", "export", "false",
-    "friend", "inline", "mutable", "namespace", "new", "noexcept", "not", "not_eq",
-    "nullptr", "operator", "or", "or_eq", "private", "protected", "public", "reflexpr",
-    "reinterpret_cast", "requires", "static_assert", "static_cast", "synchronized",
-    "template", "this", "thread_local", "throw", "true", "try", "typeid", "typename",
-    "using", "virtual", "wchar_t", "xor", "xor_eq",
-};
-#define keywords_count (sizeof(keywords)/sizeof(keywords[0]))
 
 void editor_render(SDL_Window *window, Free_Glyph_Atlas *atlas, Simple_Renderer *sr, Editor *editor)
 {
@@ -209,6 +203,62 @@ void editor_render(SDL_Window *window, Free_Glyph_Atlas *atlas, Simple_Renderer
     sr->time = (float) SDL_GetTicks() / 1000.0f;
 
     // Render text
+#if 1
+    {
+        simple_renderer_set_shader(sr, SHADER_FOR_COLOR);
+        if (editor->selection) {
+            for (size_t row = 0; row < editor->lines.count; ++row) {
+                size_t select_begin_chr = editor->select_begin;
+                size_t select_end_chr = editor->cursor;
+                if (select_begin_chr > select_end_chr) {
+                    SWAP(size_t, select_begin_chr, select_end_chr);
+                }
+
+                Line line_chr = editor->lines.items[row];
+
+                if (select_begin_chr < line_chr.begin) {
+                    select_begin_chr = line_chr.begin;
+                }
+
+                if (select_end_chr > line_chr.end) {
+                    select_end_chr = line_chr.end;
+                }
+
+                if (select_begin_chr <= select_end_chr) {
+                    Vec2f select_begin_scr = vec2f(0, -(float)row * FREE_GLYPH_FONT_SIZE);
+                    free_glyph_atlas_measure_line_sized(
+                        atlas, editor->data.items + line_chr.begin, select_begin_chr - line_chr.begin,
+                        &select_begin_scr);
+
+                    Vec2f select_end_scr = select_begin_scr;
+                    free_glyph_atlas_measure_line_sized(
+                        atlas, editor->data.items + select_begin_chr, select_end_chr - select_begin_chr,
+                        &select_end_scr);
+
+                    Vec4f selection_color = vec4f(.25, .25, .25, 1);
+                    simple_renderer_solid_rect(sr, select_begin_scr, vec2f(select_end_scr.x - select_begin_scr.x, FREE_GLYPH_FONT_SIZE), selection_color);
+                }
+            }
+        }
+        simple_renderer_flush(sr);
+
+        simple_renderer_set_shader(sr, SHADER_FOR_TEXT);
+        for (size_t i = 0; i < editor->tokens.count; ++i) {
+            Token token = editor->tokens.items[i];
+            Vec2f pos = token.position;
+            Vec4f color = vec4fs(1);
+            switch (token.kind) {
+            case TOKEN_PREPROC: color = hex_to_vec4f(0x95A99FFF); break;
+            case TOKEN_KEYWORD: color = hex_to_vec4f(0xFFDD33FF); break;
+            case TOKEN_COMMENT: color = hex_to_vec4f(0xCC8C3CFF); break;
+            case TOKEN_STRING:  color = hex_to_vec4f(0x73c936ff); break;
+            default: {}
+            }
+            free_glyph_atlas_render_line_sized(atlas, sr, token.text, token.text_len, &pos, color);
+        }
+        simple_renderer_flush(sr);
+    }
+#else
     {
         simple_renderer_set_shader(sr, SHADER_FOR_COLOR);
         if (editor->selection) {
@@ -282,6 +332,7 @@ void editor_render(SDL_Window *window, Free_Glyph_Atlas *atlas, Simple_Renderer
 
         simple_renderer_flush(sr);
     }
+#endif
 
     Vec2f cursor_pos = vec2fs(0.0f);
     {
@@ -319,6 +370,7 @@ void editor_render(SDL_Window *window, Free_Glyph_Atlas *atlas, Simple_Renderer
     // Update camera
     {
         float target_scale = 3.0f;
+        max_line_len = 1000.0f; // TODO: fix temporary epic zoom camera action
         if (max_line_len > 1000.0f) {
             max_line_len = 1000.0f;
         }

+ 10 - 0
src/editor.h

@@ -5,6 +5,7 @@
 #include "common.h"
 #include "free_glyph.h"
 #include "simple_renderer.h"
+#include "lexer.h"
 
 #include <SDL2/SDL.h>
 
@@ -20,8 +21,17 @@ typedef struct {
 } Lines;
 
 typedef struct {
+    Token *items;
+    size_t count;
+    size_t capacity;
+} Tokens;
+
+typedef struct {
+    Free_Glyph_Atlas *atlas;
+
     String_Builder data;
     Lines lines;
+    Tokens tokens;
     String_Builder file_path;
 
     bool selection;

+ 221 - 0
src/lexer.c

@@ -0,0 +1,221 @@
+#include <assert.h>
+#include <stdbool.h>
+#include <ctype.h>
+#include <string.h>
+#include "common.h"
+#include "lexer.h"
+
+typedef struct {
+    Token_Kind kind;
+    const char *text;
+} Literal_Token;
+
+Literal_Token literal_tokens[] = {
+    {.text = "(", .kind = TOKEN_OPEN_PAREN},
+    {.text = ")", .kind = TOKEN_CLOSE_PAREN},
+    {.text = "{", .kind = TOKEN_OPEN_CURLY},
+    {.text = "}", .kind = TOKEN_CLOSE_CURLY},
+    {.text = ";", .kind = TOKEN_SEMICOLON},
+};
+#define literal_tokens_count (sizeof(literal_tokens)/sizeof(literal_tokens[0]))
+
+const char *keywords[] = {
+    "auto", "break", "case", "char", "const", "continue", "default", "do", "double",
+    "else", "enum", "extern", "float", "for", "goto", "if", "int", "long", "register",
+    "return", "short", "signed", "sizeof", "static", "struct", "switch", "typedef",
+    "union", "unsigned", "void", "volatile", "while", "alignas", "alignof", "and",
+    "and_eq", "asm", "atomic_cancel", "atomic_commit", "atomic_noexcept", "bitand",
+    "bitor", "bool", "catch", "char16_t", "char32_t", "char8_t", "class", "co_await",
+    "co_return", "co_yield", "compl", "concept", "const_cast", "consteval", "constexpr",
+    "constinit", "decltype", "delete", "dynamic_cast", "explicit", "export", "false",
+    "friend", "inline", "mutable", "namespace", "new", "noexcept", "not", "not_eq",
+    "nullptr", "operator", "or", "or_eq", "private", "protected", "public", "reflexpr",
+    "reinterpret_cast", "requires", "static_assert", "static_cast", "synchronized",
+    "template", "this", "thread_local", "throw", "true", "try", "typeid", "typename",
+    "using", "virtual", "wchar_t", "xor", "xor_eq",
+};
+#define keywords_count (sizeof(keywords)/sizeof(keywords[0]))
+
+const char *token_kind_name(Token_Kind kind)
+{
+    switch (kind) {
+    case TOKEN_END:
+        return "end of content";
+    case TOKEN_INVALID:
+        return "invalid token";
+    case TOKEN_PREPROC:
+        return "preprocessor directive";
+    case TOKEN_SYMBOL:
+        return "symbol";
+    case TOKEN_OPEN_PAREN:
+        return "open paren";
+    case TOKEN_CLOSE_PAREN:
+        return "close paren";
+    case TOKEN_OPEN_CURLY:
+        return "open curly";
+    case TOKEN_CLOSE_CURLY:
+        return "close curly";
+    case TOKEN_SEMICOLON:
+        return "semicolon";
+    case TOKEN_KEYWORD:
+        return "keyword";
+    default:
+        UNREACHABLE("token_kind_name");
+    }
+    return NULL;
+}
+
+Lexer lexer_new(Free_Glyph_Atlas *atlas, const char *content, size_t content_len)
+{
+    Lexer l = {0};
+    l.atlas = atlas;
+    l.content = content;
+    l.content_len = content_len;
+    return l;
+}
+
+bool lexer_starts_with(Lexer *l, const char *prefix)
+{
+    size_t prefix_len = strlen(prefix);
+    if (prefix_len == 0) {
+        return true;
+    }
+    if (l->cursor + prefix_len - 1 >= l->content_len) {
+        return false;
+    }
+    for (size_t i = 0; i < prefix_len; ++i) {
+        if (prefix[i] != l->content[l->cursor + i]) {
+            return false;
+        }
+    }
+    return true;
+}
+
+void lexer_chop_char(Lexer *l, size_t len)
+{
+    for (size_t i = 0; i < len; ++i) {
+        // TODO: get rid of this assert by checking the length of the choped prefix upfront
+        assert(l->cursor < l->content_len);
+        char x = l->content[l->cursor];
+        l->cursor += 1;
+        if (x == '\n') {
+            l->line += 1;
+            l->bol = l->cursor;
+            l->x = 0;
+        } else {
+            if (l->atlas) {
+                size_t glyph_index = x;
+                // TODO: support for glyphs outside of ASCII range
+                if (glyph_index >= GLYPH_METRICS_CAPACITY) {
+                    glyph_index = '?';
+                }
+                Glyph_Metric metric = l->atlas->metrics[glyph_index];
+                l->x += metric.ax;
+            }
+        }
+    }
+}
+
+void lexer_trim_left(Lexer *l)
+{
+    while (l->cursor < l->content_len && isspace(l->content[l->cursor])) {
+        lexer_chop_char(l, 1);
+    }
+}
+
+bool is_symbol_start(char x)
+{
+    return isalpha(x) || x == '_';
+}
+
+bool is_symbol(char x)
+{
+    return isalnum(x) || x == '_';
+}
+
+Token lexer_next(Lexer *l)
+{
+    lexer_trim_left(l);
+
+    Token token = {
+        .text = &l->content[l->cursor],
+    };
+
+    token.position.x = l->x;
+    token.position.y = -(float)l->line * FREE_GLYPH_FONT_SIZE;
+
+    if (l->cursor >= l->content_len) return token;
+
+    if (l->content[l->cursor] == '"') {
+        // TODO: TOKEN_STRING should also handle escape sequences
+        token.kind = TOKEN_STRING;
+        lexer_chop_char(l, 1);
+        while (l->cursor < l->content_len && l->content[l->cursor] != '"' && l->content[l->cursor] != '\n') {
+            lexer_chop_char(l, 1);
+        }
+        if (l->cursor < l->content_len) {
+            lexer_chop_char(l, 1);
+        }
+        token.text_len = &l->content[l->cursor] - token.text;
+        return token;
+    }
+
+    if (l->content[l->cursor] == '#') {
+        // TODO: preproc should also handle newlines
+        token.kind = TOKEN_PREPROC;
+        while (l->cursor < l->content_len && l->content[l->cursor] != '\n') {
+            lexer_chop_char(l, 1);
+        }
+        if (l->cursor < l->content_len) {
+            lexer_chop_char(l, 1);
+        }
+        token.text_len = &l->content[l->cursor] - token.text;
+        return token;
+    }
+
+    if (lexer_starts_with(l, "//")) {
+        token.kind = TOKEN_COMMENT;
+        while (l->cursor < l->content_len && l->content[l->cursor] != '\n') {
+            lexer_chop_char(l, 1);
+        }
+        if (l->cursor < l->content_len) {
+            lexer_chop_char(l, 1);
+        }
+        token.text_len = &l->content[l->cursor] - token.text;
+        return token;
+    }
+    
+    for (size_t i = 0; i < literal_tokens_count; ++i) {
+        if (lexer_starts_with(l, literal_tokens[i].text)) {
+            // NOTE: this code assumes that there is no newlines in literal_tokens[i].text
+            size_t text_len = strlen(literal_tokens[i].text);
+            token.kind = literal_tokens[i].kind;
+            token.text_len = text_len;
+            lexer_chop_char(l, text_len);
+            return token;
+        }
+    }
+
+    if (is_symbol_start(l->content[l->cursor])) {
+        token.kind = TOKEN_SYMBOL;
+        while (l->cursor < l->content_len && is_symbol(l->content[l->cursor])) {
+            lexer_chop_char(l, 1);
+            token.text_len += 1;
+        }
+
+        for (size_t i = 0; i < keywords_count; ++i) {
+            size_t keyword_len = strlen(keywords[i]);
+            if (keyword_len == token.text_len && memcmp(keywords[i], token.text, keyword_len) == 0) {
+                token.kind = TOKEN_KEYWORD;
+                break;
+            }
+        }
+
+        return token;
+    }
+
+    lexer_chop_char(l, 1);
+    token.kind = TOKEN_INVALID;
+    token.text_len = 1;
+    return token;
+}

+ 45 - 0
src/lexer.h

@@ -0,0 +1,45 @@
+#ifndef LEXER_H_
+#define LEXER_H_
+
+#include <stddef.h>
+#include "./la.h"
+#include "./free_glyph.h"
+
+typedef enum {
+    TOKEN_END = 0,
+    TOKEN_INVALID,
+    TOKEN_PREPROC,
+    TOKEN_SYMBOL,
+    TOKEN_OPEN_PAREN,
+    TOKEN_CLOSE_PAREN,
+    TOKEN_OPEN_CURLY,
+    TOKEN_CLOSE_CURLY,
+    TOKEN_SEMICOLON,
+    TOKEN_KEYWORD,
+    TOKEN_COMMENT,
+    TOKEN_STRING,
+} Token_Kind;
+
+const char *token_kind_name(Token_Kind kind);
+
+typedef struct {
+    Token_Kind kind;
+    const char *text;
+    size_t text_len;
+    Vec2f position;
+} Token;
+
+typedef struct {
+    Free_Glyph_Atlas *atlas;
+    const char *content;
+    size_t content_len;
+    size_t cursor;
+    size_t line;
+    size_t bol;
+    float x;
+} Lexer;
+
+Lexer lexer_new(Free_Glyph_Atlas *atlas, const char *content, size_t content_len);
+Token lexer_next(Lexer *l);
+
+#endif // LEXER_H_

+ 9 - 6
src/main.c

@@ -19,6 +19,7 @@
 #include "./free_glyph.h"
 #include "./simple_renderer.h"
 #include "./common.h"
+#include "./lexer.h"
 
 // TODO: Save file dialog
 // Needed when ded is ran without any file so it does not know where to save.
@@ -119,8 +120,6 @@ int main(int argc, char **argv)
 {
     Errno err;
 
-    editor_recompute_lines(&editor);
-
     FT_Library library = {0};
 
     FT_Error error = FT_Init_FreeType(&library);
@@ -129,7 +128,8 @@ int main(int argc, char **argv)
         return 1;
     }
 
-    const char *const font_file_path = "./VictorMono-Regular.ttf";
+    // const char *const font_file_path = "./VictorMono-Regular.ttf";
+    const char *const font_file_path = "./iosevka-regular.ttf";
 
     FT_Face face;
     error = FT_New_Face(library, font_file_path, 0, &face);
@@ -150,7 +150,6 @@ int main(int argc, char **argv)
         return 1;
     }
 
-
     if (argc > 1) {
         const char *file_path = argv[1];
         err = editor_load_from_file(&editor, file_path);
@@ -228,6 +227,9 @@ int main(int argc, char **argv)
     simple_renderer_init(&sr);
     free_glyph_atlas_init(&atlas, face);
 
+    editor.atlas = &atlas;
+    editor_recompute_lines(&editor);
+
     bool quit = false;
     bool file_browser = false;
     while (!quit) {
@@ -375,8 +377,9 @@ int main(int argc, char **argv)
             // TODO(#19): update the viewport and the resolution only on actual window change
             glViewport(0, 0, w, h);
         }
-
-        glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
+        
+        Vec4f bg = hex_to_vec4f(0x181818FF);
+        glClearColor(bg.x, bg.y, bg.z, bg.w);
         glClear(GL_COLOR_BUFFER_BIT);
 
         if (file_browser) {