浏览代码

Merge pull request #134 from tsoding/69

(#69) Introduce Custom Log Formats via the power of PCRE2
Alexey Kutepov 5 年之前
父节点
当前提交
7f96dd6086
共有 41 个文件被更改,包括 124 次插入74 次删除
  1. 3 3
      .github/workflows/ci.yml
  2. 1 1
      Makefile
  3. 5 1
      README.md
  4. 3 0
      src/vodus.cpp
  5. 1 1
      src/vodus_main.cpp
  6. 83 57
      src/vodus_message.cpp
  7. 16 11
      src/vodus_video_params.cpp
  8. 2 0
      src/vodus_video_params.hpp
  9. 二进制
      test/utf8/expected-frames/48-frame.png
  10. 二进制
      test/utf8/expected-frames/49-frame.png
  11. 二进制
      test/utf8/expected-frames/50-frame.png
  12. 二进制
      test/utf8/expected-frames/51-frame.png
  13. 二进制
      test/utf8/expected-frames/52-frame.png
  14. 二进制
      test/utf8/expected-frames/53-frame.png
  15. 二进制
      test/utf8/expected-frames/54-frame.png
  16. 二进制
      test/utf8/expected-frames/55-frame.png
  17. 二进制
      test/utf8/expected-frames/56-frame.png
  18. 二进制
      test/utf8/expected-frames/57-frame.png
  19. 二进制
      test/utf8/expected-frames/58-frame.png
  20. 二进制
      test/utf8/expected-frames/59-frame.png
  21. 二进制
      test/utf8/expected-frames/60-frame.png
  22. 二进制
      test/utf8/expected-frames/61-frame.png
  23. 二进制
      test/utf8/expected-frames/62-frame.png
  24. 二进制
      test/utf8/expected-frames/63-frame.png
  25. 二进制
      test/utf8/expected-frames/64-frame.png
  26. 二进制
      test/utf8/expected-frames/65-frame.png
  27. 二进制
      test/utf8/expected-frames/66-frame.png
  28. 二进制
      test/utf8/expected-frames/67-frame.png
  29. 二进制
      test/utf8/expected-frames/68-frame.png
  30. 二进制
      test/utf8/expected-frames/69-frame.png
  31. 二进制
      test/utf8/expected-frames/70-frame.png
  32. 二进制
      test/utf8/expected-frames/71-frame.png
  33. 二进制
      test/utf8/expected-frames/72-frame.png
  34. 二进制
      test/utf8/expected-frames/73-frame.png
  35. 二进制
      test/utf8/expected-frames/74-frame.png
  36. 二进制
      test/utf8/expected-frames/75-frame.png
  37. 二进制
      test/utf8/expected-frames/76-frame.png
  38. 二进制
      test/utf8/expected-frames/77-frame.png
  39. 二进制
      test/utf8/expected-frames/78-frame.png
  40. 9 0
      test/utf8/renew.sh
  41. 1 0
      test/utf8/utf-8.txt

+ 3 - 3
.github/workflows/ci.yml

@@ -9,7 +9,7 @@ jobs:
       - name: install dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install -qq nasm libfreetype6-dev libcurl4-openssl-dev
+          sudo apt-get install -qq nasm libfreetype6-dev libcurl4-openssl-dev libpcre2-dev
       - uses: actions/cache@v2
         with:
           # TODO(#84): centralize third party versions in the build
@@ -47,7 +47,7 @@ jobs:
       - uses: actions/checkout@v1
       - name: install dependencies
         run: |
-          brew install nasm freetype2 openssl
+          brew install nasm freetype2 openssl pcre2
       - uses: actions/cache@v2
         with:
           path: |
@@ -86,7 +86,7 @@ jobs:
       - name: install dependencies
         run: |
           sudo apt-get update
-          sudo apt-get install -qq nasm libfreetype6-dev libcurl4-openssl-dev
+          sudo apt-get install -qq nasm libfreetype6-dev libcurl4-openssl-dev libpcre2-dev
       - uses: actions/cache@v2
         with:
           path: |

+ 1 - 1
Makefile

@@ -7,7 +7,7 @@ VODUS_EXTRA_CXXFLAGS += -DVODUS_SSE -msse4
 endif
 
 # TODO(#87): we need an option to build with system libraries
-VODUS_PKGS=freetype2
+VODUS_PKGS=freetype2 libpcre2-8
 VODUS_CXXFLAGS=-Wall -fno-exceptions -std=c++17 $(VODUS_EXTRA_CXXFLAGS) -ggdb `pkg-config --cflags $(VODUS_PKGS)` -I./third_party/ffmpeg-4.3-dist/usr/local/include/ -I./third_party/giflib-5.2.1-dist/usr/local/include/
 VODUS_LIBS=`pkg-config --libs $(VODUS_PKGS)` -L./third_party/giflib-5.2.1-dist/usr/local/lib/ ./third_party/giflib-5.2.1-dist/usr/local/lib/libgif.a -L./third_party/ffmpeg-4.3-dist/usr/local/lib/ -lavcodec -lavutil -lswresample -pthread -lm -llzma -lz
 

+ 5 - 1
README.md

@@ -15,7 +15,7 @@ changed at any moment or stop working at all.**
 #### Debian
 
 ```console
-$ sudo apt-get install nasm libfreetype6-dev libcurl4-openssl-dev
+$ sudo apt-get install nasm libfreetype6-dev libcurl4-openssl-dev libpcre2-dev
 ```
 
 #### NixOS
@@ -56,3 +56,7 @@ $ ./emote_downloader
 ```console
 $ make render
 ```
+
+## Custom Log Format
+
+<!-- TODO(#135): document how to parse custom log formats with message_regex config parameter -->

+ 3 - 0
src/vodus.cpp

@@ -7,6 +7,9 @@
 
 #include <algorithm>
 
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
+
 #include <ft2build.h>
 #include FT_FREETYPE_H
 #include <gif_lib.h>

+ 1 - 1
src/vodus_main.cpp

@@ -141,7 +141,7 @@ int main(int argc, char *argv[])
     }
 
     Message *messages = nullptr;
-    size_t messages_size = parse_messages_from_string_view(input.unwrap, &messages, params);
+    size_t messages_size = parse_messages_from_string_view(input.unwrap, &messages, params, input_filepath);
     defer(delete[] messages);
 
     Encoder encoder = {};

+ 83 - 57
src/vodus_message.cpp

@@ -95,6 +95,17 @@ struct Message
     }
 };
 
+void print1(FILE *stream, Message message)
+{
+    print(
+        stream,
+        "Message { ",
+        ".timestamp = ", message.timestamp, ", ",
+        ".nickname = ", message.nickname, ", ",
+        ".message = ", message.message,
+        "}");
+}
+
 template <size_t Capacity>
 struct Message_Buffer
 {
@@ -156,61 +167,37 @@ struct Message_Buffer
     }
 };
 
-bool expect_optional_char(String_View *input, char x)
+String_View get_substring_view_by_name(pcre2_code *re,
+                                       pcre2_match_data *match_data,
+                                       const char *name,
+                                       String_View subject)
 {
-    if (input->count > 0 && *input->data == x) {
-        input->chop(1);
-        return true;
-    }
-
-    return false;
-}
-
-void expect_char(String_View *input, char x)
-{
-    if (input->count == 0 || *input->data != x) {
-        println(stderr, "Expected '", x, "'");
-        abort();
-    }
-    input->chop(1);
-}
-
-String_View chop_digits(String_View *input)
-{
-    String_View digits = { 0, input->data };
-    while (input->count > 0 && isdigit(*input->data)) {
-        digits.count++;
-        input->chop(1);
-    }
-    return digits;
+    PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data);
+    int index = pcre2_substring_number_from_name(re, (PCRE2_SPTR) name);
+    const char* substring_start = subject.data + ovector[2*index];
+    size_t substring_length = ovector[2*index+1] - ovector[2*index];
+    return {substring_length, substring_start};
 }
 
-uint64_t chop_timestamp(String_View *input)
+uint64_t get_timestamp_from_match_data(pcre2_code *re,
+                                       pcre2_match_data *match_data,
+                                       String_View subject)
 {
-    *input = input->trim();
-
-    expect_char(input, '[');
-    String_View hours = chop_digits(input);
-    expect_char(input, ':');
-    String_View minutes = chop_digits(input);
-    expect_char(input, ':');
-    String_View seconds = chop_digits(input);
+    String_View hours = get_substring_view_by_name(re, match_data, "hours", subject);
+    String_View minutes = get_substring_view_by_name(re, match_data, "minutes", subject);
+    String_View seconds = get_substring_view_by_name(re, match_data, "seconds", subject);
+    String_View mseconds = get_substring_view_by_name(re, match_data, "milliseconds", subject);
 
     uint64_t mseconds_value = 0;
-    if (expect_optional_char(input, '.')) {
-        auto mseconds = chop_digits(input);
-        for (size_t i = 0; i < 3; ++i) {
-            uint64_t x = 0;
-            if (mseconds.count > 0) {
-                x = *mseconds.data - '0';
-                mseconds.chop(1);
-            }
-            mseconds_value = mseconds_value * 10 + x;
+    for (size_t i = 0; i < 3; ++i) {
+        uint64_t x = 0;
+        if (mseconds.count > 0) {
+            x = *mseconds.data - '0';
+            mseconds.chop(1);
         }
+        mseconds_value = mseconds_value * 10 + x;
     }
 
-    expect_char(input, ']');
-
     const uint64_t timestamp =
         (hours.as_integer<uint64_t>().unwrap * 60 * 60 +
          minutes.as_integer<uint64_t>().unwrap * 60 +
@@ -220,15 +207,29 @@ uint64_t chop_timestamp(String_View *input)
     return timestamp;
 }
 
-String_View chop_nickname(String_View *input)
+size_t parse_messages_from_string_view(String_View input, Message **messages, Video_Params params, const char *input_filepath)
 {
-    *input = input->trim();
-    expect_char(input, '<');
-    return input->chop_by_delim('>');
-}
+    int errorcode = 0;
+    PCRE2_SIZE erroroffset = 0;
+    pcre2_code *re = pcre2_compile(
+        (PCRE2_SPTR) params.message_regex.data,
+        params.message_regex.count, 0,
+        &errorcode,
+        &erroroffset,
+        NULL);
+
+    if (re == NULL) {
+        PCRE2_UCHAR buffer[256];
+        pcre2_get_error_message(errorcode, buffer, sizeof(buffer));
+        // TODO(#136): better PCRE2 compilation errors
+        printf("PCRE2 compilation of message_regex failed at offset %d: %s\n", (int)erroroffset, buffer);
+        exit(1);
+    }
+    defer(pcre2_code_free(re));
+
+    pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(re, NULL);
+    defer(pcre2_match_data_free(match_data));
 
-size_t parse_messages_from_string_view(String_View input, Message **messages, Video_Params params)
-{
     size_t expected_messages_size = input.count_chars('\n') + 1;
     if (params.messages_limit.has_value) {
         expected_messages_size = min(expected_messages_size, params.messages_limit.unwrap);
@@ -237,11 +238,36 @@ size_t parse_messages_from_string_view(String_View input, Message **messages, Vi
     *messages = new Message[expected_messages_size];
 
     size_t messages_size = 0;
-    while (input.count > 0 && messages_size < expected_messages_size) {
+    for (size_t line_number = 1; input.count > 0 && messages_size < expected_messages_size; ++line_number) {
         String_View message = input.chop_by_delim('\n');
-        (*messages)[messages_size].timestamp = chop_timestamp(&message);
-        (*messages)[messages_size].nickname = chop_nickname(&message);
-        (*messages)[messages_size].message = message.trim();
+
+        int rc = pcre2_match(
+            re,                           /* the compiled pattern */
+            (PCRE2_SPTR) message.data,    /* the subject string */
+            message.count,                /* the length of the subject */
+            0,                            /* start at offset 0 in the subject */
+            0,                            /* default options */
+            match_data,                   /* block for storing the result */
+            NULL);                        /* use default match context */
+
+        if (rc < 0) {
+            print(stderr, input_filepath, ":", line_number, ": ");
+
+            switch(rc) {
+            case PCRE2_ERROR_NOMATCH:
+                println(stderr, "message_regex did not match this line");
+                break;
+            default:
+                println(stderr, "Matching error ", rc);
+                break;
+            }
+
+            exit(1);
+        }
+
+        (*messages)[messages_size].timestamp = get_timestamp_from_match_data(re, match_data, message);
+        (*messages)[messages_size].nickname = get_substring_view_by_name(re, match_data, "nickname", message);
+        (*messages)[messages_size].message = get_substring_view_by_name(re, match_data, "message", message).trim();
         messages_size++;
     }
 

+ 16 - 11
src/vodus_video_params.cpp

@@ -24,17 +24,18 @@ void print1(FILE *stream, Output_Type output_type)
 void print1(FILE *stream, Video_Params params)
 {
     println(stream, "{");
-    println(stream, "    .output_type = ", params.output_type, ",");
-    println(stream, "    .fps = ", params.fps, ",");
-    println(stream, "    .width = ", params.width, ",");
-    println(stream, "    .height = ", params.height, ",");
-    println(stream, "    .font_size = ", params.font_size, ",");
-    println(stream, "    .background_color = ", params.background_color, ",");
-    println(stream, "    .nickname_color = ", params.nickname_color, ",");
-    println(stream, "    .text_color = ", params.text_color, ",");
-    println(stream, "    .bitrate = ", params.bitrate, ",");
-    println(stream, "    .font = ", params.font, ",");
-    println(stream, "    .message_limit = ", params.messages_limit, ",");
+    println(stream, "    .output_type = ", params.output_type);
+    println(stream, "    .fps = ", params.fps);
+    println(stream, "    .width = ", params.width);
+    println(stream, "    .height = ", params.height);
+    println(stream, "    .font_size = ", params.font_size);
+    println(stream, "    .background_color = ", params.background_color);
+    println(stream, "    .nickname_color = ", params.nickname_color);
+    println(stream, "    .text_color = ", params.text_color);
+    println(stream, "    .bitrate = ", params.bitrate);
+    println(stream, "    .font = ", params.font);
+    println(stream, "    .message_limit = ", params.messages_limit);
+    println(stream, "    .message_regex = ", params.message_regex);
     print(stream, "}");
 }
 
@@ -51,6 +52,8 @@ Video_Params default_video_params() {
     params.bitrate           = 400'000;
     params.font              = ""_sv;
     params.messages_limit    = {};
+
+    params.message_regex     = "\\[(?<hours>\\d+):(?<minutes>\\d+):(?<seconds>\\d+)(\\.(?<milliseconds>\\d+))?\\] \\<(?<nickname>.+?)\\> (?<message>.*)"_sv;
     return params;
 }
 
@@ -168,6 +171,8 @@ void patch_video_params_from_flag(Video_Params *params, String_View flag, String
         params->font = value;
     } else if (flag == "messages_limit"_sv || flag == "messages-limit"_sv) {
         params->messages_limit = {true, parse_integer_flag<size_t>(flag, value)};
+    } else if (flag == "message_regex"_sv || flag == "message-regex"_sv) {
+        params->message_regex = value;
     } else if (flag == "output_type"_sv || flag == "output-type"_sv) {
         if (value == "video"_sv) {
             params->output_type = Output_Type::Video;

+ 2 - 0
src/vodus_video_params.hpp

@@ -20,6 +20,7 @@ struct Video_Params
     int bitrate;
     String_View font;
     Maybe<size_t> messages_limit;
+    String_View message_regex;
 };
 
 String_View param_names[] = {
@@ -34,6 +35,7 @@ String_View param_names[] = {
     "bitrate"_sv,
     "font"_sv,
     "messages_limit"_sv,
+    "message_regex"_sv,
 };
 const size_t param_names_count = sizeof(param_names) / sizeof(param_names[0]);
 

二进制
test/utf8/expected-frames/48-frame.png


二进制
test/utf8/expected-frames/49-frame.png


二进制
test/utf8/expected-frames/50-frame.png


二进制
test/utf8/expected-frames/51-frame.png


二进制
test/utf8/expected-frames/52-frame.png


二进制
test/utf8/expected-frames/53-frame.png


二进制
test/utf8/expected-frames/54-frame.png


二进制
test/utf8/expected-frames/55-frame.png


二进制
test/utf8/expected-frames/56-frame.png


二进制
test/utf8/expected-frames/57-frame.png


二进制
test/utf8/expected-frames/58-frame.png


二进制
test/utf8/expected-frames/59-frame.png


二进制
test/utf8/expected-frames/60-frame.png


二进制
test/utf8/expected-frames/61-frame.png


二进制
test/utf8/expected-frames/62-frame.png


二进制
test/utf8/expected-frames/63-frame.png


二进制
test/utf8/expected-frames/64-frame.png


二进制
test/utf8/expected-frames/65-frame.png


二进制
test/utf8/expected-frames/66-frame.png


二进制
test/utf8/expected-frames/67-frame.png


二进制
test/utf8/expected-frames/68-frame.png


二进制
test/utf8/expected-frames/69-frame.png


二进制
test/utf8/expected-frames/70-frame.png


二进制
test/utf8/expected-frames/71-frame.png


二进制
test/utf8/expected-frames/72-frame.png


二进制
test/utf8/expected-frames/73-frame.png


二进制
test/utf8/expected-frames/74-frame.png


二进制
test/utf8/expected-frames/75-frame.png


二进制
test/utf8/expected-frames/76-frame.png


二进制
test/utf8/expected-frames/77-frame.png


二进制
test/utf8/expected-frames/78-frame.png


+ 9 - 0
test/utf8/renew.sh

@@ -0,0 +1,9 @@
+#!/bin/sh
+
+VODUS=../../vodus.release
+
+set -xe
+
+rm -rf ./expected-frames/
+mkdir -p ./expected-frames/
+$VODUS ./utf-8.txt expected-frames/ --config ./test.conf

+ 1 - 0
test/utf8/utf-8.txt

@@ -1,3 +1,4 @@
 [0:00:00] <Tsoding> Hello, World!
 [0:00:01] <Tsoding> Привет, Мир!
 [0:00:02] <Tsoding> こんにちは世界!
+[0:00:03] <Цодинг> OPA DAVAI DAVAI