Browse Source

Merge pull request #155 from tsoding/133

(#133) Smol research towards emoji support
Alexey Kutepov 4 years ago
parent
commit
ea4addb21e
4 changed files with 94 additions and 2 deletions
  1. 3 0
      sample.txt
  2. 86 1
      src/aids.hpp
  3. 3 0
      src/emote_downloader.cpp
  4. 2 1
      src/vodus_emotes.cpp

+ 3 - 0
sample.txt

@@ -4,6 +4,9 @@
 [0:00:02] <zhiayang> AYAYA AYAYA AYAYA AYAYA AYAYA AYAYA AYAYA AYAYA AYAYA AYAYA
 [0:00:02] <zhiayang> AYAYA AYAYA AYAYA AYAYA AYAYA AYAYA AYAYA AYAYA AYAYA AYAYA
 [0:00:02.50] <Tsoding> tsodinW tsodinW tsodinW tsodinW tsodinW tsodinW tsodinW tsodinW 
 [0:00:02.50] <Tsoding> tsodinW tsodinW tsodinW tsodinW tsodinW tsodinW tsodinW tsodinW 
 [0:00:03] <herrhotzenplotz> phpHop phpHop phpHop phpHop phpHop phpHop phpHop phpHop phpHop phpHop
 [0:00:03] <herrhotzenplotz> phpHop phpHop phpHop phpHop phpHop phpHop phpHop phpHop phpHop phpHop
+[0:00:03.50] <herrhotzenplotz> 😂 😂 😂 😂 😂 😂 😂 😂 😂 😂
+[0:00:03.60] <herrhotzenplotz> 🅱 🅱 🅱 🅱 🅱 🅱 🅱 🅱 🅱 🅱 🅱 
+[0:00:03.70] <herrhotzenplotz> 🇯🇵 👩🏾 ? 
 [0:00:04] <Tsoding> HELL YEEEAH BROTHAA
 [0:00:04] <Tsoding> HELL YEEEAH BROTHAA
 [0:00:04] <igooddoog> KKoooona KKoooona KKoooona KKoooona KKoooona KKoooona KKoooona KKoooona KKoooona
 [0:00:04] <igooddoog> KKoooona KKoooona KKoooona KKoooona KKoooona KKoooona KKoooona KKoooona KKoooona
 [0:00:05] <igooddoog> KKona KKona KKona KKona KKona KKona KKona KKona KKona
 [0:00:05] <igooddoog> KKona KKona KKona KKona KKona KKona KKona KKona KKona

+ 86 - 1
src/aids.hpp

@@ -21,7 +21,7 @@
 //
 //
 // ============================================================
 // ============================================================
 //
 //
-// aids — 0.22.0 — std replacement for C++. Designed to aid developers
+// aids — 0.23.0 — std replacement for C++. Designed to aid developers
 // to a better programming experience.
 // to a better programming experience.
 //
 //
 // https://github.com/rexim/aids
 // https://github.com/rexim/aids
@@ -30,6 +30,8 @@
 //
 //
 // ChangeLog (https://semver.org/ is implied)
 // ChangeLog (https://semver.org/ is implied)
 //
 //
+//   0.23.0 code_to_utf8()
+//          struct Utf8_Char
 //   0.22.0 panic()
 //   0.22.0 panic()
 //   0.21.0 void sprint1(String_Buffer *buffer, unsigned int x)
 //   0.21.0 void sprint1(String_Buffer *buffer, unsigned int x)
 //   0.20.0 Escape
 //   0.20.0 Escape
@@ -850,6 +852,89 @@ namespace aids
     // UTF-8
     // UTF-8
     ////////////////////////////////////////////////////////////
     ////////////////////////////////////////////////////////////
 
 
+    struct Utf8_Char {
+        uint8_t bytes[4];
+        size_t count;
+    };
+
+    void print1(FILE *stream, Utf8_Char uchar)
+    {
+        print(stream, String_View {uchar.count, reinterpret_cast<const char*>(uchar.bytes)});
+    }
+
+    Utf8_Char code_to_utf8(uint32_t code)
+    {
+        if (0x0000 <= code && code <= 0x007F) {
+            // 0xxxxxxx
+            // 1 byte
+            Utf8_Char result = {
+                {(uint8_t) code, 0, 0, 0},
+                1,
+            };
+            return result;
+        } else if (0x0080 <= code && code <= 0x07FF) {
+            // 110xxxxx 10xxxxxx
+            // 2 bytes
+            const uint32_t header = 0b00000011000000;
+            const uint32_t extend = 0b00000010000000;
+            const uint32_t mask0  = 0b00111111000000;
+            const uint32_t mask1  = 0b00000000111111;
+
+            Utf8_Char result = {
+                {
+                    (uint8_t) (((code & mask0) >> 6) | header),
+                    (uint8_t) (((code & mask1) >> 0) | extend),
+                    0,
+                    0
+                },
+                2
+            };
+
+            return result;
+        } else if (0x0800 <= code && code <= 0xFFFF) {
+            // 3 bytes
+            // 1110xxxx 10xxxxxx 10xxxxxx
+            const uint32_t header = 0b0000000011100000;
+            const uint32_t extend = 0b0000000010000000;
+            const uint32_t mask0  = 0b1111000000000000;
+            const uint32_t mask1  = 0b0000111111000000;
+            const uint32_t mask2  = 0b0000000000111111;
+
+            Utf8_Char result = {
+                {
+                    (uint8_t) (((code & mask0) >> 12) | header),
+                    (uint8_t) (((code & mask1) >> 6)  | extend),
+                    (uint8_t) (((code & mask2) >> 0)  | extend),
+                    0
+                },
+                3
+            };
+            return result;
+        } else if (0x10000 <= code && code <= 0x10FFFF) {
+            // 4 bytes
+            // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+            const uint32_t header = 0b000000000000011110000;
+            const uint32_t extend = 0b000000000000010000000;
+            const uint32_t mask0  = 0b111000000000000000000;
+            const uint32_t mask1  = 0b000111111000000000000;
+            const uint32_t mask2  = 0b000000000111111000000;
+            const uint32_t mask3  = 0b000000000000000111111;
+
+            Utf8_Char result = {
+                {
+                    (uint8_t) (((code & mask0) >> 18) | header),
+                    (uint8_t) (((code & mask1) >> 12) | extend),
+                    (uint8_t) (((code & mask2) >> 6)  | extend),
+                    (uint8_t) (((code & mask3) >> 0)  | extend),
+                },
+                4
+            };
+            return result;
+        } else {
+            panic("The code point is too big");
+        }
+    }
+
     Maybe<uint32_t> utf8_get_code(String_View view, size_t *size)
     Maybe<uint32_t> utf8_get_code(String_View view, size_t *size)
     {
     {
         const uint8_t UTF8_1BYTE_MASK      = 1 << 7;
         const uint8_t UTF8_1BYTE_MASK      = 1 << 7;

+ 3 - 0
src/emote_downloader.cpp

@@ -597,5 +597,8 @@ int main(int argc, char **argv)
         if(still_alive) curl_multi_wait(cm, NULL, 0, 1000, NULL);
         if(still_alive) curl_multi_wait(cm, NULL, 0, 1000, NULL);
     } while (still_alive || (transfers < downloads.size));
     } while (still_alive || (transfers < downloads.size));
 
 
+    // TODO(#156): emote_downloader should download and setup the twitter emoji pack
+    // Twitter Emoji Pack: https://twemoji.twitter.com/
+
     return 0;
     return 0;
 }
 }

+ 2 - 1
src/vodus_emotes.cpp

@@ -89,7 +89,7 @@ struct Emote
     }
     }
 };
 };
 
 
-const size_t EMOTE_MAPPING_CAPACITY = 1021;
+const size_t EMOTE_MAPPING_CAPACITY = 5000;
 const size_t EMOTE_GIFS_CAPACITY = EMOTE_MAPPING_CAPACITY;
 const size_t EMOTE_GIFS_CAPACITY = EMOTE_MAPPING_CAPACITY;
 
 
 String_View file_extension(String_View filename)
 String_View file_extension(String_View filename)
@@ -208,6 +208,7 @@ struct Emote_Cache
             abort();
             abort();
         }
         }
 
 
+        // TODO(#157): Emote_Cache::populate_from_file should crash if we don't have enough emote capacity
         while (mapping_csv.unwrap.count > 0 && emote_mapping_count < EMOTE_MAPPING_CAPACITY) {
         while (mapping_csv.unwrap.count > 0 && emote_mapping_count < EMOTE_MAPPING_CAPACITY) {
             auto line = mapping_csv.unwrap.chop_by_delim('\n');
             auto line = mapping_csv.unwrap.chop_by_delim('\n');
             auto name = line.chop_by_delim(',');
             auto name = line.chop_by_delim(',');