Răsfoiți Sursa

Fix UTF-8 validation in static checks

Use isutf8 instead of recode to detect invalid UTF-8 sequences.

Also add the necessary dependencies to run the static checks locally
using act (https://github.com/nektos/act) with the Medium size image.
Gergely Kis 3 ani în urmă
părinte
comite
13d9a8c88f
2 a modificat fișierele cu 23 adăugiri și 13 ștergeri
  1. 2 2
      .github/workflows/static_checks.yml
  2. 21 11
      misc/scripts/file_format.sh

+ 2 - 2
.github/workflows/static_checks.yml

@@ -24,8 +24,8 @@ jobs:
 
       - name: Install dependencies
         run: |
-          sudo apt-get install -qq dos2unix recode clang-format-13 libxml2-utils
-          sudo update-alternatives --remove-all clang-format
+          sudo apt-get install -qq dos2unix clang-format-13 libxml2-utils python3-pip moreutils
+          sudo update-alternatives --remove-all clang-format || true
           sudo update-alternatives --install /usr/bin/clang-format clang-format /usr/bin/clang-format-13 100
           sudo pip3 install black==22.3.0 pygments
 

+ 21 - 11
misc/scripts/file_format.sh

@@ -5,8 +5,8 @@
 # run before them.
 
 # We need dos2unix and recode.
-if [ ! -x "$(command -v dos2unix)" -o ! -x "$(command -v recode)" ]; then
-    printf "Install 'dos2unix' and 'recode' to use this script.\n"
+if [ ! -x "$(command -v dos2unix)" -o ! -x "$(command -v isutf8)" ]; then
+    printf "Install 'dos2unix' and 'isutf8' (from the moreutils package) to use this script.\n"
 fi
 
 set -uo pipefail
@@ -36,7 +36,7 @@ while IFS= read -rd '' f; do
         continue
     fi
     # Ensure that files are UTF-8 formatted.
-    recode UTF-8 "$f" 2> /dev/null
+    isutf8 "$f" >> utf8-validation.txt 2>&1
     # Ensure that files have LF line endings and do not contain a BOM.
     dos2unix "$f" 2> /dev/null
     # Remove trailing space characters and ensures that files end
@@ -48,17 +48,27 @@ done
 
 git diff --color > patch.patch
 
-# If no patch has been generated all is OK, clean up, and exit.
-if [ ! -s patch.patch ] ; then
+# If no UTF-8 violations were collected and no patch has been
+# generated all is OK, clean up, and exit.
+if [ ! -s utf8-validation.txt ] && [ ! -s patch.patch ] ; then
     printf "Files in this commit comply with the formatting rules.\n"
-    rm -f patch.patch
+    rm -f patch.patch utf8-validation.txt
     exit 0
 fi
 
-# A patch has been created, notify the user, clean up, and exit.
-printf "\n*** The following differences were found between the code "
-printf "and the formatting rules:\n\n"
-cat patch.patch
+# Violations detected, notify the user, clean up, and exit.
+if [ -s utf8-validation.txt ]
+then
+    printf "\n*** The following files contain invalid UTF-8 character sequences:\n\n"
+    cat utf8-validation.txt
+fi
+
+if [ -s patch.patch ]
+then
+    printf "\n*** The following differences were found between the code "
+    printf "and the formatting rules:\n\n"
+    cat patch.patch
+fi
+rm -f utf8-validation.txt patch.patch
 printf "\n*** Aborting, please fix your commit(s) with 'git commit --amend' or 'git rebase -i <hash>'\n"
-rm -f patch.patch
 exit 1