compare_manuals.sh 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. #!/bin/bash
  2. # Script to compare manual files across different languages
  3. # Usage: ./compare_manuals.sh
  4. set -euo pipefail
  5. # Global variables for tracking issues and options
  6. HAS_ISSUES=0
  7. REMOVE_MODE=false
  8. # Colors for output
  9. RED='\033[0;31m'
  10. GREEN='\033[0;32m'
  11. YELLOW='\033[1;33m'
  12. BLUE='\033[0;34m'
  13. NC='\033[0m' # No Color
  14. # Base directory for manuals
  15. MANUAL_DIR="manual"
  16. ENGLISH_DIR="${MANUAL_DIR}/english"
  17. # Function to print colored output
  18. print_colored() {
  19. local color=$1
  20. local message=$2
  21. echo -e "${color}${message}${NC}"
  22. }
  23. # Function to get all language directories
  24. get_language_dirs() {
  25. find "${MANUAL_DIR}" -mindepth 1 -maxdepth 1 -type d | grep -v "${ENGLISH_DIR}" | sort
  26. }
  27. # Function to get all .md files and directories relative to base path
  28. get_structure() {
  29. local base_dir=$1
  30. if [[ ! -d "$base_dir" ]]; then
  31. return 1
  32. fi
  33. # Get all .md files and directories
  34. find "$base_dir" -name "*.md" -o -type d | \
  35. sed "s|^${base_dir}/||" | \
  36. grep -v "^$" | \
  37. sort
  38. }
  39. # Function to compare file content line by line
  40. compare_files() {
  41. local file1=$1
  42. local file2=$2
  43. if [[ ! -f "$file1" ]] || [[ ! -f "$file2" ]]; then
  44. return 1
  45. fi
  46. local f1_lines f2_lines
  47. f1_lines=$(wc -l < "$file1")
  48. f2_lines=$(wc -l < "$file2")
  49. # First check line count
  50. if [[ $f1_lines -ne $f2_lines ]]; then
  51. echo "$f1_lines:$f2_lines"
  52. return 1
  53. fi
  54. # Check empty/non-empty line alignment
  55. local line_num=1
  56. local misaligned_lines=()
  57. while IFS= read -r line1 && IFS= read -r line2 <&3; do
  58. # Remove \r and check if line is empty
  59. local clean_line1="${line1//$'\r'/}"
  60. local clean_line2="${line2//$'\r'/}"
  61. local is_empty1=false
  62. local is_empty2=false
  63. [[ -z "$clean_line1" ]] && is_empty1=true
  64. [[ -z "$clean_line2" ]] && is_empty2=true
  65. # Check if empty/non-empty state matches
  66. if [[ "$is_empty1" != "$is_empty2" ]]; then
  67. misaligned_lines+=("$line_num")
  68. fi
  69. ((line_num++))
  70. done < "$file1" 3< "$file2"
  71. # Return misaligned lines if any found
  72. if [[ ${#misaligned_lines[@]} -gt 0 ]]; then
  73. echo "alignment:$(IFS=,; echo "${misaligned_lines[*]}")"
  74. return 1
  75. fi
  76. # Check HTML comments are identical (not translated)
  77. local comments1 comments2
  78. comments1=$(grep -o '<!--.*-->' "$file1" | sort)
  79. comments2=$(grep -o '<!--.*-->' "$file2" | sort)
  80. if [[ "$comments1" != "$comments2" ]]; then
  81. echo "comments:translated"
  82. return 1
  83. fi
  84. return 0
  85. }
  86. # Function to check if path exists in language directory
  87. path_exists() {
  88. local lang_dir=$1
  89. local relative_path=$2
  90. local full_path="${lang_dir}/${relative_path}"
  91. [[ -e "$full_path" ]]
  92. }
  93. # Main comparison function
  94. compare_with_languages() {
  95. local english_dir=$1
  96. if [[ ! -d "$english_dir" ]]; then
  97. print_colored "$RED" "Error: English directory '$english_dir' not found!"
  98. exit 1
  99. fi
  100. print_colored "$BLUE" "=== Manual Structure Comparison ==="
  101. print_colored "$BLUE" "Base directory: $english_dir"
  102. if [[ "$REMOVE_MODE" == "true" ]]; then
  103. print_colored "$YELLOW" "REMOVE MODE: Files with mismatched line counts will be automatically removed"
  104. fi
  105. echo
  106. # Get English structure
  107. local english_structure
  108. english_structure=$(get_structure "$english_dir")
  109. if [[ -z "$english_structure" ]]; then
  110. print_colored "$YELLOW" "Warning: No .md files or directories found in $english_dir"
  111. return
  112. fi
  113. # Get all language directories
  114. local lang_dirs
  115. lang_dirs=$(get_language_dirs)
  116. if [[ -z "$lang_dirs" ]]; then
  117. print_colored "$YELLOW" "Warning: No other language directories found in $MANUAL_DIR"
  118. return
  119. fi
  120. print_colored "$GREEN" "Found language directories:"
  121. echo "$lang_dirs" | while read -r lang_dir; do
  122. echo " - $(basename "$lang_dir")"
  123. done
  124. echo
  125. # Compare each language with English
  126. while IFS= read -r lang_dir; do
  127. local lang_name
  128. lang_name=$(basename "$lang_dir")
  129. print_colored "$BLUE" "--- Comparing with $lang_name ---"
  130. local missing_files=()
  131. local different_files=()
  132. local missing_dirs=()
  133. local removed_files=()
  134. # Check each item in English structure
  135. while IFS= read -r item; do
  136. local english_path="${english_dir}/${item}"
  137. local lang_path="${lang_dir}/${item}"
  138. if [[ ! -e "$lang_path" ]]; then
  139. if [[ -d "$english_path" ]]; then
  140. missing_dirs+=("$item")
  141. HAS_ISSUES=1
  142. elif [[ -f "$english_path" ]]; then
  143. missing_files+=("$item")
  144. HAS_ISSUES=1
  145. fi
  146. elif [[ -f "$english_path" ]] && [[ -f "$lang_path" ]]; then
  147. # Both are files, compare content
  148. local diff_result
  149. set +e # Temporarily disable exit on error
  150. diff_result=$(compare_files "$english_path" "$lang_path")
  151. local compare_exit_code=$?
  152. set -e # Re-enable exit on error
  153. if [[ $compare_exit_code -ne 0 ]]; then
  154. HAS_ISSUES=1
  155. if [[ "$REMOVE_MODE" == "true" ]]; then
  156. # Remove the file with mismatched line count
  157. rm -f "$lang_path"
  158. removed_files+=("$item")
  159. print_colored "$RED" "REMOVED: $lang_path (line count mismatch)"
  160. else
  161. if [[ -n "$diff_result" ]]; then
  162. different_files+=("$item|$diff_result")
  163. else
  164. different_files+=("$item")
  165. fi
  166. fi
  167. fi
  168. fi
  169. done <<< "$english_structure"
  170. # Report results
  171. if [[ ${#missing_dirs[@]} -gt 0 ]]; then
  172. print_colored "$RED" "Missing directories in $lang_name:"
  173. printf ' %s\n' "${missing_dirs[@]}"
  174. echo
  175. fi
  176. if [[ ${#missing_files[@]} -gt 0 ]]; then
  177. print_colored "$RED" "Missing files in $lang_name:"
  178. printf ' %s\n' "${missing_files[@]}"
  179. echo
  180. fi
  181. if [[ ${#removed_files[@]} -gt 0 ]]; then
  182. print_colored "$RED" "Removed files with mismatched line counts in $lang_name:"
  183. printf ' %s\n' "${removed_files[@]}"
  184. echo
  185. fi
  186. if [[ ${#different_files[@]} -gt 0 ]]; then
  187. print_colored "$YELLOW" "Files with different content in $lang_name:"
  188. for file_info in "${different_files[@]}"; do
  189. if [[ "$file_info" == *"|"* ]]; then
  190. local file="${file_info%|*}"
  191. local error_info="${file_info#*|}"
  192. if [[ "$error_info" == *":"* ]] && [[ "$error_info" != "alignment:"* ]] && [[ "$error_info" != "comments:"* ]]; then
  193. # Line count mismatch
  194. local eng_lines="${error_info%:*}"
  195. local lang_lines="${error_info#*:}"
  196. printf ' %s (English: %d lines, %s: %d lines)\n' "$file" "$eng_lines" "$lang_name" "$lang_lines"
  197. elif [[ "$error_info" == "alignment:"* ]]; then
  198. # Empty/non-empty line alignment issue
  199. local misaligned_lines="${error_info#alignment:}"
  200. printf ' %s (empty/non-empty line alignment mismatch at lines: %s)\n' "$file" "$misaligned_lines"
  201. elif [[ "$error_info" == "comments:"* ]]; then
  202. # HTML comments translated
  203. printf ' %s (HTML comments <!-- --> were translated, must remain in English)\n' "$file"
  204. else
  205. printf ' %s (%s)\n' "$file" "$error_info"
  206. fi
  207. else
  208. printf ' %s\n' "$file_info"
  209. fi
  210. done
  211. echo
  212. fi
  213. if [[ ${#missing_dirs[@]} -eq 0 ]] && [[ ${#missing_files[@]} -eq 0 ]] && [[ ${#different_files[@]} -eq 0 ]] && [[ ${#removed_files[@]} -eq 0 ]]; then
  214. print_colored "$GREEN" "✓ $lang_name structure and content matches English perfectly!"
  215. echo
  216. fi
  217. done <<< "$lang_dirs"
  218. }
  219. # Function to show detailed diff for specific files
  220. show_detailed_diff() {
  221. local english_dir=$1
  222. local lang_dir=$2
  223. local file_path=$3
  224. local english_file="${english_dir}/${file_path}"
  225. local lang_file="${lang_dir}/${file_path}"
  226. if [[ -f "$english_file" ]] && [[ -f "$lang_file" ]]; then
  227. print_colored "$BLUE" "--- Detailed diff for $file_path ---"
  228. print_colored "$BLUE" "English: $english_file"
  229. print_colored "$BLUE" "$(basename "$lang_dir"): $lang_file"
  230. echo
  231. # Show side-by-side diff with line numbers
  232. diff -u "$english_file" "$lang_file" || true
  233. echo
  234. fi
  235. }
  236. # Function to show summary statistics
  237. show_summary() {
  238. local english_dir=$1
  239. print_colored "$BLUE" "=== Summary Statistics ==="
  240. # Count English files and directories
  241. local english_files
  242. local english_dirs
  243. english_files=$(find "$english_dir" -name "*.md" | wc -l | tr -d ' ')
  244. english_dirs=$(find "$english_dir" -type d | wc -l | tr -d ' ')
  245. echo "English structure:"
  246. echo " - Directories: $english_dirs"
  247. echo " - .md files: $english_files"
  248. echo
  249. # Count for each language
  250. get_language_dirs | while read -r lang_dir; do
  251. local lang_name
  252. lang_name=$(basename "$lang_dir")
  253. local lang_files
  254. local lang_dirs_count
  255. lang_files=$(find "$lang_dir" -name "*.md" 2>/dev/null | wc -l | tr -d ' ')
  256. lang_dirs_count=$(find "$lang_dir" -type d 2>/dev/null | wc -l | tr -d ' ')
  257. echo "$lang_name structure:"
  258. echo " - Directories: $lang_dirs_count"
  259. echo " - .md files: $lang_files"
  260. echo
  261. done
  262. }
  263. # Main execution
  264. main() {
  265. # Check if manual directory exists
  266. if [[ ! -d "$MANUAL_DIR" ]]; then
  267. print_colored "$RED" "Error: Manual directory '$MANUAL_DIR' not found in current directory!"
  268. print_colored "$YELLOW" "Current directory: $(pwd)"
  269. exit 1
  270. fi
  271. # Check if English directory exists
  272. if [[ ! -d "$ENGLISH_DIR" ]]; then
  273. print_colored "$RED" "Error: English directory '$ENGLISH_DIR' not found!"
  274. exit 1
  275. fi
  276. print_colored "$GREEN" "Starting manual comparison..."
  277. echo "Current directory: $(pwd)"
  278. echo
  279. # Show summary first
  280. show_summary "$ENGLISH_DIR"
  281. # Compare structures and content
  282. # Compare structures and content
  283. compare_with_languages "$ENGLISH_DIR"
  284. if [[ $HAS_ISSUES -eq 0 ]]; then
  285. print_colored "$GREEN" "Comparison completed! All translations are perfect."
  286. exit 0
  287. else
  288. print_colored "$RED" "Comparison completed with issues found."
  289. exit 1
  290. fi
  291. }
  292. # Handle command line arguments
  293. # Handle command line arguments
  294. case "${1:-}" in
  295. -h|--help)
  296. echo "Usage: $0 [OPTIONS]"
  297. echo ""
  298. echo "Compare manual files across different languages"
  299. echo ""
  300. echo "OPTIONS:"
  301. echo " -h, --help Show this help message"
  302. echo " -s, --summary Show only summary statistics"
  303. echo " --remove Automatically remove files with mismatched line counts"
  304. echo ""
  305. echo "EXIT CODES:"
  306. echo " 0 All translations perfect (no missing files or line count mismatches)"
  307. echo " 1 Issues found (missing translations or line count mismatches)"
  308. echo ""
  309. echo "This script compares the structure and content of .md files"
  310. echo "in manual/english/ with all other language directories."
  311. echo "With --remove flag, files with different line counts are automatically deleted."
  312. exit 0
  313. ;;
  314. -s|--summary)
  315. show_summary "$ENGLISH_DIR"
  316. exit 0
  317. ;;
  318. --remove)
  319. REMOVE_MODE=true
  320. main
  321. ;;
  322. "")
  323. main
  324. ;;
  325. *)
  326. print_colored "$RED" "Unknown option: $1"
  327. print_colored "$YELLOW" "Use -h or --help for usage information"
  328. exit 1
  329. ;;
  330. esac