123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- Document Consistency Checker
- Uses modules in the scripts/modules directory to implement document consistency checking functionality
- """
- import os
- import sys
- import argparse
- # Import modules
- from modules.main import main
- from modules.file_handler import setup_console_encoding
- from modules.markdown_handler import compare_markdown_syntax_trees
- from modules.file_handler import read_file_content
- def run_docs_consistency_check(source_dir=None, target_dir=None, output_file=None, specific_file=None, source_file=None, target_file=None):
- """
- Run document consistency check between two language directories
-
- Parameters:
- source_dir: Source document directory path
- target_dir: Target document directory path
- output_file: Output Excel file path
- specific_file: Specific file path to check (relative to docs directory)
- source_file: Specified source version file path
- target_file: Specified target version file path
- """
- # Set console encoding to resolve character display issues
- setup_console_encoding()
-
- # If specific source and target file paths are specified, use these paths
- if source_file and target_file:
- print(f"Checking files: Source version {source_file}, Target version {target_file}")
-
- # Check if files exist
- if not os.path.exists(source_file):
- print(f"Error: Source file does not exist: {source_file}")
- return
-
- if not os.path.exists(target_file):
- print(f"Error: Target file does not exist: {target_file}")
- return
-
- # Read file contents
- source_content = read_file_content(source_file)
- target_content = read_file_content(target_file)
-
- if source_content is None:
- print(f"Error: Unable to read source file: {source_file}")
- return
-
- if target_content is None:
- print(f"Error: Unable to read target file: {target_file}")
- return
-
- # Check if they are Markdown files
- if not (source_file.endswith('.md') and target_file.endswith('.md')):
- print(f"Warning: Files are not Markdown files, skipping syntax tree comparison")
- return
-
- # Compare Markdown syntax trees
- print(f"Comparing Markdown syntax trees of files...")
- inconsistencies = compare_markdown_syntax_trees(source_content, target_content, os.path.basename(source_file))
-
- # Output results
- if inconsistencies and inconsistencies != "Consistent":
- # Check if it's a return value from syntax tree comparison error
- if inconsistencies.startswith("Syntax tree comparison error:"):
- print(inconsistencies)
- else:
- # Split the inconsistency information separated by semicolons into a list, but preserve special separator |ERROR_SEPARATOR|
- # First check if there is [ERROR_COUNT:1] tag
- if "[ERROR_COUNT:1]" in inconsistencies:
- # If there is a tag, add the entire error as an element to the list
- issues = [inconsistencies]
- else:
- # If no tag, split normally, supporting both semicolon and newline as separators
- # Replace all newlines with semicolon+space, then split uniformly
- temp_inconsistencies = inconsistencies.replace("\n", "; ")
- issues = temp_inconsistencies.split("; ")
-
- # Calculate actual error count, considering special tag [ERROR_COUNT:1]
- error_count = 0
- formatted_issues = []
-
- for issue in issues:
- # Check if there is an error count tag
- if "[ERROR_COUNT:1]" in issue:
- error_count += 1
- # Remove tag and replace newlines with semicolons, then add to formatted issues list
- formatted_issue = issue.replace("[ERROR_COUNT:1]", "").replace("\n", "; ")
- formatted_issues.append(formatted_issue)
- else:
- # If no tag, check if it's a sub-error of a heading node (already counted error)
- # Check if the previous formatted issue contains "heading node" and "errors under:"
- is_sub_error = False
- for prev_issue in formatted_issues:
- if "heading node" in prev_issue and "errors under:" in prev_issue:
- is_sub_error = True
- break
-
- if not is_sub_error:
- error_count += 1
- # Ensure all newlines in issues are replaced with semicolons
- formatted_issue = issue.replace("\n", "; ")
- formatted_issues.append(formatted_issue)
- else:
- # Ensure all newlines in issues are replaced with semicolons
- formatted_issue = issue.replace("\n", "; ")
- formatted_issues.append(formatted_issue)
-
- print(f"Found {error_count} inconsistency issues:")
- for i, issue in enumerate(formatted_issues, 1):
- print(f"{i}. {issue}")
- else:
- print("No inconsistency issues found, document structure is consistent")
- return
-
- # Check if source and target directories are provided for directory-based operations
- if source_dir is None and target_dir is None:
- # Try to use current working directory as a fallback
- cwd = os.getcwd()
- if os.path.exists(os.path.join(cwd, "docs")):
- source_dir = os.path.join(cwd, "docs", "source")
- target_dir = os.path.join(cwd, "docs", "target")
- print(f"Using default directories based on current working directory:")
- print(f" Source directory: {source_dir}")
- print(f" Target directory: {target_dir}")
- else:
- print("Error: Source and target directories must be specified.")
- print("Usage examples:")
- print(" python docs_consistency_checker.py --source-dir ./docs/en --target-dir ./docs/zh")
- print(" python docs_consistency_checker.py --source-file ./docs/en/manuals/introduction.md --target-file ./docs/zh/manuals/introduction.md")
- print(" python docs_consistency_checker.py --source-dir ./docs/en --target-dir ./docs/zh --file manuals/introduction.md")
- return
-
- # Check if source directory exists
- if source_dir and not os.path.exists(source_dir):
- print(f"Error: Source directory does not exist: {source_dir}")
- return
-
- # Check if target directory exists
- if target_dir and not os.path.exists(target_dir):
- print(f"Error: Target directory does not exist: {target_dir}")
- return
-
- if output_file is None:
- output_file = "docs_structure_comparison.xlsx"
-
- # If a specific file is specified, only check that file
- if specific_file:
- print(f"Checking specific file: {specific_file}")
-
- # Build complete file paths
- source_file_path = os.path.join(source_dir, specific_file)
- target_file_path = os.path.join(target_dir, specific_file)
-
- # Check if files exist
- if not os.path.exists(source_file_path):
- print(f"Error: Source file does not exist: {source_file_path}")
- return
-
- if not os.path.exists(target_file_path):
- print(f"Error: Target file does not exist: {target_file_path}")
- return
-
- # Read file contents
- source_content = read_file_content(source_file_path)
- target_content = read_file_content(target_file_path)
-
- if source_content is None:
- print(f"Error: Unable to read source file: {source_file_path}")
- return
-
- if target_content is None:
- print(f"Error: Unable to read target file: {target_file_path}")
- return
-
- # Check if it's a Markdown file
- if not specific_file.endswith('.md'):
- print(f"Warning: File {specific_file} is not a Markdown file, skipping syntax tree comparison")
- return
-
- # Compare Markdown syntax trees
- print(f"Comparing Markdown syntax trees of {specific_file}...")
- inconsistencies = compare_markdown_syntax_trees(source_content, target_content, specific_file)
-
- # Output results
- if inconsistencies and inconsistencies != "Consistent":
- # Check if it's a return value from syntax tree comparison error
- if inconsistencies.startswith("Syntax tree comparison error:"):
- print(inconsistencies)
- else:
- # Split the inconsistency information separated by semicolons into a list, but preserve special separator |ERROR_SEPARATOR|
- # First check if there is [ERROR_COUNT:1] tag
- if "[ERROR_COUNT:1]" in inconsistencies:
- # If there is a tag, add the entire error as an element to the list
- issues = [inconsistencies]
- else:
- # If no tag, split normally, supporting both semicolon and newline as separators
- # Replace all newlines with semicolon+space, then split uniformly
- temp_inconsistencies = inconsistencies.replace("\n", "; ")
- issues = temp_inconsistencies.split("; ")
-
- # Calculate actual error count, considering special tag [ERROR_COUNT:1]
- error_count = 0
- formatted_issues = []
-
- for issue in issues:
- # Check if there is an error count tag
- if "[ERROR_COUNT:1]" in issue:
- error_count += 1
- # Remove tag and replace newlines with semicolons, then add to formatted issues list
- formatted_issue = issue.replace("[ERROR_COUNT:1]", "").replace("\n", "; ")
- formatted_issues.append(formatted_issue)
- else:
- # If no tag, check if it's a sub-error of a heading node (already counted error)
- # Check if the previous formatted issue contains "heading node" and "errors under:"
- is_sub_error = False
- for prev_issue in formatted_issues:
- if "heading node" in prev_issue and "errors under:" in prev_issue:
- is_sub_error = True
- break
-
- if not is_sub_error:
- error_count += 1
- # Ensure all newlines in issues are replaced with semicolons
- formatted_issue = issue.replace("\n", "; ")
- formatted_issues.append(formatted_issue)
- else:
- # Ensure all newlines in issues are replaced with semicolons
- formatted_issue = issue.replace("\n", "; ")
- formatted_issues.append(formatted_issue)
-
- print(f"Found {error_count} inconsistency issues:")
- for i, issue in enumerate(formatted_issues, 1):
- print(f"{i}. {issue}")
- else:
- print("No inconsistency issues found, document structure is consistent")
- else:
- # Run main function, passing parameters
- main(source_dir_path=source_dir, target_dir_path=target_dir, output_file_path=output_file)
- if __name__ == "__main__":
- # Parse command line arguments
- parser = argparse.ArgumentParser(description="Document Consistency Checker")
- parser.add_argument("--file", help="Specify the specific file path to check (relative to docs directory)")
- parser.add_argument("--source-dir", help="Source document directory path")
- parser.add_argument("--target-dir", help="Target document directory path")
- parser.add_argument("--output", help="Output Excel file path")
- parser.add_argument("--source-file", help="Specify source version file path")
- parser.add_argument("--target-file", help="Specify target version file path")
-
- args = parser.parse_args()
-
- print("Starting document consistency check...")
- run_docs_consistency_check(
- source_dir=args.source_dir,
- target_dir=args.target_dir,
- output_file=args.output,
- specific_file=args.file,
- source_file=args.source_file,
- target_file=args.target_file
- )
- print("Document consistency check completed!")
|