1 month ago · a647b7c09c
--- a/scripts/ts2csv.sh
+++ b/scripts/ts2csv.sh
@@ -21,6 +21,8 @@ set -euo pipefail
 
				 # Notes:
			
 
				 # - Language is taken from TS/@language if present; otherwise guessed from filename (e.g. *_de.ts -> de).
			
 
				 # - Plural forms (numerus) are joined with " | " unless --explode-plurals is used.
			
 
				+# - Embedded newlines in fields are flattened to the literal two characters "\n" by default.
			
 
				+# - All CSV fields are quoted (RFC 4180), so commas and quotes inside fields are safe.
			
 
				 
			
 
				 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
			
 
				 REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
			
@@ -70,18 +72,18 @@ if [[ ${#FILES[@]} -eq 0 ]]; then
 
				 fi
			
 
				 
			
 
				 python3 - <<'PY' "$OUTDIR" "$MODE" "$INCLUDE_UNFINISHED" "$KEEP_OBSOLETE" "$WRITE_BOM" "$WRITE_HEADER" "$EXPLODE_PLURALS" "${FILES[@]}"
			
 
				-import sys, os, re, csv, argparse, xml.etree.ElementTree as ET
			
 
				+import sys, re, csv, xml.etree.ElementTree as ET
			
 
				 from pathlib import Path
			
 
				 
			
 
				 # Pull configuration from argv (provided by the bash wrapper for simplicity)
			
 
				-OUTDIR          = Path(sys.argv[1])
			
 
				-MODE            = sys.argv[2]           # "2col" | "1col"
			
 
				-INCL_UNFINISHED = sys.argv[3] == "1"
			
 
				-KEEP_OBSOLETE   = sys.argv[4] == "1"
			
 
				-WRITE_BOM       = sys.argv[5] == "1"
			
 
				-WRITE_HEADER    = sys.argv[6] == "1"
			
 
				-EXPLODE_PLURALS = sys.argv[7] == "1"
			
 
				-FILES           = [Path(p) for p in sys.argv[8:]]
			
 
				+OUTDIR           = Path(sys.argv[1])
			
 
				+MODE             = sys.argv[2]           # "2col" | "1col"
			
 
				+INCL_UNFINISHED  = sys.argv[3] == "1"
			
 
				+KEEP_OBSOLETE    = sys.argv[4] == "1"
			
 
				+WRITE_BOM        = sys.argv[5] == "1"
			
 
				+WRITE_HEADER     = sys.argv[6] == "1"
			
 
				+EXPLODE_PLURALS  = sys.argv[7] == "1"
			
 
				+FILES            = [Path(p) for p in sys.argv[8:]]
			
 
				 
			
 
				 OUTDIR.mkdir(parents=True, exist_ok=True)
			
 
				 
			
@@ -118,7 +120,7 @@ def plural_forms(trans_el: ET.Element):
 
				     nufs = list(trans_el.findall("./numerusform"))
			
 
				     if nufs:
			
 
				         return [text_of(n).strip() for n in nufs]
			
 
				-    return [ (text_of(trans_el) or "").strip() ]
			
 
				+    return [(text_of(trans_el) or "").strip()]
			
 
				 
			
 
				 def src_text(msg_el: ET.Element) -> str:
			
 
				     s = msg_el.find("source")
			
@@ -126,30 +128,37 @@ def src_text(msg_el: ET.Element) -> str:
 
				 
			
 
				 def is_obsolete_or_vanished(msg_el: ET.Element) -> bool:
			
 
				     t = msg_el.find("translation")
			
 
				-    if t is None: 
			
 
				+    if t is None:
			
 
				         return False
			
 
				     typ = (t.get("type") or "").lower()
			
 
				-    return typ in ("obsolete","vanished")
			
 
				+    return typ in ("obsolete", "vanished")
			
 
				 
			
 
				 def each_message(root: ET.Element):
			
 
				     for msg in root.findall(".//context/message"):
			
 
				         yield msg
			
 
				 
			
 
				-def explode_plural_rows(source: str, forms: list[str]):
			
 
				-    rows = []
			
 
				-    for idx, val in enumerate(forms):
			
 
				-        rows.append((f"{source} [plural {idx}]", val))
			
 
				-    return rows
			
 
				+def explode_plural_rows(source: str, forms):
			
 
				+    return [(f"{source} [plural {idx}]", val) for idx, val in enumerate(forms)]
			
 
				 
			
 
				 def write_csv(rows, header, out_path: Path, bom=False):
			
 
				     out_path.parent.mkdir(parents=True, exist_ok=True)
			
 
				     encoding = "utf-8-sig" if bom else "utf-8"
			
 
				+
			
 
				+    def sanitize(s: str) -> str:
			
 
				+        if s is None:
			
 
				+            return ""
			
 
				+        # normalize then flatten newlines to literal "\n"
			
 
				+        s = s.replace("\r\n", "\n").replace("\r", "\n")
			
 
				+        s = s.replace("\n", "\\n")
			
 
				+        return s
			
 
				+
			
 
				     with out_path.open("w", newline="", encoding=encoding) as f:
			
 
				-        w = csv.writer(f)  # default excels at quoting; keeps embedded newlines
			
 
				+        # Quote EVERYTHING so commas/newlines/quotes are safe; CRLF for Excel
			
 
				+        w = csv.writer(f, quoting=csv.QUOTE_ALL, lineterminator="\r\n")
			
 
				         if header:
			
 
				-            w.writerow(header)
			
 
				+            w.writerow([sanitize(h) for h in header])
			
 
				         for r in rows:
			
 
				-            w.writerow(r)
			
 
				+            w.writerow([sanitize(c) for c in r])
			
 
				 
			
 
				 for ts_path in FILES:
			
 
				     try:
			
@@ -192,6 +201,8 @@ for ts_path in FILES:
 
				             t_forms = [""]
			
 
				             unfinished = True
			
 
				         else:
			
 
				+            if should_skip_translation(t_el):
			
 
				+                continue
			
 
				             t_forms = plural_forms(t_el)
			
 
				             unfinished = is_unfinished(t_el)