csv2ts.sh 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. #!/usr/bin/env bash
  2. set -euo pipefail
  3. # csv2ts.sh — Apply CSV translations back into Qt .ts files
  4. #
  5. # Typical usage:
  6. # 1) Update an existing locale using its CSV:
  7. # ./scripts/csv2ts.sh -i translations/csv/app_de.csv -t translations/app_de.ts -o translations/updated
  8. #
  9. # 2) Create a new locale from English template + CSV:
  10. # ./scripts/csv2ts.sh -i translations/csv/app_fr.csv -t translations/app_en.ts -l fr_FR -o translations/updated
  11. #
  12. # 3) In-place update (overwrites the template .ts):
  13. # ./scripts/csv2ts.sh -i translations/csv/app_de.csv -t translations/app_de.ts --inplace --backup
  14. #
  15. # CSV expectations (auto-detected):
  16. # - 2-column with header: "source,<anything>" (e.g., "translation_de" or "translation")
  17. # - 2-column without header: column 1=source, column 2=translation
  18. # - Plurals:
  19. # - Joined in a single cell: plural forms separated by " | " (default)
  20. # - OR "exploded": create rows with source suffixed by " [plural N]" (0-based)
  21. #
  22. # Flags:
  23. # -i, --input CSV Input CSV file
  24. # -t, --template TS Template .ts file to update (existing locale or English)
  25. # -o, --outdir DIR Where to write updated .ts (default: <repo>/translations/updated)
  26. # -l, --lang LANG Override TS @language (e.g. de_DE, fr_FR)
  27. # --plural-sep SEP Separator for joined plural forms (default: " | ")
  28. # --inplace Write changes back to the template .ts path
  29. # --backup When --inplace, create a .bak next to the template
  30. # --clear-when-empty If translation cell empty, set type="unfinished" and clear text (default: on)
  31. # --keep-text-when-empty Keep previous text if CSV cell empty (still marks unfinished)
  32. #
  33. # Notes:
  34. # - Messages not present in CSV remain unchanged.
  35. # - If the template message is plural (numerus="yes"), CSV can be joined or exploded; both are supported.
  36. # - If CSV provides fewer plural forms than exist in template, only provided indices are updated; others kept.
  37. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  38. REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
  39. OUTDIR="$REPO_ROOT/translations/updated"
  40. INPUT_CSV=""
  41. TEMPLATE_TS=""
  42. LANG_OVERRIDE=""
  43. PLURAL_SEP=" | "
  44. INPLACE=0
  45. BACKUP=0
  46. CLEAR_WHEN_EMPTY=1
  47. KEEP_TEXT_WHEN_EMPTY=0
  48. # Parse args
  49. ARGS=()
  50. while [[ $# -gt 0 ]]; do
  51. case "$1" in
  52. -i|--input) INPUT_CSV="$2"; shift 2;;
  53. -t|--template) TEMPLATE_TS="$2"; shift 2;;
  54. -o|--outdir) OUTDIR="$2"; shift 2;;
  55. -l|--lang) LANG_OVERRIDE="$2"; shift 2;;
  56. --plural-sep) PLURAL_SEP="$2"; shift 2;;
  57. --inplace) INPLACE=1; shift;;
  58. --backup) BACKUP=1; shift;;
  59. --clear-when-empty) CLEAR_WHEN_EMPTY=1; KEEP_TEXT_WHEN_EMPTY=0; shift;;
  60. --keep-text-when-empty) KEEP_TEXT_WHEN_EMPTY=1; CLEAR_WHEN_EMPTY=0; shift;;
  61. -h|--help)
  62. grep '^# ' "$0" | sed 's/^# //'
  63. exit 0
  64. ;;
  65. *) ARGS+=("$1"); shift;;
  66. esac
  67. done
  68. set -- "${ARGS[@]}"
  69. if [[ -z "$INPUT_CSV" ]]; then
  70. echo "Error: --input CSV is required" >&2
  71. exit 1
  72. fi
  73. if [[ -z "$TEMPLATE_TS" ]]; then
  74. echo "Error: --template TS is required" >&2
  75. exit 1
  76. fi
  77. if [[ $INPLACE -eq 0 ]]; then
  78. mkdir -p "$OUTDIR"
  79. fi
  80. if [[ $INPLACE -eq 1 && $BACKUP -eq 1 ]]; then
  81. cp -f "$TEMPLATE_TS" "$TEMPLATE_TS.bak"
  82. fi
  83. python3 - <<'PY' "$INPUT_CSV" "$TEMPLATE_TS" "$OUTDIR" "$LANG_OVERRIDE" "$PLURAL_SEP" "$INPLACE" "$CLEAR_WHEN_EMPTY" "$KEEP_TEXT_WHEN_EMPTY"
  84. import sys, os, re, csv, xml.etree.ElementTree as ET
  85. from pathlib import Path
  86. INPUT_CSV = Path(sys.argv[1])
  87. TEMPLATE_TS = Path(sys.argv[2])
  88. OUTDIR = Path(sys.argv[3])
  89. LANG_OVERRIDE = sys.argv[4]
  90. PLURAL_SEP = sys.argv[5]
  91. INPLACE = sys.argv[6] == "1"
  92. CLEAR_WHEN_EMPTY = sys.argv[7] == "1"
  93. KEEP_TEXT_WHEN_EMPTY = sys.argv[8] == "1"
  94. def read_csv_map(csv_path: Path):
  95. """
  96. Returns:
  97. mapping: dict[source] -> list[str] (plural forms) OR single-element list for non-plural
  98. exploded_indices: dict[source] -> {idx: text} (if exploded plural rows were found)
  99. """
  100. mapping = {}
  101. exploded_indices = {}
  102. def add_mapping(src, val):
  103. if src not in mapping:
  104. mapping[src] = []
  105. mapping[src] = [val] # non-plural default; may be replaced if joined contains multiple
  106. def set_plural(src, idx, val):
  107. d = exploded_indices.setdefault(src, {})
  108. d[int(idx)] = val
  109. with csv_path.open("r", newline="", encoding="utf-8-sig") as f:
  110. rdr = csv.reader(f)
  111. rows = list(rdr)
  112. if not rows:
  113. return mapping, exploded_indices
  114. # Detect header
  115. header = None
  116. if any(h.lower() == "source" for h in rows[0]):
  117. header = [h.strip() for h in rows[0]]
  118. rows = rows[1:]
  119. # Column indices
  120. if header:
  121. try:
  122. source_idx = [i for i,h in enumerate(header) if h.lower()=="source"][0]
  123. except IndexError:
  124. raise SystemExit("CSV header must contain a 'source' column")
  125. # translation column = first non-source column
  126. trans_idx_candidates = [i for i,h in enumerate(header) if i != source_idx]
  127. if not trans_idx_candidates:
  128. raise SystemExit("CSV must have a translation column")
  129. trans_idx = trans_idx_candidates[0]
  130. else:
  131. if len(rows[0]) < 2:
  132. raise SystemExit("CSV without header must have at least 2 columns: source, translation")
  133. source_idx, trans_idx = 0, 1
  134. plural_tag_re = re.compile(r"\s*\[plural\s+(\d+)\]\s*$", re.IGNORECASE)
  135. for r in rows:
  136. if not r:
  137. continue
  138. # pad short rows
  139. if len(r) <= max(source_idx, trans_idx):
  140. r = (r + [""]*(max(source_idx, trans_idx)+1 - len(r)))
  141. src = (r[source_idx] or "").strip()
  142. if not src:
  143. continue
  144. val = (r[trans_idx] or "")
  145. m = plural_tag_re.search(src)
  146. if m:
  147. base_src = plural_tag_re.sub("", src).rstrip()
  148. set_plural(base_src, m.group(1), val)
  149. continue
  150. # joined plural forms? split by PLURAL_SEP; keep raw if no sep
  151. if PLURAL_SEP in val:
  152. forms = [s for s in (p.strip() for p in val.split(PLURAL_SEP))]
  153. mapping[src] = forms
  154. else:
  155. add_mapping(src, val)
  156. # Merge exploded forms into mapping
  157. for base_src, idx_map in exploded_indices.items():
  158. max_index = max(idx_map.keys()) if idx_map else -1
  159. forms = ["" for _ in range(max_index+1)]
  160. for i, v in idx_map.items():
  161. if i < 0:
  162. continue
  163. if i >= len(forms):
  164. forms.extend([""] * (i+1-len(forms)))
  165. forms[i] = v
  166. mapping[base_src] = forms
  167. return mapping, exploded_indices
  168. def text_of(elem: ET.Element) -> str:
  169. parts = []
  170. if elem.text:
  171. parts.append(elem.text)
  172. for child in elem:
  173. parts.append(text_of(child))
  174. if child.tail:
  175. parts.append(child.tail)
  176. return "".join(parts)
  177. def set_text(elem: ET.Element, value: str):
  178. # Replace all content with a single text node
  179. for child in list(elem):
  180. elem.remove(child)
  181. elem.text = value
  182. def update_ts(template_ts: Path, mapping: dict, lang_override: str | None):
  183. tree = ET.parse(template_ts)
  184. root = tree.getroot()
  185. if lang_override:
  186. root.set("language", lang_override)
  187. # namespace cleanup if present
  188. for ctx in root.findall(".//context"):
  189. for msg in ctx.findall("message"):
  190. source_el = msg.find("source")
  191. if source_el is None:
  192. continue
  193. src = (text_of(source_el) or "").strip()
  194. if not src:
  195. continue
  196. numerus = (msg.get("numerus") or "").lower() == "yes"
  197. trans_el = msg.find("translation")
  198. if trans_el is None:
  199. trans_el = ET.SubElement(msg, "translation")
  200. if src not in mapping:
  201. # Not provided in CSV — leave untouched
  202. continue
  203. values = mapping[src] # list[str] even for non-plural
  204. if numerus:
  205. # ensure <numerusform> children
  206. existing = trans_el.findall("numerusform")
  207. if existing:
  208. # Update existing indices; if CSV gives fewer, update only provided
  209. for i, child in enumerate(existing):
  210. if i < len(values):
  211. set_text(child, values[i])
  212. # If CSV provides more, append more nodes
  213. for i in range(len(existing), len(values)):
  214. n = ET.SubElement(trans_el, "numerusform")
  215. set_text(n, values[i])
  216. else:
  217. # No children yet; create from CSV
  218. for v in values:
  219. n = ET.SubElement(trans_el, "numerusform")
  220. set_text(n, v)
  221. # Mark finished/unfinished
  222. empty_all = all((v.strip() == "") for v in values)
  223. if empty_all:
  224. if CLEAR_WHEN_EMPTY:
  225. # clear all forms
  226. for child in trans_el.findall("numerusform"):
  227. set_text(child, "")
  228. trans_el.set("type", "unfinished")
  229. else:
  230. # remove 'type' attribute if present
  231. if "type" in trans_el.attrib:
  232. del trans_el.attrib["type"]
  233. else:
  234. val = values[0] if values else ""
  235. if val.strip() == "":
  236. if CLEAR_WHEN_EMPTY:
  237. set_text(trans_el, "")
  238. # mark unfinished (or keep text if KEEP_TEXT_WHEN_EMPTY)
  239. trans_el.set("type", "unfinished")
  240. else:
  241. set_text(trans_el, val)
  242. if "type" in trans_el.attrib:
  243. del trans_el.attrib["type"]
  244. return tree, root
  245. def write_tree(tree: ET.ElementTree, out_path: Path):
  246. out_path.parent.mkdir(parents=True, exist_ok=True)
  247. tree.write(out_path, encoding="utf-8", xml_declaration=True)
  248. mapping, _ = read_csv_map(INPUT_CSV)
  249. tree, root = update_ts(TEMPLATE_TS, mapping, LANG_OVERRIDE or None)
  250. if INPLACE:
  251. out_path = TEMPLATE_TS
  252. else:
  253. out_path = OUTDIR / TEMPLATE_TS.name
  254. write_tree(tree, out_path)
  255. print(f"[OK] Applied {INPUT_CSV.name} -> {out_path}")
  256. PY
  257. echo "Done."