csv2ts.sh 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. #!/usr/bin/env bash
  2. set -euo pipefail
  3. # csv2ts.sh — Apply CSV translations back into Qt .ts files
  4. #
  5. # Typical usage:
  6. # 1) Update an existing locale using its CSV:
  7. # ./scripts/csv2ts.sh -i translations/csv/app_de.csv -t translations/app_de.ts -o translations/updated
  8. #
  9. # 2) Create a new locale from English template + CSV:
  10. # ./scripts/csv2ts.sh -i translations/csv/app_fr.csv -t translations/app_en.ts -l fr_FR -o translations/updated
  11. #
  12. # 3) In-place update (overwrites the template .ts):
  13. # ./scripts/csv2ts.sh -i translations/csv/app_de.csv -t translations/app_de.ts --inplace --backup
  14. #
  15. # CSV expectations (auto-detected):
  16. # - 2-column with header: "source,<anything>" (e.g., "translation_de" or "translation")
  17. # - 2-column without header: column 1=source, column 2=translation
  18. # - Plurals in CSV may be:
  19. # - Joined in a single cell, forms separated by " | "
  20. # - OR "exploded": rows with source suffixed by " [plural N]" (0-based)
  21. #
  22. # Notes:
  23. # - Matches ts2csv defaults: embedded newlines were flattened to "\n" during export;
  24. # this script unflattens those back to real newlines by default.
  25. # - Messages not present in CSV remain unchanged.
  26. # - If the template message is plural (numerus="yes"), CSV can be joined or exploded; both are supported.
  27. # - If CSV provides fewer plural forms than exist in template, only provided indices are updated; others kept.
  28. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  29. REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
  30. OUTDIR="$REPO_ROOT/translations/updated"
  31. INPUT_CSV=""
  32. TEMPLATE_TS=""
  33. LANG_OVERRIDE=""
  34. PLURAL_SEP=" | "
  35. INPLACE=0
  36. BACKUP=0
  37. CLEAR_WHEN_EMPTY=1
  38. KEEP_TEXT_WHEN_EMPTY=0
  39. # Parse args
  40. ARGS=()
  41. while [[ $# -gt 0 ]]; do
  42. case "$1" in
  43. -i|--input) INPUT_CSV="$2"; shift 2;;
  44. -t|--template) TEMPLATE_TS="$2"; shift 2;;
  45. -o|--outdir) OUTDIR="$2"; shift 2;;
  46. -l|--lang) LANG_OVERRIDE="$2"; shift 2;;
  47. --plural-sep) PLURAL_SEP="$2"; shift 2;;
  48. --inplace) INPLACE=1; shift;;
  49. --backup) BACKUP=1; shift;;
  50. --clear-when-empty) CLEAR_WHEN_EMPTY=1; KEEP_TEXT_WHEN_EMPTY=0; shift;;
  51. --keep-text-when-empty) KEEP_TEXT_WHEN_EMPTY=1; CLEAR_WHEN_EMPTY=0; shift;;
  52. -h|--help)
  53. grep '^# ' "$0" | sed 's/^# //'
  54. exit 0
  55. ;;
  56. *) ARGS+=("$1"); shift;;
  57. esac
  58. done
  59. set -- "${ARGS[@]}"
  60. if [[ -z "$INPUT_CSV" ]]; then
  61. echo "Error: --input CSV is required" >&2
  62. exit 1
  63. fi
  64. if [[ -z "$TEMPLATE_TS" ]]; then
  65. echo "Error: --template TS is required" >&2
  66. exit 1
  67. fi
  68. if [[ $INPLACE -eq 0 ]]; then
  69. mkdir -p "$OUTDIR"
  70. fi
  71. if [[ $INPLACE -eq 1 && $BACKUP -eq 1 ]]; then
  72. cp -f "$TEMPLATE_TS" "$TEMPLATE_TS.bak"
  73. fi
  74. python3 - <<'PY' "$INPUT_CSV" "$TEMPLATE_TS" "$OUTDIR" "$LANG_OVERRIDE" "$PLURAL_SEP" "$INPLACE" "$CLEAR_WHEN_EMPTY" "$KEEP_TEXT_WHEN_EMPTY"
  75. import sys, re, csv, xml.etree.ElementTree as ET
  76. from pathlib import Path
  77. INPUT_CSV = Path(sys.argv[1])
  78. TEMPLATE_TS = Path(sys.argv[2])
  79. OUTDIR = Path(sys.argv[3])
  80. LANG_OVERRIDE = sys.argv[4]
  81. PLURAL_SEP = sys.argv[5]
  82. INPLACE = sys.argv[6] == "1"
  83. CLEAR_WHEN_EMPTY = sys.argv[7] == "1"
  84. KEEP_TEXT_WHEN_EMPTY = sys.argv[8] == "1"
  85. def decode_flattened_newlines(s: str) -> str:
  86. if s is None:
  87. return ""
  88. # normalize AND unflatten literal "\n" back to real newlines
  89. s = s.replace("\r\n", "\n").replace("\r", "\n")
  90. return s.replace("\\n", "\n")
  91. def read_csv_maps(csv_path: Path):
  92. """
  93. Returns two maps:
  94. raw_map[src] = raw string value from CSV (unflattened newlines)
  95. exploded[src] = {index: form_text} for any " [plural N]" rows
  96. Joined plurals (value contains PLURAL_SEP) are kept as a single string in raw_map.
  97. We only split by PLURAL_SEP later if the target message is actually plural.
  98. """
  99. raw_map: dict[str, str] = {}
  100. exploded: dict[str, dict[int, str]] = {}
  101. with csv_path.open("r", newline="", encoding="utf-8-sig") as f:
  102. rdr = csv.reader(f) # handles commas/quotes/multiline properly
  103. rows = list(rdr)
  104. if not rows:
  105. return raw_map, exploded
  106. # Detect header
  107. header = None
  108. if any(isinstance(h, str) and h.lower() == "source" for h in rows[0]):
  109. header = [h.strip() for h in rows[0]]
  110. rows = rows[1:]
  111. # Column indices
  112. if header:
  113. try:
  114. source_idx = [i for i, h in enumerate(header) if h.lower() == "source"][0]
  115. except IndexError:
  116. raise SystemExit("CSV header must contain a 'source' column")
  117. trans_idx_candidates = [i for i, h in enumerate(header) if i != source_idx]
  118. if not trans_idx_candidates:
  119. raise SystemExit("CSV must have a translation column")
  120. trans_idx = trans_idx_candidates[0]
  121. else:
  122. if len(rows[0]) < 2:
  123. raise SystemExit("CSV without header must have at least 2 columns: source, translation")
  124. source_idx, trans_idx = 0, 1
  125. plural_tag_re = re.compile(r"\s*\[plural\s+(\d+)\]\s*$", re.IGNORECASE)
  126. for r in rows:
  127. if not r:
  128. continue
  129. if len(r) <= max(source_idx, trans_idx):
  130. r = (r + [""] * (max(source_idx, trans_idx) + 1 - len(r)))
  131. src = (r[source_idx] or "").strip()
  132. if not src:
  133. continue
  134. val = decode_flattened_newlines(r[trans_idx] or "")
  135. m = plural_tag_re.search(src)
  136. if m:
  137. base_src = plural_tag_re.sub("", src).rstrip()
  138. d = exploded.setdefault(base_src, {})
  139. d[int(m.group(1))] = val
  140. continue
  141. # store raw (possibly joined) value; don't split here
  142. raw_map[src] = val
  143. return raw_map, exploded
  144. def text_of(elem: ET.Element) -> str:
  145. parts = []
  146. if elem.text:
  147. parts.append(elem.text)
  148. for child in elem:
  149. parts.append(text_of(child))
  150. if child.tail:
  151. parts.append(child.tail)
  152. return "".join(parts)
  153. def set_text(elem: ET.Element, value: str):
  154. # Replace all content with a single text node
  155. for child in list(elem):
  156. elem.remove(child)
  157. elem.text = value
  158. def update_ts(template_ts: Path, raw_map: dict, exploded: dict, lang_override: str | None):
  159. tree = ET.parse(template_ts)
  160. root = tree.getroot()
  161. if lang_override:
  162. root.set("language", lang_override)
  163. for msg in root.findall(".//context/message"):
  164. source_el = msg.find("source")
  165. if source_el is None:
  166. continue
  167. src = (text_of(source_el) or "").strip()
  168. if not src:
  169. continue
  170. numerus = (msg.get("numerus") or "").lower() == "yes"
  171. trans_el = msg.find("translation")
  172. if trans_el is None:
  173. trans_el = ET.SubElement(msg, "translation")
  174. if src not in raw_map and src not in exploded:
  175. # Not provided in CSV — leave untouched
  176. continue
  177. if numerus:
  178. # prefer exploded rows if present
  179. if src in exploded:
  180. idx_map = exploded[src]
  181. max_index = max(idx_map.keys()) if idx_map else -1
  182. forms = ["" for _ in range(max_index + 1)]
  183. for i, v in idx_map.items():
  184. if i >= 0:
  185. if i >= len(forms):
  186. forms.extend([""] * (i + 1 - len(forms)))
  187. forms[i] = v
  188. else:
  189. raw_val = raw_map.get(src, "")
  190. # only split joined forms if the message is numerus
  191. forms = [s.strip() for s in raw_val.split(PLURAL_SEP)] if PLURAL_SEP in raw_val else [raw_val]
  192. existing = trans_el.findall("numerusform")
  193. if existing:
  194. for i, child in enumerate(existing):
  195. if i < len(forms):
  196. set_text(child, forms[i])
  197. for i in range(len(existing), len(forms)):
  198. n = ET.SubElement(trans_el, "numerusform")
  199. set_text(n, forms[i])
  200. else:
  201. for v in forms:
  202. n = ET.SubElement(trans_el, "numerusform")
  203. set_text(n, v)
  204. empty_all = all((v.strip() == "") for v in forms)
  205. if empty_all:
  206. if CLEAR_WHEN_EMPTY:
  207. for child in trans_el.findall("numerusform"):
  208. set_text(child, "")
  209. trans_el.set("type", "unfinished")
  210. else:
  211. trans_el.attrib.pop("type", None)
  212. else:
  213. # non-plural: take the raw value as-is, even if it contains the plural separator
  214. val = raw_map.get(src, "")
  215. if val.strip() == "":
  216. if CLEAR_WHEN_EMPTY:
  217. set_text(trans_el, "")
  218. trans_el.set("type", "unfinished")
  219. else:
  220. set_text(trans_el, val)
  221. trans_el.attrib.pop("type", None)
  222. return tree
  223. def write_tree(tree: ET.ElementTree, out_path: Path):
  224. out_path.parent.mkdir(parents=True, exist_ok=True)
  225. tree.write(out_path, encoding="utf-8", xml_declaration=True)
  226. raw_map, exploded = read_csv_maps(INPUT_CSV)
  227. tree = update_ts(TEMPLATE_TS, raw_map, exploded, LANG_OVERRIDE or None)
  228. out_path = TEMPLATE_TS if INPLACE else (OUTDIR / TEMPLATE_TS.name)
  229. write_tree(tree, out_path)
  230. print(f"[OK] Applied {INPUT_CSV.name} -> {out_path}")
  231. PY
  232. echo "Done."