remove-comments.sh 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. #!/usr/bin/env bash
  2. # scripts/remove-comments.sh
  3. # Remove comments from C/C++ source files in-place.
  4. set -Eeuo pipefail
  5. trap 'echo "error: line $LINENO: $BASH_COMMAND" >&2' ERR
  6. EXTS_DEFAULT="c,cc,cpp,cxx,h,hh,hpp,hxx,ipp,inl,tpp,qml"
  7. ROOTS=(".")
  8. DRY_RUN=0
  9. BACKUP=0 # OFF by default
  10. QUIET=0
  11. EXTS="$EXTS_DEFAULT"
  12. usage() {
  13. cat <<'USAGE'
  14. remove-comments.sh - strip comments from C/C++ files.
  15. Usage:
  16. scripts/remove-comments.sh [options] [PATH ...]
  17. Options:
  18. -x, --ext Comma-separated extensions to scan (default: c,cc,cpp,cxx,h,hh,hpp,hxx,ipp,inl,tpp,qml)
  19. -n, --dry-run Show files that would be modified; don't write changes
  20. --backup Create FILE.bak before writing (default: OFF)
  21. -q, --quiet Less output
  22. -h, --help Show this help
  23. Examples:
  24. scripts/remove-comments.sh
  25. scripts/remove-comments.sh --backup src/ include/
  26. scripts/remove-comments.sh -x c,cpp,hpp
  27. USAGE
  28. }
  29. log() { (( QUIET == 0 )) && printf '%s\n' "$*"; }
  30. die() { printf 'error: %s\n' "$*" >&2; exit 1; }
  31. # --- arg parsing ---
  32. args=()
  33. while [[ $# -gt 0 ]]; do
  34. case "$1" in
  35. -x|--ext) EXTS="${2:?missing extensions}"; shift 2 ;;
  36. -n|--dry-run) DRY_RUN=1; shift ;;
  37. --backup) BACKUP=1; shift ;;
  38. -q|--quiet) QUIET=1; shift ;;
  39. -h|--help) usage; exit 0 ;;
  40. --) shift; break ;;
  41. -*) die "Unknown option: $1" ;;
  42. *) args+=("$1"); shift ;;
  43. esac
  44. done
  45. ((${#args[@]})) && ROOTS=("${args[@]}")
  46. # Build extension list (portable; no mapfile)
  47. IFS=',' read -r -a EXT_ARR <<< "$EXTS"
  48. ((${#EXT_ARR[@]})) || die "No extensions provided"
  49. # Build find predicates
  50. FIND_NAME=()
  51. for e in "${EXT_ARR[@]}"; do
  52. e="${e#.}"
  53. FIND_NAME+=(-o -iname "*.${e}")
  54. done
  55. FIND_NAME=("${FIND_NAME[@]:1}") # drop leading -o
  56. # Pick Python
  57. if command -v python3 >/dev/null 2>&1; then
  58. PYTHON_BIN=python3
  59. elif command -v python >/dev/null 2>&1; then
  60. PYTHON_BIN=python
  61. else
  62. die "Python is required but not found."
  63. fi
  64. # Python filter as a literal (processes BYTES; preserves UTF-8/Unicode)
  65. PY_FILTER=$(cat <<'PYCODE'
  66. import sys, re
  67. # Read raw bytes and operate purely on bytes so UTF-8 (and any other) is preserved.
  68. path = sys.argv[1]
  69. with open(path, 'rb') as f:
  70. data = f.read()
  71. RAW_PREFIX = re.compile(rb'(?:u8|u|U|L)?R"([^\s()\\]{0,16})\(')
  72. def isspace(b): # b is an int 0..255
  73. return b in b' \t\r\n\v\f'
  74. def strip_comments(b: bytes) -> bytes:
  75. out = bytearray()
  76. i = 0
  77. n = len(b)
  78. def prev_byte():
  79. return out[-1] if out else None
  80. while i < n:
  81. # C++ raw string?
  82. m = RAW_PREFIX.match(b, i)
  83. if m:
  84. delim = m.group(1)
  85. start = m.end()
  86. end_token = b')' + delim + b'"'
  87. j = b.find(end_token, start)
  88. if j != -1:
  89. out += b[i:j+len(end_token)]
  90. i = j + len(end_token)
  91. continue
  92. c = b[i]
  93. # Regular string / char literals
  94. if c == 0x22 or c == 0x27: # " or '
  95. quote = c
  96. out.append(c); i += 1
  97. while i < n:
  98. ch = b[i]; out.append(ch); i += 1
  99. if ch == 0x5C and i < n: # backslash -> escape next byte verbatim
  100. out.append(b[i]); i += 1
  101. elif ch == quote:
  102. break
  103. continue
  104. # Comments
  105. if c == 0x2F and i + 1 < n: # '/'
  106. nx = b[i+1]
  107. # // line comment
  108. if nx == 0x2F:
  109. i += 2
  110. while i < n and b[i] != 0x0A:
  111. i += 1
  112. if i < n and b[i] == 0x0A:
  113. # Preserve CRLF if present
  114. if i-1 >= 0 and b[i-1] == 0x0D:
  115. out += b'\r\n'
  116. else:
  117. out += b'\n'
  118. i += 1
  119. continue
  120. # /* block comment */
  121. if nx == 0x2A:
  122. i += 2
  123. had_nl = False
  124. while i < n - 1:
  125. if b[i] == 0x0A:
  126. had_nl = True
  127. if b[i] == 0x2A and b[i+1] == 0x2F:
  128. i += 2
  129. break
  130. i += 1
  131. # Insert minimal whitespace so tokens don't glue
  132. nextc = b[i] if i < n else None
  133. p = prev_byte()
  134. if had_nl:
  135. if p not in (None, 0x0A, 0x0D):
  136. out.append(0x0A) # '\n'
  137. else:
  138. if p is not None and not isspace(p) and (nextc is not None) and not isspace(nextc):
  139. out.append(0x20) # ' '
  140. continue
  141. # Default: copy byte verbatim (preserves any UTF-8 / binary)
  142. out.append(c); i += 1
  143. return bytes(out)
  144. sys.stdout.buffer.write(strip_comments(data))
  145. PYCODE
  146. )
  147. changed=0
  148. processed=0
  149. process_file() {
  150. local f="$1"
  151. log "processing: $f"
  152. # Capture current file mode (GNU and BSD)
  153. local mode
  154. mode="$(stat -c '%a' "$f" 2>/dev/null || stat -f '%Lp' "$f" 2>/dev/null || echo '')"
  155. # mktemp: handle BSD/GNU differences
  156. local tmp
  157. tmp="$(mktemp 2>/dev/null || mktemp -t rmcomments)" || die "mktemp failed"
  158. # Run the Python filter; keep argv[1] = file path
  159. if ! printf '%s\n' "$PY_FILTER" | "$PYTHON_BIN" - "$f" >"$tmp"; then
  160. rm -f "$tmp"
  161. die "Python filter failed on $f"
  162. fi
  163. if ! cmp -s "$f" "$tmp"; then
  164. if (( DRY_RUN == 1 )); then
  165. echo "would modify: $f"
  166. rm -f "$tmp"
  167. ((processed+=1))
  168. return
  169. fi
  170. if (( BACKUP == 1 )); then
  171. cp -p -- "$f" "$f.bak" 2>/dev/null || cp -p "$f" "$f.bak" || true
  172. fi
  173. # Replace file
  174. mv -- "$tmp" "$f" 2>/dev/null || mv "$tmp" "$f"
  175. # Restore original mode if we captured it
  176. [[ -n "$mode" ]] && chmod "$mode" "$f" 2>/dev/null || true
  177. ((changed+=1))
  178. else
  179. rm -f "$tmp"
  180. fi
  181. ((processed+=1))
  182. }
  183. log "Scanning: ${ROOTS[*]}"
  184. log "Extensions: $EXTS"
  185. (( DRY_RUN )) && log "(dry run)"
  186. # Find files and process
  187. while IFS= read -r -d '' f; do
  188. process_file "$f"
  189. done < <(
  190. find "${ROOTS[@]}" -type f \( "${FIND_NAME[@]}" \) \
  191. -not -path '*/.git/*' -not -path '*/.svn/*' -not -path '*/build/*' -print0
  192. )
  193. if (( DRY_RUN == 1 )); then
  194. echo "dry run complete. processed: $processed file(s); would modify: $changed"
  195. else
  196. echo "done. processed: $processed file(s); modified: $changed"
  197. fi