remove-comments.sh 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. #!/usr/bin/env bash
  2. # scripts/remove-comments.sh
  3. # Remove comments from C/C++ source files in-place.
  4. set -Eeuo pipefail
  5. trap 'echo "error: line $LINENO: $BASH_COMMAND" >&2' ERR
  6. EXTS_DEFAULT="c,cc,cpp,cxx,h,hh,hpp,hxx,ipp,inl,tpp,qml,vert,frag,glsl"
  7. ROOTS=(".")
  8. DRY_RUN=0
  9. BACKUP=0 # OFF by default
  10. QUIET=0
  11. EXTS="$EXTS_DEFAULT"
  12. usage() {
  13. cat <<'USAGE'
  14. remove-comments.sh - strip comments from C/C++ and shader files.
  15. Usage:
  16. scripts/remove-comments.sh [options] [PATH ...]
  17. Options:
  18. -x, --ext Comma-separated extensions to scan (default: c,cc,cpp,cxx,h,hh,hpp,hxx,ipp,inl,tpp,qml,vert,frag,glsl)
  19. -n, --dry-run Show files that would be modified; don't write changes
  20. --backup Create FILE.bak before writing (default: OFF)
  21. -q, --quiet Less output
  22. -h, --help Show this help
  23. Examples:
  24. scripts/remove-comments.sh
  25. scripts/remove-comments.sh --backup src/ include/
  26. scripts/remove-comments.sh -x c,cpp,hpp
  27. scripts/remove-comments.sh assets/shaders/
  28. USAGE
  29. }
  30. log() { (( QUIET == 0 )) && printf '%s\n' "$*"; }
  31. die() { printf 'error: %s\n' "$*" >&2; exit 1; }
  32. # --- arg parsing ---
  33. args=()
  34. while [[ $# -gt 0 ]]; do
  35. case "$1" in
  36. -x|--ext) EXTS="${2:?missing extensions}"; shift 2 ;;
  37. -n|--dry-run) DRY_RUN=1; shift ;;
  38. --backup) BACKUP=1; shift ;;
  39. -q|--quiet) QUIET=1; shift ;;
  40. -h|--help) usage; exit 0 ;;
  41. --) shift; break ;;
  42. -*) die "Unknown option: $1" ;;
  43. *) args+=("$1"); shift ;;
  44. esac
  45. done
  46. ((${#args[@]})) && ROOTS=("${args[@]}")
  47. # Build extension list (portable; no mapfile)
  48. IFS=',' read -r -a EXT_ARR <<< "$EXTS"
  49. ((${#EXT_ARR[@]})) || die "No extensions provided"
  50. # Build find predicates
  51. FIND_NAME=()
  52. for e in "${EXT_ARR[@]}"; do
  53. e="${e#.}"
  54. FIND_NAME+=(-o -iname "*.${e}")
  55. done
  56. FIND_NAME=("${FIND_NAME[@]:1}") # drop leading -o
  57. # Pick Python
  58. if command -v python3 >/dev/null 2>&1; then
  59. PYTHON_BIN=python3
  60. elif command -v python >/dev/null 2>&1; then
  61. PYTHON_BIN=python
  62. else
  63. die "Python is required but not found."
  64. fi
  65. # Python filter as a literal (processes BYTES; preserves UTF-8/Unicode)
  66. PY_FILTER=$(cat <<'PYCODE'
  67. import sys, re
  68. # Read raw bytes and operate purely on bytes so UTF-8 (and any other) is preserved.
  69. path = sys.argv[1]
  70. with open(path, 'rb') as f:
  71. data = f.read()
  72. RAW_PREFIX = re.compile(rb'(?:u8|u|U|L)?R"([^\s()\\]{0,16})\(')
  73. def isspace(b): # b is an int 0..255
  74. return b in b' \t\r\n\v\f'
  75. def strip_comments(b: bytes) -> bytes:
  76. out = bytearray()
  77. i = 0
  78. n = len(b)
  79. def prev_byte():
  80. return out[-1] if out else None
  81. while i < n:
  82. # C++ raw string?
  83. m = RAW_PREFIX.match(b, i)
  84. if m:
  85. delim = m.group(1)
  86. start = m.end()
  87. end_token = b')' + delim + b'"'
  88. j = b.find(end_token, start)
  89. if j != -1:
  90. out += b[i:j+len(end_token)]
  91. i = j + len(end_token)
  92. continue
  93. c = b[i]
  94. # Regular string / char literals
  95. if c == 0x22 or c == 0x27: # " or '
  96. quote = c
  97. out.append(c); i += 1
  98. while i < n:
  99. ch = b[i]; out.append(ch); i += 1
  100. if ch == 0x5C and i < n: # backslash -> escape next byte verbatim
  101. out.append(b[i]); i += 1
  102. elif ch == quote:
  103. break
  104. continue
  105. # Comments
  106. if c == 0x2F and i + 1 < n: # '/'
  107. nx = b[i+1]
  108. # // line comment
  109. if nx == 0x2F:
  110. i += 2
  111. while i < n and b[i] != 0x0A:
  112. i += 1
  113. if i < n and b[i] == 0x0A:
  114. # Preserve CRLF if present
  115. if i-1 >= 0 and b[i-1] == 0x0D:
  116. out += b'\r\n'
  117. else:
  118. out += b'\n'
  119. i += 1
  120. continue
  121. # /* block comment */
  122. if nx == 0x2A:
  123. i += 2
  124. had_nl = False
  125. while i < n - 1:
  126. if b[i] == 0x0A:
  127. had_nl = True
  128. if b[i] == 0x2A and b[i+1] == 0x2F:
  129. i += 2
  130. break
  131. i += 1
  132. # Insert minimal whitespace so tokens don't glue
  133. nextc = b[i] if i < n else None
  134. p = prev_byte()
  135. if had_nl:
  136. if p not in (None, 0x0A, 0x0D):
  137. out.append(0x0A) # '\n'
  138. else:
  139. if p is not None and not isspace(p) and (nextc is not None) and not isspace(nextc):
  140. out.append(0x20) # ' '
  141. continue
  142. # Default: copy byte verbatim (preserves any UTF-8 / binary)
  143. out.append(c); i += 1
  144. return bytes(out)
  145. sys.stdout.buffer.write(strip_comments(data))
  146. PYCODE
  147. )
  148. changed=0
  149. processed=0
  150. process_file() {
  151. local f="$1"
  152. log "processing: $f"
  153. # Capture current file mode (GNU and BSD)
  154. local mode
  155. mode="$(stat -c '%a' "$f" 2>/dev/null || stat -f '%Lp' "$f" 2>/dev/null || echo '')"
  156. # mktemp: handle BSD/GNU differences
  157. local tmp
  158. tmp="$(mktemp 2>/dev/null || mktemp -t rmcomments)" || die "mktemp failed"
  159. # Run the Python filter; keep argv[1] = file path
  160. if ! printf '%s\n' "$PY_FILTER" | "$PYTHON_BIN" - "$f" >"$tmp"; then
  161. rm -f "$tmp"
  162. die "Python filter failed on $f"
  163. fi
  164. if ! cmp -s "$f" "$tmp"; then
  165. if (( DRY_RUN == 1 )); then
  166. echo "would modify: $f"
  167. rm -f "$tmp"
  168. ((processed+=1))
  169. return
  170. fi
  171. if (( BACKUP == 1 )); then
  172. cp -p -- "$f" "$f.bak" 2>/dev/null || cp -p "$f" "$f.bak" || true
  173. fi
  174. # Replace file
  175. mv -- "$tmp" "$f" 2>/dev/null || mv "$tmp" "$f"
  176. # Restore original mode if we captured it
  177. [[ -n "$mode" ]] && chmod "$mode" "$f" 2>/dev/null || true
  178. ((changed+=1))
  179. else
  180. rm -f "$tmp"
  181. fi
  182. ((processed+=1))
  183. }
  184. log "Scanning: ${ROOTS[*]}"
  185. log "Extensions: $EXTS"
  186. (( DRY_RUN )) && log "(dry run)"
  187. # Find files and process
  188. while IFS= read -r -d '' f; do
  189. process_file "$f"
  190. done < <(
  191. find "${ROOTS[@]}" -type f \( "${FIND_NAME[@]}" \) \
  192. -not -path '*/.git/*' -not -path '*/.svn/*' -not -path '*/build/*' -print0
  193. )
  194. if (( DRY_RUN == 1 )); then
  195. echo "dry run complete. processed: $processed file(s); would modify: $changed"
  196. else
  197. echo "done. processed: $processed file(s); modified: $changed"
  198. fi