sedCommand.cxx 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. // Filename: sedCommand.cxx
  2. // Created by: drose (24Oct00)
  3. //
  4. ////////////////////////////////////////////////////////////////////
  5. #include "sedCommand.h"
  6. #include "sedAddress.h"
  7. #include "sedContext.h"
  8. #include "sedScript.h"
  9. ////////////////////////////////////////////////////////////////////
  10. // Function: SedCommand::Constructor
  11. // Access: Public
  12. // Description:
  13. ////////////////////////////////////////////////////////////////////
  14. SedCommand::
  15. SedCommand() {
  16. _addr1 = (SedAddress *)NULL;
  17. _addr2 = (SedAddress *)NULL;
  18. _command = '\0';
  19. _flags = 0;
  20. _active = false;
  21. }
  22. ////////////////////////////////////////////////////////////////////
  23. // Function: SedCommand::Destructor
  24. // Access: Public
  25. // Description:
  26. ////////////////////////////////////////////////////////////////////
  27. SedCommand::
  28. ~SedCommand() {
  29. if (_addr1 != (SedAddress *)NULL) {
  30. delete _addr1;
  31. }
  32. if (_addr2 != (SedAddress *)NULL) {
  33. delete _addr2;
  34. }
  35. if ((_flags & F_have_re) != 0) {
  36. regfree(&_re);
  37. }
  38. }
  39. ////////////////////////////////////////////////////////////////////
  40. // Function: SedCommand::parse_command
  41. // Access: Public
  42. // Description: Scans the indicated string at the given character
  43. // position for a legal command. If a legal command is
  44. // found, stores it and increments p to the first
  45. // non-whitespace character after the command, returning
  46. // true. Otherwise, returns false.
  47. ////////////////////////////////////////////////////////////////////
  48. bool SedCommand::
  49. parse_command(const string &line, size_t &p) {
  50. // First, skip initial whitespace.
  51. while (p < line.length() && isspace(line[p])) {
  52. p++;
  53. }
  54. // Now, check for an address.
  55. if (p < line.length() &&
  56. (isdigit(line[p]) || line[p] == '/' || line[p] == '\\')) {
  57. _addr1 = new SedAddress;
  58. if (!_addr1->parse_address(line, p)) {
  59. return false;
  60. }
  61. if (p < line.length() && line[p] == ',') {
  62. // Another address.
  63. // Skip the comma and more whitespace.
  64. p++;
  65. while (p < line.length() && isspace(line[p])) {
  66. p++;
  67. }
  68. _addr2 = new SedAddress;
  69. if (!_addr2->parse_address(line, p)) {
  70. return false;
  71. }
  72. }
  73. }
  74. if (p >= line.length()) {
  75. // It's a null command, which is acceptable; ignore it.
  76. return true;
  77. }
  78. _command = line[p];
  79. // Skip more whitespace after the command letter.
  80. p++;
  81. while (p < line.length() && isspace(line[p])) {
  82. p++;
  83. }
  84. // At the moment, we only accept a small subset of sed commands. We
  85. // can add more later as we see the need.
  86. switch (_command) {
  87. case 'd':
  88. // No arguments.
  89. return true;
  90. case 's':
  91. // /regexp/repl/flags
  92. return parse_s_params(line, p);
  93. default:
  94. cerr << "Unknown command: " << _command << "\n";
  95. return false;
  96. }
  97. }
  98. ////////////////////////////////////////////////////////////////////
  99. // Function: SedCommand::run
  100. // Access: Public
  101. // Description: Runs the script command, modifying the context and/or
  102. // the script position as appropriate.
  103. ////////////////////////////////////////////////////////////////////
  104. void SedCommand::
  105. run(SedScript &script, SedContext &context) {
  106. // First, see if this command matches the pattern space.
  107. bool matches = false;
  108. if (_addr1 != (SedAddress *)NULL && _addr2 != (SedAddress *)NULL) {
  109. // If the user supplied two addresses, all lines inclusive between
  110. // the lines matched by the two addresses are considered matching.
  111. if (_active) {
  112. // We have previously matched _addr1. Therefore this line is
  113. // in, but are the rest of the lines following this one?
  114. matches = true;
  115. if (_addr2->matches(context)) {
  116. // If this line matches addr2, that's the end of our range for
  117. // next time.
  118. _active = false;
  119. }
  120. } else {
  121. // We have not yet matched _addr1. This line and subsequent
  122. // lines are in only if we match now.
  123. if (_addr1->matches(context)) {
  124. matches = true;
  125. if (!_addr2->precedes(context)) {
  126. _active = true;
  127. }
  128. }
  129. }
  130. } else if (_addr1 != (SedAddress *)NULL) {
  131. // If the user supplied only one address, only those lines that
  132. // exactly match the address are considered matching.
  133. matches = _addr1->matches(context);
  134. } else {
  135. // If the user supplied no addresses, all lines are considered
  136. // matching.
  137. matches = true;
  138. }
  139. if (matches) {
  140. do_command(script, context);
  141. }
  142. }
  143. ////////////////////////////////////////////////////////////////////
  144. // Function: SedCommand::parse_s_params
  145. // Access: Private
  146. // Description: Parses the /regexp/replacement/flags parameters that
  147. // follow the 's' command.
  148. ////////////////////////////////////////////////////////////////////
  149. bool SedCommand::
  150. parse_s_params(const string &line, size_t &p) {
  151. size_t p0 = p;
  152. char delimiter = line[p];
  153. p++;
  154. if (p < line.length() && delimiter == '\\') {
  155. // A backslash might escape the opening character.
  156. delimiter = line[p];
  157. p++;
  158. }
  159. size_t begin = p;
  160. while (p < line.length() && line[p] != delimiter) {
  161. if (line[p] == '\\') {
  162. p++;
  163. // A backslash could escape the closing character.
  164. }
  165. p++;
  166. }
  167. if (p >= line.length()) {
  168. cerr << "Could not find terminating character '" << delimiter
  169. << "' in regular expression: " << line.substr(p0) << "\n";
  170. return false;
  171. }
  172. string re = line.substr(begin, p - begin);
  173. p++;
  174. int error = regcomp(&_re, re.c_str(), 0);
  175. if (error != 0) {
  176. static const int errbuf_size = 512;
  177. char errbuf[errbuf_size];
  178. regerror(error, &_re, errbuf, errbuf_size);
  179. cerr << "Invalid regular expression: " << re << "\n"
  180. << errbuf << "\n";
  181. return false;
  182. }
  183. _flags |= F_have_re;
  184. // Get the replacement string.
  185. begin = p;
  186. while (p < line.length() && line[p] != delimiter) {
  187. if (line[p] == '\\') {
  188. p++;
  189. // A backslash could escape the closing character.
  190. }
  191. p++;
  192. }
  193. if (p >= line.length()) {
  194. cerr << "Could not find terminating character '" << delimiter
  195. << "' in replacement string: " << line.substr(p0) << "\n";
  196. return false;
  197. }
  198. _string2 = line.substr(begin, p - begin);
  199. // Skip the final delimiter.
  200. p++;
  201. if (p < line.length() && line[p] == 'g') {
  202. // Global flag.
  203. p++;
  204. _flags |= F_g;
  205. }
  206. // Skip any more whitespace after the parameters.
  207. while (p < line.length() && isspace(line[p])) {
  208. p++;
  209. }
  210. return true;
  211. }
  212. ////////////////////////////////////////////////////////////////////
  213. // Function: SedCommand::do_command
  214. // Access: Private
  215. // Description: Actually invokes the command, once it has been
  216. // determined that the command applied to the current
  217. // pattern space.
  218. ////////////////////////////////////////////////////////////////////
  219. void SedCommand::
  220. do_command(SedScript &script, SedContext &context) {
  221. switch (_command) {
  222. case '\0':
  223. // Null command.
  224. return;
  225. case 'd':
  226. // Delete line.
  227. context._deleted = true;
  228. script._next_command = script._commands.end();
  229. return;
  230. case 's':
  231. // Substitute.
  232. do_s_command(context);
  233. return;
  234. }
  235. cerr << "Undefined command: " << _command << "\n";
  236. }
  237. ////////////////////////////////////////////////////////////////////
  238. // Function: SedCommand::do_s_command
  239. // Access: Private
  240. // Description: Invokes the s command, which performs a
  241. // pattern/replacement substitution.
  242. ////////////////////////////////////////////////////////////////////
  243. void SedCommand::
  244. do_s_command(SedContext &context) {
  245. size_t nmatch = _re.re_nsub + 1;
  246. regmatch_t *pmatch = new regmatch_t[nmatch];
  247. string result;
  248. const char *str = context._pattern_space.c_str();
  249. int error = regexec(&_re, str, nmatch, pmatch, 0);
  250. while (error == 0) {
  251. // Here's a match. Determine the replacement.
  252. string repl;
  253. size_t p = 0;
  254. while (p < _string2.length()) {
  255. if (_string2[p] == '\\') {
  256. p++;
  257. if (p < _string2.length()) {
  258. if (isdigit(_string2[p])) {
  259. // Here's a subexpression reference.
  260. const char *numstr = _string2.c_str() + p;
  261. char *numend;
  262. int ref = strtol(numstr, &numend, 10);
  263. p += (numend - numstr);
  264. if (ref <= 0 || ref >= (int)nmatch) {
  265. cerr << "Invalid subexpression number: " << ref << "\n";
  266. } else {
  267. repl += string(str + pmatch[ref].rm_so,
  268. pmatch[ref].rm_eo - pmatch[ref].rm_so);
  269. }
  270. } else {
  271. // Here's an escaped character.
  272. repl += _string2[p];
  273. p++;
  274. }
  275. }
  276. } else {
  277. // Here's a normal character.
  278. repl += _string2[p];
  279. p++;
  280. }
  281. }
  282. // Store the result so far.
  283. result += string(str, pmatch[0].rm_so);
  284. result += repl;
  285. str += pmatch[0].rm_eo;
  286. if ((_flags & F_g) == 0) {
  287. // If we don't have the global flag set, stop after the first iteration.
  288. result += str;
  289. context._pattern_space = result;
  290. delete[] pmatch;
  291. return;
  292. }
  293. error = regexec(&_re, str, nmatch, pmatch, 0);
  294. }
  295. // All done.
  296. result += str;
  297. context._pattern_space = result;
  298. delete[] pmatch;
  299. }