tokenizecmd.cpp 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. /*
  2. * Copyright 2012 Branimir Karadzic. All rights reserved.
  3. * License: http://www.opensource.org/licenses/BSD-2-Clause
  4. */
  5. #include <stdint.h>
  6. #include <stdio.h>
  7. #include <ctype.h>
  8. #include "tokenizecmd.h"
  9. // Reference:
  10. // http://msdn.microsoft.com/en-us/library/a1y7w461.aspx
  11. const char* tokenizeCommandLine(const char* _commandLine, char* _buffer, uint32_t& _bufferSize, int& _argc, char* _argv[], int _maxArgvs, char _term)
  12. {
  13. int argc = 0;
  14. const char* curr = _commandLine;
  15. char* currOut = _buffer;
  16. char term = ' ';
  17. bool sub = false;
  18. enum ParserState
  19. {
  20. SkipWhitespace,
  21. SetTerm,
  22. Copy,
  23. Escape,
  24. End,
  25. };
  26. ParserState state = SkipWhitespace;
  27. while ('\0' != *curr
  28. && _term != *curr
  29. && argc < _maxArgvs)
  30. {
  31. switch (state)
  32. {
  33. case SkipWhitespace:
  34. for (; isspace(*curr); ++curr) {}; // skip whitespace
  35. state = SetTerm;
  36. break;
  37. case SetTerm:
  38. if ('"' == *curr)
  39. {
  40. term = '"';
  41. ++curr; // skip begining quote
  42. }
  43. else
  44. {
  45. term = ' ';
  46. }
  47. _argv[argc] = currOut;
  48. ++argc;
  49. state = Copy;
  50. break;
  51. case Copy:
  52. if ('\\' == *curr)
  53. {
  54. state = Escape;
  55. }
  56. else if ('"' == *curr
  57. && '"' != term)
  58. {
  59. sub = !sub;
  60. }
  61. else if (isspace(*curr) && !sub)
  62. {
  63. state = End;
  64. }
  65. else if (term != *curr || sub)
  66. {
  67. *currOut = *curr;
  68. ++currOut;
  69. }
  70. else
  71. {
  72. state = End;
  73. }
  74. ++curr;
  75. break;
  76. case Escape:
  77. {
  78. const char* start = --curr;
  79. for (; '\\' == *curr; ++curr) {};
  80. if ('"' != *curr)
  81. {
  82. int count = (int)(curr-start);
  83. curr = start;
  84. for (int ii = 0; ii < count; ++ii)
  85. {
  86. *currOut = *curr;
  87. ++currOut;
  88. ++curr;
  89. }
  90. }
  91. else
  92. {
  93. curr = start+1;
  94. *currOut = *curr;
  95. ++currOut;
  96. ++curr;
  97. }
  98. }
  99. state = Copy;
  100. break;
  101. case End:
  102. *currOut = '\0';
  103. ++currOut;
  104. state = SkipWhitespace;
  105. break;
  106. }
  107. }
  108. *currOut = '\0';
  109. if (0 < argc
  110. && '\0' == _argv[argc-1][0])
  111. {
  112. --argc;
  113. }
  114. _bufferSize = (uint32_t)(currOut - _buffer);
  115. _argc = argc;
  116. if ('\0' != *curr)
  117. {
  118. ++curr;
  119. }
  120. return curr;
  121. }
  122. #if 0
  123. #include <string.h>
  124. int main(int _argc, const char** _argv)
  125. {
  126. const char* input[7] =
  127. {
  128. " ",
  129. "\\",
  130. "\"a b c\" d e",
  131. "\"ab\\\"c\" \"\\\\\" d",
  132. "a\\\\\\b d\"e f\"g h",
  133. "a\\\\\\\"b c d",
  134. "a\\\\\\\\\"b c\" d e",
  135. };
  136. const int expected_argc[7] =
  137. {
  138. 0, 0, 3, 3, 3, 3, 3
  139. };
  140. const char* expected_results[] =
  141. {
  142. "a b c", "d", "e",
  143. "ab\"c", "\\", "d",
  144. "a\\\\\\b", "de fg", "h",
  145. "a\\\"b", "c", "d",
  146. "a\\\\b c", "d", "e",
  147. };
  148. const char** expected_argv[7] =
  149. {
  150. NULL,
  151. NULL,
  152. &expected_results[0],
  153. &expected_results[3],
  154. &expected_results[6],
  155. &expected_results[9],
  156. &expected_results[12],
  157. };
  158. for (int ii = 0; ii < 7; ++ii)
  159. {
  160. char commandLine[1024];
  161. unsigned int size = 1023;
  162. char* argv[50];
  163. int argc = tokenizeCommandLine(input[ii], commandLine, size, argv, 50);
  164. printf("\n%d (%d): %s %s\n", ii, argc, input[ii], expected_argc[ii]==argc?"":"FAILED!");
  165. for (int jj = 0; jj < argc; ++jj)
  166. {
  167. printf("\t%d: {%s} %s\n"
  168. , jj
  169. , argv[jj]
  170. , jj<argc?(0==strcmp(argv[jj], expected_argv[ii][jj])?"":"FAILED!"):"FAILED!"
  171. );
  172. }
  173. }
  174. return 0;
  175. }
  176. #endif // 0