stringutils.C 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. /*
  2. This program is free software: you can redistribute it and/or modify
  3. it under the terms of the GNU General Public License as published by
  4. the Free Software Foundation, either version 3 of the License, or
  5. (at your option) any later version.
  6. This program is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. GNU General Public License for more details.
  10. You should have received a copy of the GNU General Public License
  11. along with this program. If not, see <http://www.gnu.org/licenses/>.
  12. * */
  13. /*
  14. * stringutils.C
  15. *
  16. * Created on: Apr 9, 2013
  17. * Author: xaxaxa
  18. */
  19. #include <cpoll/cpoll.H>
  20. #include "include/stringutils.H"
  21. #include "include/split.H"
  22. using namespace CP;
  23. namespace cppsp
  24. {
  25. inline char hexCharToInt(char ch) {
  26. if (ch <= '9') return ch - '0';
  27. else if (ch <= 'Z') return ch - 'A' + 10;
  28. else return ch - 'a' + 10;
  29. }
  30. inline char intToHexChar(char i) {
  31. if (i < 10) return i + '0';
  32. else return i - 10 + 'A';
  33. }
  34. void urlDecode(const char* in, int inLen, StreamWriter& sw) {
  35. const char* end = in + inLen;
  36. const char* ptr = in;
  37. while (true) {
  38. if (ptr >= end) goto E;
  39. const char* next = (const char*) memchr(ptr, '%', end - ptr);
  40. if (next == NULL) break;
  41. sw.write(ptr, next - ptr);
  42. if (next + 2 >= end) {
  43. sw.write(next, end - next);
  44. goto E;
  45. }
  46. char tmp = hexCharToInt(next[1]) << 4 | hexCharToInt(next[2]);
  47. sw.write(tmp);
  48. ptr = next + 3;
  49. }
  50. if (ptr < end) sw.write(ptr, end - ptr);
  51. E: ;
  52. }
  53. String urlDecode(const char* in, int inLen, StringPool& sp) {
  54. char* ch = sp.beginAdd(inLen); //output size will never exceed input size
  55. char* c = ch;
  56. const char* end = in + inLen;
  57. const char* ptr = in;
  58. while (true) {
  59. if (ptr >= end) goto E;
  60. const char* next = (const char*) memchr(ptr, '%', end - ptr);
  61. if (next == NULL) break;
  62. memcpy(c, ptr, next - ptr);
  63. c += (next - ptr);
  64. if (next + 2 >= end) {
  65. memcpy(c, next, end - next);
  66. c += (end - next);
  67. goto E;
  68. }
  69. *c = hexCharToInt(next[1]) << 4 | hexCharToInt(next[2]);
  70. c++;
  71. ptr = next + 3;
  72. }
  73. if (ptr < end) {
  74. memcpy(c, ptr, end - ptr);
  75. c += (end - ptr);
  76. }
  77. sp.endAdd(c - ch);
  78. return {ch,c-ch};
  79. E: ;
  80. return {(char*)nullptr,0};
  81. }
  82. void urlEncode(const char* in, int inLen, CP::StreamWriter& sw) {
  83. int last_i = 0;
  84. const char* c = in;
  85. char ch[3];
  86. ch[0] = '%';
  87. for (int i = 0; i < inLen; i++) {
  88. if ((48 <= c[i] && c[i] <= 57) || //0-9
  89. (65 <= c[i] && c[i] <= 90) || //abc...xyz
  90. (97 <= c[i] && c[i] <= 122) || //ABC...XYZ
  91. (c[i] == '~' || c[i] == '!' || c[i] == '*' || c[i] == '(' || c[i] == ')'
  92. || c[i] == '\'')) continue;
  93. if (i > last_i) sw.write(in + last_i, i - last_i);
  94. last_i = i + 1;
  95. ch[1] = intToHexChar(c[i] >> 4);
  96. ch[2] = intToHexChar(c[i] & (char) 0xF);
  97. sw.write(ch, 3);
  98. }
  99. if (inLen > last_i) sw.write(in + last_i, inLen - last_i);
  100. }
  101. std::string urlDecode(const char* in, int inLen) {
  102. StringStream ss;
  103. {
  104. StreamWriter sw(ss);
  105. urlDecode(in, inLen, sw);
  106. }
  107. return ss.str();
  108. }
  109. std::string urlEncode(const char* in, int inLen) {
  110. StringStream ss;
  111. {
  112. StreamWriter sw(ss);
  113. urlEncode(in, inLen, sw);
  114. }
  115. return ss.str();
  116. }
  117. std::string htmlEscape(const char* in, int inLen) {
  118. StringStream ss;
  119. {
  120. StreamWriter sw(ss);
  121. htmlEscape(in, inLen, sw);
  122. }
  123. return ss.str();
  124. }
  125. std::string htmlAttributeEscape(const char* in, int inLen) {
  126. StringStream ss;
  127. {
  128. StreamWriter sw(ss);
  129. htmlAttributeEscape(in, inLen, sw);
  130. }
  131. return ss.str();
  132. }
  133. void parseQueryString(const char* in, int inLen, queryStringCallback cb, bool decode) {
  134. if (decode) {
  135. MemoryStream ms;
  136. StreamWriter sw(ms);
  137. split spl(in, inLen, '&');
  138. while (spl.read()) {
  139. const char* s = spl.value.d;
  140. int l = spl.value.len;
  141. const char* _end = s + l;
  142. const char* tmp = (const char*) memchr(s, '=', l);
  143. if (tmp == NULL) {
  144. urlDecode(s, l, sw);
  145. sw.flush();
  146. cb((const char*) ms.data(), ms.length(), nullptr, 0);
  147. ms.clear();
  148. } else {
  149. urlDecode(s, tmp - s, sw);
  150. sw.flush();
  151. int i = ms.length();
  152. urlDecode(tmp + 1, _end - tmp - 1, sw);
  153. sw.flush();
  154. cb((const char*) ms.data(), i, (const char*) (ms.data() + i), ms.length() - i);
  155. ms.clear();
  156. }
  157. }
  158. } else {
  159. split spl(in, inLen, '&');
  160. while (spl.read()) {
  161. const char* s = spl.value.d;
  162. int l = spl.value.len;
  163. const char* _end = s + l;
  164. const char* tmp = (const char*) memchr(s, '=', l);
  165. if (tmp == NULL) cb(s, l, nullptr, 0);
  166. else cb(s, tmp - s, tmp + 1, _end - tmp - 1);
  167. }
  168. }
  169. }
  170. void htmlEscape(const char* in, int inLen, CP::StreamWriter& sw) {
  171. int sz = 0;
  172. for (int i = 0; i < inLen; i++) {
  173. switch (in[i]) {
  174. case '&':
  175. sz += 5;
  176. break;
  177. case '<':
  178. sz += 4;
  179. break;
  180. case '>':
  181. sz += 4;
  182. break;
  183. case '"':
  184. sz += 6;
  185. break;
  186. case '\'':
  187. sz += 6;
  188. break;
  189. default:
  190. sz++;
  191. break;
  192. }
  193. }
  194. char* data = sw.beginWrite(sz);
  195. char* c = data;
  196. for (int i = 0; i < inLen; i++) {
  197. switch (in[i]) {
  198. case '&':
  199. c[0] = '&';
  200. c[1] = 'a';
  201. c[2] = 'm';
  202. c[3] = 'p';
  203. c[4] = ';';
  204. c += 5;
  205. break;
  206. case '<':
  207. c[0] = '&';
  208. c[1] = 'l';
  209. c[2] = 't';
  210. c[3] = ';';
  211. c += 4;
  212. break;
  213. case '>':
  214. c[0] = '&';
  215. c[1] = 'g';
  216. c[2] = 't';
  217. c[3] = ';';
  218. c += 4;
  219. break;
  220. case '"':
  221. c[0] = '&';
  222. c[1] = 'q';
  223. c[2] = 'u';
  224. c[3] = 'o';
  225. c[4] = 't';
  226. c[5] = ';';
  227. c += 6;
  228. break;
  229. case '\'':
  230. c[0] = '&';
  231. c[1] = 'a';
  232. c[2] = 'p';
  233. c[3] = 'o';
  234. c[4] = 's';
  235. c[5] = ';';
  236. c += 6;
  237. break;
  238. default:
  239. *(c++) = in[i];
  240. }
  241. }
  242. sw.endWrite(sz);
  243. }
  244. void htmlAttributeEscape(const char* in, int inLen, CP::StreamWriter& sw) {
  245. int last_i = 0;
  246. const char* tmp;
  247. for (int i = 0; i < inLen; i++) {
  248. switch (in[i]) {
  249. case '&':
  250. tmp = "&amp;";
  251. break;
  252. case '<':
  253. tmp = "&lt;";
  254. break;
  255. case '>':
  256. tmp = "&gt;";
  257. break;
  258. case '"':
  259. tmp = "&quot;";
  260. break;
  261. case '\'':
  262. tmp = "&apos;";
  263. break;
  264. default:
  265. continue;
  266. }
  267. if (i > last_i) sw.write(in + last_i, i - last_i);
  268. last_i = i + 1;
  269. sw.write(tmp);
  270. }
  271. if (inLen > last_i) sw.write(in + last_i, inLen - last_i);
  272. }
  273. int ci_compare(String s1, String s2) {
  274. if (s1.length() > s2.length()) return 1;
  275. if (s1.length() < s2.length()) return -1;
  276. if (s1.length() == 0) return 0;
  277. char a, b;
  278. for (int i = 0; i < s1.length(); i++) {
  279. a = tolower(s1.data()[i]);
  280. b = tolower(s2.data()[i]);
  281. if (a < b) return -1;
  282. if (a > b) return 1;
  283. }
  284. return 0;
  285. }
  286. static inline int itoa1(int i, char* b) {
  287. static char const digit[] = "0123456789";
  288. char* p = b;
  289. //negative detection is not needed for this specific use-case
  290. //(writing the content-length header)
  291. p += (i == 0 ? 0 : int(log10f(i))) + 1;
  292. *p = '\0';
  293. int l = p - b;
  294. do { //Move back, inserting digits as u go
  295. *--p = digit[i % 10];
  296. i = i / 10;
  297. } while (i);
  298. return l;
  299. }
  300. //pads beginning with 0s
  301. //i: input number
  302. //d: # of digits
  303. static inline int itoa2(int i, int d, char* b) {
  304. static char const digit[] = "0123456789";
  305. for (int x = d - 1; x >= 0; x--) {
  306. b[x] = digit[i % 10];
  307. i /= 10;
  308. }
  309. return d;
  310. }
  311. int rfctime(const tm& time, char* c) {
  312. static const char* days[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" };
  313. static const char* months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep",
  314. "Oct", "Nov", "Dec" };
  315. char* s = c;
  316. //AAA, AA AAA ???? AA:AA:AA GMT\0
  317. const char* day = days[time.tm_wday];
  318. //copy 4 bytes (includes extra null byte)
  319. *(int*) c = (*(int*) day) | int(',') << 24;
  320. c += 4;
  321. *(c++) = ' ';
  322. c += itoa1(time.tm_mday, c);
  323. *(c++) = ' ';
  324. const char* month = months[time.tm_mon];
  325. *(c++) = *(month++);
  326. *(c++) = *(month++);
  327. *(c++) = *(month++);
  328. *(c++) = ' ';
  329. c += itoa1(time.tm_year + 1900, c);
  330. *(c++) = ' ';
  331. c += itoa2(time.tm_hour, 2, c);
  332. *(c++) = ':';
  333. c += itoa2(time.tm_min, 2, c);
  334. *(c++) = ':';
  335. c += itoa2(time.tm_sec, 2, c);
  336. *(c++) = ' ';
  337. *(c++) = 'G';
  338. *(c++) = 'M';
  339. *(c++) = 'T';
  340. *(c++) = '\0';
  341. return int(c - s) - 1;
  342. }
  343. }