parser.hpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615
  1. /*
  2. * Copyright (c) 2014, Peter Thorson. All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions are met:
  6. * * Redistributions of source code must retain the above copyright
  7. * notice, this list of conditions and the following disclaimer.
  8. * * Redistributions in binary form must reproduce the above copyright
  9. * notice, this list of conditions and the following disclaimer in the
  10. * documentation and/or other materials provided with the distribution.
  11. * * Neither the name of the WebSocket++ Project nor the
  12. * names of its contributors may be used to endorse or promote products
  13. * derived from this software without specific prior written permission.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18. * ARE DISCLAIMED. IN NO EVENT SHALL PETER THORSON BE LIABLE FOR ANY
  19. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  20. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  21. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  22. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  24. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. *
  26. */
  27. #ifndef HTTP_PARSER_HPP
  28. #define HTTP_PARSER_HPP
  29. #include <algorithm>
  30. #include <map>
  31. #include <string>
  32. #include <utility>
  33. #include <websocketpp/utilities.hpp>
  34. #include <websocketpp/http/constants.hpp>
  35. namespace websocketpp {
  36. namespace http {
  37. namespace parser {
  38. namespace state {
  39. enum value {
  40. method,
  41. resource,
  42. version,
  43. headers
  44. };
  45. }
  46. namespace body_encoding {
  47. enum value {
  48. unknown,
  49. plain,
  50. chunked
  51. };
  52. }
  53. typedef std::map<std::string, std::string, utility::ci_less > header_list;
  54. /// Read and return the next token in the stream
  55. /**
  56. * Read until a non-token character is found and then return the token and
  57. * iterator to the next character to read
  58. *
  59. * @param begin An iterator to the beginning of the sequence
  60. * @param end An iterator to the end of the sequence
  61. * @return A pair containing the token and an iterator to the next character in
  62. * the stream
  63. */
  64. template <typename InputIterator>
  65. std::pair<std::string,InputIterator> extract_token(InputIterator begin,
  66. InputIterator end)
  67. {
  68. InputIterator it = std::find_if(begin,end,&is_not_token_char);
  69. return std::make_pair(std::string(begin,it),it);
  70. }
  71. /// Read and return the next quoted string in the stream
  72. /**
  73. * Read a double quoted string starting at `begin`. The quotes themselves are
  74. * stripped. The quoted value is returned along with an iterator to the next
  75. * character to read
  76. *
  77. * @param begin An iterator to the beginning of the sequence
  78. * @param end An iterator to the end of the sequence
  79. * @return A pair containing the string read and an iterator to the next
  80. * character in the stream
  81. */
  82. template <typename InputIterator>
  83. std::pair<std::string,InputIterator> extract_quoted_string(InputIterator begin,
  84. InputIterator end)
  85. {
  86. std::string s;
  87. if (end == begin) {
  88. return std::make_pair(s,begin);
  89. }
  90. if (*begin != '"') {
  91. return std::make_pair(s,begin);
  92. }
  93. InputIterator cursor = begin+1;
  94. InputIterator marker = cursor;
  95. cursor = std::find(cursor,end,'"');
  96. while (cursor != end) {
  97. // either this is the end or a quoted string
  98. if (*(cursor-1) == '\\') {
  99. s.append(marker,cursor-1);
  100. s.append(1,'"');
  101. ++cursor;
  102. marker = cursor;
  103. } else {
  104. s.append(marker,cursor);
  105. ++cursor;
  106. return std::make_pair(s,cursor);
  107. }
  108. cursor = std::find(cursor,end,'"');
  109. }
  110. return std::make_pair("",begin);
  111. }
  112. /// Read and discard one unit of linear whitespace
  113. /**
  114. * Read one unit of linear white space and return the iterator to the character
  115. * afterwards. If `begin` is returned, no whitespace was extracted.
  116. *
  117. * @param begin An iterator to the beginning of the sequence
  118. * @param end An iterator to the end of the sequence
  119. * @return An iterator to the character after the linear whitespace read
  120. */
  121. template <typename InputIterator>
  122. InputIterator extract_lws(InputIterator begin, InputIterator end) {
  123. InputIterator it = begin;
  124. // strip leading CRLF
  125. if (end-begin > 2 && *begin == '\r' && *(begin+1) == '\n' &&
  126. is_whitespace_char(static_cast<unsigned char>(*(begin+2))))
  127. {
  128. it+=3;
  129. }
  130. it = std::find_if(it,end,&is_not_whitespace_char);
  131. return it;
  132. }
  133. /// Read and discard linear whitespace
  134. /**
  135. * Read linear white space until a non-lws character is read and return an
  136. * iterator to that character. If `begin` is returned, no whitespace was
  137. * extracted.
  138. *
  139. * @param begin An iterator to the beginning of the sequence
  140. * @param end An iterator to the end of the sequence
  141. * @return An iterator to the character after the linear whitespace read
  142. */
  143. template <typename InputIterator>
  144. InputIterator extract_all_lws(InputIterator begin, InputIterator end) {
  145. InputIterator old_it;
  146. InputIterator new_it = begin;
  147. do {
  148. // Pull value from previous iteration
  149. old_it = new_it;
  150. // look ahead another pass
  151. new_it = extract_lws(old_it,end);
  152. } while (new_it != end && old_it != new_it);
  153. return new_it;
  154. }
  155. /// Extract HTTP attributes
  156. /**
  157. * An http attributes list is a semicolon delimited list of key value pairs in
  158. * the format: *( ";" attribute "=" value ) where attribute is a token and value
  159. * is a token or quoted string.
  160. *
  161. * Attributes extracted are appended to the supplied attributes list
  162. * `attributes`.
  163. *
  164. * @param [in] begin An iterator to the beginning of the sequence
  165. * @param [in] end An iterator to the end of the sequence
  166. * @param [out] attributes A reference to the attributes list to append
  167. * attribute/value pairs extracted to
  168. * @return An iterator to the character after the last atribute read
  169. */
  170. template <typename InputIterator>
  171. InputIterator extract_attributes(InputIterator begin, InputIterator end,
  172. attribute_list & attributes)
  173. {
  174. InputIterator cursor;
  175. bool first = true;
  176. if (begin == end) {
  177. return begin;
  178. }
  179. cursor = begin;
  180. std::pair<std::string,InputIterator> ret;
  181. while (cursor != end) {
  182. std::string name;
  183. cursor = http::parser::extract_all_lws(cursor,end);
  184. if (cursor == end) {
  185. break;
  186. }
  187. if (first) {
  188. // ignore this check for the very first pass
  189. first = false;
  190. } else {
  191. if (*cursor == ';') {
  192. // advance past the ';'
  193. ++cursor;
  194. } else {
  195. // non-semicolon in this position indicates end end of the
  196. // attribute list, break and return.
  197. break;
  198. }
  199. }
  200. cursor = http::parser::extract_all_lws(cursor,end);
  201. ret = http::parser::extract_token(cursor,end);
  202. if (ret.first == "") {
  203. // error: expected a token
  204. return begin;
  205. } else {
  206. name = ret.first;
  207. cursor = ret.second;
  208. }
  209. cursor = http::parser::extract_all_lws(cursor,end);
  210. if (cursor == end || *cursor != '=') {
  211. // if there is an equals sign, read the attribute value. Otherwise
  212. // record a blank value and continue
  213. attributes[name] = "";
  214. continue;
  215. }
  216. // advance past the '='
  217. ++cursor;
  218. cursor = http::parser::extract_all_lws(cursor,end);
  219. if (cursor == end) {
  220. // error: expected a token or quoted string
  221. return begin;
  222. }
  223. ret = http::parser::extract_quoted_string(cursor,end);
  224. if (ret.second != cursor) {
  225. attributes[name] = ret.first;
  226. cursor = ret.second;
  227. continue;
  228. }
  229. ret = http::parser::extract_token(cursor,end);
  230. if (ret.first == "") {
  231. // error : expected token or quoted string
  232. return begin;
  233. } else {
  234. attributes[name] = ret.first;
  235. cursor = ret.second;
  236. }
  237. }
  238. return cursor;
  239. }
  240. /// Extract HTTP parameters
  241. /**
  242. * An http parameters list is a comma delimited list of tokens followed by
  243. * optional semicolon delimited attributes lists.
  244. *
  245. * Parameters extracted are appended to the supplied parameters list
  246. * `parameters`.
  247. *
  248. * @param [in] begin An iterator to the beginning of the sequence
  249. * @param [in] end An iterator to the end of the sequence
  250. * @param [out] parameters A reference to the parameters list to append
  251. * paramter values extracted to
  252. * @return An iterator to the character after the last parameter read
  253. */
  254. template <typename InputIterator>
  255. InputIterator extract_parameters(InputIterator begin, InputIterator end,
  256. parameter_list &parameters)
  257. {
  258. InputIterator cursor;
  259. if (begin == end) {
  260. // error: expected non-zero length range
  261. return begin;
  262. }
  263. cursor = begin;
  264. std::pair<std::string,InputIterator> ret;
  265. /**
  266. * LWS
  267. * token
  268. * LWS
  269. * *(";" method-param)
  270. * LWS
  271. * ,=loop again
  272. */
  273. while (cursor != end) {
  274. std::string parameter_name;
  275. attribute_list attributes;
  276. // extract any stray whitespace
  277. cursor = http::parser::extract_all_lws(cursor,end);
  278. if (cursor == end) {break;}
  279. ret = http::parser::extract_token(cursor,end);
  280. if (ret.first == "") {
  281. // error: expected a token
  282. return begin;
  283. } else {
  284. parameter_name = ret.first;
  285. cursor = ret.second;
  286. }
  287. // Safe break point, insert parameter with blank attributes and exit
  288. cursor = http::parser::extract_all_lws(cursor,end);
  289. if (cursor == end) {
  290. //parameters[parameter_name] = attributes;
  291. parameters.push_back(std::make_pair(parameter_name,attributes));
  292. break;
  293. }
  294. // If there is an attribute list, read it in
  295. if (*cursor == ';') {
  296. InputIterator acursor;
  297. ++cursor;
  298. acursor = http::parser::extract_attributes(cursor,end,attributes);
  299. if (acursor == cursor) {
  300. // attribute extraction ended in syntax error
  301. return begin;
  302. }
  303. cursor = acursor;
  304. }
  305. // insert parameter into output list
  306. //parameters[parameter_name] = attributes;
  307. parameters.push_back(std::make_pair(parameter_name,attributes));
  308. cursor = http::parser::extract_all_lws(cursor,end);
  309. if (cursor == end) {break;}
  310. // if next char is ',' then read another parameter, else stop
  311. if (*cursor != ',') {
  312. break;
  313. }
  314. // advance past comma
  315. ++cursor;
  316. if (cursor == end) {
  317. // expected more bytes after a comma
  318. return begin;
  319. }
  320. }
  321. return cursor;
  322. }
  323. inline std::string strip_lws(std::string const & input) {
  324. std::string::const_iterator begin = extract_all_lws(input.begin(),input.end());
  325. if (begin == input.end()) {
  326. return std::string();
  327. }
  328. std::string::const_reverse_iterator end = extract_all_lws(input.rbegin(),input.rend());
  329. return std::string(begin,end.base());
  330. }
  331. /// Base HTTP parser
  332. /**
  333. * Includes methods and data elements common to all types of HTTP messages such
  334. * as headers, versions, bodies, etc.
  335. */
  336. class parser {
  337. public:
  338. parser()
  339. : m_header_bytes(0)
  340. , m_body_bytes_needed(0)
  341. , m_body_bytes_max(max_body_size)
  342. , m_body_encoding(body_encoding::unknown) {}
  343. /// Get the HTTP version string
  344. /**
  345. * @return The version string for this parser
  346. */
  347. std::string const & get_version() const {
  348. return m_version;
  349. }
  350. /// Set HTTP parser Version
  351. /**
  352. * Input should be in format: HTTP/x.y where x and y are positive integers.
  353. * @todo Does this method need any validation?
  354. *
  355. * @param [in] version The value to set the HTTP version to.
  356. */
  357. void set_version(std::string const & version);
  358. /// Get the value of an HTTP header
  359. /**
  360. * @todo Make this method case insensitive.
  361. *
  362. * @param [in] key The name/key of the header to get.
  363. * @return The value associated with the given HTTP header key.
  364. */
  365. std::string const & get_header(std::string const & key) const;
  366. /// Extract an HTTP parameter list from a parser header.
  367. /**
  368. * If the header requested doesn't exist or exists and is empty the
  369. * parameter list is valid (but empty).
  370. *
  371. * @param [in] key The name/key of the HTTP header to use as input.
  372. * @param [out] out The parameter list to store extracted parameters in.
  373. * @return Whether or not the input was a valid parameter list.
  374. */
  375. bool get_header_as_plist(std::string const & key, parameter_list & out)
  376. const;
  377. /// Append a value to an existing HTTP header
  378. /**
  379. * This method will set the value of the HTTP header `key` with the
  380. * indicated value. If a header with the name `key` already exists, `val`
  381. * will be appended to the existing value.
  382. *
  383. * @todo Make this method case insensitive.
  384. * @todo Should there be any restrictions on which keys are allowed?
  385. * @todo Exception free varient
  386. *
  387. * @see replace_header
  388. *
  389. * @param [in] key The name/key of the header to append to.
  390. * @param [in] val The value to append.
  391. */
  392. void append_header(std::string const & key, std::string const & val);
  393. /// Set a value for an HTTP header, replacing an existing value
  394. /**
  395. * This method will set the value of the HTTP header `key` with the
  396. * indicated value. If a header with the name `key` already exists, `val`
  397. * will replace the existing value.
  398. *
  399. * @todo Make this method case insensitive.
  400. * @todo Should there be any restrictions on which keys are allowed?
  401. * @todo Exception free varient
  402. *
  403. * @see append_header
  404. *
  405. * @param [in] key The name/key of the header to append to.
  406. * @param [in] val The value to append.
  407. */
  408. void replace_header(std::string const & key, std::string const & val);
  409. /// Remove a header from the parser
  410. /**
  411. * Removes the header entirely from the parser. This is different than
  412. * setting the value of the header to blank.
  413. *
  414. * @todo Make this method case insensitive.
  415. *
  416. * @param [in] key The name/key of the header to remove.
  417. */
  418. void remove_header(std::string const & key);
  419. /// Get HTTP body
  420. /**
  421. * Gets the body of the HTTP object
  422. *
  423. * @return The body of the HTTP message.
  424. */
  425. std::string const & get_body() const {
  426. return m_body;
  427. }
  428. /// Set body content
  429. /**
  430. * Set the body content of the HTTP response to the parameter string. Note
  431. * set_body will also set the Content-Length HTTP header to the appropriate
  432. * value. If you want the Content-Length header to be something else, do so
  433. * via replace_header("Content-Length") after calling set_body()
  434. *
  435. * @param value String data to include as the body content.
  436. */
  437. void set_body(std::string const & value);
  438. /// Get body size limit
  439. /**
  440. * Retrieves the maximum number of bytes to parse & buffer before canceling
  441. * a request.
  442. *
  443. * @since 0.5.0
  444. *
  445. * @return The maximum length of a message body.
  446. */
  447. size_t get_max_body_size() const {
  448. return m_body_bytes_max;
  449. }
  450. /// Set body size limit
  451. /**
  452. * Set the maximum number of bytes to parse and buffer before canceling a
  453. * request.
  454. *
  455. * @since 0.5.0
  456. *
  457. * @param value The size to set the max body length to.
  458. */
  459. void set_max_body_size(size_t value) {
  460. m_body_bytes_max = value;
  461. }
  462. /// Extract an HTTP parameter list from a string.
  463. /**
  464. * @param [in] in The input string.
  465. * @param [out] out The parameter list to store extracted parameters in.
  466. * @return Whether or not the input was a valid parameter list.
  467. */
  468. bool parse_parameter_list(std::string const & in, parameter_list & out)
  469. const;
  470. protected:
  471. /// Process a header line
  472. /**
  473. * @todo Update this method to be exception free.
  474. *
  475. * @param [in] begin An iterator to the beginning of the sequence.
  476. * @param [in] end An iterator to the end of the sequence.
  477. */
  478. void process_header(std::string::iterator begin, std::string::iterator end);
  479. /// Prepare the parser to begin parsing body data
  480. /**
  481. * Inspects headers to determine if the message has a body that needs to be
  482. * read. If so, sets up the necessary state, otherwise returns false. If
  483. * this method returns true and loading the message body is desired call
  484. * `process_body` until it returns zero bytes or an error.
  485. *
  486. * Must not be called until after all headers have been processed.
  487. *
  488. * @since 0.5.0
  489. *
  490. * @return True if more bytes are needed to load the body, false otherwise.
  491. */
  492. bool prepare_body();
  493. /// Process body data
  494. /**
  495. * Parses body data.
  496. *
  497. * @since 0.5.0
  498. *
  499. * @param [in] begin An iterator to the beginning of the sequence.
  500. * @param [in] end An iterator to the end of the sequence.
  501. * @return The number of bytes processed
  502. */
  503. size_t process_body(char const * buf, size_t len);
  504. /// Check if the parser is done parsing the body
  505. /**
  506. * Behavior before a call to `prepare_body` is undefined.
  507. *
  508. * @since 0.5.0
  509. *
  510. * @return True if the message body has been completed loaded.
  511. */
  512. bool body_ready() const {
  513. return (m_body_bytes_needed == 0);
  514. }
  515. /// Generate and return the HTTP headers as a string
  516. /**
  517. * Each headers will be followed by the \r\n sequence including the last one.
  518. * A second \r\n sequence (blank header) is not appended by this method
  519. *
  520. * @return The HTTP headers as a string.
  521. */
  522. std::string raw_headers() const;
  523. std::string m_version;
  524. header_list m_headers;
  525. size_t m_header_bytes;
  526. std::string m_body;
  527. size_t m_body_bytes_needed;
  528. size_t m_body_bytes_max;
  529. body_encoding::value m_body_encoding;
  530. };
  531. } // namespace parser
  532. } // namespace http
  533. } // namespace websocketpp
  534. #include <websocketpp/http/impl/parser.hpp>
  535. #endif // HTTP_PARSER_HPP