non-ascii.c 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. /***************************************************************************
  2. * _ _ ____ _
  3. * Project ___| | | | _ \| |
  4. * / __| | | | |_) | |
  5. * | (__| |_| | _ <| |___
  6. * \___|\___/|_| \_\_____|
  7. *
  8. * Copyright (C) 1998 - 2021, Daniel Stenberg, <[email protected]>, et al.
  9. *
  10. * This software is licensed as described in the file COPYING, which
  11. * you should have received as part of this distribution. The terms
  12. * are also available at https://curl.se/docs/copyright.html.
  13. *
  14. * You may opt to use, copy, modify, merge, publish, distribute and/or sell
  15. * copies of the Software, and permit persons to whom the Software is
  16. * furnished to do so, under the terms of the COPYING file.
  17. *
  18. * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  19. * KIND, either express or implied.
  20. *
  21. ***************************************************************************/
  22. #include "curl_setup.h"
  23. #ifdef CURL_DOES_CONVERSIONS
  24. #include <curl/curl.h>
  25. #include "non-ascii.h"
  26. #include "formdata.h"
  27. #include "sendf.h"
  28. #include "urldata.h"
  29. #include "multiif.h"
  30. #include "strerror.h"
  31. #include "curl_memory.h"
  32. /* The last #include file should be: */
  33. #include "memdebug.h"
  34. #ifdef HAVE_ICONV
  35. #include <iconv.h>
  36. /* set default codesets for iconv */
  37. #ifndef CURL_ICONV_CODESET_OF_NETWORK
  38. #define CURL_ICONV_CODESET_OF_NETWORK "ISO8859-1"
  39. #endif
  40. #ifndef CURL_ICONV_CODESET_FOR_UTF8
  41. #define CURL_ICONV_CODESET_FOR_UTF8 "UTF-8"
  42. #endif
  43. #define ICONV_ERROR (size_t)-1
  44. #endif /* HAVE_ICONV */
  45. /*
  46. * Curl_convert_clone() returns a malloced copy of the source string (if
  47. * returning CURLE_OK), with the data converted to network format.
  48. */
  49. CURLcode Curl_convert_clone(struct Curl_easy *data,
  50. const char *indata,
  51. size_t insize,
  52. char **outbuf)
  53. {
  54. char *convbuf;
  55. CURLcode result;
  56. convbuf = malloc(insize);
  57. if(!convbuf)
  58. return CURLE_OUT_OF_MEMORY;
  59. memcpy(convbuf, indata, insize);
  60. result = Curl_convert_to_network(data, convbuf, insize);
  61. if(result) {
  62. free(convbuf);
  63. return result;
  64. }
  65. *outbuf = convbuf; /* return the converted buffer */
  66. return CURLE_OK;
  67. }
  68. /*
  69. * Curl_convert_to_network() is an internal function for performing ASCII
  70. * conversions on non-ASCII platforms. It converts the buffer _in place_.
  71. */
  72. CURLcode Curl_convert_to_network(struct Curl_easy *data,
  73. char *buffer, size_t length)
  74. {
  75. if(data && data->set.convtonetwork) {
  76. /* use translation callback */
  77. CURLcode result;
  78. Curl_set_in_callback(data, true);
  79. result = data->set.convtonetwork(buffer, length);
  80. Curl_set_in_callback(data, false);
  81. if(result) {
  82. failf(data,
  83. "CURLOPT_CONV_TO_NETWORK_FUNCTION callback returned %d: %s",
  84. (int)result, curl_easy_strerror(result));
  85. }
  86. return result;
  87. }
  88. else {
  89. #ifdef HAVE_ICONV
  90. /* do the translation ourselves */
  91. iconv_t tmpcd = (iconv_t) -1;
  92. iconv_t *cd = &tmpcd;
  93. char *input_ptr, *output_ptr;
  94. size_t in_bytes, out_bytes, rc;
  95. char ebuffer[STRERROR_LEN];
  96. /* open an iconv conversion descriptor if necessary */
  97. if(data)
  98. cd = &data->outbound_cd;
  99. if(*cd == (iconv_t)-1) {
  100. *cd = iconv_open(CURL_ICONV_CODESET_OF_NETWORK,
  101. CURL_ICONV_CODESET_OF_HOST);
  102. if(*cd == (iconv_t)-1) {
  103. failf(data,
  104. "The iconv_open(\"%s\", \"%s\") call failed with errno %i: %s",
  105. CURL_ICONV_CODESET_OF_NETWORK,
  106. CURL_ICONV_CODESET_OF_HOST,
  107. errno, Curl_strerror(errno, ebuffer, sizeof(ebuffer)));
  108. return CURLE_CONV_FAILED;
  109. }
  110. }
  111. /* call iconv */
  112. input_ptr = output_ptr = buffer;
  113. in_bytes = out_bytes = length;
  114. rc = iconv(*cd, &input_ptr, &in_bytes,
  115. &output_ptr, &out_bytes);
  116. if(!data)
  117. iconv_close(tmpcd);
  118. if((rc == ICONV_ERROR) || (in_bytes)) {
  119. failf(data,
  120. "The Curl_convert_to_network iconv call failed with errno %i: %s",
  121. errno, Curl_strerror(errno, ebuffer, sizeof(ebuffer)));
  122. return CURLE_CONV_FAILED;
  123. }
  124. #else
  125. failf(data, "CURLOPT_CONV_TO_NETWORK_FUNCTION callback required");
  126. return CURLE_CONV_REQD;
  127. #endif /* HAVE_ICONV */
  128. }
  129. return CURLE_OK;
  130. }
  131. /*
  132. * Curl_convert_from_network() is an internal function for performing ASCII
  133. * conversions on non-ASCII platforms. It converts the buffer _in place_.
  134. */
  135. CURLcode Curl_convert_from_network(struct Curl_easy *data,
  136. char *buffer, size_t length)
  137. {
  138. if(data && data->set.convfromnetwork) {
  139. /* use translation callback */
  140. CURLcode result;
  141. Curl_set_in_callback(data, true);
  142. result = data->set.convfromnetwork(buffer, length);
  143. Curl_set_in_callback(data, false);
  144. if(result) {
  145. failf(data,
  146. "CURLOPT_CONV_FROM_NETWORK_FUNCTION callback returned %d: %s",
  147. (int)result, curl_easy_strerror(result));
  148. }
  149. return result;
  150. }
  151. else {
  152. #ifdef HAVE_ICONV
  153. /* do the translation ourselves */
  154. iconv_t tmpcd = (iconv_t) -1;
  155. iconv_t *cd = &tmpcd;
  156. char *input_ptr, *output_ptr;
  157. size_t in_bytes, out_bytes, rc;
  158. char ebuffer[STRERROR_LEN];
  159. /* open an iconv conversion descriptor if necessary */
  160. if(data)
  161. cd = &data->inbound_cd;
  162. if(*cd == (iconv_t)-1) {
  163. *cd = iconv_open(CURL_ICONV_CODESET_OF_HOST,
  164. CURL_ICONV_CODESET_OF_NETWORK);
  165. if(*cd == (iconv_t)-1) {
  166. failf(data,
  167. "The iconv_open(\"%s\", \"%s\") call failed with errno %i: %s",
  168. CURL_ICONV_CODESET_OF_HOST,
  169. CURL_ICONV_CODESET_OF_NETWORK,
  170. errno, Curl_strerror(errno, ebuffer, sizeof(ebuffer)));
  171. return CURLE_CONV_FAILED;
  172. }
  173. }
  174. /* call iconv */
  175. input_ptr = output_ptr = buffer;
  176. in_bytes = out_bytes = length;
  177. rc = iconv(*cd, &input_ptr, &in_bytes,
  178. &output_ptr, &out_bytes);
  179. if(!data)
  180. iconv_close(tmpcd);
  181. if((rc == ICONV_ERROR) || (in_bytes)) {
  182. failf(data,
  183. "Curl_convert_from_network iconv call failed with errno %i: %s",
  184. errno, Curl_strerror(errno, ebuffer, sizeof(ebuffer)));
  185. return CURLE_CONV_FAILED;
  186. }
  187. #else
  188. failf(data, "CURLOPT_CONV_FROM_NETWORK_FUNCTION callback required");
  189. return CURLE_CONV_REQD;
  190. #endif /* HAVE_ICONV */
  191. }
  192. return CURLE_OK;
  193. }
  194. /*
  195. * Curl_convert_from_utf8() is an internal function for performing UTF-8
  196. * conversions on non-ASCII platforms.
  197. */
  198. CURLcode Curl_convert_from_utf8(struct Curl_easy *data,
  199. char *buffer, size_t length)
  200. {
  201. if(data && data->set.convfromutf8) {
  202. /* use translation callback */
  203. CURLcode result;
  204. Curl_set_in_callback(data, true);
  205. result = data->set.convfromutf8(buffer, length);
  206. Curl_set_in_callback(data, false);
  207. if(result) {
  208. failf(data,
  209. "CURLOPT_CONV_FROM_UTF8_FUNCTION callback returned %d: %s",
  210. (int)result, curl_easy_strerror(result));
  211. }
  212. return result;
  213. }
  214. else {
  215. #ifdef HAVE_ICONV
  216. /* do the translation ourselves */
  217. iconv_t tmpcd = (iconv_t) -1;
  218. iconv_t *cd = &tmpcd;
  219. char *input_ptr;
  220. char *output_ptr;
  221. size_t in_bytes, out_bytes, rc;
  222. char ebuffer[STRERROR_LEN];
  223. /* open an iconv conversion descriptor if necessary */
  224. if(data)
  225. cd = &data->utf8_cd;
  226. if(*cd == (iconv_t)-1) {
  227. *cd = iconv_open(CURL_ICONV_CODESET_OF_HOST,
  228. CURL_ICONV_CODESET_FOR_UTF8);
  229. if(*cd == (iconv_t)-1) {
  230. failf(data,
  231. "The iconv_open(\"%s\", \"%s\") call failed with errno %i: %s",
  232. CURL_ICONV_CODESET_OF_HOST,
  233. CURL_ICONV_CODESET_FOR_UTF8,
  234. errno, Curl_strerror(errno, ebuffer, sizeof(ebuffer)));
  235. return CURLE_CONV_FAILED;
  236. }
  237. }
  238. /* call iconv */
  239. input_ptr = output_ptr = buffer;
  240. in_bytes = out_bytes = length;
  241. rc = iconv(*cd, &input_ptr, &in_bytes,
  242. &output_ptr, &out_bytes);
  243. if(!data)
  244. iconv_close(tmpcd);
  245. if((rc == ICONV_ERROR) || (in_bytes)) {
  246. failf(data,
  247. "The Curl_convert_from_utf8 iconv call failed with errno %i: %s",
  248. errno, Curl_strerror(errno, ebuffer, sizeof(ebuffer)));
  249. return CURLE_CONV_FAILED;
  250. }
  251. if(output_ptr < input_ptr) {
  252. /* null terminate the now shorter output string */
  253. *output_ptr = 0x00;
  254. }
  255. #else
  256. failf(data, "CURLOPT_CONV_FROM_UTF8_FUNCTION callback required");
  257. return CURLE_CONV_REQD;
  258. #endif /* HAVE_ICONV */
  259. }
  260. return CURLE_OK;
  261. }
  262. /*
  263. * Init conversion stuff for a Curl_easy
  264. */
  265. void Curl_convert_init(struct Curl_easy *data)
  266. {
  267. #if defined(CURL_DOES_CONVERSIONS) && defined(HAVE_ICONV)
  268. /* conversion descriptors for iconv calls */
  269. data->outbound_cd = (iconv_t)-1;
  270. data->inbound_cd = (iconv_t)-1;
  271. data->utf8_cd = (iconv_t)-1;
  272. #else
  273. (void)data;
  274. #endif /* CURL_DOES_CONVERSIONS && HAVE_ICONV */
  275. }
  276. /*
  277. * Setup conversion stuff for a Curl_easy
  278. */
  279. void Curl_convert_setup(struct Curl_easy *data)
  280. {
  281. data->inbound_cd = iconv_open(CURL_ICONV_CODESET_OF_HOST,
  282. CURL_ICONV_CODESET_OF_NETWORK);
  283. data->outbound_cd = iconv_open(CURL_ICONV_CODESET_OF_NETWORK,
  284. CURL_ICONV_CODESET_OF_HOST);
  285. data->utf8_cd = iconv_open(CURL_ICONV_CODESET_OF_HOST,
  286. CURL_ICONV_CODESET_FOR_UTF8);
  287. }
  288. /*
  289. * Close conversion stuff for a Curl_easy
  290. */
  291. void Curl_convert_close(struct Curl_easy *data)
  292. {
  293. #ifdef HAVE_ICONV
  294. /* close iconv conversion descriptors */
  295. if(data->inbound_cd != (iconv_t)-1) {
  296. iconv_close(data->inbound_cd);
  297. }
  298. if(data->outbound_cd != (iconv_t)-1) {
  299. iconv_close(data->outbound_cd);
  300. }
  301. if(data->utf8_cd != (iconv_t)-1) {
  302. iconv_close(data->utf8_cd);
  303. }
  304. #else
  305. (void)data;
  306. #endif /* HAVE_ICONV */
  307. }
  308. #endif /* CURL_DOES_CONVERSIONS */