SDL_iconv.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934
  1. /*
  2. Simple DirectMedia Layer
  3. Copyright (C) 1997-2019 Sam Lantinga <[email protected]>
  4. This software is provided 'as-is', without any express or implied
  5. warranty. In no event will the authors be held liable for any damages
  6. arising from the use of this software.
  7. Permission is granted to anyone to use this software for any purpose,
  8. including commercial applications, and to alter it and redistribute it
  9. freely, subject to the following restrictions:
  10. 1. The origin of this software must not be misrepresented; you must not
  11. claim that you wrote the original software. If you use this software
  12. in a product, an acknowledgment in the product documentation would be
  13. appreciated but is not required.
  14. 2. Altered source versions must be plainly marked as such, and must not be
  15. misrepresented as being the original software.
  16. 3. This notice may not be removed or altered from any source distribution.
  17. */
  18. #if defined(__clang_analyzer__) && !defined(SDL_DISABLE_ANALYZE_MACROS)
  19. #define SDL_DISABLE_ANALYZE_MACROS 1
  20. #endif
  21. #include "../SDL_internal.h"
  22. /* This file contains portable iconv functions for SDL */
  23. #include "SDL_stdinc.h"
  24. #include "SDL_endian.h"
  25. #if defined(HAVE_ICONV) && defined(HAVE_ICONV_H)
  26. #include <iconv.h>
  27. /* Depending on which standard the iconv() was implemented with,
  28. iconv() may or may not use const char ** for the inbuf param.
  29. If we get this wrong, it's just a warning, so no big deal.
  30. */
  31. #if defined(_XGP6) || defined(__APPLE__) || defined(__RISCOS__) || \
  32. defined(__EMSCRIPTEN__) || \
  33. (defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2)) || \
  34. (defined(_NEWLIB_VERSION)))
  35. #define ICONV_INBUF_NONCONST
  36. #endif
  37. #include <errno.h>
  38. SDL_COMPILE_TIME_ASSERT(iconv_t, sizeof (iconv_t) <= sizeof (SDL_iconv_t));
  39. SDL_iconv_t
  40. SDL_iconv_open(const char *tocode, const char *fromcode)
  41. {
  42. return (SDL_iconv_t) ((size_t) iconv_open(tocode, fromcode));
  43. }
  44. int
  45. SDL_iconv_close(SDL_iconv_t cd)
  46. {
  47. return iconv_close((iconv_t) ((size_t) cd));
  48. }
  49. size_t
  50. SDL_iconv(SDL_iconv_t cd,
  51. const char **inbuf, size_t * inbytesleft,
  52. char **outbuf, size_t * outbytesleft)
  53. {
  54. size_t retCode;
  55. #ifdef ICONV_INBUF_NONCONST
  56. retCode = iconv((iconv_t) ((size_t) cd), (char **) inbuf, inbytesleft, outbuf, outbytesleft);
  57. #else
  58. retCode = iconv((iconv_t) ((size_t) cd), inbuf, inbytesleft, outbuf, outbytesleft);
  59. #endif
  60. if (retCode == (size_t) - 1) {
  61. switch (errno) {
  62. case E2BIG:
  63. return SDL_ICONV_E2BIG;
  64. case EILSEQ:
  65. return SDL_ICONV_EILSEQ;
  66. case EINVAL:
  67. return SDL_ICONV_EINVAL;
  68. default:
  69. return SDL_ICONV_ERROR;
  70. }
  71. }
  72. return retCode;
  73. }
  74. #else
  75. /* Lots of useful information on Unicode at:
  76. http://www.cl.cam.ac.uk/~mgk25/unicode.html
  77. */
  78. #define UNICODE_BOM 0xFEFF
  79. #define UNKNOWN_ASCII '?'
  80. #define UNKNOWN_UNICODE 0xFFFD
  81. enum
  82. {
  83. ENCODING_UNKNOWN,
  84. ENCODING_ASCII,
  85. ENCODING_LATIN1,
  86. ENCODING_UTF8,
  87. ENCODING_UTF16, /* Needs byte order marker */
  88. ENCODING_UTF16BE,
  89. ENCODING_UTF16LE,
  90. ENCODING_UTF32, /* Needs byte order marker */
  91. ENCODING_UTF32BE,
  92. ENCODING_UTF32LE,
  93. ENCODING_UCS2BE,
  94. ENCODING_UCS2LE,
  95. ENCODING_UCS4BE,
  96. ENCODING_UCS4LE,
  97. };
  98. #if SDL_BYTEORDER == SDL_BIG_ENDIAN
  99. #define ENCODING_UTF16NATIVE ENCODING_UTF16BE
  100. #define ENCODING_UTF32NATIVE ENCODING_UTF32BE
  101. #define ENCODING_UCS2NATIVE ENCODING_UCS2BE
  102. #define ENCODING_UCS4NATIVE ENCODING_UCS4BE
  103. #else
  104. #define ENCODING_UTF16NATIVE ENCODING_UTF16LE
  105. #define ENCODING_UTF32NATIVE ENCODING_UTF32LE
  106. #define ENCODING_UCS2NATIVE ENCODING_UCS2LE
  107. #define ENCODING_UCS4NATIVE ENCODING_UCS4LE
  108. #endif
  109. struct _SDL_iconv_t
  110. {
  111. int src_fmt;
  112. int dst_fmt;
  113. };
  114. static struct
  115. {
  116. const char *name;
  117. int format;
  118. } encodings[] = {
  119. /* *INDENT-OFF* */
  120. { "ASCII", ENCODING_ASCII },
  121. { "US-ASCII", ENCODING_ASCII },
  122. { "8859-1", ENCODING_LATIN1 },
  123. { "ISO-8859-1", ENCODING_LATIN1 },
  124. { "UTF8", ENCODING_UTF8 },
  125. { "UTF-8", ENCODING_UTF8 },
  126. { "UTF16", ENCODING_UTF16 },
  127. { "UTF-16", ENCODING_UTF16 },
  128. { "UTF16BE", ENCODING_UTF16BE },
  129. { "UTF-16BE", ENCODING_UTF16BE },
  130. { "UTF16LE", ENCODING_UTF16LE },
  131. { "UTF-16LE", ENCODING_UTF16LE },
  132. { "UTF32", ENCODING_UTF32 },
  133. { "UTF-32", ENCODING_UTF32 },
  134. { "UTF32BE", ENCODING_UTF32BE },
  135. { "UTF-32BE", ENCODING_UTF32BE },
  136. { "UTF32LE", ENCODING_UTF32LE },
  137. { "UTF-32LE", ENCODING_UTF32LE },
  138. { "UCS2", ENCODING_UCS2BE },
  139. { "UCS-2", ENCODING_UCS2BE },
  140. { "UCS-2LE", ENCODING_UCS2LE },
  141. { "UCS-2BE", ENCODING_UCS2BE },
  142. { "UCS-2-INTERNAL", ENCODING_UCS2NATIVE },
  143. { "UCS4", ENCODING_UCS4BE },
  144. { "UCS-4", ENCODING_UCS4BE },
  145. { "UCS-4LE", ENCODING_UCS4LE },
  146. { "UCS-4BE", ENCODING_UCS4BE },
  147. { "UCS-4-INTERNAL", ENCODING_UCS4NATIVE },
  148. /* *INDENT-ON* */
  149. };
  150. static const char *
  151. getlocale(char *buffer, size_t bufsize)
  152. {
  153. const char *lang;
  154. char *ptr;
  155. lang = SDL_getenv("LC_ALL");
  156. if (!lang) {
  157. lang = SDL_getenv("LC_CTYPE");
  158. }
  159. if (!lang) {
  160. lang = SDL_getenv("LC_MESSAGES");
  161. }
  162. if (!lang) {
  163. lang = SDL_getenv("LANG");
  164. }
  165. if (!lang || !*lang || SDL_strcmp(lang, "C") == 0) {
  166. lang = "ASCII";
  167. }
  168. /* We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" */
  169. ptr = SDL_strchr(lang, '.');
  170. if (ptr != NULL) {
  171. lang = ptr + 1;
  172. }
  173. SDL_strlcpy(buffer, lang, bufsize);
  174. ptr = SDL_strchr(buffer, '@');
  175. if (ptr != NULL) {
  176. *ptr = '\0'; /* chop end of string. */
  177. }
  178. return buffer;
  179. }
  180. SDL_iconv_t
  181. SDL_iconv_open(const char *tocode, const char *fromcode)
  182. {
  183. int src_fmt = ENCODING_UNKNOWN;
  184. int dst_fmt = ENCODING_UNKNOWN;
  185. int i;
  186. char fromcode_buffer[64];
  187. char tocode_buffer[64];
  188. if (!fromcode || !*fromcode) {
  189. fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer));
  190. }
  191. if (!tocode || !*tocode) {
  192. tocode = getlocale(tocode_buffer, sizeof(tocode_buffer));
  193. }
  194. for (i = 0; i < SDL_arraysize(encodings); ++i) {
  195. if (SDL_strcasecmp(fromcode, encodings[i].name) == 0) {
  196. src_fmt = encodings[i].format;
  197. if (dst_fmt != ENCODING_UNKNOWN) {
  198. break;
  199. }
  200. }
  201. if (SDL_strcasecmp(tocode, encodings[i].name) == 0) {
  202. dst_fmt = encodings[i].format;
  203. if (src_fmt != ENCODING_UNKNOWN) {
  204. break;
  205. }
  206. }
  207. }
  208. if (src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN) {
  209. SDL_iconv_t cd = (SDL_iconv_t) SDL_malloc(sizeof(*cd));
  210. if (cd) {
  211. cd->src_fmt = src_fmt;
  212. cd->dst_fmt = dst_fmt;
  213. return cd;
  214. }
  215. }
  216. return (SDL_iconv_t) - 1;
  217. }
  218. size_t
  219. SDL_iconv(SDL_iconv_t cd,
  220. const char **inbuf, size_t * inbytesleft,
  221. char **outbuf, size_t * outbytesleft)
  222. {
  223. /* For simplicity, we'll convert everything to and from UCS-4 */
  224. const char *src;
  225. char *dst;
  226. size_t srclen, dstlen;
  227. Uint32 ch = 0;
  228. size_t total;
  229. if (!inbuf || !*inbuf) {
  230. /* Reset the context */
  231. return 0;
  232. }
  233. if (!outbuf || !*outbuf || !outbytesleft || !*outbytesleft) {
  234. return SDL_ICONV_E2BIG;
  235. }
  236. src = *inbuf;
  237. srclen = (inbytesleft ? *inbytesleft : 0);
  238. dst = *outbuf;
  239. dstlen = *outbytesleft;
  240. switch (cd->src_fmt) {
  241. case ENCODING_UTF16:
  242. /* Scan for a byte order marker */
  243. {
  244. Uint8 *p = (Uint8 *) src;
  245. size_t n = srclen / 2;
  246. while (n) {
  247. if (p[0] == 0xFF && p[1] == 0xFE) {
  248. cd->src_fmt = ENCODING_UTF16BE;
  249. break;
  250. } else if (p[0] == 0xFE && p[1] == 0xFF) {
  251. cd->src_fmt = ENCODING_UTF16LE;
  252. break;
  253. }
  254. p += 2;
  255. --n;
  256. }
  257. if (n == 0) {
  258. /* We can't tell, default to host order */
  259. cd->src_fmt = ENCODING_UTF16NATIVE;
  260. }
  261. }
  262. break;
  263. case ENCODING_UTF32:
  264. /* Scan for a byte order marker */
  265. {
  266. Uint8 *p = (Uint8 *) src;
  267. size_t n = srclen / 4;
  268. while (n) {
  269. if (p[0] == 0xFF && p[1] == 0xFE &&
  270. p[2] == 0x00 && p[3] == 0x00) {
  271. cd->src_fmt = ENCODING_UTF32BE;
  272. break;
  273. } else if (p[0] == 0x00 && p[1] == 0x00 &&
  274. p[2] == 0xFE && p[3] == 0xFF) {
  275. cd->src_fmt = ENCODING_UTF32LE;
  276. break;
  277. }
  278. p += 4;
  279. --n;
  280. }
  281. if (n == 0) {
  282. /* We can't tell, default to host order */
  283. cd->src_fmt = ENCODING_UTF32NATIVE;
  284. }
  285. }
  286. break;
  287. }
  288. switch (cd->dst_fmt) {
  289. case ENCODING_UTF16:
  290. /* Default to host order, need to add byte order marker */
  291. if (dstlen < 2) {
  292. return SDL_ICONV_E2BIG;
  293. }
  294. *(Uint16 *) dst = UNICODE_BOM;
  295. dst += 2;
  296. dstlen -= 2;
  297. cd->dst_fmt = ENCODING_UTF16NATIVE;
  298. break;
  299. case ENCODING_UTF32:
  300. /* Default to host order, need to add byte order marker */
  301. if (dstlen < 4) {
  302. return SDL_ICONV_E2BIG;
  303. }
  304. *(Uint32 *) dst = UNICODE_BOM;
  305. dst += 4;
  306. dstlen -= 4;
  307. cd->dst_fmt = ENCODING_UTF32NATIVE;
  308. break;
  309. }
  310. total = 0;
  311. while (srclen > 0) {
  312. /* Decode a character */
  313. switch (cd->src_fmt) {
  314. case ENCODING_ASCII:
  315. {
  316. Uint8 *p = (Uint8 *) src;
  317. ch = (Uint32) (p[0] & 0x7F);
  318. ++src;
  319. --srclen;
  320. }
  321. break;
  322. case ENCODING_LATIN1:
  323. {
  324. Uint8 *p = (Uint8 *) src;
  325. ch = (Uint32) p[0];
  326. ++src;
  327. --srclen;
  328. }
  329. break;
  330. case ENCODING_UTF8: /* RFC 3629 */
  331. {
  332. Uint8 *p = (Uint8 *) src;
  333. size_t left = 0;
  334. SDL_bool overlong = SDL_FALSE;
  335. if (p[0] >= 0xFC) {
  336. if ((p[0] & 0xFE) != 0xFC) {
  337. /* Skip illegal sequences
  338. return SDL_ICONV_EILSEQ;
  339. */
  340. ch = UNKNOWN_UNICODE;
  341. } else {
  342. if (p[0] == 0xFC && srclen > 1 && (p[1] & 0xFC) == 0x80) {
  343. overlong = SDL_TRUE;
  344. }
  345. ch = (Uint32) (p[0] & 0x01);
  346. left = 5;
  347. }
  348. } else if (p[0] >= 0xF8) {
  349. if ((p[0] & 0xFC) != 0xF8) {
  350. /* Skip illegal sequences
  351. return SDL_ICONV_EILSEQ;
  352. */
  353. ch = UNKNOWN_UNICODE;
  354. } else {
  355. if (p[0] == 0xF8 && srclen > 1 && (p[1] & 0xF8) == 0x80) {
  356. overlong = SDL_TRUE;
  357. }
  358. ch = (Uint32) (p[0] & 0x03);
  359. left = 4;
  360. }
  361. } else if (p[0] >= 0xF0) {
  362. if ((p[0] & 0xF8) != 0xF0) {
  363. /* Skip illegal sequences
  364. return SDL_ICONV_EILSEQ;
  365. */
  366. ch = UNKNOWN_UNICODE;
  367. } else {
  368. if (p[0] == 0xF0 && srclen > 1 && (p[1] & 0xF0) == 0x80) {
  369. overlong = SDL_TRUE;
  370. }
  371. ch = (Uint32) (p[0] & 0x07);
  372. left = 3;
  373. }
  374. } else if (p[0] >= 0xE0) {
  375. if ((p[0] & 0xF0) != 0xE0) {
  376. /* Skip illegal sequences
  377. return SDL_ICONV_EILSEQ;
  378. */
  379. ch = UNKNOWN_UNICODE;
  380. } else {
  381. if (p[0] == 0xE0 && srclen > 1 && (p[1] & 0xE0) == 0x80) {
  382. overlong = SDL_TRUE;
  383. }
  384. ch = (Uint32) (p[0] & 0x0F);
  385. left = 2;
  386. }
  387. } else if (p[0] >= 0xC0) {
  388. if ((p[0] & 0xE0) != 0xC0) {
  389. /* Skip illegal sequences
  390. return SDL_ICONV_EILSEQ;
  391. */
  392. ch = UNKNOWN_UNICODE;
  393. } else {
  394. if ((p[0] & 0xDE) == 0xC0) {
  395. overlong = SDL_TRUE;
  396. }
  397. ch = (Uint32) (p[0] & 0x1F);
  398. left = 1;
  399. }
  400. } else {
  401. if ((p[0] & 0x80) != 0x00) {
  402. /* Skip illegal sequences
  403. return SDL_ICONV_EILSEQ;
  404. */
  405. ch = UNKNOWN_UNICODE;
  406. } else {
  407. ch = (Uint32) p[0];
  408. }
  409. }
  410. ++src;
  411. --srclen;
  412. if (srclen < left) {
  413. return SDL_ICONV_EINVAL;
  414. }
  415. while (left--) {
  416. ++p;
  417. if ((p[0] & 0xC0) != 0x80) {
  418. /* Skip illegal sequences
  419. return SDL_ICONV_EILSEQ;
  420. */
  421. ch = UNKNOWN_UNICODE;
  422. break;
  423. }
  424. ch <<= 6;
  425. ch |= (p[0] & 0x3F);
  426. ++src;
  427. --srclen;
  428. }
  429. if (overlong) {
  430. /* Potential security risk
  431. return SDL_ICONV_EILSEQ;
  432. */
  433. ch = UNKNOWN_UNICODE;
  434. }
  435. if ((ch >= 0xD800 && ch <= 0xDFFF) ||
  436. (ch == 0xFFFE || ch == 0xFFFF) || ch > 0x10FFFF) {
  437. /* Skip illegal sequences
  438. return SDL_ICONV_EILSEQ;
  439. */
  440. ch = UNKNOWN_UNICODE;
  441. }
  442. }
  443. break;
  444. case ENCODING_UTF16BE: /* RFC 2781 */
  445. {
  446. Uint8 *p = (Uint8 *) src;
  447. Uint16 W1, W2;
  448. if (srclen < 2) {
  449. return SDL_ICONV_EINVAL;
  450. }
  451. W1 = ((Uint16) p[0] << 8) | (Uint16) p[1];
  452. src += 2;
  453. srclen -= 2;
  454. if (W1 < 0xD800 || W1 > 0xDFFF) {
  455. ch = (Uint32) W1;
  456. break;
  457. }
  458. if (W1 > 0xDBFF) {
  459. /* Skip illegal sequences
  460. return SDL_ICONV_EILSEQ;
  461. */
  462. ch = UNKNOWN_UNICODE;
  463. break;
  464. }
  465. if (srclen < 2) {
  466. return SDL_ICONV_EINVAL;
  467. }
  468. p = (Uint8 *) src;
  469. W2 = ((Uint16) p[0] << 8) | (Uint16) p[1];
  470. src += 2;
  471. srclen -= 2;
  472. if (W2 < 0xDC00 || W2 > 0xDFFF) {
  473. /* Skip illegal sequences
  474. return SDL_ICONV_EILSEQ;
  475. */
  476. ch = UNKNOWN_UNICODE;
  477. break;
  478. }
  479. ch = (((Uint32) (W1 & 0x3FF) << 10) |
  480. (Uint32) (W2 & 0x3FF)) + 0x10000;
  481. }
  482. break;
  483. case ENCODING_UTF16LE: /* RFC 2781 */
  484. {
  485. Uint8 *p = (Uint8 *) src;
  486. Uint16 W1, W2;
  487. if (srclen < 2) {
  488. return SDL_ICONV_EINVAL;
  489. }
  490. W1 = ((Uint16) p[1] << 8) | (Uint16) p[0];
  491. src += 2;
  492. srclen -= 2;
  493. if (W1 < 0xD800 || W1 > 0xDFFF) {
  494. ch = (Uint32) W1;
  495. break;
  496. }
  497. if (W1 > 0xDBFF) {
  498. /* Skip illegal sequences
  499. return SDL_ICONV_EILSEQ;
  500. */
  501. ch = UNKNOWN_UNICODE;
  502. break;
  503. }
  504. if (srclen < 2) {
  505. return SDL_ICONV_EINVAL;
  506. }
  507. p = (Uint8 *) src;
  508. W2 = ((Uint16) p[1] << 8) | (Uint16) p[0];
  509. src += 2;
  510. srclen -= 2;
  511. if (W2 < 0xDC00 || W2 > 0xDFFF) {
  512. /* Skip illegal sequences
  513. return SDL_ICONV_EILSEQ;
  514. */
  515. ch = UNKNOWN_UNICODE;
  516. break;
  517. }
  518. ch = (((Uint32) (W1 & 0x3FF) << 10) |
  519. (Uint32) (W2 & 0x3FF)) + 0x10000;
  520. }
  521. break;
  522. case ENCODING_UCS2LE:
  523. {
  524. Uint8 *p = (Uint8 *) src;
  525. if (srclen < 2) {
  526. return SDL_ICONV_EINVAL;
  527. }
  528. ch = ((Uint32) p[1] << 8) | (Uint32) p[0];
  529. src += 2;
  530. srclen -= 2;
  531. }
  532. break;
  533. case ENCODING_UCS2BE:
  534. {
  535. Uint8 *p = (Uint8 *) src;
  536. if (srclen < 2) {
  537. return SDL_ICONV_EINVAL;
  538. }
  539. ch = ((Uint32) p[0] << 8) | (Uint32) p[1];
  540. src += 2;
  541. srclen -= 2;
  542. }
  543. break;
  544. case ENCODING_UCS4BE:
  545. case ENCODING_UTF32BE:
  546. {
  547. Uint8 *p = (Uint8 *) src;
  548. if (srclen < 4) {
  549. return SDL_ICONV_EINVAL;
  550. }
  551. ch = ((Uint32) p[0] << 24) |
  552. ((Uint32) p[1] << 16) |
  553. ((Uint32) p[2] << 8) | (Uint32) p[3];
  554. src += 4;
  555. srclen -= 4;
  556. }
  557. break;
  558. case ENCODING_UCS4LE:
  559. case ENCODING_UTF32LE:
  560. {
  561. Uint8 *p = (Uint8 *) src;
  562. if (srclen < 4) {
  563. return SDL_ICONV_EINVAL;
  564. }
  565. ch = ((Uint32) p[3] << 24) |
  566. ((Uint32) p[2] << 16) |
  567. ((Uint32) p[1] << 8) | (Uint32) p[0];
  568. src += 4;
  569. srclen -= 4;
  570. }
  571. break;
  572. }
  573. /* Encode a character */
  574. switch (cd->dst_fmt) {
  575. case ENCODING_ASCII:
  576. {
  577. Uint8 *p = (Uint8 *) dst;
  578. if (dstlen < 1) {
  579. return SDL_ICONV_E2BIG;
  580. }
  581. if (ch > 0x7F) {
  582. *p = UNKNOWN_ASCII;
  583. } else {
  584. *p = (Uint8) ch;
  585. }
  586. ++dst;
  587. --dstlen;
  588. }
  589. break;
  590. case ENCODING_LATIN1:
  591. {
  592. Uint8 *p = (Uint8 *) dst;
  593. if (dstlen < 1) {
  594. return SDL_ICONV_E2BIG;
  595. }
  596. if (ch > 0xFF) {
  597. *p = UNKNOWN_ASCII;
  598. } else {
  599. *p = (Uint8) ch;
  600. }
  601. ++dst;
  602. --dstlen;
  603. }
  604. break;
  605. case ENCODING_UTF8: /* RFC 3629 */
  606. {
  607. Uint8 *p = (Uint8 *) dst;
  608. if (ch > 0x10FFFF) {
  609. ch = UNKNOWN_UNICODE;
  610. }
  611. if (ch <= 0x7F) {
  612. if (dstlen < 1) {
  613. return SDL_ICONV_E2BIG;
  614. }
  615. *p = (Uint8) ch;
  616. ++dst;
  617. --dstlen;
  618. } else if (ch <= 0x7FF) {
  619. if (dstlen < 2) {
  620. return SDL_ICONV_E2BIG;
  621. }
  622. p[0] = 0xC0 | (Uint8) ((ch >> 6) & 0x1F);
  623. p[1] = 0x80 | (Uint8) (ch & 0x3F);
  624. dst += 2;
  625. dstlen -= 2;
  626. } else if (ch <= 0xFFFF) {
  627. if (dstlen < 3) {
  628. return SDL_ICONV_E2BIG;
  629. }
  630. p[0] = 0xE0 | (Uint8) ((ch >> 12) & 0x0F);
  631. p[1] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
  632. p[2] = 0x80 | (Uint8) (ch & 0x3F);
  633. dst += 3;
  634. dstlen -= 3;
  635. } else if (ch <= 0x1FFFFF) {
  636. if (dstlen < 4) {
  637. return SDL_ICONV_E2BIG;
  638. }
  639. p[0] = 0xF0 | (Uint8) ((ch >> 18) & 0x07);
  640. p[1] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
  641. p[2] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
  642. p[3] = 0x80 | (Uint8) (ch & 0x3F);
  643. dst += 4;
  644. dstlen -= 4;
  645. } else if (ch <= 0x3FFFFFF) {
  646. if (dstlen < 5) {
  647. return SDL_ICONV_E2BIG;
  648. }
  649. p[0] = 0xF8 | (Uint8) ((ch >> 24) & 0x03);
  650. p[1] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
  651. p[2] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
  652. p[3] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
  653. p[4] = 0x80 | (Uint8) (ch & 0x3F);
  654. dst += 5;
  655. dstlen -= 5;
  656. } else {
  657. if (dstlen < 6) {
  658. return SDL_ICONV_E2BIG;
  659. }
  660. p[0] = 0xFC | (Uint8) ((ch >> 30) & 0x01);
  661. p[1] = 0x80 | (Uint8) ((ch >> 24) & 0x3F);
  662. p[2] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
  663. p[3] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
  664. p[4] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
  665. p[5] = 0x80 | (Uint8) (ch & 0x3F);
  666. dst += 6;
  667. dstlen -= 6;
  668. }
  669. }
  670. break;
  671. case ENCODING_UTF16BE: /* RFC 2781 */
  672. {
  673. Uint8 *p = (Uint8 *) dst;
  674. if (ch > 0x10FFFF) {
  675. ch = UNKNOWN_UNICODE;
  676. }
  677. if (ch < 0x10000) {
  678. if (dstlen < 2) {
  679. return SDL_ICONV_E2BIG;
  680. }
  681. p[0] = (Uint8) (ch >> 8);
  682. p[1] = (Uint8) ch;
  683. dst += 2;
  684. dstlen -= 2;
  685. } else {
  686. Uint16 W1, W2;
  687. if (dstlen < 4) {
  688. return SDL_ICONV_E2BIG;
  689. }
  690. ch = ch - 0x10000;
  691. W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
  692. W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
  693. p[0] = (Uint8) (W1 >> 8);
  694. p[1] = (Uint8) W1;
  695. p[2] = (Uint8) (W2 >> 8);
  696. p[3] = (Uint8) W2;
  697. dst += 4;
  698. dstlen -= 4;
  699. }
  700. }
  701. break;
  702. case ENCODING_UTF16LE: /* RFC 2781 */
  703. {
  704. Uint8 *p = (Uint8 *) dst;
  705. if (ch > 0x10FFFF) {
  706. ch = UNKNOWN_UNICODE;
  707. }
  708. if (ch < 0x10000) {
  709. if (dstlen < 2) {
  710. return SDL_ICONV_E2BIG;
  711. }
  712. p[1] = (Uint8) (ch >> 8);
  713. p[0] = (Uint8) ch;
  714. dst += 2;
  715. dstlen -= 2;
  716. } else {
  717. Uint16 W1, W2;
  718. if (dstlen < 4) {
  719. return SDL_ICONV_E2BIG;
  720. }
  721. ch = ch - 0x10000;
  722. W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
  723. W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
  724. p[1] = (Uint8) (W1 >> 8);
  725. p[0] = (Uint8) W1;
  726. p[3] = (Uint8) (W2 >> 8);
  727. p[2] = (Uint8) W2;
  728. dst += 4;
  729. dstlen -= 4;
  730. }
  731. }
  732. break;
  733. case ENCODING_UCS2BE:
  734. {
  735. Uint8 *p = (Uint8 *) dst;
  736. if (ch > 0xFFFF) {
  737. ch = UNKNOWN_UNICODE;
  738. }
  739. if (dstlen < 2) {
  740. return SDL_ICONV_E2BIG;
  741. }
  742. p[0] = (Uint8) (ch >> 8);
  743. p[1] = (Uint8) ch;
  744. dst += 2;
  745. dstlen -= 2;
  746. }
  747. break;
  748. case ENCODING_UCS2LE:
  749. {
  750. Uint8 *p = (Uint8 *) dst;
  751. if (ch > 0xFFFF) {
  752. ch = UNKNOWN_UNICODE;
  753. }
  754. if (dstlen < 2) {
  755. return SDL_ICONV_E2BIG;
  756. }
  757. p[1] = (Uint8) (ch >> 8);
  758. p[0] = (Uint8) ch;
  759. dst += 2;
  760. dstlen -= 2;
  761. }
  762. break;
  763. case ENCODING_UTF32BE:
  764. if (ch > 0x10FFFF) {
  765. ch = UNKNOWN_UNICODE;
  766. }
  767. /* fallthrough */
  768. case ENCODING_UCS4BE:
  769. if (ch > 0x7FFFFFFF) {
  770. ch = UNKNOWN_UNICODE;
  771. }
  772. {
  773. Uint8 *p = (Uint8 *) dst;
  774. if (dstlen < 4) {
  775. return SDL_ICONV_E2BIG;
  776. }
  777. p[0] = (Uint8) (ch >> 24);
  778. p[1] = (Uint8) (ch >> 16);
  779. p[2] = (Uint8) (ch >> 8);
  780. p[3] = (Uint8) ch;
  781. dst += 4;
  782. dstlen -= 4;
  783. }
  784. break;
  785. case ENCODING_UTF32LE:
  786. if (ch > 0x10FFFF) {
  787. ch = UNKNOWN_UNICODE;
  788. }
  789. /* fallthrough */
  790. case ENCODING_UCS4LE:
  791. if (ch > 0x7FFFFFFF) {
  792. ch = UNKNOWN_UNICODE;
  793. }
  794. {
  795. Uint8 *p = (Uint8 *) dst;
  796. if (dstlen < 4) {
  797. return SDL_ICONV_E2BIG;
  798. }
  799. p[3] = (Uint8) (ch >> 24);
  800. p[2] = (Uint8) (ch >> 16);
  801. p[1] = (Uint8) (ch >> 8);
  802. p[0] = (Uint8) ch;
  803. dst += 4;
  804. dstlen -= 4;
  805. }
  806. break;
  807. }
  808. /* Update state */
  809. *inbuf = src;
  810. *inbytesleft = srclen;
  811. *outbuf = dst;
  812. *outbytesleft = dstlen;
  813. ++total;
  814. }
  815. return total;
  816. }
  817. int
  818. SDL_iconv_close(SDL_iconv_t cd)
  819. {
  820. if (cd != (SDL_iconv_t)-1) {
  821. SDL_free(cd);
  822. }
  823. return 0;
  824. }
  825. #endif /* !HAVE_ICONV */
  826. char *
  827. SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf,
  828. size_t inbytesleft)
  829. {
  830. SDL_iconv_t cd;
  831. char *string;
  832. size_t stringsize;
  833. char *outbuf;
  834. size_t outbytesleft;
  835. size_t retCode = 0;
  836. cd = SDL_iconv_open(tocode, fromcode);
  837. if (cd == (SDL_iconv_t) - 1) {
  838. /* See if we can recover here (fixes iconv on Solaris 11) */
  839. if (!tocode || !*tocode) {
  840. tocode = "UTF-8";
  841. }
  842. if (!fromcode || !*fromcode) {
  843. fromcode = "UTF-8";
  844. }
  845. cd = SDL_iconv_open(tocode, fromcode);
  846. }
  847. if (cd == (SDL_iconv_t) - 1) {
  848. return NULL;
  849. }
  850. stringsize = inbytesleft > 4 ? inbytesleft : 4;
  851. string = SDL_malloc(stringsize);
  852. if (!string) {
  853. SDL_iconv_close(cd);
  854. return NULL;
  855. }
  856. outbuf = string;
  857. outbytesleft = stringsize;
  858. SDL_memset(outbuf, 0, 4);
  859. while (inbytesleft > 0) {
  860. retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
  861. switch (retCode) {
  862. case SDL_ICONV_E2BIG:
  863. {
  864. char *oldstring = string;
  865. stringsize *= 2;
  866. string = SDL_realloc(string, stringsize);
  867. if (!string) {
  868. SDL_iconv_close(cd);
  869. return NULL;
  870. }
  871. outbuf = string + (outbuf - oldstring);
  872. outbytesleft = stringsize - (outbuf - string);
  873. SDL_memset(outbuf, 0, 4);
  874. }
  875. break;
  876. case SDL_ICONV_EILSEQ:
  877. /* Try skipping some input data - not perfect, but... */
  878. ++inbuf;
  879. --inbytesleft;
  880. break;
  881. case SDL_ICONV_EINVAL:
  882. case SDL_ICONV_ERROR:
  883. /* We can't continue... */
  884. inbytesleft = 0;
  885. break;
  886. }
  887. }
  888. SDL_iconv_close(cd);
  889. return string;
  890. }
  891. /* vi: set ts=4 sw=4 expandtab: */