SDL 2.0
SDL_iconv.c
Go to the documentation of this file.
1/*
2 Simple DirectMedia Layer
3 Copyright (C) 1997-2019 Sam Lantinga <slouken@libsdl.org>
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any damages
7 arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
20*/
21
22#if defined(__clang_analyzer__) && !defined(SDL_DISABLE_ANALYZE_MACROS)
23#define SDL_DISABLE_ANALYZE_MACROS 1
24#endif
25
26#include "../SDL_internal.h"
27
28/* This file contains portable iconv functions for SDL */
29
30#include "SDL_stdinc.h"
31#include "SDL_endian.h"
32
33#if defined(HAVE_ICONV) && defined(HAVE_ICONV_H)
34#include <iconv.h>
35
36/* Depending on which standard the iconv() was implemented with,
37 iconv() may or may not use const char ** for the inbuf param.
38 If we get this wrong, it's just a warning, so no big deal.
39*/
40#if defined(_XGP6) || defined(__APPLE__) || defined(__RISCOS__) || \
41 defined(__EMSCRIPTEN__) || \
42 (defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2)) || \
43 (defined(_NEWLIB_VERSION)))
44#define ICONV_INBUF_NONCONST
45#endif
46
47#include <errno.h>
48
49SDL_COMPILE_TIME_ASSERT(iconv_t, sizeof (iconv_t) <= sizeof (SDL_iconv_t));
50
51SDL_iconv_t
52SDL_iconv_open(const char *tocode, const char *fromcode)
53{
54 return (SDL_iconv_t) ((size_t) iconv_open(tocode, fromcode));
55}
56
57int
58SDL_iconv_close(SDL_iconv_t cd)
59{
60 return iconv_close((iconv_t) ((size_t) cd));
61}
62
63size_t
64SDL_iconv(SDL_iconv_t cd,
65 const char **inbuf, size_t * inbytesleft,
66 char **outbuf, size_t * outbytesleft)
67{
68 size_t retCode;
69#ifdef ICONV_INBUF_NONCONST
70 retCode = iconv((iconv_t) ((size_t) cd), (char **) inbuf, inbytesleft, outbuf, outbytesleft);
71#else
72 retCode = iconv((iconv_t) ((size_t) cd), inbuf, inbytesleft, outbuf, outbytesleft);
73#endif
74 if (retCode == (size_t) - 1) {
75 switch (errno) {
76 case E2BIG:
77 return SDL_ICONV_E2BIG;
78 case EILSEQ:
79 return SDL_ICONV_EILSEQ;
80 case EINVAL:
81 return SDL_ICONV_EINVAL;
82 default:
83 return SDL_ICONV_ERROR;
84 }
85 }
86 return retCode;
87}
88
89#else
90
91/* Lots of useful information on Unicode at:
92 http://www.cl.cam.ac.uk/~mgk25/unicode.html
93*/
94
95#define UNICODE_BOM 0xFEFF
96
97#define UNKNOWN_ASCII '?'
98#define UNKNOWN_UNICODE 0xFFFD
99
100enum
101{
106 ENCODING_UTF16, /* Needs byte order marker */
109 ENCODING_UTF32, /* Needs byte order marker */
116};
117#if SDL_BYTEORDER == SDL_BIG_ENDIAN
118#define ENCODING_UTF16NATIVE ENCODING_UTF16BE
119#define ENCODING_UTF32NATIVE ENCODING_UTF32BE
120#define ENCODING_UCS2NATIVE ENCODING_UCS2BE
121#define ENCODING_UCS4NATIVE ENCODING_UCS4BE
122#else
123#define ENCODING_UTF16NATIVE ENCODING_UTF16LE
124#define ENCODING_UTF32NATIVE ENCODING_UTF32LE
125#define ENCODING_UCS2NATIVE ENCODING_UCS2LE
126#define ENCODING_UCS4NATIVE ENCODING_UCS4LE
127#endif
128
130{
133};
134
135static struct
136{
137 const char *name;
139} encodings[] = {
140/* *INDENT-OFF* */
141 { "ASCII", ENCODING_ASCII },
142 { "US-ASCII", ENCODING_ASCII },
143 { "8859-1", ENCODING_LATIN1 },
144 { "ISO-8859-1", ENCODING_LATIN1 },
145 { "UTF8", ENCODING_UTF8 },
146 { "UTF-8", ENCODING_UTF8 },
147 { "UTF16", ENCODING_UTF16 },
148 { "UTF-16", ENCODING_UTF16 },
149 { "UTF16BE", ENCODING_UTF16BE },
150 { "UTF-16BE", ENCODING_UTF16BE },
151 { "UTF16LE", ENCODING_UTF16LE },
152 { "UTF-16LE", ENCODING_UTF16LE },
153 { "UTF32", ENCODING_UTF32 },
154 { "UTF-32", ENCODING_UTF32 },
155 { "UTF32BE", ENCODING_UTF32BE },
156 { "UTF-32BE", ENCODING_UTF32BE },
157 { "UTF32LE", ENCODING_UTF32LE },
158 { "UTF-32LE", ENCODING_UTF32LE },
159 { "UCS2", ENCODING_UCS2BE },
160 { "UCS-2", ENCODING_UCS2BE },
161 { "UCS-2LE", ENCODING_UCS2LE },
162 { "UCS-2BE", ENCODING_UCS2BE },
163 { "UCS-2-INTERNAL", ENCODING_UCS2NATIVE },
164 { "UCS4", ENCODING_UCS4BE },
165 { "UCS-4", ENCODING_UCS4BE },
166 { "UCS-4LE", ENCODING_UCS4LE },
167 { "UCS-4BE", ENCODING_UCS4BE },
168 { "UCS-4-INTERNAL", ENCODING_UCS4NATIVE },
169/* *INDENT-ON* */
171
172static const char *
174{
175 const char *lang;
176 char *ptr;
177
178 lang = SDL_getenv("LC_ALL");
179 if (!lang) {
180 lang = SDL_getenv("LC_CTYPE");
181 }
182 if (!lang) {
183 lang = SDL_getenv("LC_MESSAGES");
184 }
185 if (!lang) {
186 lang = SDL_getenv("LANG");
187 }
188 if (!lang || !*lang || SDL_strcmp(lang, "C") == 0) {
189 lang = "ASCII";
190 }
191
192 /* We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" */
193 ptr = SDL_strchr(lang, '.');
194 if (ptr != NULL) {
195 lang = ptr + 1;
196 }
197
198 SDL_strlcpy(buffer, lang, bufsize);
199 ptr = SDL_strchr(buffer, '@');
200 if (ptr != NULL) {
201 *ptr = '\0'; /* chop end of string. */
202 }
203
204 return buffer;
205}
206
207SDL_iconv_t
208SDL_iconv_open(const char *tocode, const char *fromcode)
209{
210 int src_fmt = ENCODING_UNKNOWN;
211 int dst_fmt = ENCODING_UNKNOWN;
212 int i;
213 char fromcode_buffer[64];
214 char tocode_buffer[64];
215
216 if (!fromcode || !*fromcode) {
217 fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer));
218 }
219 if (!tocode || !*tocode) {
220 tocode = getlocale(tocode_buffer, sizeof(tocode_buffer));
221 }
222 for (i = 0; i < SDL_arraysize(encodings); ++i) {
223 if (SDL_strcasecmp(fromcode, encodings[i].name) == 0) {
224 src_fmt = encodings[i].format;
225 if (dst_fmt != ENCODING_UNKNOWN) {
226 break;
227 }
228 }
229 if (SDL_strcasecmp(tocode, encodings[i].name) == 0) {
230 dst_fmt = encodings[i].format;
231 if (src_fmt != ENCODING_UNKNOWN) {
232 break;
233 }
234 }
235 }
236 if (src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN) {
237 SDL_iconv_t cd = (SDL_iconv_t) SDL_malloc(sizeof(*cd));
238 if (cd) {
239 cd->src_fmt = src_fmt;
240 cd->dst_fmt = dst_fmt;
241 return cd;
242 }
243 }
244 return (SDL_iconv_t) - 1;
245}
246
247size_t
248SDL_iconv(SDL_iconv_t cd,
249 const char **inbuf, size_t * inbytesleft,
250 char **outbuf, size_t * outbytesleft)
251{
252 /* For simplicity, we'll convert everything to and from UCS-4 */
253 const char *src;
254 char *dst;
255 size_t srclen, dstlen;
256 Uint32 ch = 0;
257 size_t total;
258
259 if (!inbuf || !*inbuf) {
260 /* Reset the context */
261 return 0;
262 }
263 if (!outbuf || !*outbuf || !outbytesleft || !*outbytesleft) {
264 return SDL_ICONV_E2BIG;
265 }
266 src = *inbuf;
267 srclen = (inbytesleft ? *inbytesleft : 0);
268 dst = *outbuf;
269 dstlen = *outbytesleft;
270
271 switch (cd->src_fmt) {
272 case ENCODING_UTF16:
273 /* Scan for a byte order marker */
274 {
275 Uint8 *p = (Uint8 *) src;
276 size_t n = srclen / 2;
277 while (n) {
278 if (p[0] == 0xFF && p[1] == 0xFE) {
279 cd->src_fmt = ENCODING_UTF16BE;
280 break;
281 } else if (p[0] == 0xFE && p[1] == 0xFF) {
282 cd->src_fmt = ENCODING_UTF16LE;
283 break;
284 }
285 p += 2;
286 --n;
287 }
288 if (n == 0) {
289 /* We can't tell, default to host order */
290 cd->src_fmt = ENCODING_UTF16NATIVE;
291 }
292 }
293 break;
294 case ENCODING_UTF32:
295 /* Scan for a byte order marker */
296 {
297 Uint8 *p = (Uint8 *) src;
298 size_t n = srclen / 4;
299 while (n) {
300 if (p[0] == 0xFF && p[1] == 0xFE &&
301 p[2] == 0x00 && p[3] == 0x00) {
302 cd->src_fmt = ENCODING_UTF32BE;
303 break;
304 } else if (p[0] == 0x00 && p[1] == 0x00 &&
305 p[2] == 0xFE && p[3] == 0xFF) {
306 cd->src_fmt = ENCODING_UTF32LE;
307 break;
308 }
309 p += 4;
310 --n;
311 }
312 if (n == 0) {
313 /* We can't tell, default to host order */
314 cd->src_fmt = ENCODING_UTF32NATIVE;
315 }
316 }
317 break;
318 }
319
320 switch (cd->dst_fmt) {
321 case ENCODING_UTF16:
322 /* Default to host order, need to add byte order marker */
323 if (dstlen < 2) {
324 return SDL_ICONV_E2BIG;
325 }
326 *(Uint16 *) dst = UNICODE_BOM;
327 dst += 2;
328 dstlen -= 2;
329 cd->dst_fmt = ENCODING_UTF16NATIVE;
330 break;
331 case ENCODING_UTF32:
332 /* Default to host order, need to add byte order marker */
333 if (dstlen < 4) {
334 return SDL_ICONV_E2BIG;
335 }
336 *(Uint32 *) dst = UNICODE_BOM;
337 dst += 4;
338 dstlen -= 4;
339 cd->dst_fmt = ENCODING_UTF32NATIVE;
340 break;
341 }
342
343 total = 0;
344 while (srclen > 0) {
345 /* Decode a character */
346 switch (cd->src_fmt) {
347 case ENCODING_ASCII:
348 {
349 Uint8 *p = (Uint8 *) src;
350 ch = (Uint32) (p[0] & 0x7F);
351 ++src;
352 --srclen;
353 }
354 break;
355 case ENCODING_LATIN1:
356 {
357 Uint8 *p = (Uint8 *) src;
358 ch = (Uint32) p[0];
359 ++src;
360 --srclen;
361 }
362 break;
363 case ENCODING_UTF8: /* RFC 3629 */
364 {
365 Uint8 *p = (Uint8 *) src;
366 size_t left = 0;
367 SDL_bool overlong = SDL_FALSE;
368 if (p[0] >= 0xFC) {
369 if ((p[0] & 0xFE) != 0xFC) {
370 /* Skip illegal sequences
371 return SDL_ICONV_EILSEQ;
372 */
373 ch = UNKNOWN_UNICODE;
374 } else {
375 if (p[0] == 0xFC && srclen > 1 && (p[1] & 0xFC) == 0x80) {
376 overlong = SDL_TRUE;
377 }
378 ch = (Uint32) (p[0] & 0x01);
379 left = 5;
380 }
381 } else if (p[0] >= 0xF8) {
382 if ((p[0] & 0xFC) != 0xF8) {
383 /* Skip illegal sequences
384 return SDL_ICONV_EILSEQ;
385 */
386 ch = UNKNOWN_UNICODE;
387 } else {
388 if (p[0] == 0xF8 && srclen > 1 && (p[1] & 0xF8) == 0x80) {
389 overlong = SDL_TRUE;
390 }
391 ch = (Uint32) (p[0] & 0x03);
392 left = 4;
393 }
394 } else if (p[0] >= 0xF0) {
395 if ((p[0] & 0xF8) != 0xF0) {
396 /* Skip illegal sequences
397 return SDL_ICONV_EILSEQ;
398 */
399 ch = UNKNOWN_UNICODE;
400 } else {
401 if (p[0] == 0xF0 && srclen > 1 && (p[1] & 0xF0) == 0x80) {
402 overlong = SDL_TRUE;
403 }
404 ch = (Uint32) (p[0] & 0x07);
405 left = 3;
406 }
407 } else if (p[0] >= 0xE0) {
408 if ((p[0] & 0xF0) != 0xE0) {
409 /* Skip illegal sequences
410 return SDL_ICONV_EILSEQ;
411 */
412 ch = UNKNOWN_UNICODE;
413 } else {
414 if (p[0] == 0xE0 && srclen > 1 && (p[1] & 0xE0) == 0x80) {
415 overlong = SDL_TRUE;
416 }
417 ch = (Uint32) (p[0] & 0x0F);
418 left = 2;
419 }
420 } else if (p[0] >= 0xC0) {
421 if ((p[0] & 0xE0) != 0xC0) {
422 /* Skip illegal sequences
423 return SDL_ICONV_EILSEQ;
424 */
425 ch = UNKNOWN_UNICODE;
426 } else {
427 if ((p[0] & 0xDE) == 0xC0) {
428 overlong = SDL_TRUE;
429 }
430 ch = (Uint32) (p[0] & 0x1F);
431 left = 1;
432 }
433 } else {
434 if ((p[0] & 0x80) != 0x00) {
435 /* Skip illegal sequences
436 return SDL_ICONV_EILSEQ;
437 */
438 ch = UNKNOWN_UNICODE;
439 } else {
440 ch = (Uint32) p[0];
441 }
442 }
443 ++src;
444 --srclen;
445 if (srclen < left) {
446 return SDL_ICONV_EINVAL;
447 }
448 while (left--) {
449 ++p;
450 if ((p[0] & 0xC0) != 0x80) {
451 /* Skip illegal sequences
452 return SDL_ICONV_EILSEQ;
453 */
454 ch = UNKNOWN_UNICODE;
455 break;
456 }
457 ch <<= 6;
458 ch |= (p[0] & 0x3F);
459 ++src;
460 --srclen;
461 }
462 if (overlong) {
463 /* Potential security risk
464 return SDL_ICONV_EILSEQ;
465 */
466 ch = UNKNOWN_UNICODE;
467 }
468 if ((ch >= 0xD800 && ch <= 0xDFFF) ||
469 (ch == 0xFFFE || ch == 0xFFFF) || ch > 0x10FFFF) {
470 /* Skip illegal sequences
471 return SDL_ICONV_EILSEQ;
472 */
473 ch = UNKNOWN_UNICODE;
474 }
475 }
476 break;
477 case ENCODING_UTF16BE: /* RFC 2781 */
478 {
479 Uint8 *p = (Uint8 *) src;
480 Uint16 W1, W2;
481 if (srclen < 2) {
482 return SDL_ICONV_EINVAL;
483 }
484 W1 = ((Uint16) p[0] << 8) | (Uint16) p[1];
485 src += 2;
486 srclen -= 2;
487 if (W1 < 0xD800 || W1 > 0xDFFF) {
488 ch = (Uint32) W1;
489 break;
490 }
491 if (W1 > 0xDBFF) {
492 /* Skip illegal sequences
493 return SDL_ICONV_EILSEQ;
494 */
495 ch = UNKNOWN_UNICODE;
496 break;
497 }
498 if (srclen < 2) {
499 return SDL_ICONV_EINVAL;
500 }
501 p = (Uint8 *) src;
502 W2 = ((Uint16) p[0] << 8) | (Uint16) p[1];
503 src += 2;
504 srclen -= 2;
505 if (W2 < 0xDC00 || W2 > 0xDFFF) {
506 /* Skip illegal sequences
507 return SDL_ICONV_EILSEQ;
508 */
509 ch = UNKNOWN_UNICODE;
510 break;
511 }
512 ch = (((Uint32) (W1 & 0x3FF) << 10) |
513 (Uint32) (W2 & 0x3FF)) + 0x10000;
514 }
515 break;
516 case ENCODING_UTF16LE: /* RFC 2781 */
517 {
518 Uint8 *p = (Uint8 *) src;
519 Uint16 W1, W2;
520 if (srclen < 2) {
521 return SDL_ICONV_EINVAL;
522 }
523 W1 = ((Uint16) p[1] << 8) | (Uint16) p[0];
524 src += 2;
525 srclen -= 2;
526 if (W1 < 0xD800 || W1 > 0xDFFF) {
527 ch = (Uint32) W1;
528 break;
529 }
530 if (W1 > 0xDBFF) {
531 /* Skip illegal sequences
532 return SDL_ICONV_EILSEQ;
533 */
534 ch = UNKNOWN_UNICODE;
535 break;
536 }
537 if (srclen < 2) {
538 return SDL_ICONV_EINVAL;
539 }
540 p = (Uint8 *) src;
541 W2 = ((Uint16) p[1] << 8) | (Uint16) p[0];
542 src += 2;
543 srclen -= 2;
544 if (W2 < 0xDC00 || W2 > 0xDFFF) {
545 /* Skip illegal sequences
546 return SDL_ICONV_EILSEQ;
547 */
548 ch = UNKNOWN_UNICODE;
549 break;
550 }
551 ch = (((Uint32) (W1 & 0x3FF) << 10) |
552 (Uint32) (W2 & 0x3FF)) + 0x10000;
553 }
554 break;
555 case ENCODING_UCS2LE:
556 {
557 Uint8 *p = (Uint8 *) src;
558 if (srclen < 2) {
559 return SDL_ICONV_EINVAL;
560 }
561 ch = ((Uint32) p[1] << 8) | (Uint32) p[0];
562 src += 2;
563 srclen -= 2;
564 }
565 break;
566 case ENCODING_UCS2BE:
567 {
568 Uint8 *p = (Uint8 *) src;
569 if (srclen < 2) {
570 return SDL_ICONV_EINVAL;
571 }
572 ch = ((Uint32) p[0] << 8) | (Uint32) p[1];
573 src += 2;
574 srclen -= 2;
575 }
576 break;
577 case ENCODING_UCS4BE:
578 case ENCODING_UTF32BE:
579 {
580 Uint8 *p = (Uint8 *) src;
581 if (srclen < 4) {
582 return SDL_ICONV_EINVAL;
583 }
584 ch = ((Uint32) p[0] << 24) |
585 ((Uint32) p[1] << 16) |
586 ((Uint32) p[2] << 8) | (Uint32) p[3];
587 src += 4;
588 srclen -= 4;
589 }
590 break;
591 case ENCODING_UCS4LE:
592 case ENCODING_UTF32LE:
593 {
594 Uint8 *p = (Uint8 *) src;
595 if (srclen < 4) {
596 return SDL_ICONV_EINVAL;
597 }
598 ch = ((Uint32) p[3] << 24) |
599 ((Uint32) p[2] << 16) |
600 ((Uint32) p[1] << 8) | (Uint32) p[0];
601 src += 4;
602 srclen -= 4;
603 }
604 break;
605 }
606
607 /* Encode a character */
608 switch (cd->dst_fmt) {
609 case ENCODING_ASCII:
610 {
611 Uint8 *p = (Uint8 *) dst;
612 if (dstlen < 1) {
613 return SDL_ICONV_E2BIG;
614 }
615 if (ch > 0x7F) {
616 *p = UNKNOWN_ASCII;
617 } else {
618 *p = (Uint8) ch;
619 }
620 ++dst;
621 --dstlen;
622 }
623 break;
624 case ENCODING_LATIN1:
625 {
626 Uint8 *p = (Uint8 *) dst;
627 if (dstlen < 1) {
628 return SDL_ICONV_E2BIG;
629 }
630 if (ch > 0xFF) {
631 *p = UNKNOWN_ASCII;
632 } else {
633 *p = (Uint8) ch;
634 }
635 ++dst;
636 --dstlen;
637 }
638 break;
639 case ENCODING_UTF8: /* RFC 3629 */
640 {
641 Uint8 *p = (Uint8 *) dst;
642 if (ch > 0x10FFFF) {
643 ch = UNKNOWN_UNICODE;
644 }
645 if (ch <= 0x7F) {
646 if (dstlen < 1) {
647 return SDL_ICONV_E2BIG;
648 }
649 *p = (Uint8) ch;
650 ++dst;
651 --dstlen;
652 } else if (ch <= 0x7FF) {
653 if (dstlen < 2) {
654 return SDL_ICONV_E2BIG;
655 }
656 p[0] = 0xC0 | (Uint8) ((ch >> 6) & 0x1F);
657 p[1] = 0x80 | (Uint8) (ch & 0x3F);
658 dst += 2;
659 dstlen -= 2;
660 } else if (ch <= 0xFFFF) {
661 if (dstlen < 3) {
662 return SDL_ICONV_E2BIG;
663 }
664 p[0] = 0xE0 | (Uint8) ((ch >> 12) & 0x0F);
665 p[1] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
666 p[2] = 0x80 | (Uint8) (ch & 0x3F);
667 dst += 3;
668 dstlen -= 3;
669 } else if (ch <= 0x1FFFFF) {
670 if (dstlen < 4) {
671 return SDL_ICONV_E2BIG;
672 }
673 p[0] = 0xF0 | (Uint8) ((ch >> 18) & 0x07);
674 p[1] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
675 p[2] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
676 p[3] = 0x80 | (Uint8) (ch & 0x3F);
677 dst += 4;
678 dstlen -= 4;
679 } else if (ch <= 0x3FFFFFF) {
680 if (dstlen < 5) {
681 return SDL_ICONV_E2BIG;
682 }
683 p[0] = 0xF8 | (Uint8) ((ch >> 24) & 0x03);
684 p[1] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
685 p[2] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
686 p[3] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
687 p[4] = 0x80 | (Uint8) (ch & 0x3F);
688 dst += 5;
689 dstlen -= 5;
690 } else {
691 if (dstlen < 6) {
692 return SDL_ICONV_E2BIG;
693 }
694 p[0] = 0xFC | (Uint8) ((ch >> 30) & 0x01);
695 p[1] = 0x80 | (Uint8) ((ch >> 24) & 0x3F);
696 p[2] = 0x80 | (Uint8) ((ch >> 18) & 0x3F);
697 p[3] = 0x80 | (Uint8) ((ch >> 12) & 0x3F);
698 p[4] = 0x80 | (Uint8) ((ch >> 6) & 0x3F);
699 p[5] = 0x80 | (Uint8) (ch & 0x3F);
700 dst += 6;
701 dstlen -= 6;
702 }
703 }
704 break;
705 case ENCODING_UTF16BE: /* RFC 2781 */
706 {
707 Uint8 *p = (Uint8 *) dst;
708 if (ch > 0x10FFFF) {
709 ch = UNKNOWN_UNICODE;
710 }
711 if (ch < 0x10000) {
712 if (dstlen < 2) {
713 return SDL_ICONV_E2BIG;
714 }
715 p[0] = (Uint8) (ch >> 8);
716 p[1] = (Uint8) ch;
717 dst += 2;
718 dstlen -= 2;
719 } else {
720 Uint16 W1, W2;
721 if (dstlen < 4) {
722 return SDL_ICONV_E2BIG;
723 }
724 ch = ch - 0x10000;
725 W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
726 W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
727 p[0] = (Uint8) (W1 >> 8);
728 p[1] = (Uint8) W1;
729 p[2] = (Uint8) (W2 >> 8);
730 p[3] = (Uint8) W2;
731 dst += 4;
732 dstlen -= 4;
733 }
734 }
735 break;
736 case ENCODING_UTF16LE: /* RFC 2781 */
737 {
738 Uint8 *p = (Uint8 *) dst;
739 if (ch > 0x10FFFF) {
740 ch = UNKNOWN_UNICODE;
741 }
742 if (ch < 0x10000) {
743 if (dstlen < 2) {
744 return SDL_ICONV_E2BIG;
745 }
746 p[1] = (Uint8) (ch >> 8);
747 p[0] = (Uint8) ch;
748 dst += 2;
749 dstlen -= 2;
750 } else {
751 Uint16 W1, W2;
752 if (dstlen < 4) {
753 return SDL_ICONV_E2BIG;
754 }
755 ch = ch - 0x10000;
756 W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF);
757 W2 = 0xDC00 | (Uint16) (ch & 0x3FF);
758 p[1] = (Uint8) (W1 >> 8);
759 p[0] = (Uint8) W1;
760 p[3] = (Uint8) (W2 >> 8);
761 p[2] = (Uint8) W2;
762 dst += 4;
763 dstlen -= 4;
764 }
765 }
766 break;
767 case ENCODING_UCS2BE:
768 {
769 Uint8 *p = (Uint8 *) dst;
770 if (ch > 0xFFFF) {
771 ch = UNKNOWN_UNICODE;
772 }
773 if (dstlen < 2) {
774 return SDL_ICONV_E2BIG;
775 }
776 p[0] = (Uint8) (ch >> 8);
777 p[1] = (Uint8) ch;
778 dst += 2;
779 dstlen -= 2;
780 }
781 break;
782 case ENCODING_UCS2LE:
783 {
784 Uint8 *p = (Uint8 *) dst;
785 if (ch > 0xFFFF) {
786 ch = UNKNOWN_UNICODE;
787 }
788 if (dstlen < 2) {
789 return SDL_ICONV_E2BIG;
790 }
791 p[1] = (Uint8) (ch >> 8);
792 p[0] = (Uint8) ch;
793 dst += 2;
794 dstlen -= 2;
795 }
796 break;
797 case ENCODING_UTF32BE:
798 if (ch > 0x10FFFF) {
799 ch = UNKNOWN_UNICODE;
800 }
801 /* fallthrough */
802 case ENCODING_UCS4BE:
803 if (ch > 0x7FFFFFFF) {
804 ch = UNKNOWN_UNICODE;
805 }
806 {
807 Uint8 *p = (Uint8 *) dst;
808 if (dstlen < 4) {
809 return SDL_ICONV_E2BIG;
810 }
811 p[0] = (Uint8) (ch >> 24);
812 p[1] = (Uint8) (ch >> 16);
813 p[2] = (Uint8) (ch >> 8);
814 p[3] = (Uint8) ch;
815 dst += 4;
816 dstlen -= 4;
817 }
818 break;
819 case ENCODING_UTF32LE:
820 if (ch > 0x10FFFF) {
821 ch = UNKNOWN_UNICODE;
822 }
823 /* fallthrough */
824 case ENCODING_UCS4LE:
825 if (ch > 0x7FFFFFFF) {
826 ch = UNKNOWN_UNICODE;
827 }
828 {
829 Uint8 *p = (Uint8 *) dst;
830 if (dstlen < 4) {
831 return SDL_ICONV_E2BIG;
832 }
833 p[3] = (Uint8) (ch >> 24);
834 p[2] = (Uint8) (ch >> 16);
835 p[1] = (Uint8) (ch >> 8);
836 p[0] = (Uint8) ch;
837 dst += 4;
838 dstlen -= 4;
839 }
840 break;
841 }
842
843 /* Update state */
844 *inbuf = src;
845 *inbytesleft = srclen;
846 *outbuf = dst;
847 *outbytesleft = dstlen;
848 ++total;
849 }
850 return total;
851}
852
853int
854SDL_iconv_close(SDL_iconv_t cd)
855{
856 if (cd != (SDL_iconv_t)-1) {
857 SDL_free(cd);
858 }
859 return 0;
860}
861
862#endif /* !HAVE_ICONV */
863
864char *
865SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf,
866 size_t inbytesleft)
867{
868 SDL_iconv_t cd;
869 char *string;
870 size_t stringsize;
871 char *outbuf;
872 size_t outbytesleft;
873 size_t retCode = 0;
874
875 cd = SDL_iconv_open(tocode, fromcode);
876 if (cd == (SDL_iconv_t) - 1) {
877 /* See if we can recover here (fixes iconv on Solaris 11) */
878 if (!tocode || !*tocode) {
879 tocode = "UTF-8";
880 }
881 if (!fromcode || !*fromcode) {
882 fromcode = "UTF-8";
883 }
884 cd = SDL_iconv_open(tocode, fromcode);
885 }
886 if (cd == (SDL_iconv_t) - 1) {
887 return NULL;
888 }
889
890 stringsize = inbytesleft > 4 ? inbytesleft : 4;
891 string = SDL_malloc(stringsize);
892 if (!string) {
893 SDL_iconv_close(cd);
894 return NULL;
895 }
896 outbuf = string;
897 outbytesleft = stringsize;
898 SDL_memset(outbuf, 0, 4);
899
900 while (inbytesleft > 0) {
901 retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
902 switch (retCode) {
903 case SDL_ICONV_E2BIG:
904 {
905 char *oldstring = string;
906 stringsize *= 2;
907 string = SDL_realloc(string, stringsize);
908 if (!string) {
909 SDL_iconv_close(cd);
910 return NULL;
911 }
912 outbuf = string + (outbuf - oldstring);
913 outbytesleft = stringsize - (outbuf - string);
914 SDL_memset(outbuf, 0, 4);
915 }
916 break;
917 case SDL_ICONV_EILSEQ:
918 /* Try skipping some input data - not perfect, but... */
919 ++inbuf;
920 --inbytesleft;
921 break;
922 case SDL_ICONV_EINVAL:
923 case SDL_ICONV_ERROR:
924 /* We can't continue... */
925 inbytesleft = 0;
926 break;
927 }
928 }
929 SDL_iconv_close(cd);
930
931 return string;
932}
933
934/* vi: set ts=4 sw=4 expandtab: */
unsigned int size_t
#define SDL_memset
#define SDL_strchr
#define SDL_getenv
#define SDL_malloc
#define SDL_realloc
#define SDL_strcasecmp
#define SDL_strlcpy
#define SDL_free
#define SDL_strcmp
SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode)
Definition: SDL_iconv.c:208
#define ENCODING_UCS2NATIVE
Definition: SDL_iconv.c:120
static const char * getlocale(char *buffer, size_t bufsize)
Definition: SDL_iconv.c:173
size_t SDL_iconv(SDL_iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft)
Definition: SDL_iconv.c:248
#define UNICODE_BOM
Definition: SDL_iconv.c:95
int format
Definition: SDL_iconv.c:138
@ ENCODING_UNKNOWN
Definition: SDL_iconv.c:102
@ ENCODING_UTF16
Definition: SDL_iconv.c:106
@ ENCODING_LATIN1
Definition: SDL_iconv.c:104
@ ENCODING_UCS2BE
Definition: SDL_iconv.c:112
@ ENCODING_UCS4LE
Definition: SDL_iconv.c:115
@ ENCODING_UCS2LE
Definition: SDL_iconv.c:113
@ ENCODING_ASCII
Definition: SDL_iconv.c:103
@ ENCODING_UCS4BE
Definition: SDL_iconv.c:114
@ ENCODING_UTF16BE
Definition: SDL_iconv.c:107
@ ENCODING_UTF32
Definition: SDL_iconv.c:109
@ ENCODING_UTF32LE
Definition: SDL_iconv.c:111
@ ENCODING_UTF8
Definition: SDL_iconv.c:105
@ ENCODING_UTF16LE
Definition: SDL_iconv.c:108
@ ENCODING_UTF32BE
Definition: SDL_iconv.c:110
int SDL_iconv_close(SDL_iconv_t cd)
Definition: SDL_iconv.c:854
char * SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft)
Definition: SDL_iconv.c:865
#define UNKNOWN_UNICODE
Definition: SDL_iconv.c:98
const char * name
Definition: SDL_iconv.c:137
#define ENCODING_UTF16NATIVE
Definition: SDL_iconv.c:118
#define ENCODING_UTF32NATIVE
Definition: SDL_iconv.c:119
#define ENCODING_UCS4NATIVE
Definition: SDL_iconv.c:121
static struct @37 encodings[]
#define UNKNOWN_ASCII
Definition: SDL_iconv.c:97
GLenum src
GLint left
GLuint buffer
GLenum GLenum dst
GLuint const GLchar * name
GLfloat GLfloat p
GLenum GLuint GLsizei bufsize
GLdouble n
GLsizei const GLchar *const * string
#define SDL_ICONV_EINVAL
Definition: SDL_stdinc.h:544
SDL_bool
Definition: SDL_stdinc.h:162
@ SDL_TRUE
Definition: SDL_stdinc.h:164
@ SDL_FALSE
Definition: SDL_stdinc.h:163
#define SDL_arraysize(array)
Definition: SDL_stdinc.h:115
uint32_t Uint32
Definition: SDL_stdinc.h:203
#define SDL_ICONV_E2BIG
Definition: SDL_stdinc.h:542
#define SDL_COMPILE_TIME_ASSERT(name, x)
Definition: SDL_stdinc.h:312
#define SDL_ICONV_ERROR
Definition: SDL_stdinc.h:541
#define SDL_ICONV_EILSEQ
Definition: SDL_stdinc.h:543
uint16_t Uint16
Definition: SDL_stdinc.h:191
uint8_t Uint8
Definition: SDL_stdinc.h:179
return Display return Display Bool Bool int int int return Display XEvent Bool(*) XPointer return Display return Display Drawable _Xconst char unsigned int unsigned int return Display Pixmap Pixmap XColor XColor unsigned int unsigned int return Display _Xconst char char int char return Display Visual unsigned int int int char unsigned int unsigned int in i)
Definition: SDL_x11sym.h:50
#define NULL
Definition: begin_code.h:167