Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
charset.h
1/*************************************************************************/
2/* */
3/* Copyright (c) 1997-98 Richard Tobin, Language Technology Group, HCRC, */
4/* University of Edinburgh. */
5/* */
6/* THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, */
7/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
8/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
9/* IN NO EVENT SHALL THE AUTHOR OR THE UNIVERSITY OF EDINBURGH BE LIABLE */
10/* FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF */
11/* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION */
12/* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
13/* */
14/*************************************************************************/
15#ifndef CHARSET_H
16#define CHARSET_H
17
18#ifndef FOR_LT
19#define STD_API
20#endif
21
22STD_API void init_charset(void);
23
24/*
25 * We'd like char8 to be unsigned char, but it causes too many problems.
26 * For example:
27 * char8 name; ...; return name ? name : "<none>"
28 * produces a warning with many compilers if char8 is unsigned.
29 */
30
31typedef char char8;
32typedef unsigned short char16;
33typedef unsigned int char32;
34
35#if !defined(CHAR_SIZE)
36# error CHAR_SIZE not defined
37#endif
38
39#if CHAR_SIZE == 8
40typedef char8 Char;
41#elif CHAR_SIZE == 16
42typedef char16 Char;
43#else
44#error CHAR_SIZE must be 8 or 16
45#endif
46
47/* Character encodings */
48
49enum character_encoding {
50 CE_unknown, CE_unspecified_ascii_superset,
51 CE_UTF_8, CE_ISO_646,
52 CE_ISO_8859_1,
53
54 CE_ISO_8859_2, CE_ISO_8859_3, CE_ISO_8859_4, CE_ISO_8859_5,
55 CE_ISO_8859_6, CE_ISO_8859_7, CE_ISO_8859_8, CE_ISO_8859_9,
56
57 CE_UTF_16B, CE_UTF_16L, CE_ISO_10646_UCS_2B, CE_ISO_10646_UCS_2L,
58 CE_enum_count
59};
60
61typedef enum character_encoding CharacterEncoding;
62
63extern STD_API CharacterEncoding InternalCharacterEncoding;
64
65extern STD_API const char8 *CharacterEncodingName[CE_enum_count];
66extern STD_API const char8 *CharacterEncodingNameAndByteOrder[CE_enum_count];
67
68struct character_encoding_alias {const char8 *name; CharacterEncoding enc;};
69extern STD_API struct character_encoding_alias CharacterEncodingAlias[];
70extern STD_API const int CE_alias_count;
71
72STD_API int EncodingIsAsciiSuperset(CharacterEncoding enc);
73STD_API int EncodingsCompatible(CharacterEncoding enc1, CharacterEncoding enc2,
74 CharacterEncoding *enc3);
75STD_API CharacterEncoding FindEncoding(char8 *name);
76
77/* Translation tables for Latin-N - do this right sometime! XXX */
78
79extern STD_API int iso_to_unicode[8][256];
80extern STD_API int iso_max_val[8];
81extern STD_API char8 *unicode_to_iso[8];
82
83#endif /* CHARSET_H */