Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
charset.h
1 /*************************************************************************/
2 /* */
3 /* Copyright (c) 1997-98 Richard Tobin, Language Technology Group, HCRC, */
4 /* University of Edinburgh. */
5 /* */
6 /* THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, */
7 /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
8 /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
9 /* IN NO EVENT SHALL THE AUTHOR OR THE UNIVERSITY OF EDINBURGH BE LIABLE */
10 /* FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF */
11 /* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION */
12 /* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
13 /* */
14 /*************************************************************************/
15 #ifndef CHARSET_H
16 #define CHARSET_H
17 
18 #ifndef FOR_LT
19 #define STD_API
20 #endif
21 
22 STD_API void init_charset(void);
23 
24 /*
25  * We'd like char8 to be unsigned char, but it causes too many problems.
26  * For example:
27  * char8 name; ...; return name ? name : "<none>"
28  * produces a warning with many compilers if char8 is unsigned.
29  */
30 
31 typedef char char8;
32 typedef unsigned short char16;
33 typedef unsigned int char32;
34 
35 #if !defined(CHAR_SIZE)
36 # error CHAR_SIZE not defined
37 #endif
38 
39 #if CHAR_SIZE == 8
40 typedef char8 Char;
41 #elif CHAR_SIZE == 16
42 typedef char16 Char;
43 #else
44 #error CHAR_SIZE must be 8 or 16
45 #endif
46 
47 /* Character encodings */
48 
49 enum character_encoding {
50  CE_unknown, CE_unspecified_ascii_superset,
51  CE_UTF_8, CE_ISO_646,
52  CE_ISO_8859_1,
53 
54  CE_ISO_8859_2, CE_ISO_8859_3, CE_ISO_8859_4, CE_ISO_8859_5,
55  CE_ISO_8859_6, CE_ISO_8859_7, CE_ISO_8859_8, CE_ISO_8859_9,
56 
57  CE_UTF_16B, CE_UTF_16L, CE_ISO_10646_UCS_2B, CE_ISO_10646_UCS_2L,
58  CE_enum_count
59 };
60 
61 typedef enum character_encoding CharacterEncoding;
62 
63 extern STD_API CharacterEncoding InternalCharacterEncoding;
64 
65 extern STD_API const char8 *CharacterEncodingName[CE_enum_count];
66 extern STD_API const char8 *CharacterEncodingNameAndByteOrder[CE_enum_count];
67 
68 struct character_encoding_alias {const char8 *name; CharacterEncoding enc;};
69 extern STD_API struct character_encoding_alias CharacterEncodingAlias[];
70 extern STD_API const int CE_alias_count;
71 
72 STD_API int EncodingIsAsciiSuperset(CharacterEncoding enc);
73 STD_API int EncodingsCompatible(CharacterEncoding enc1, CharacterEncoding enc2,
74  CharacterEncoding *enc3);
75 STD_API CharacterEncoding FindEncoding(char8 *name);
76 
77 /* Translation tables for Latin-N - do this right sometime! XXX */
78 
79 extern STD_API int iso_to_unicode[8][256];
80 extern STD_API int iso_max_val[8];
81 extern STD_API char8 *unicode_to_iso[8];
82 
83 #endif /* CHARSET_H */