20 #include "lt-memory.h"
33 int iso_to_unicode[8][256];
35 char8 *unicode_to_iso[8];
39 static int latin_table[8][96] = {
43 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
44 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
45 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
46 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
47 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
48 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
49 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
50 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
51 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
52 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
53 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
54 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
59 0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, -00001, 0x0124, 0x00a7,
60 0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, -00001, 0x017b,
61 0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
62 0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, -00001, 0x017c,
63 0x00c0, 0x00c1, 0x00c2, -00001, 0x00c4, 0x010a, 0x0108, 0x00c7,
64 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
65 -00001, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
66 0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
67 0x00e0, 0x00e1, 0x00e2, -00001, 0x00e4, 0x010b, 0x0109, 0x00e7,
68 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
69 -00001, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
70 0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
75 0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
76 0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
77 0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
78 0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
79 0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
80 0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
81 0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
82 0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
83 0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
84 0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
85 0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
86 0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
91 0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
92 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
93 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
94 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
95 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
96 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
97 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
98 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
99 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
100 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
101 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
102 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
107 0x00a0, -00001, -00001, -00001, 0x00a4, -00001, -00001, -00001,
108 -00001, -00001, -00001, -00001, 0x060c, 0x00ad, -00001, -00001,
109 -00001, -00001, -00001, -00001, -00001, -00001, -00001, -00001,
110 -00001, -00001, -00001, 0x061b, -00001, -00001, -00001, 0x061f,
111 -00001, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
112 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
113 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
114 0x0638, 0x0639, 0x063a, -00001, -00001, -00001, -00001, -00001,
115 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
116 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
117 0x0650, 0x0651, 0x0652, -00001, -00001, -00001, -00001, -00001,
118 -00001, -00001, -00001, -00001, -00001, -00001, -00001, -00001,
123 0x00a0, 0x02bd, 0x02bc, 0x00a3, -00001, -00001, 0x00a6, 0x00a7,
124 0x00a8, 0x00a9, -00001, 0x00ab, 0x00ac, 0x00ad, -00001, 0x2015,
125 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
126 0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
127 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
128 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
129 0x03a0, 0x03a1, -00001, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
130 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
131 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
132 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
133 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
134 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, -00001,
139 0x00a0, -00001, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
140 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x203e,
141 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
142 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, -00001,
143 -00001, -00001, -00001, -00001, -00001, -00001, -00001, -00001,
144 -00001, -00001, -00001, -00001, -00001, -00001, -00001, -00001,
145 -00001, -00001, -00001, -00001, -00001, -00001, -00001, -00001,
146 -00001, -00001, -00001, -00001, -00001, -00001, -00001, 0x2017,
147 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
148 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
149 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
150 0x05e8, 0x05e9, 0x05ea, -00001, -00001, -00001, -00001, -00001,
155 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
156 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
157 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
158 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
159 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
160 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
161 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
162 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
163 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
164 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
165 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
166 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
170 const char8 *CharacterEncodingName[CE_enum_count] = {
172 "unspecified-ascii-superset",
193 const char8 *CharacterEncodingNameAndByteOrder[CE_enum_count] = {
195 "unspecified_ascii_superset",
217 {
"ASCII", CE_ISO_646},
218 {
"ISO-Latin-1", CE_ISO_8859_1},
219 {
"ISO-Latin-2", CE_ISO_8859_2},
220 {
"ISO-Latin-3", CE_ISO_8859_3},
221 {
"ISO-Latin-4", CE_ISO_8859_4},
222 {
"ISO-Latin-5", CE_ISO_8859_5},
223 {
"ISO-Latin-6", CE_ISO_8859_6},
224 {
"ISO-Latin-7", CE_ISO_8859_7},
225 {
"ISO-Latin-8", CE_ISO_8859_8},
226 {
"UCS-2", CE_ISO_10646_UCS_2B},
228 const int CE_alias_count =
229 sizeof(CharacterEncodingAlias)/
sizeof(CharacterEncodingAlias[0]);
231 CharacterEncoding InternalCharacterEncoding;
233 void init_charset(
void)
236 union {
char b[2];
short s;} bytes;
243 InternalCharacterEncoding = CE_unspecified_ascii_superset;
245 InternalCharacterEncoding = (bytes.b[0] == 0) ? CE_UTF_16B : CE_UTF_16L;
254 for(j=0; j<0xa0; j++)
255 iso_to_unicode[i][j] = j;
256 for(j=0xa0; j<0x100; j++)
258 int code = latin_table[i][j-0xa0];
259 iso_to_unicode[i][j] = code;
260 if(code > max) max = code;
263 iso_max_val[i] = max;
265 if(!(unicode_to_iso[i] = Malloc(max+1)))
267 fprintf(stderr,
"Malloc failed in charset initialisation\n");
271 for(j=0; j<0xa0; j++)
272 unicode_to_iso[i][j] = j;
273 for(j=0xa0; j<=max; j++)
274 unicode_to_iso[i][j] =
'?';
275 for(j=0xa0; j<0x100; j++)
277 int code = latin_table[i][j-0xa0];
279 unicode_to_iso[i][code] = j;
287 int EncodingIsAsciiSuperset(CharacterEncoding enc)
289 return enc >= CE_unspecified_ascii_superset && enc <= CE_ISO_8859_9;
298 int EncodingsCompatible(CharacterEncoding enc1, CharacterEncoding enc2,
299 CharacterEncoding *enc3)
301 if(EncodingIsAsciiSuperset(enc1))
303 if(EncodingIsAsciiSuperset(enc2))
311 if(enc1 == CE_UTF_16B || enc1 == CE_ISO_10646_UCS_2B)
313 if(enc2 == CE_UTF_16B || enc2 == CE_UTF_16L)
315 else if(enc2 == CE_ISO_10646_UCS_2B || enc2 == CE_ISO_10646_UCS_2L)
316 *enc3 = CE_ISO_10646_UCS_2B;
322 if(enc1 == CE_UTF_16L || enc1 == CE_ISO_10646_UCS_2L)
324 if(enc2 == CE_UTF_16B || enc2 == CE_UTF_16L)
326 else if(enc2 == CE_ISO_10646_UCS_2B || enc2 == CE_ISO_10646_UCS_2L)
327 *enc3 = CE_ISO_10646_UCS_2L;
336 CharacterEncoding FindEncoding(char8 *name)
340 for(i=0; i<CE_enum_count; i++)
341 if(strcasecmp8(name, CharacterEncodingNameAndByteOrder[i]) == 0)
342 return (CharacterEncoding)i;
344 for(i=0; i<CE_enum_count; i++)
345 if(strcasecmp8(name, CharacterEncodingName[i]) == 0)
346 return (CharacterEncoding)i;
348 for(i=0; i<CE_alias_count; i++)
349 if(strcasecmp8(name, CharacterEncodingAlias[i].name) == 0)
350 return CharacterEncodingAlias[i].enc;