aboutsummaryrefslogtreecommitdiffstats
path: root/libwc/ces.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--libwc/ces.c375
1 files changed, 375 insertions, 0 deletions
diff --git a/libwc/ces.c b/libwc/ces.c
new file mode 100644
index 0000000..5997121
--- /dev/null
+++ b/libwc/ces.c
@@ -0,0 +1,375 @@
+
+#include "wc.h"
+#include "iso2022.h"
+#include "sjis.h"
+#include "hz.h"
+#include "big5.h"
+#include "hkscs.h"
+#include "johab.h"
+#include "gbk.h"
+#include "gb18030.h"
+#include "uhc.h"
+#include "viet.h"
+#include "priv.h"
+#ifdef USE_UNICODE
+#include "utf8.h"
+#include "utf7.h"
+#endif
+
+static wc_gset gset_usascii[] = {
+ { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 },
+ { 0, 0, 0 },
+};
+
+#define gset_iso8859(no) \
+static wc_gset gset_iso8859##no[] = { \
+ { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, \
+ { WC_CCS_ISO_8859_##no, WC_C_G1_CS96 | 0x80, 1 }, \
+ { 0, 0, 0 }, \
+}
+gset_iso8859(1); gset_iso8859(2); gset_iso8859(3); gset_iso8859(4);
+gset_iso8859(5); gset_iso8859(6); gset_iso8859(7); gset_iso8859(8);
+gset_iso8859(9); gset_iso8859(10); gset_iso8859(11);
+gset_iso8859(13); gset_iso8859(14); gset_iso8859(15); gset_iso8859(16);
+
+#define gset_cp(no) gset_priv1(CP##no, cp##no)
+#define gset_priv1(ccs, ces) \
+static wc_gset gset_##ces[] = { \
+ { WC_CCS_US_ASCII, 0, 1 }, \
+ { WC_CCS_##ccs, 0x80, 1 }, \
+ { 0, 0, 0 }, \
+}
+gset_cp(437); gset_cp(737); gset_cp(775); gset_cp(850); gset_cp(852);
+gset_cp(855); gset_cp(856); gset_cp(857); gset_cp(860); gset_cp(861);
+gset_cp(862); gset_cp(863); gset_cp(864); gset_cp(865); gset_cp(866);
+gset_cp(869); gset_cp(874); gset_cp(1006);
+gset_cp(1250); gset_cp(1251); gset_cp(1252); gset_cp(1253); gset_cp(1254);
+gset_cp(1255); gset_cp(1256); gset_cp(1257);
+gset_priv1(KOI8_R, koi8r);
+gset_priv1(KOI8_U, koi8u);
+gset_priv1(NEXTSTEP, nextstep);
+
+static wc_gset gset_iso2022jp[] = {
+ { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 },
+ { WC_CCS_JIS_X_0208, WC_C_G0_CS94, 0 },
+ { 0, 0, 0 },
+};
+static wc_gset gset_iso2022jp2[] = {
+ { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 },
+ { WC_CCS_JIS_X_0208, WC_C_G0_CS94, 0 },
+ { WC_CCS_JIS_X_0212, WC_C_G0_CS94, 0 },
+ { WC_CCS_GB_2312, WC_C_G0_CS94, 0 },
+ { WC_CCS_KS_X_1001, WC_C_G0_CS94, 0 },
+ { WC_CCS_ISO_8859_1, WC_C_G2_CS96, 0 },
+ { WC_CCS_ISO_8859_7, WC_C_G2_CS96, 0 },
+ { 0, 0, 0 },
+};
+static wc_gset gset_iso2022jp3[] = {
+ { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 },
+ { WC_CCS_JIS_X_0208, WC_C_G0_CS94, 0 },
+ { WC_CCS_JIS_X_0213_1, WC_C_G0_CS94, 0 },
+ { WC_CCS_JIS_X_0213_2, WC_C_G0_CS94, 0 },
+ { 0, 0, 0 },
+};
+static wc_gset gset_iso2022cn[] = {
+ { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 },
+ { WC_CCS_GB_2312, WC_C_G1_CS94, 1 },
+ { WC_CCS_ISO_IR_165, WC_C_G1_CS94, 0 },
+ { WC_CCS_CNS_11643_1, WC_C_G1_CS94, 0 },
+ { WC_CCS_CNS_11643_2, WC_C_G2_CS94, 0 },
+ { WC_CCS_CNS_11643_3, WC_C_G3_CS94, 0 },
+ { WC_CCS_CNS_11643_4, WC_C_G3_CS94, 0 },
+ { WC_CCS_CNS_11643_5, WC_C_G3_CS94, 0 },
+ { WC_CCS_CNS_11643_6, WC_C_G3_CS94, 0 },
+ { WC_CCS_CNS_11643_7, WC_C_G3_CS94, 0 },
+ { 0, 0, 0 },
+};
+static wc_gset gset_iso2022kr[] = {
+ { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 },
+ { WC_CCS_KS_X_1001, WC_C_G1_CS94, 1 },
+ { 0, 0, 0 },
+};
+static wc_uchar gset_ext_iso2022jp[] = {
+ WC_C_G0_CS94, WC_C_G2_CS96, WC_C_G0_CS94, WC_C_G2_CS96
+};
+static wc_uchar gset_ext_iso2022cn[] = {
+ WC_C_G2_CS94, WC_C_G2_CS96, WC_C_G2_CS94, WC_C_G2_CS96
+};
+static wc_uchar gset_ext_iso2022kr[] = {
+ WC_C_G1_CS94, WC_C_G1_CS96, WC_C_G1_CS94, WC_C_G1_CS96
+};
+static wc_gset gset_eucjp[] = {
+ { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 },
+ { WC_CCS_JIS_X_0208, WC_C_G1_CS94 | 0x80, 1 },
+ { WC_CCS_JIS_X_0201K, WC_C_G2_CS94 | 0x80, 1 },
+ { WC_CCS_JIS_X_0213_1, WC_C_G1_CS94 | 0x80, 0 },
+ { WC_CCS_JIS_X_0213_2, WC_C_G3_CS94 | 0x80, 0 },
+ { WC_CCS_JIS_X_0212, WC_C_G3_CS94 | 0x80, 1 },
+ { 0, 0, 0 },
+};
+static wc_gset gset_euccn[] = {
+ { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 },
+ { WC_CCS_GB_2312, WC_C_G1_CS94 | 0x80, 1 },
+ { 0, 0, 0 },
+};
+static wc_gset gset_euctw[] = {
+ { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 },
+ { WC_CCS_CNS_11643_1, WC_C_G1_CS94 | 0x80, 1 },
+ { WC_CCS_CNS_11643_X, WC_C_G2_CS94 | 0x80, 1 },
+ { 0, 0, 0 },
+};
+static wc_gset gset_euckr[] = {
+ { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 },
+ { WC_CCS_KS_X_1001, WC_C_G1_CS94 | 0x80, 1 },
+ { 0, 0, 0 },
+};
+static wc_gset gset_sjis[] = {
+ { WC_CCS_US_ASCII, 0, 1 },
+ { WC_CCS_JIS_X_0208, 0x80, 1 },
+ { WC_CCS_JIS_X_0201K, 0x80, 1 },
+ { WC_CCS_SJIS_EXT_1, 0x80, 1 },
+ { WC_CCS_SJIS_EXT_2, 0x80, 1 },
+ { WC_CCS_SJIS_EXT, 0x80, 1 },
+ { 0, 0, 0 },
+};
+static wc_gset gset_sjisx0213[] = {
+ { WC_CCS_US_ASCII, 0, 1 },
+ { WC_CCS_JIS_X_0208, 0x80, 1 },
+ { WC_CCS_JIS_X_0201K, 0x80, 1 },
+ { WC_CCS_JIS_X_0213_1, 0x80, 1 },
+ { WC_CCS_JIS_X_0213_2, 0x80, 1 },
+ { 0, 0, 0 },
+};
+static wc_gset gset_hz[] = {
+ { WC_CCS_US_ASCII, 0, 1 },
+ { WC_CCS_GB_2312, 0, 0 },
+ { 0, 0, 0 },
+};
+static wc_gset gset_big5[] = {
+ { WC_CCS_US_ASCII, 0, 1 },
+ { WC_CCS_BIG5_1, 0x80, 1 },
+ { WC_CCS_BIG5_2, 0x80, 1 },
+ { WC_CCS_BIG5, 0x80, 1 },
+ { 0, 0, 0 },
+};
+static wc_gset gset_hkscs[] = {
+ { WC_CCS_US_ASCII, 0, 1 },
+ { WC_CCS_BIG5_1, 0x80, 1 },
+ { WC_CCS_BIG5_2, 0x80, 1 },
+ { WC_CCS_BIG5, 0x80, 1 },
+ { WC_CCS_HKSCS_1, 0x80, 1 },
+ { WC_CCS_HKSCS_2, 0x80, 1 },
+ { WC_CCS_HKSCS, 0x80, 1 },
+ { 0, 0, 0 },
+};
+static wc_gset gset_johab[] = {
+ { WC_CCS_US_ASCII, 0, 1 },
+ { WC_CCS_JOHAB_1, 0x80, 1 },
+ { WC_CCS_JOHAB_2, 0x80, 1 },
+ { WC_CCS_JOHAB_3, 0x80, 1 },
+ { WC_CCS_JOHAB, 0x80, 1 },
+ { 0, 0, 0 },
+};
+static wc_gset gset_gbk[] = {
+ { WC_CCS_US_ASCII, 0, 1 },
+ { WC_CCS_GB_2312, 0x80, 1 },
+ { WC_CCS_GBK_80, 0x80, 1 },
+ { WC_CCS_GBK_1, 0x80, 1 },
+ { WC_CCS_GBK_2, 0x80, 1 },
+ { WC_CCS_GBK, 0x80, 1 },
+ { 0, 0, 0 },
+};
+static wc_gset gset_gb18030[] = {
+ { WC_CCS_US_ASCII, 0, 1 },
+ { WC_CCS_GB_2312, 0x80, 1 },
+ { WC_CCS_GBK_1, 0x80, 1 },
+ { WC_CCS_GBK_2, 0x80, 1 },
+ { WC_CCS_GBK, 0x80, 1 },
+ { WC_CCS_GBK_EXT_1, 0x80, 1 },
+ { WC_CCS_GBK_EXT_2, 0x80, 1 },
+ { WC_CCS_GBK_EXT, 0x80, 1 },
+ { WC_CCS_GB18030, 0x80, 1 },
+ { 0, 0, 0 },
+};
+static wc_gset gset_uhc[] = {
+ { WC_CCS_US_ASCII, 0, 1 },
+ { WC_CCS_KS_X_1001, 0x80, 1 },
+ { WC_CCS_UHC_1, 0x80, 1 },
+ { WC_CCS_UHC_2, 0x80, 1 },
+ { WC_CCS_UHC, 0x80, 1 },
+ { 0, 0, 0 },
+};
+#define gset_priv2(ccs, ces) \
+static wc_gset gset_##ces[] = { \
+ { WC_CCS_US_ASCII, 0, 1 }, \
+ { WC_CCS_##ccs##_1, 0x80, 1 }, \
+ { WC_CCS_##ccs##_2, 0x80, 1 }, \
+ { 0, 0, 0 }, \
+}
+gset_priv2(CP1258, cp1258);
+gset_priv2(VISCII_11, viscii11);
+gset_priv2(VPS, vps);
+static wc_gset gset_tcvn5712[] = {
+ { WC_CCS_US_ASCII, 0, 1 },
+ { WC_CCS_TCVN_5712_1, 0x80, 1 },
+ { WC_CCS_TCVN_5712_2, 0x80, 1 },
+ { WC_CCS_TCVN_5712_3, 0x80, 1 },
+ { 0, 0, 0 },
+};
+
+#ifdef USE_UNICODE
+static wc_gset gset_utf8[] = {
+ { WC_CCS_US_ASCII, 0, 1 },
+ { WC_CCS_UCS2, 0x80, 1 },
+ { WC_CCS_UCS4, 0x80, 1 },
+ { WC_CCS_UCS_TAG, 0x80, 1 },
+ { 0, 0, 0 },
+};
+static wc_gset gset_utf7[] = {
+ { WC_CCS_US_ASCII, 0, 1 },
+ { WC_CCS_UCS2, 0x80, 1 },
+ { WC_CCS_UCS4, 0x80, 1 },
+ { WC_CCS_UCS_TAG, 0x80, 1 },
+ { 0, 0, 0 },
+};
+#endif
+
+static wc_gset gset_raw[] = {
+ { WC_CCS_US_ASCII, 0, 1 },
+ { WC_CCS_RAW, 0x80, 1 },
+ { 0, 0, 0 },
+};
+
+#define ces_ascii(id,name,desc) \
+ { WC_CES_##id, name, desc, gset_usascii, NULL, \
+ (void *)wc_conv_from_ascii, (void *)wc_push_to_iso8859, \
+ (void *)wc_char_conv_from_iso2022 }
+#define ces_iso8859(id,name,desc,no) \
+ { WC_CES_##id, name, desc, gset_iso8859##no, NULL, \
+ (void *)wc_conv_from_iso2022, (void *)wc_push_to_iso8859, \
+ (void *)wc_char_conv_from_iso2022 }
+#define ces_priv1(id,name,desc,ces) \
+ { WC_CES_##id, name, desc, gset_##ces, NULL, \
+ (void *)wc_conv_from_priv1, (void *)wc_push_to_priv1, \
+ (void *)wc_char_conv_from_priv1 }
+#define ces_iso2022(id,name,desc,terr) \
+ { WC_CES_##id, name, desc, gset_iso2022##terr, gset_ext_iso2022##terr, \
+ (void *)wc_conv_from_iso2022, (void *)wc_push_to_iso2022, \
+ (void *)wc_char_conv_from_iso2022 }
+#define ces_euc(id,name,desc,terr) \
+ { WC_CES_##id, name, desc, gset_euc##terr, NULL, \
+ (void *)wc_conv_from_iso2022, (void *)wc_push_to_euc##terr, \
+ (void *)wc_char_conv_from_iso2022 }
+#define ces_priv2(id,name,desc,ces) \
+ { WC_CES_##id, name, desc, gset_##ces, NULL, \
+ (void *)wc_conv_from_##ces, (void *)wc_push_to_##ces, \
+ (void *)wc_char_conv_from_##ces }
+
+#define gset_ext_iso2022jp2 gset_ext_iso2022jp
+#define gset_ext_iso2022jp3 gset_ext_iso2022jp
+#define wc_push_to_euckr wc_push_to_euc
+#define wc_push_to_euccn wc_push_to_euc
+#define wc_push_to_priv1 wc_push_to_iso8859
+#define wc_push_to_cp1258 wc_push_to_viet
+#define wc_push_to_tcvn5712 wc_push_to_viet
+#define wc_push_to_viscii11 wc_push_to_viet
+#define wc_push_to_vps wc_push_to_viet
+#define wc_conv_from_cp1258 wc_conv_from_priv1
+#define wc_conv_from_tcvn5712 wc_conv_from_viet
+#define wc_conv_from_viscii11 wc_conv_from_viet
+#define wc_conv_from_vps wc_conv_from_viet
+#define wc_conv_from_raw wc_conv_from_priv1
+#define wc_char_conv_from_hz wc_char_conv_from_iso2022
+#define wc_char_conv_from_cp1258 wc_char_conv_from_priv1
+#define wc_char_conv_from_tcvn5712 wc_char_conv_from_viet
+#define wc_char_conv_from_viscii11 wc_char_conv_from_viet
+#define wc_char_conv_from_vps wc_char_conv_from_viet
+#define wc_char_conv_from_raw wc_char_conv_from_priv1
+
+wc_ces_info WcCesInfo[] = {
+ ces_ascii(US_ASCII, "US-ASCII", "Latin (US-ASCII)"),
+
+ ces_iso8859(ISO_8859_1, "ISO-8859-1", "Latin 1 (ISO-8859-1)", 1),
+ ces_iso8859(ISO_8859_2, "ISO-8859-2", "Latin 2 (ISO-8859-2)", 2),
+ ces_iso8859(ISO_8859_3, "ISO-8859-3", "Latin 3 (ISO-8859-3)", 3),
+ ces_iso8859(ISO_8859_4, "ISO-8859-4", "Latin 4 (ISO-8859-4)", 4),
+ ces_iso8859(ISO_8859_5, "ISO-8859-5", "Cyrillic (ISO-8859-5)", 5),
+ ces_iso8859(ISO_8859_6, "ISO-8859-6", "Arabic (ISO-8859-6)", 6),
+ ces_iso8859(ISO_8859_7, "ISO-8859-7", "Greek (ISO-8859-7)", 7),
+ ces_iso8859(ISO_8859_8, "ISO-8859-8", "Hebrew (ISO-8859-8)", 8),
+ ces_iso8859(ISO_8859_9, "ISO-8859-9", "Turkish (ISO-8859-9)", 9),
+ ces_iso8859(ISO_8859_10, "ISO-8859-10", "Nordic (ISO-8859-10)", 10),
+ ces_iso8859(ISO_8859_11, "ISO-8859-11", "Thai (ISO-8859-11, TIS-620)", 11),
+ { WC_CES_ISO_8859_12, NULL, NULL, NULL, NULL, NULL, NULL, NULL },
+ ces_iso8859(ISO_8859_13, "ISO-8859-13", "Baltic Rim (ISO-8859-13)", 13),
+ ces_iso8859(ISO_8859_14, "ISO-8859-14", "Celtic (ISO-8859-14)", 14),
+ ces_iso8859(ISO_8859_15, "ISO-8859-15", "Latin 9 (ISO-8859-15)", 15),
+ ces_iso8859(ISO_8859_16, "ISO-8859-16", "Romanian (ISO-8859-16)", 16),
+
+ ces_iso2022(ISO_2022_JP, "ISO-2022-JP", "Japanese (ISO-2022-JP)", jp),
+ ces_iso2022(ISO_2022_JP_2, "ISO-2022-JP-2", "Japanese (ISO-2022-JP-2)", jp2),
+ ces_iso2022(ISO_2022_JP_3, "ISO-2022-JP-3", "Japanese (ISO-2022-JP-3)", jp3),
+ ces_iso2022(ISO_2022_CN, "ISO-2022-CN", "Chinese (ISO-2022-CN)", cn),
+ ces_iso2022(ISO_2022_KR, "ISO-2022-KR", "Korean (ISO-2022-KR)", kr),
+
+ ces_euc(EUC_JP, "EUC-JP", "Japanese (EUC-JP)", jp),
+ ces_euc(EUC_CN, "EUC-CN", "Chinese (EUC-CN, GB2312)", cn),
+ ces_euc(EUC_TW, "EUC-TW", "Chinese Taiwan (EUC-TW)", tw),
+ ces_euc(EUC_KR, "EUC-KR", "Korean (EUC-KR)", kr),
+
+ ces_priv1(CP437, "CP437", "Latin (CP437)", cp437),
+ ces_priv1(CP737, "CP737", "Greek (CP737)", cp737),
+ ces_priv1(CP775, "CP775", "Baltic Rim (CP775)", cp775),
+ ces_priv1(CP850, "CP850", "Latin 1 (CP850)", cp850),
+ ces_priv1(CP852, "CP852", "Latin 2 (CP852)", cp852),
+ ces_priv1(CP855, "CP855", "Cyrillic (CP855)", cp855),
+ ces_priv1(CP856, "CP856", "Hebrew (CP856)", cp856),
+ ces_priv1(CP857, "CP857", "Turkish (CP857)", cp857),
+ ces_priv1(CP860, "CP860", "Portuguese (CP860)", cp860),
+ ces_priv1(CP861, "CP861", "Icelandic (CP861)", cp861),
+ ces_priv1(CP862, "CP862", "Hebrew (CP862)", cp862),
+ ces_priv1(CP863, "CP863", "Canada French (CP863)", cp863),
+ ces_priv1(CP864, "CP864", "Arabic (CP864)", cp864),
+ ces_priv1(CP865, "CP865", "Nordic (CP865)", cp865),
+ ces_priv1(CP866, "CP866", "Cyrillic (CP866)", cp866),
+ ces_priv1(CP869, "CP869", "Greek 2 (CP869)", cp869),
+ ces_priv1(CP874, "CP874", "Thai (CP874)", cp874),
+ ces_priv1(CP1006, "CP1006", "Arabic (CP1006)", cp1006),
+ ces_priv1(CP1250, "CP1250", "Latin 2 (CP1250)", cp1250),
+ ces_priv1(CP1251, "CP1251", "Cyrillic (CP1251)", cp1251),
+ ces_priv1(CP1252, "CP1252", "Latin 1 (CP1252)", cp1252),
+ ces_priv1(CP1253, "CP1253", "Greek (CP1253)", cp1253),
+ ces_priv1(CP1254, "CP1254", "Turkish (CP1254)", cp1254),
+ ces_priv1(CP1255, "CP1255", "Hebrew (CP1255)", cp1255),
+ ces_priv1(CP1256, "CP1256", "Arabic (CP1256)", cp1256),
+ ces_priv1(CP1257, "CP1257", "Baltic Rim (CP1257)", cp1257),
+ ces_priv1(KOI8_R, "KOI8-R", "Cyrillic (KOI8-R)", koi8r),
+ ces_priv1(KOI8_U, "KOI8-U", "Ukrainian (KOI8-U)", koi8u),
+ ces_priv1(NEXTSTEP, "NeXTSTEP", "NeXTSTEP", nextstep),
+
+ ces_priv2(RAW, "Raw", "8bit Raw", raw),
+
+ ces_priv2(SHIFT_JIS, "Shift_JIS", "Japanese (Shift_JIS, CP932)", sjis),
+ ces_priv2(SHIFT_JISX0213, "Shift_JISX0213", "Japanese (Shift_JISX0213)", sjisx0213),
+ ces_priv2(GBK, "GBK", "Chinese (GBK, CP936)", gbk),
+ ces_priv2(GB18030, "GB18030", "Chinese (GB18030)", gb18030),
+ ces_priv2(HZ_GB_2312, "HZ-GB-2312", "Chinese (HZ-GB-2312)", hz),
+ ces_priv2(BIG5, "Big5", "Chinese Taiwan (Big5, CP950)", big5),
+ ces_priv2(HKSCS, "HKSCS", "Chinese Hong Kong (HKSCS)", hkscs),
+ ces_priv2(UHC, "UHC", "Korean (UHC, CP949)", uhc),
+ ces_priv2(JOHAB, "Johab", "Korean (Johab)", johab),
+
+ ces_priv2(CP1258, "CP1258", "Vietnamese (CP1258)", cp1258),
+ ces_priv2(TCVN_5712, "TCVN-5712", "Vietnamese (TCVN-5712)", tcvn5712),
+ ces_priv2(VISCII_11, "VISCII-1.1", "Vietnamese (VISCII 1.1)", viscii11),
+ ces_priv2(VPS, "VPS", "Vietnamese (VPS)", vps),
+
+#ifdef USE_UNICODE
+ ces_priv2(UTF_8, "UTF-8", "Unicode (UTF-8)", utf8),
+ ces_priv2(UTF_7, "UTF-7", "Unicode (UTF-7)", utf7),
+#else
+ { WC_CES_UTF_8, NULL, NULL, NULL, NULL, NULL, NULL, NULL },
+ { WC_CES_UTF_7, NULL, NULL, NULL, NULL, NULL, NULL, NULL },
+#endif
+ { 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL },
+};