diff options
author | Tatsuya Kinoshita <tats@vega.ocn.ne.jp> | 2011-05-04 07:05:14 +0000 |
---|---|---|
committer | Tatsuya Kinoshita <tats@vega.ocn.ne.jp> | 2011-05-04 07:05:14 +0000 |
commit | 72f72d64a422d6628c4796f5c0bf2e508f134214 (patch) | |
tree | 0c9ea90cc53310832c977265521fb44db24a515e /libwc/ces.c | |
parent | Adding upstream version 0.3 (diff) | |
download | w3m-72f72d64a422d6628c4796f5c0bf2e508f134214.tar.gz w3m-72f72d64a422d6628c4796f5c0bf2e508f134214.zip |
Adding upstream version 0.5.1upstream/0.5.1
Diffstat (limited to '')
-rw-r--r-- | libwc/ces.c | 375 |
1 files changed, 375 insertions, 0 deletions
diff --git a/libwc/ces.c b/libwc/ces.c new file mode 100644 index 0000000..5997121 --- /dev/null +++ b/libwc/ces.c @@ -0,0 +1,375 @@ + +#include "wc.h" +#include "iso2022.h" +#include "sjis.h" +#include "hz.h" +#include "big5.h" +#include "hkscs.h" +#include "johab.h" +#include "gbk.h" +#include "gb18030.h" +#include "uhc.h" +#include "viet.h" +#include "priv.h" +#ifdef USE_UNICODE +#include "utf8.h" +#include "utf7.h" +#endif + +static wc_gset gset_usascii[] = { + { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, + { 0, 0, 0 }, +}; + +#define gset_iso8859(no) \ +static wc_gset gset_iso8859##no[] = { \ + { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, \ + { WC_CCS_ISO_8859_##no, WC_C_G1_CS96 | 0x80, 1 }, \ + { 0, 0, 0 }, \ +} +gset_iso8859(1); gset_iso8859(2); gset_iso8859(3); gset_iso8859(4); +gset_iso8859(5); gset_iso8859(6); gset_iso8859(7); gset_iso8859(8); +gset_iso8859(9); gset_iso8859(10); gset_iso8859(11); +gset_iso8859(13); gset_iso8859(14); gset_iso8859(15); gset_iso8859(16); + +#define gset_cp(no) gset_priv1(CP##no, cp##no) +#define gset_priv1(ccs, ces) \ +static wc_gset gset_##ces[] = { \ + { WC_CCS_US_ASCII, 0, 1 }, \ + { WC_CCS_##ccs, 0x80, 1 }, \ + { 0, 0, 0 }, \ +} +gset_cp(437); gset_cp(737); gset_cp(775); gset_cp(850); gset_cp(852); +gset_cp(855); gset_cp(856); gset_cp(857); gset_cp(860); gset_cp(861); +gset_cp(862); gset_cp(863); gset_cp(864); gset_cp(865); gset_cp(866); +gset_cp(869); gset_cp(874); gset_cp(1006); +gset_cp(1250); gset_cp(1251); gset_cp(1252); gset_cp(1253); gset_cp(1254); +gset_cp(1255); gset_cp(1256); gset_cp(1257); +gset_priv1(KOI8_R, koi8r); +gset_priv1(KOI8_U, koi8u); +gset_priv1(NEXTSTEP, nextstep); + +static wc_gset gset_iso2022jp[] = { + { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, + { WC_CCS_JIS_X_0208, WC_C_G0_CS94, 0 }, + { 0, 0, 0 }, +}; +static wc_gset gset_iso2022jp2[] = { + { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, + { WC_CCS_JIS_X_0208, WC_C_G0_CS94, 0 }, + { WC_CCS_JIS_X_0212, WC_C_G0_CS94, 0 }, + { WC_CCS_GB_2312, WC_C_G0_CS94, 0 }, + { WC_CCS_KS_X_1001, WC_C_G0_CS94, 0 }, + { WC_CCS_ISO_8859_1, WC_C_G2_CS96, 0 }, + { WC_CCS_ISO_8859_7, WC_C_G2_CS96, 0 }, + { 0, 0, 0 }, +}; +static wc_gset gset_iso2022jp3[] = { + { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, + { WC_CCS_JIS_X_0208, WC_C_G0_CS94, 0 }, + { WC_CCS_JIS_X_0213_1, WC_C_G0_CS94, 0 }, + { WC_CCS_JIS_X_0213_2, WC_C_G0_CS94, 0 }, + { 0, 0, 0 }, +}; +static wc_gset gset_iso2022cn[] = { + { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, + { WC_CCS_GB_2312, WC_C_G1_CS94, 1 }, + { WC_CCS_ISO_IR_165, WC_C_G1_CS94, 0 }, + { WC_CCS_CNS_11643_1, WC_C_G1_CS94, 0 }, + { WC_CCS_CNS_11643_2, WC_C_G2_CS94, 0 }, + { WC_CCS_CNS_11643_3, WC_C_G3_CS94, 0 }, + { WC_CCS_CNS_11643_4, WC_C_G3_CS94, 0 }, + { WC_CCS_CNS_11643_5, WC_C_G3_CS94, 0 }, + { WC_CCS_CNS_11643_6, WC_C_G3_CS94, 0 }, + { WC_CCS_CNS_11643_7, WC_C_G3_CS94, 0 }, + { 0, 0, 0 }, +}; +static wc_gset gset_iso2022kr[] = { + { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, + { WC_CCS_KS_X_1001, WC_C_G1_CS94, 1 }, + { 0, 0, 0 }, +}; +static wc_uchar gset_ext_iso2022jp[] = { + WC_C_G0_CS94, WC_C_G2_CS96, WC_C_G0_CS94, WC_C_G2_CS96 +}; +static wc_uchar gset_ext_iso2022cn[] = { + WC_C_G2_CS94, WC_C_G2_CS96, WC_C_G2_CS94, WC_C_G2_CS96 +}; +static wc_uchar gset_ext_iso2022kr[] = { + WC_C_G1_CS94, WC_C_G1_CS96, WC_C_G1_CS94, WC_C_G1_CS96 +}; +static wc_gset gset_eucjp[] = { + { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, + { WC_CCS_JIS_X_0208, WC_C_G1_CS94 | 0x80, 1 }, + { WC_CCS_JIS_X_0201K, WC_C_G2_CS94 | 0x80, 1 }, + { WC_CCS_JIS_X_0213_1, WC_C_G1_CS94 | 0x80, 0 }, + { WC_CCS_JIS_X_0213_2, WC_C_G3_CS94 | 0x80, 0 }, + { WC_CCS_JIS_X_0212, WC_C_G3_CS94 | 0x80, 1 }, + { 0, 0, 0 }, +}; +static wc_gset gset_euccn[] = { + { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, + { WC_CCS_GB_2312, WC_C_G1_CS94 | 0x80, 1 }, + { 0, 0, 0 }, +}; +static wc_gset gset_euctw[] = { + { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, + { WC_CCS_CNS_11643_1, WC_C_G1_CS94 | 0x80, 1 }, + { WC_CCS_CNS_11643_X, WC_C_G2_CS94 | 0x80, 1 }, + { 0, 0, 0 }, +}; +static wc_gset gset_euckr[] = { + { WC_CCS_US_ASCII, WC_C_G0_CS94, 1 }, + { WC_CCS_KS_X_1001, WC_C_G1_CS94 | 0x80, 1 }, + { 0, 0, 0 }, +}; +static wc_gset gset_sjis[] = { + { WC_CCS_US_ASCII, 0, 1 }, + { WC_CCS_JIS_X_0208, 0x80, 1 }, + { WC_CCS_JIS_X_0201K, 0x80, 1 }, + { WC_CCS_SJIS_EXT_1, 0x80, 1 }, + { WC_CCS_SJIS_EXT_2, 0x80, 1 }, + { WC_CCS_SJIS_EXT, 0x80, 1 }, + { 0, 0, 0 }, +}; +static wc_gset gset_sjisx0213[] = { + { WC_CCS_US_ASCII, 0, 1 }, + { WC_CCS_JIS_X_0208, 0x80, 1 }, + { WC_CCS_JIS_X_0201K, 0x80, 1 }, + { WC_CCS_JIS_X_0213_1, 0x80, 1 }, + { WC_CCS_JIS_X_0213_2, 0x80, 1 }, + { 0, 0, 0 }, +}; +static wc_gset gset_hz[] = { + { WC_CCS_US_ASCII, 0, 1 }, + { WC_CCS_GB_2312, 0, 0 }, + { 0, 0, 0 }, +}; +static wc_gset gset_big5[] = { + { WC_CCS_US_ASCII, 0, 1 }, + { WC_CCS_BIG5_1, 0x80, 1 }, + { WC_CCS_BIG5_2, 0x80, 1 }, + { WC_CCS_BIG5, 0x80, 1 }, + { 0, 0, 0 }, +}; +static wc_gset gset_hkscs[] = { + { WC_CCS_US_ASCII, 0, 1 }, + { WC_CCS_BIG5_1, 0x80, 1 }, + { WC_CCS_BIG5_2, 0x80, 1 }, + { WC_CCS_BIG5, 0x80, 1 }, + { WC_CCS_HKSCS_1, 0x80, 1 }, + { WC_CCS_HKSCS_2, 0x80, 1 }, + { WC_CCS_HKSCS, 0x80, 1 }, + { 0, 0, 0 }, +}; +static wc_gset gset_johab[] = { + { WC_CCS_US_ASCII, 0, 1 }, + { WC_CCS_JOHAB_1, 0x80, 1 }, + { WC_CCS_JOHAB_2, 0x80, 1 }, + { WC_CCS_JOHAB_3, 0x80, 1 }, + { WC_CCS_JOHAB, 0x80, 1 }, + { 0, 0, 0 }, +}; +static wc_gset gset_gbk[] = { + { WC_CCS_US_ASCII, 0, 1 }, + { WC_CCS_GB_2312, 0x80, 1 }, + { WC_CCS_GBK_80, 0x80, 1 }, + { WC_CCS_GBK_1, 0x80, 1 }, + { WC_CCS_GBK_2, 0x80, 1 }, + { WC_CCS_GBK, 0x80, 1 }, + { 0, 0, 0 }, +}; +static wc_gset gset_gb18030[] = { + { WC_CCS_US_ASCII, 0, 1 }, + { WC_CCS_GB_2312, 0x80, 1 }, + { WC_CCS_GBK_1, 0x80, 1 }, + { WC_CCS_GBK_2, 0x80, 1 }, + { WC_CCS_GBK, 0x80, 1 }, + { WC_CCS_GBK_EXT_1, 0x80, 1 }, + { WC_CCS_GBK_EXT_2, 0x80, 1 }, + { WC_CCS_GBK_EXT, 0x80, 1 }, + { WC_CCS_GB18030, 0x80, 1 }, + { 0, 0, 0 }, +}; +static wc_gset gset_uhc[] = { + { WC_CCS_US_ASCII, 0, 1 }, + { WC_CCS_KS_X_1001, 0x80, 1 }, + { WC_CCS_UHC_1, 0x80, 1 }, + { WC_CCS_UHC_2, 0x80, 1 }, + { WC_CCS_UHC, 0x80, 1 }, + { 0, 0, 0 }, +}; +#define gset_priv2(ccs, ces) \ +static wc_gset gset_##ces[] = { \ + { WC_CCS_US_ASCII, 0, 1 }, \ + { WC_CCS_##ccs##_1, 0x80, 1 }, \ + { WC_CCS_##ccs##_2, 0x80, 1 }, \ + { 0, 0, 0 }, \ +} +gset_priv2(CP1258, cp1258); +gset_priv2(VISCII_11, viscii11); +gset_priv2(VPS, vps); +static wc_gset gset_tcvn5712[] = { + { WC_CCS_US_ASCII, 0, 1 }, + { WC_CCS_TCVN_5712_1, 0x80, 1 }, + { WC_CCS_TCVN_5712_2, 0x80, 1 }, + { WC_CCS_TCVN_5712_3, 0x80, 1 }, + { 0, 0, 0 }, +}; + +#ifdef USE_UNICODE +static wc_gset gset_utf8[] = { + { WC_CCS_US_ASCII, 0, 1 }, + { WC_CCS_UCS2, 0x80, 1 }, + { WC_CCS_UCS4, 0x80, 1 }, + { WC_CCS_UCS_TAG, 0x80, 1 }, + { 0, 0, 0 }, +}; +static wc_gset gset_utf7[] = { + { WC_CCS_US_ASCII, 0, 1 }, + { WC_CCS_UCS2, 0x80, 1 }, + { WC_CCS_UCS4, 0x80, 1 }, + { WC_CCS_UCS_TAG, 0x80, 1 }, + { 0, 0, 0 }, +}; +#endif + +static wc_gset gset_raw[] = { + { WC_CCS_US_ASCII, 0, 1 }, + { WC_CCS_RAW, 0x80, 1 }, + { 0, 0, 0 }, +}; + +#define ces_ascii(id,name,desc) \ + { WC_CES_##id, name, desc, gset_usascii, NULL, \ + (void *)wc_conv_from_ascii, (void *)wc_push_to_iso8859, \ + (void *)wc_char_conv_from_iso2022 } +#define ces_iso8859(id,name,desc,no) \ + { WC_CES_##id, name, desc, gset_iso8859##no, NULL, \ + (void *)wc_conv_from_iso2022, (void *)wc_push_to_iso8859, \ + (void *)wc_char_conv_from_iso2022 } +#define ces_priv1(id,name,desc,ces) \ + { WC_CES_##id, name, desc, gset_##ces, NULL, \ + (void *)wc_conv_from_priv1, (void *)wc_push_to_priv1, \ + (void *)wc_char_conv_from_priv1 } +#define ces_iso2022(id,name,desc,terr) \ + { WC_CES_##id, name, desc, gset_iso2022##terr, gset_ext_iso2022##terr, \ + (void *)wc_conv_from_iso2022, (void *)wc_push_to_iso2022, \ + (void *)wc_char_conv_from_iso2022 } +#define ces_euc(id,name,desc,terr) \ + { WC_CES_##id, name, desc, gset_euc##terr, NULL, \ + (void *)wc_conv_from_iso2022, (void *)wc_push_to_euc##terr, \ + (void *)wc_char_conv_from_iso2022 } +#define ces_priv2(id,name,desc,ces) \ + { WC_CES_##id, name, desc, gset_##ces, NULL, \ + (void *)wc_conv_from_##ces, (void *)wc_push_to_##ces, \ + (void *)wc_char_conv_from_##ces } + +#define gset_ext_iso2022jp2 gset_ext_iso2022jp +#define gset_ext_iso2022jp3 gset_ext_iso2022jp +#define wc_push_to_euckr wc_push_to_euc +#define wc_push_to_euccn wc_push_to_euc +#define wc_push_to_priv1 wc_push_to_iso8859 +#define wc_push_to_cp1258 wc_push_to_viet +#define wc_push_to_tcvn5712 wc_push_to_viet +#define wc_push_to_viscii11 wc_push_to_viet +#define wc_push_to_vps wc_push_to_viet +#define wc_conv_from_cp1258 wc_conv_from_priv1 +#define wc_conv_from_tcvn5712 wc_conv_from_viet +#define wc_conv_from_viscii11 wc_conv_from_viet +#define wc_conv_from_vps wc_conv_from_viet +#define wc_conv_from_raw wc_conv_from_priv1 +#define wc_char_conv_from_hz wc_char_conv_from_iso2022 +#define wc_char_conv_from_cp1258 wc_char_conv_from_priv1 +#define wc_char_conv_from_tcvn5712 wc_char_conv_from_viet +#define wc_char_conv_from_viscii11 wc_char_conv_from_viet +#define wc_char_conv_from_vps wc_char_conv_from_viet +#define wc_char_conv_from_raw wc_char_conv_from_priv1 + +wc_ces_info WcCesInfo[] = { + ces_ascii(US_ASCII, "US-ASCII", "Latin (US-ASCII)"), + + ces_iso8859(ISO_8859_1, "ISO-8859-1", "Latin 1 (ISO-8859-1)", 1), + ces_iso8859(ISO_8859_2, "ISO-8859-2", "Latin 2 (ISO-8859-2)", 2), + ces_iso8859(ISO_8859_3, "ISO-8859-3", "Latin 3 (ISO-8859-3)", 3), + ces_iso8859(ISO_8859_4, "ISO-8859-4", "Latin 4 (ISO-8859-4)", 4), + ces_iso8859(ISO_8859_5, "ISO-8859-5", "Cyrillic (ISO-8859-5)", 5), + ces_iso8859(ISO_8859_6, "ISO-8859-6", "Arabic (ISO-8859-6)", 6), + ces_iso8859(ISO_8859_7, "ISO-8859-7", "Greek (ISO-8859-7)", 7), + ces_iso8859(ISO_8859_8, "ISO-8859-8", "Hebrew (ISO-8859-8)", 8), + ces_iso8859(ISO_8859_9, "ISO-8859-9", "Turkish (ISO-8859-9)", 9), + ces_iso8859(ISO_8859_10, "ISO-8859-10", "Nordic (ISO-8859-10)", 10), + ces_iso8859(ISO_8859_11, "ISO-8859-11", "Thai (ISO-8859-11, TIS-620)", 11), + { WC_CES_ISO_8859_12, NULL, NULL, NULL, NULL, NULL, NULL, NULL }, + ces_iso8859(ISO_8859_13, "ISO-8859-13", "Baltic Rim (ISO-8859-13)", 13), + ces_iso8859(ISO_8859_14, "ISO-8859-14", "Celtic (ISO-8859-14)", 14), + ces_iso8859(ISO_8859_15, "ISO-8859-15", "Latin 9 (ISO-8859-15)", 15), + ces_iso8859(ISO_8859_16, "ISO-8859-16", "Romanian (ISO-8859-16)", 16), + + ces_iso2022(ISO_2022_JP, "ISO-2022-JP", "Japanese (ISO-2022-JP)", jp), + ces_iso2022(ISO_2022_JP_2, "ISO-2022-JP-2", "Japanese (ISO-2022-JP-2)", jp2), + ces_iso2022(ISO_2022_JP_3, "ISO-2022-JP-3", "Japanese (ISO-2022-JP-3)", jp3), + ces_iso2022(ISO_2022_CN, "ISO-2022-CN", "Chinese (ISO-2022-CN)", cn), + ces_iso2022(ISO_2022_KR, "ISO-2022-KR", "Korean (ISO-2022-KR)", kr), + + ces_euc(EUC_JP, "EUC-JP", "Japanese (EUC-JP)", jp), + ces_euc(EUC_CN, "EUC-CN", "Chinese (EUC-CN, GB2312)", cn), + ces_euc(EUC_TW, "EUC-TW", "Chinese Taiwan (EUC-TW)", tw), + ces_euc(EUC_KR, "EUC-KR", "Korean (EUC-KR)", kr), + + ces_priv1(CP437, "CP437", "Latin (CP437)", cp437), + ces_priv1(CP737, "CP737", "Greek (CP737)", cp737), + ces_priv1(CP775, "CP775", "Baltic Rim (CP775)", cp775), + ces_priv1(CP850, "CP850", "Latin 1 (CP850)", cp850), + ces_priv1(CP852, "CP852", "Latin 2 (CP852)", cp852), + ces_priv1(CP855, "CP855", "Cyrillic (CP855)", cp855), + ces_priv1(CP856, "CP856", "Hebrew (CP856)", cp856), + ces_priv1(CP857, "CP857", "Turkish (CP857)", cp857), + ces_priv1(CP860, "CP860", "Portuguese (CP860)", cp860), + ces_priv1(CP861, "CP861", "Icelandic (CP861)", cp861), + ces_priv1(CP862, "CP862", "Hebrew (CP862)", cp862), + ces_priv1(CP863, "CP863", "Canada French (CP863)", cp863), + ces_priv1(CP864, "CP864", "Arabic (CP864)", cp864), + ces_priv1(CP865, "CP865", "Nordic (CP865)", cp865), + ces_priv1(CP866, "CP866", "Cyrillic (CP866)", cp866), + ces_priv1(CP869, "CP869", "Greek 2 (CP869)", cp869), + ces_priv1(CP874, "CP874", "Thai (CP874)", cp874), + ces_priv1(CP1006, "CP1006", "Arabic (CP1006)", cp1006), + ces_priv1(CP1250, "CP1250", "Latin 2 (CP1250)", cp1250), + ces_priv1(CP1251, "CP1251", "Cyrillic (CP1251)", cp1251), + ces_priv1(CP1252, "CP1252", "Latin 1 (CP1252)", cp1252), + ces_priv1(CP1253, "CP1253", "Greek (CP1253)", cp1253), + ces_priv1(CP1254, "CP1254", "Turkish (CP1254)", cp1254), + ces_priv1(CP1255, "CP1255", "Hebrew (CP1255)", cp1255), + ces_priv1(CP1256, "CP1256", "Arabic (CP1256)", cp1256), + ces_priv1(CP1257, "CP1257", "Baltic Rim (CP1257)", cp1257), + ces_priv1(KOI8_R, "KOI8-R", "Cyrillic (KOI8-R)", koi8r), + ces_priv1(KOI8_U, "KOI8-U", "Ukrainian (KOI8-U)", koi8u), + ces_priv1(NEXTSTEP, "NeXTSTEP", "NeXTSTEP", nextstep), + + ces_priv2(RAW, "Raw", "8bit Raw", raw), + + ces_priv2(SHIFT_JIS, "Shift_JIS", "Japanese (Shift_JIS, CP932)", sjis), + ces_priv2(SHIFT_JISX0213, "Shift_JISX0213", "Japanese (Shift_JISX0213)", sjisx0213), + ces_priv2(GBK, "GBK", "Chinese (GBK, CP936)", gbk), + ces_priv2(GB18030, "GB18030", "Chinese (GB18030)", gb18030), + ces_priv2(HZ_GB_2312, "HZ-GB-2312", "Chinese (HZ-GB-2312)", hz), + ces_priv2(BIG5, "Big5", "Chinese Taiwan (Big5, CP950)", big5), + ces_priv2(HKSCS, "HKSCS", "Chinese Hong Kong (HKSCS)", hkscs), + ces_priv2(UHC, "UHC", "Korean (UHC, CP949)", uhc), + ces_priv2(JOHAB, "Johab", "Korean (Johab)", johab), + + ces_priv2(CP1258, "CP1258", "Vietnamese (CP1258)", cp1258), + ces_priv2(TCVN_5712, "TCVN-5712", "Vietnamese (TCVN-5712)", tcvn5712), + ces_priv2(VISCII_11, "VISCII-1.1", "Vietnamese (VISCII 1.1)", viscii11), + ces_priv2(VPS, "VPS", "Vietnamese (VPS)", vps), + +#ifdef USE_UNICODE + ces_priv2(UTF_8, "UTF-8", "Unicode (UTF-8)", utf8), + ces_priv2(UTF_7, "UTF-7", "Unicode (UTF-7)", utf7), +#else + { WC_CES_UTF_8, NULL, NULL, NULL, NULL, NULL, NULL, NULL }, + { WC_CES_UTF_7, NULL, NULL, NULL, NULL, NULL, NULL, NULL }, +#endif + { 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL }, +}; |