diff options
author | Tatsuya Kinoshita <tats@vega.ocn.ne.jp> | 2011-05-04 07:18:09 +0000 |
---|---|---|
committer | Tatsuya Kinoshita <tats@vega.ocn.ne.jp> | 2011-05-04 07:18:09 +0000 |
commit | 5f8e0f8ef9a422691dd72e8a953a42a41478fcb4 (patch) | |
tree | 4b2df4796a534793648b3c4fc532fc36bd0cd525 /libwc/iso2022.c | |
parent | Releasing debian version 0.3-2.4 (diff) | |
download | w3m-5f8e0f8ef9a422691dd72e8a953a42a41478fcb4.tar.gz w3m-5f8e0f8ef9a422691dd72e8a953a42a41478fcb4.zip |
Releasing debian version 0.5.1-1debian/0.5.1-1
Diffstat (limited to '')
-rw-r--r-- | libwc/iso2022.c | 1059 |
1 files changed, 1059 insertions, 0 deletions
diff --git a/libwc/iso2022.c b/libwc/iso2022.c new file mode 100644 index 0000000..33d9a19 --- /dev/null +++ b/libwc/iso2022.c @@ -0,0 +1,1059 @@ + +#include "wc.h" +#include "iso2022.h" +#include "jis.h" +#include "big5.h" +#include "johab.h" +#include "wtf.h" +#ifdef USE_UNICODE +#include "ucs.h" +#endif + +#define C0 WC_ISO_MAP_C0 +#define C1 WC_ISO_MAP_C1 +#define GL WC_ISO_MAP_GL +#define GR WC_ISO_MAP_GR +#define GL2 WC_ISO_MAP_GL96 +#define GR2 WC_ISO_MAP_GR96 +#define SO WC_ISO_MAP_SO +#define SI WC_ISO_MAP_SI +#define ESC WC_ISO_MAP_ESC +#define SS2 WC_ISO_MAP_SS2 +#define SS3 WC_ISO_MAP_SS3 + +wc_uint8 WC_ISO_MAP[ 0x100 ] = { + C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, SO, SI, + C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, ESC,C0, C0, C0, C0, + GL2,GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, + GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, + GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, + GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, + GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, + GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL2, + + C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, SS2,SS3, + C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, + GR2,GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, + GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, + GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, + GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, + GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, + GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR2, +}; + +static wc_uchar cs94_gmap[ 0x80 - WC_F_ISO_BASE ]; +static wc_uchar cs94w_gmap[ 0x80 - WC_F_ISO_BASE ]; +static wc_uchar cs96_gmap[ 0x80 - WC_F_ISO_BASE ]; +static wc_uchar cs96w_gmap[ 0x80 - WC_F_ISO_BASE ]; +static wc_uchar cs942_gmap[ 0x80 - WC_F_ISO_BASE ]; + +static void +wtf_push_iso2022(Str os, wc_ccs ccs, wc_uint32 code) +{ + switch (ccs) { + case WC_CCS_JIS_C_6226: + case WC_CCS_JIS_X_0208: + case WC_CCS_JIS_X_0213_1: + ccs = wc_jisx0208_or_jisx02131(code); + break; + case WC_CCS_JIS_X_0212: + case WC_CCS_JIS_X_0213_2: + ccs = wc_jisx0212_or_jisx02132(code); + break; + case WC_CCS_JIS_X_0201: + case WC_CCS_GB_1988: + ccs = WC_CCS_US_ASCII; + break; + } + wtf_push(os, ccs, code); +} + +Str +wc_conv_from_iso2022(Str is, wc_ces ces) +{ + Str os; + wc_uchar *sp = (wc_uchar *)is->ptr; + wc_uchar *ep = sp + is->length; + wc_uchar *p, *q = NULL; + int state = WC_ISO_NOSTATE; + wc_status st; + wc_ccs gl_ccs, gr_ccs; + + for (p = sp; p < ep && !(WC_ISO_MAP[*p] & WC_ISO_MAP_DETECT); p++) + ; + if (p == ep) + return is; + os = Strnew_size(is->length); + if (p > sp) + Strcat_charp_n(os, is->ptr, (int)(p - sp)); + + wc_input_init(ces, &st); + gl_ccs = st.design[st.gl]; + gr_ccs = st.design[st.gr]; + + for (; p < ep; p++) { + switch (state) { + case WC_ISO_NOSTATE: + switch (WC_ISO_MAP[*p]) { + case GL2: + gl_ccs = st.ss ? st.design[st.ss] + : st.design[st.gl]; + if (!(WC_CCS_TYPE(gl_ccs) & WC_CCS_A_CS96)) { + Strcat_char(os, (char)*p); + break; + } + case GL: + gl_ccs = st.ss ? st.design[st.ss] + : st.design[st.gl]; + if (WC_CCS_IS_WIDE(gl_ccs)) { + q = p; + state = WC_ISO_MBYTE1; + continue; + } else if (gl_ccs == WC_CES_US_ASCII) + Strcat_char(os, (char)*p); + else + wtf_push_iso2022(os, gl_ccs, (wc_uint32)*p); + break; + case GR2: + gr_ccs = st.ss ? st.design[st.ss] + : st.design[st.gr]; + if (!(WC_CCS_TYPE(gr_ccs) & WC_CCS_A_CS96)) { + wtf_push_unknown(os, p, 1); + break; + } + case GR: + gr_ccs = st.ss ? st.design[st.ss] + : st.design[st.gr]; + if (WC_CCS_IS_WIDE(gr_ccs)) { + q = p; + state = WC_EUC_MBYTE1; + continue; + } else if (gr_ccs) + wtf_push_iso2022(os, gr_ccs, (wc_uint32)*p); + else + wtf_push_unknown(os, p, 1); + break; + case C0: + Strcat_char(os, (char)*p); + break; + case C1: + wtf_push(os, WC_CCS_C1, (wc_uint32)*p); + break; + case ESC: + st.ss = 0; + if (wc_parse_iso2022_esc(&p, &st)) + state = st.state; + else + Strcat_char(os, (char)*p); + continue; + case SI: + st.gl = 0; + break; + case SO: + st.gl = 1; + break; + case SS2: + if (! st.design[2]) { + wtf_push_unknown(os, p, 1); + break; + } + st.ss = 2; + continue; + case SS3: + if (! st.design[3]) { + wtf_push_unknown(os, p, 1); + break; + } + st.ss = 3; + continue; + } + break; + case WC_ISO_MBYTE1: + switch (WC_ISO_MAP[*p]) { + case GL2: + if (!(WC_CCS_TYPE(gl_ccs) & WC_CCS_A_CS96)) { + Strcat_char(os, (char)*q); + Strcat_char(os, (char)*p); + break; + } + case GL: + wtf_push_iso2022(os, gl_ccs, ((wc_uint32)*q << 8) | *p); + break; + default: + wtf_push_unknown(os, q, 2); + break; + } + break; + case WC_EUC_MBYTE1: + switch (WC_ISO_MAP[*p]) { + case GR2: + if (!(WC_CCS_TYPE(gr_ccs) & WC_CCS_A_CS96)) { + wtf_push_unknown(os, q, 2); + break; + } + case GR: + if (gr_ccs == WC_CCS_CNS_11643_X) { + state = WC_EUC_TW_MBYTE2; + continue; + } + wtf_push_iso2022(os, gr_ccs, ((wc_uint32)*q << 8) | *p); + break; + default: + wtf_push_unknown(os, q, 2); + break; + } + break; + case WC_EUC_TW_MBYTE2: + if (WC_ISO_MAP[*p] == GR) { + if (0xa1 <= *q && *q <= 0xa7) { + wtf_push_iso2022(os, WC_CCS_CNS_11643_1 + (*q - 0xa1), + ((wc_uint32)*(q+1) << 8) | *p); + break; + } + if (0xa8 <= *q && *q <= 0xb0) { + wtf_push_iso2022(os, WC_CCS_CNS_11643_8 + (*q - 0xa8), + ((wc_uint32)*(q+1) << 8) | *p); + break; + } + } + wtf_push_unknown(os, q, 3); + break; + case WC_ISO_CSWSR: + if (*p == WC_C_ESC && *(p+1) == WC_C_CSWSR) { + if (*(p+2) == WC_F_ISO_BASE) { + state = st.state = WC_ISO_NOSTATE; + p += 2; + continue; + } else if (*(p+2) > WC_F_ISO_BASE && *(p+2) <= 0x7e) { + p += 2; + continue; + } + } + wtf_push_unknown(os, p, 1); + continue; + case WC_ISO_CSWOSR: + wtf_push_unknown(os, p, ep - p); + return os; + break; + } + st.ss = 0; + state = WC_ISO_NOSTATE; + } + switch (state) { + case WC_ISO_MBYTE1: + case WC_EUC_MBYTE1: + wtf_push_unknown(os, p-1, 1); + break; + case WC_EUC_TW_MBYTE1: + wtf_push_unknown(os, p-2, 2); + break; + } + return os; +} + +int +wc_parse_iso2022_esc(wc_uchar **ptr, wc_status *st) +{ + wc_uchar *p = *ptr, state, f = 0, g = 0, cs = 0; + + if (*p != WC_C_ESC) + return 0; + state = *p; + for (p++; *p && state; p++) { + switch (state) { + case WC_C_ESC: /* ESC */ + switch (*p) { + case WC_C_MBCS: /* ESC '$' */ + state = *p; + continue; + case WC_C_G0_CS94: /* ESC '(' */ + case WC_C_G1_CS94: /* ESC ')' */ + case WC_C_G2_CS94: /* ESC '*' */ + case WC_C_G3_CS94: /* ESC '+' */ + state = cs = WC_C_G0_CS94; + g = *p & 0x03; + continue; + case WC_C_G0_CS96: /* ESC ',' */ /* ISO 2022 does not permit */ + case WC_C_G1_CS96: /* ESC '-' */ + case WC_C_G2_CS96: /* ESC '.' */ + case WC_C_G3_CS96: /* ESC '/' */ + state = cs = WC_C_G0_CS96; + g = *p & 0x03; + continue; + case WC_C_C0: /* ESC '!' */ /* not suported */ + case WC_C_C1: /* ESC '"' */ /* not suported */ + case WC_C_REP: /* ESC '&' */ /* not suported */ + state = cs = WC_C_C0; + continue; + case WC_C_CSWSR: /* ESC '%' */ /* not suported */ + state = cs = WC_C_CSWSR; + continue; + case WC_C_SS2: /* ESC 'N' */ + st->ss = 2; *ptr = p; return 1; + case WC_C_SS3: /* ESC 'O' */ + st->ss = 3; *ptr = p; return 1; + case WC_C_LS2: /* ESC 'n' */ + st->gl = 2; *ptr = p; return 1; + case WC_C_LS3: /* ESC 'o' */ + st->gl = 3; *ptr = p; return 1; + case WC_C_LS1R: /* ESC '~' */ + st->gr = 1; *ptr = p; return 1; + case WC_C_LS2R: /* ESC '}' */ + st->gr = 2; *ptr = p; return 1; + case WC_C_LS3R: /* ESC '|' */ + st->gr = 3; *ptr = p; return 1; + default: + return 0; + } + break; + case WC_C_MBCS: /* ESC '$' */ + switch (*p) { + case WC_F_JIS_C_6226: /* ESC '$' @ */ + case WC_F_JIS_X_0208: /* ESC '$' B */ + case WC_F_GB_2312: /* ESC '$' A */ + state = 0; + cs = WC_C_G0_CS94 | 0x80; + g = 0; + f = *p; + break; + case WC_C_G0_CS94: /* ESC '$' '(' */ + case WC_C_G1_CS94: /* ESC '$' ')' */ + case WC_C_G2_CS94: /* ESC '$' '*' */ + case WC_C_G3_CS94: /* ESC '$' '+' */ + state = cs = WC_C_G0_CS94 | 0x80; + g = *p & 0x03; + continue; + case WC_C_G0_CS96: /* ESC '$' ',' */ /* ISO 2022 does not permit */ + case WC_C_G1_CS96: /* ESC '$' '-' */ + case WC_C_G2_CS96: /* ESC '$' '.' */ + case WC_C_G3_CS96: /* ESC '$' '/' */ + state = cs = WC_C_G0_CS96 | 0x80; + g = *p & 0x03; + continue; + default: + return 0; + } + break; + case WC_C_G0_CS94: /* ESC [()*+] F */ + if (*p == WC_C_CS942) { /* ESC [()*+] '!' */ + state = cs = WC_C_CS942 | 0x80; + g = *p & 0x03; + continue; + } + case WC_C_G0_CS96: /* ESC [,-./] F */ + case WC_C_G0_CS94 | 0x80: /* ESC '$' [()*+] F */ + case WC_C_G0_CS96 | 0x80: /* ESC '$' [,-./] F */ + case WC_C_CS942 | 0x80: /* ESC [()*+] '!' F */ + case WC_C_C0: /* ESC [!"&] F */ + case WC_C_CSWSR | 0x80: /* ESC '%' '/' F */ + state = 0; + f = *p; + break; + case WC_C_CSWSR: /* ESC '%' F */ + if (*p == WC_C_CSWOSR) { /* ESC '%' '/' */ + state = cs = WC_C_CSWSR | 0x80; + continue; + } + state = 0; + f = *p; + break; + default: + return 0; + } + } + if (f < WC_F_ISO_BASE || f > 0x7e) + return 0; + switch (cs) { + case WC_C_G0_CS94: + st->design[g] = WC_CCS_SET_CS94(f); + break; + case WC_C_G0_CS94 | 0x80: + st->design[g] = WC_CCS_SET_CS94W(f); + break; + case WC_C_G0_CS96: + st->design[g] = WC_CCS_SET_CS96(f); + break; + case WC_C_G0_CS96 | 0x80: + st->design[g] = WC_CCS_SET_CS96W(f); + break; + case WC_C_CS942 | 0x80: + st->design[g] = WC_CCS_SET_CS942(f); + break; + case WC_C_CSWSR: + if (f == WC_F_ISO_BASE) + st->state = WC_ISO_NOSTATE; + else + st->state = WC_ISO_CSWSR; + break; + case WC_C_CSWOSR: + st->state = WC_ISO_CSWOSR; + break; + } + *ptr = p - 1; + return 1; +} + +void +wc_push_to_iso2022(Str os, wc_wchar_t cc, wc_status *st) +{ + wc_uchar g = 0; + wc_bool is_wide = WC_FALSE, retry = WC_FALSE; + wc_wchar_t cc2; + + while (1) { + switch (WC_CCS_TYPE(cc.ccs)) { + case WC_CCS_A_CS94: + if (cc.ccs == WC_CCS_US_ASCII) + cc.ccs = st->g0_ccs; + g = cs94_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE]; + break; + case WC_CCS_A_CS94W: + is_wide = 1; + switch (cc.ccs) { +#ifdef USE_UNICODE + case WC_CCS_JIS_X_0212: + if (!WcOption.use_jisx0212 && WcOption.use_jisx0213 && + WcOption.ucs_conv) { + cc2 = wc_jisx0212_to_jisx0213(cc); + if (cc2.ccs == WC_CCS_JIS_X_0213_1 || + cc2.ccs == WC_CCS_JIS_X_0213_2) { + cc = cc2; + continue; + } + } + break; + case WC_CCS_JIS_X_0213_1: + case WC_CCS_JIS_X_0213_2: + if (!WcOption.use_jisx0213 && WcOption.use_jisx0212 && + WcOption.ucs_conv) { + cc2 = wc_jisx0213_to_jisx0212(cc); + if (cc2.ccs == WC_CCS_JIS_X_0212) { + cc = cc2; + continue; + } + } + break; +#endif + } + g = cs94w_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE]; + break; + case WC_CCS_A_CS96: + g = cs96_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE]; + break; + case WC_CCS_A_CS96W: + is_wide = 1; + g = cs96w_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE]; + break; + case WC_CCS_A_CS942: + g = cs942_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE]; + break; + case WC_CCS_A_UNKNOWN_W: + if (WcOption.no_replace) + return; + is_wide = 1; + cc.ccs = WC_CCS_US_ASCII; + g = cs94_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE]; + cc.code = ((wc_uint32)WC_REPLACE_W[0] << 8) | WC_REPLACE_W[1]; + break; + case WC_CCS_A_UNKNOWN: + if (WcOption.no_replace) + return; + cc.ccs = WC_CCS_US_ASCII; + g = cs94_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE]; + cc.code = (wc_uint32)WC_REPLACE[0]; + break; + default: + if ((cc.ccs == WC_CCS_JOHAB || WC_CCS_JOHAB_1 || + cc.ccs == WC_CCS_JOHAB_2 || cc.ccs == WC_CCS_JOHAB_3) && + cs94w_gmap[WC_F_KS_X_1001 - WC_F_ISO_BASE]) { + wc_wchar_t cc2 = wc_johab_to_ksx1001(cc); + if (cc2.ccs == WC_CCS_KS_X_1001) { + cc = cc2; + continue; + } + } +#ifdef USE_UNICODE + if (WcOption.ucs_conv) + cc = wc_any_to_iso2022(cc, st); + else +#endif + cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; + continue; + } + if (! g) { +#ifdef USE_UNICODE + if (WcOption.ucs_conv && ! retry) + cc = wc_any_to_any_ces(cc, st); + else +#endif + cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; + retry = WC_TRUE; + continue; + } + + wc_push_iso2022_esc(os, cc.ccs, g, 1, st); + if (is_wide) + Strcat_char(os, (char)((cc.code >> 8) & 0x7f)); + Strcat_char(os, (char)(cc.code & 0x7f)); + return; + } +} + +void +wc_push_to_iso2022_end(Str os, wc_status *st) +{ + if (st->design[1] != 0 && st->design[1] != st->g1_ccs) + wc_push_iso2022_esc(os, st->g1_ccs, WC_C_G1_CS94, 0, st); + wc_push_iso2022_esc(os, st->g0_ccs, WC_C_G0_CS94, 1, st); +} + +void +wc_push_iso2022_esc(Str os, wc_ccs ccs, wc_uchar g, wc_uint8 invoke, wc_status *st) +{ + wc_uint8 g_invoke = g & 0x03; + + if (st->design[g_invoke] != ccs) { + Strcat_char(os, WC_C_ESC); + if (WC_CCS_IS_WIDE(ccs)) { + Strcat_char(os, WC_C_MBCS); + if (g_invoke != 0 || + (ccs != WC_CCS_JIS_C_6226 && + ccs != WC_CCS_JIS_X_0208 && + ccs != WC_CCS_GB_2312)) + Strcat_char(os, (char)g); + } else { + Strcat_char(os, (char)g); + if ((ccs & WC_CCS_A_ISO_2022) == WC_CCS_A_CS942) + Strcat_char(os, WC_C_CS942); + } + Strcat_char(os, (char)WC_CCS_GET_F(ccs)); + st->design[g_invoke] = ccs; + } + if (! invoke) + return; + + switch (g_invoke) { + case 0: + if (st->gl != 0) { + Strcat_char(os, WC_C_SI); + st->gl = 0; + } + break; + case 1: + if (st->gl != 1) { + Strcat_char(os, WC_C_SO); + st->gl = 1; + } + break; + case 2: + Strcat_char(os, WC_C_ESC); + Strcat_char(os, WC_C_SS2); + break; + case 3: + Strcat_char(os, WC_C_ESC); + Strcat_char(os, WC_C_SS3); + break; + } +} + +void +wc_push_to_euc(Str os, wc_wchar_t cc, wc_status *st) +{ + wc_ccs g1_ccs = st->ces_info->gset[1].ccs; + + while (1) { + if (cc.ccs == g1_ccs) { + Strcat_char(os, (char)((cc.code >> 8) | 0x80)); + Strcat_char(os, (char)((cc.code & 0xff) | 0x80)); + return; + } + switch (cc.ccs) { + case WC_CCS_US_ASCII: + Strcat_char(os, (char)cc.code); + return; + case WC_CCS_C1: + Strcat_char(os, (char)(cc.code | 0x80)); + return; + case WC_CCS_UNKNOWN_W: + if (!WcOption.no_replace) + Strcat_charp(os, WC_REPLACE_W); + return; + case WC_CCS_UNKNOWN: + if (!WcOption.no_replace) + Strcat_charp(os, WC_REPLACE); + return; + case WC_CCS_JOHAB: + case WC_CCS_JOHAB_1: + case WC_CCS_JOHAB_2: + case WC_CCS_JOHAB_3: + if (st->ces_info->id == WC_CES_EUC_KR) { + cc = wc_johab_to_ksx1001(cc); + continue; + } + default: +#ifdef USE_UNICODE + if (WcOption.ucs_conv) + cc = wc_any_to_any_ces(cc, st); + else +#endif + cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; + continue; + } + } +} + +void +wc_push_to_eucjp(Str os, wc_wchar_t cc, wc_status *st) +{ + while (1) { + switch (cc.ccs) { + case WC_CCS_US_ASCII: + Strcat_char(os, (char)cc.code); + return; + case WC_CCS_JIS_X_0201K: + if (WcOption.use_jisx0201k) { + Strcat_char(os, WC_C_SS2R); + Strcat_char(os, (char)(cc.code | 0x80)); + return; + } else if (WcOption.fix_width_conv) + cc.ccs = WC_CCS_UNKNOWN; + else + cc = wc_jisx0201k_to_jisx0208(cc); + continue; + case WC_CCS_JIS_X_0208: + break; + case WC_CCS_JIS_X_0213_1: + if (WcOption.use_jisx0213) + break; +#ifdef USE_UNICODE + else if (WcOption.ucs_conv && WcOption.use_jisx0212) + cc = wc_jisx0213_to_jisx0212(cc); +#endif + else + cc.ccs = WC_CCS_UNKNOWN_W; + continue; + case WC_CCS_JIS_X_0212: + if (WcOption.use_jisx0212) { + Strcat_char(os, WC_C_SS3R); + break; + } +#ifdef USE_UNICODE + else if (WcOption.ucs_conv && WcOption.use_jisx0213) + cc = wc_jisx0212_to_jisx0213(cc); +#endif + else + cc.ccs = WC_CCS_UNKNOWN_W; + continue; + case WC_CCS_JIS_X_0213_2: + if (WcOption.use_jisx0213) { + Strcat_char(os, WC_C_SS3R); + break; + } +#ifdef USE_UNICODE + else if (WcOption.ucs_conv && WcOption.use_jisx0212) + cc = wc_jisx0213_to_jisx0212(cc); +#endif + else + cc.ccs = WC_CCS_UNKNOWN_W; + continue; + case WC_CCS_C1: + Strcat_char(os, (char)(cc.code | 0x80)); + return; + case WC_CCS_UNKNOWN_W: + if (!WcOption.no_replace) + Strcat_charp(os, WC_REPLACE_W); + return; + case WC_CCS_UNKNOWN: + if (!WcOption.no_replace) + Strcat_charp(os, WC_REPLACE); + return; + default: +#ifdef USE_UNICODE + if (WcOption.ucs_conv) + cc = wc_any_to_any_ces(cc, st); + else +#endif + cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; + continue; + } + Strcat_char(os, (char)((cc.code >> 8) | 0x80)); + Strcat_char(os, (char)((cc.code & 0xff) | 0x80)); + return; + } +} + +void +wc_push_to_euctw(Str os, wc_wchar_t cc, wc_status *st) +{ + while (1) { + switch (cc.ccs) { + case WC_CCS_US_ASCII: + Strcat_char(os, (char)cc.code); + return; + case WC_CCS_CNS_11643_1: + break; + case WC_CCS_CNS_11643_2: + case WC_CCS_CNS_11643_3: + case WC_CCS_CNS_11643_4: + case WC_CCS_CNS_11643_5: + case WC_CCS_CNS_11643_6: + case WC_CCS_CNS_11643_7: + Strcat_char(os, WC_C_SS2R); + Strcat_char(os, (char)(0xA1 + (cc.ccs - WC_CCS_CNS_11643_1))); + break; + case WC_CCS_CNS_11643_8: + case WC_CCS_CNS_11643_9: + case WC_CCS_CNS_11643_10: + case WC_CCS_CNS_11643_11: + case WC_CCS_CNS_11643_12: + case WC_CCS_CNS_11643_13: + case WC_CCS_CNS_11643_14: + case WC_CCS_CNS_11643_15: + case WC_CCS_CNS_11643_16: + Strcat_char(os, WC_C_SS2R); + Strcat_char(os, (char)(0xA8 + (cc.ccs - WC_CCS_CNS_11643_8))); + break; + case WC_CCS_C1: + Strcat_char(os, (char)(cc.code | 0x80)); + return; + case WC_CCS_UNKNOWN_W: + if (!WcOption.no_replace) + Strcat_charp(os, WC_REPLACE_W); + return; + case WC_CCS_UNKNOWN: + if (!WcOption.no_replace) + Strcat_charp(os, WC_REPLACE); + return; + default: +#ifdef USE_UNICODE + if (WcOption.ucs_conv) + cc = wc_any_to_any_ces(cc, st); + else +#endif + cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; + continue; + } + Strcat_char(os, (char)((cc.code >> 8) | 0x80)); + Strcat_char(os, (char)((cc.code & 0xff) | 0x80)); + return; + } +} + +void +wc_push_to_iso8859(Str os, wc_wchar_t cc, wc_status *st) +{ + wc_ccs g1_ccs = st->ces_info->gset[1].ccs; + + while (1) { + if (cc.ccs == g1_ccs) { + Strcat_char(os, (char)(cc.code | 0x80)); + return; + } + switch (cc.ccs) { + case WC_CCS_US_ASCII: + Strcat_char(os, (char)cc.code); + return; + case WC_CCS_C1: + Strcat_char(os, (char)(cc.code | 0x80)); + return; + case WC_CCS_UNKNOWN_W: + if (!WcOption.no_replace) + Strcat_charp(os, WC_REPLACE_W); + return; + case WC_CCS_UNKNOWN: + if (!WcOption.no_replace) + Strcat_charp(os, WC_REPLACE); + return; + default: +#ifdef USE_UNICODE + if (WcOption.ucs_conv) + cc = wc_any_to_any_ces(cc, st); + else +#endif + cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; + continue; + } + } +} + +void +wc_create_gmap(wc_status *st) +{ + wc_gset *gset = st->ces_info->gset; + wc_uchar *gset_ext = st->ces_info->gset_ext; + int i, f; + + if (WcOption.strict_iso2022) { + for (i = 0; i < WC_F_ISO_BASE; i++) { + cs94_gmap[i] = 0; + cs96_gmap[i] = 0; + cs94w_gmap[i] = 0; + cs96w_gmap[i] = 0; + cs942_gmap[i] = 0; + } + } else { + for (i = 0; i < WC_F_ISO_BASE; i++) { + cs94_gmap[i] = gset_ext[0]; + cs96_gmap[i] = gset_ext[1]; + cs94w_gmap[i] = gset_ext[2]; + cs96w_gmap[i] = gset_ext[3]; + cs942_gmap[i] = gset_ext[0]; + } + } + for (i = 0; gset[i].ccs; i++) { + f = WC_CCS_GET_F(gset[i].ccs) - WC_F_ISO_BASE; + switch (WC_CCS_TYPE(gset[i].ccs)) { + case WC_CCS_A_CS94: + switch (gset[i].ccs) { + case WC_CCS_JIS_X_0201K: + if (!WcOption.use_jisx0201k) + continue; + break; + } + cs94_gmap[f] = gset[i].g; + break; + case WC_CCS_A_CS94W: + switch (gset[i].ccs) { + case WC_CCS_JIS_X_0212: + if (!WcOption.use_jisx0212) + continue; + break; + case WC_CCS_JIS_X_0213_1: + case WC_CCS_JIS_X_0213_2: + if (!WcOption.use_jisx0213) + continue; + break; + } + cs94w_gmap[f] = gset[i].g; + break; + case WC_CCS_A_CS96: + cs96_gmap[f] = gset[i].g; + break; + case WC_CCS_A_CS96W: + cs96w_gmap[f] = gset[i].g; + break; + case WC_CCS_A_CS942: + cs942_gmap[f] = gset[i].g; + break; + } + } +} + +Str +wc_char_conv_from_iso2022(wc_uchar c, wc_status *st) +{ + static Str os; + static wc_uchar buf[4]; + static size_t nbuf; + wc_uchar *p; + wc_ccs gl_ccs, gr_ccs; + + if (st->state == -1) { + st->state = WC_ISO_NOSTATE; + os = Strnew_size(8); + nbuf = 0; + } + + gl_ccs = st->ss ? st->design[st->ss] : st->design[st->gl]; + gr_ccs = st->ss ? st->design[st->ss] : st->design[st->gr]; + + switch (st->state) { + case WC_ISO_NOSTATE: + switch (WC_ISO_MAP[c]) { + case GL2: + if (!(WC_CCS_TYPE(gl_ccs) & WC_CCS_A_CS96)) { + Strcat_char(os, (char)c); + break; + } + case GL: + if (WC_CCS_IS_WIDE(gl_ccs)) { + buf[nbuf++] = c; + st->state = WC_ISO_MBYTE1; + return NULL; + } else if (gl_ccs == WC_CES_US_ASCII) + Strcat_char(os, (char)c); + else + wtf_push_iso2022(os, gl_ccs, (wc_uint32)c); + break; + case GR2: + if (!(WC_CCS_TYPE(gr_ccs) & WC_CCS_A_CS96)) + break; + case GR: + if (WC_CCS_IS_WIDE(gr_ccs)) { + buf[nbuf++] = c; + st->state = WC_EUC_MBYTE1; + return NULL; + } else if (gr_ccs) + wtf_push_iso2022(os, gr_ccs, (wc_uint32)c); + break; + case C0: + Strcat_char(os, (char)c); + break; + case C1: + break; + case ESC: + buf[nbuf++] = c; + st->state = WC_C_ESC; + return NULL; + case SI: + st->gl = 0; + break; + case SO: + st->gl = 1; + break; + case SS2: + if (! st->design[2]) + return os; + st->ss = 2; + return NULL; + case SS3: + if (! st->design[3]) + return os; + st->ss = 3; + return NULL; + } + break; + case WC_ISO_MBYTE1: + switch (WC_ISO_MAP[c]) { + case GL2: + if (!(WC_CCS_TYPE(gl_ccs) & WC_CCS_A_CS96)) + break; + case GL: + buf[nbuf++] = c; + wtf_push_iso2022(os, gl_ccs, ((wc_uint32)buf[0] << 8) | buf[1]); + break; + } + st->state = WC_ISO_NOSTATE; + break; + case WC_EUC_MBYTE1: + switch (WC_ISO_MAP[c]) { + case GR2: + if (!(WC_CCS_TYPE(gr_ccs) & WC_CCS_A_CS96)) + break; + case GR: + if (gr_ccs == WC_CCS_CNS_11643_X) { + buf[nbuf++] = c; + st->state = WC_EUC_TW_MBYTE2; + return NULL; + } + buf[nbuf++] = c; + wtf_push_iso2022(os, gr_ccs, ((wc_uint32)buf[0] << 8) | buf[1]); + break; + } + st->state = WC_ISO_NOSTATE; + break; + case WC_EUC_TW_MBYTE2: + if (WC_ISO_MAP[c] == GR) { + buf[nbuf++] = c; + c = buf[0]; + if (0xa1 <= c && c <= 0xa7) { + wtf_push_iso2022(os, WC_CCS_CNS_11643_1 + (c - 0xa1), + ((wc_uint32)buf[1] << 8) | buf[2]); + break; + } + if (0xa8 <= c && c <= 0xb0) { + wtf_push_iso2022(os, WC_CCS_CNS_11643_8 + (c - 0xa8), + ((wc_uint32)buf[1] << 8) | buf[2]); + break; + } + } + st->state = WC_ISO_NOSTATE; + break; + case WC_C_ESC: + switch (c) { + case WC_C_G0_CS94: + case WC_C_G1_CS94: + case WC_C_G2_CS94: + case WC_C_G3_CS94: + buf[nbuf++] = c; + st->state = WC_C_G0_CS94; + return NULL; + case WC_C_G0_CS96: + case WC_C_G1_CS96: + case WC_C_G2_CS96: + case WC_C_G3_CS96: + case WC_C_C0: + case WC_C_C1: + case WC_C_REP: + buf[nbuf++] = c; + st->state = WC_C_G0_CS96; + return NULL; + case WC_C_MBCS: + case WC_C_CSWSR: + buf[nbuf++] = c; + st->state = c; + return NULL; + case WC_C_SS2: + st->ss = 2; + st->state = WC_ISO_NOSTATE; + return NULL; + case WC_C_SS3: + st->ss = 3; + st->state = WC_ISO_NOSTATE; + return NULL; + case WC_C_LS2: + st->gl = 2; + break; + case WC_C_LS3: + st->gl = 3; + break; + case WC_C_LS2R: + st->gr = 2; + break; + case WC_C_LS3R: + st->gr = 3; + break; + default: + break; + } + break; + case WC_C_MBCS: + switch (c) { + case WC_F_JIS_C_6226: + case WC_F_JIS_X_0208: + case WC_F_GB_2312: + buf[nbuf++] = c; + p = buf; + wc_parse_iso2022_esc(&p, st); + break; + case WC_C_G0_CS94: + case WC_C_G1_CS94: + case WC_C_G2_CS94: + case WC_C_G3_CS94: + case WC_C_G0_CS96: + case WC_C_G1_CS96: + case WC_C_G2_CS96: + case WC_C_G3_CS96: + buf[nbuf++] = c; + st->state = WC_C_G0_CS96; + return NULL; + } + break; + case WC_C_CSWSR: + switch (c) { + case WC_C_CSWOSR: + buf[nbuf++] = c; + st->state = WC_C_G1_CS94; + return NULL; + } + buf[nbuf++] = c; + p = buf; + wc_parse_iso2022_esc(&p, st); + break; + case WC_C_G0_CS94: + switch (c) { + case WC_C_CS942: + buf[nbuf++] = c; + st->state = WC_C_G0_CS96; + return NULL; + } + case WC_C_G0_CS96: + buf[nbuf++] = c; + p = buf; + wc_parse_iso2022_esc(&p, st); + break; + } + st->ss = 0; + st->state = -1; + return os; +} |