#include "wc.h" #include "iso2022.h" #include "jis.h" #include "big5.h" #include "johab.h" #include "wtf.h" #ifdef USE_UNICODE #include "ucs.h" #endif #define C0 WC_ISO_MAP_C0 #define C1 WC_ISO_MAP_C1 #define GL WC_ISO_MAP_GL #define GR WC_ISO_MAP_GR #define GL2 WC_ISO_MAP_GL96 #define GR2 WC_ISO_MAP_GR96 #define SO WC_ISO_MAP_SO #define SI WC_ISO_MAP_SI #define ESC WC_ISO_MAP_ESC #define SS2 WC_ISO_MAP_SS2 #define SS3 WC_ISO_MAP_SS3 wc_uint8 WC_ISO_MAP[ 0x100 ] = { C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, SO, SI, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, ESC,C0, C0, C0, C0, GL2,GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL2, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, SS2,SS3, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, GR2,GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR, GR2, }; static wc_uchar cs94_gmap[ 0x80 - WC_F_ISO_BASE ]; static wc_uchar cs94w_gmap[ 0x80 - WC_F_ISO_BASE ]; static wc_uchar cs96_gmap[ 0x80 - WC_F_ISO_BASE ]; static wc_uchar cs96w_gmap[ 0x80 - WC_F_ISO_BASE ]; static wc_uchar cs942_gmap[ 0x80 - WC_F_ISO_BASE ]; static void wtf_push_iso2022(Str os, wc_ccs ccs, wc_uint32 code) { switch (ccs) { case WC_CCS_JIS_C_6226: case WC_CCS_JIS_X_0208: case WC_CCS_JIS_X_0213_1: ccs = wc_jisx0208_or_jisx02131(code); break; case WC_CCS_JIS_X_0212: case WC_CCS_JIS_X_0213_2: ccs = wc_jisx0212_or_jisx02132(code); break; case WC_CCS_JIS_X_0201: case WC_CCS_GB_1988: ccs = WC_CCS_US_ASCII; break; } wtf_push(os, ccs, code); } Str wc_conv_from_iso2022(Str is, wc_ces ces) { Str os; wc_uchar *sp = (wc_uchar *)is->ptr; wc_uchar *ep = sp + is->length; wc_uchar *p, *q = NULL; int state = WC_ISO_NOSTATE; wc_status st; wc_ccs gl_ccs, gr_ccs; for (p = sp; p < ep && !(WC_ISO_MAP[*p] & WC_ISO_MAP_DETECT); p++) ; if (p == ep) return is; os = Strnew_size(is->length); if (p > sp) Strcat_charp_n(os, is->ptr, (int)(p - sp)); wc_input_init(ces, &st); gl_ccs = st.design[st.gl]; gr_ccs = st.design[st.gr]; for (; p < ep; p++) { switch (state) { case WC_ISO_NOSTATE: switch (WC_ISO_MAP[*p]) { case GL2: gl_ccs = st.ss ? st.design[st.ss] : st.design[st.gl]; if (!(WC_CCS_TYPE(gl_ccs) & WC_CCS_A_CS96)) { Strcat_char(os, (char)*p); break; } case GL: gl_ccs = st.ss ? st.design[st.ss] : st.design[st.gl]; if (WC_CCS_IS_WIDE(gl_ccs)) { q = p; state = WC_ISO_MBYTE1; continue; } else if (gl_ccs == WC_CES_US_ASCII) Strcat_char(os, (char)*p); else wtf_push_iso2022(os, gl_ccs, (wc_uint32)*p); break; case GR2: gr_ccs = st.ss ? st.design[st.ss] : st.design[st.gr]; if (!(WC_CCS_TYPE(gr_ccs) & WC_CCS_A_CS96)) { wtf_push_unknown(os, p, 1); break; } case GR: gr_ccs = st.ss ? st.design[st.ss] : st.design[st.gr]; if (WC_CCS_IS_WIDE(gr_ccs)) { q = p; state = WC_EUC_MBYTE1; continue; } else if (gr_ccs) wtf_push_iso2022(os, gr_ccs, (wc_uint32)*p); else wtf_push_unknown(os, p, 1); break; case C0: Strcat_char(os, (char)*p); break; case C1: wtf_push(os, WC_CCS_C1, (wc_uint32)*p); break; case ESC: st.ss = 0; if (wc_parse_iso2022_esc(&p, &st)) state = st.state; else Strcat_char(os, (char)*p); continue; case SI: st.gl = 0; break; case SO: st.gl = 1; break; case SS2: if (! st.design[2]) { wtf_push_unknown(os, p, 1); break; } st.ss = 2; continue; case SS3: if (! st.design[3]) { wtf_push_unknown(os, p, 1); break; } st.ss = 3; continue; } break; case WC_ISO_MBYTE1: switch (WC_ISO_MAP[*p]) { case GL2: if (!(WC_CCS_TYPE(gl_ccs) & WC_CCS_A_CS96)) { Strcat_char(os, (char)*q); Strcat_char(os, (char)*p); break; } case GL: wtf_push_iso2022(os, gl_ccs, ((wc_uint32)*q << 8) | *p); break; default: wtf_push_unknown(os, q, 2); break; } break; case WC_EUC_MBYTE1: switch (WC_ISO_MAP[*p]) { case GR2: if (!(WC_CCS_TYPE(gr_ccs) & WC_CCS_A_CS96)) { wtf_push_unknown(os, q, 2); break; } case GR: if (gr_ccs == WC_CCS_CNS_11643_X) { state = WC_EUC_TW_MBYTE2; continue; } wtf_push_iso2022(os, gr_ccs, ((wc_uint32)*q << 8) | *p); break; default: wtf_push_unknown(os, q, 2); break; } break; case WC_EUC_TW_MBYTE2: if (WC_ISO_MAP[*p] == GR) { if (0xa1 <= *q && *q <= 0xa7) { wtf_push_iso2022(os, WC_CCS_CNS_11643_1 + (*q - 0xa1), ((wc_uint32)*(q+1) << 8) | *p); break; } if (0xa8 <= *q && *q <= 0xb0) { wtf_push_iso2022(os, WC_CCS_CNS_11643_8 + (*q - 0xa8), ((wc_uint32)*(q+1) << 8) | *p); break; } } wtf_push_unknown(os, q, 3); break; case WC_ISO_CSWSR: if (*p == WC_C_ESC && *(p+1) == WC_C_CSWSR) { if (*(p+2) == WC_F_ISO_BASE) { state = st.state = WC_ISO_NOSTATE; p += 2; continue; } else if (*(p+2) > WC_F_ISO_BASE && *(p+2) <= 0x7e) { p += 2; continue; } } wtf_push_unknown(os, p, 1); continue; case WC_ISO_CSWOSR: wtf_push_unknown(os, p, ep - p); return os; break; } st.ss = 0; state = WC_ISO_NOSTATE; } switch (state) { case WC_ISO_MBYTE1: case WC_EUC_MBYTE1: wtf_push_unknown(os, p-1, 1); break; case WC_EUC_TW_MBYTE1: wtf_push_unknown(os, p-2, 2); break; } return os; } int wc_parse_iso2022_esc(wc_uchar **ptr, wc_status *st) { wc_uchar *p = *ptr, state, f = 0, g = 0, cs = 0; if (*p != WC_C_ESC) return 0; state = *p; for (p++; *p && state; p++) { switch (state) { case WC_C_ESC: /* ESC */ switch (*p) { case WC_C_MBCS: /* ESC '$' */ state = *p; continue; case WC_C_G0_CS94: /* ESC '(' */ case WC_C_G1_CS94: /* ESC ')' */ case WC_C_G2_CS94: /* ESC '*' */ case WC_C_G3_CS94: /* ESC '+' */ state = cs = WC_C_G0_CS94; g = *p & 0x03; continue; case WC_C_G0_CS96: /* ESC ',' */ /* ISO 2022 does not permit */ case WC_C_G1_CS96: /* ESC '-' */ case WC_C_G2_CS96: /* ESC '.' */ case WC_C_G3_CS96: /* ESC '/' */ state = cs = WC_C_G0_CS96; g = *p & 0x03; continue; case WC_C_C0: /* ESC '!' */ /* not supported */ case WC_C_C1: /* ESC '"' */ /* not supported */ case WC_C_REP: /* ESC '&' */ /* not supported */ state = cs = WC_C_C0; continue; case WC_C_CSWSR: /* ESC '%' */ /* not supported */ state = cs = WC_C_CSWSR; continue; case WC_C_SS2: /* ESC 'N' */ st->ss = 2; *ptr = p; return 1; case WC_C_SS3: /* ESC 'O' */ st->ss = 3; *ptr = p; return 1; case WC_C_LS2: /* ESC 'n' */ st->gl = 2; *ptr = p; return 1; case WC_C_LS3: /* ESC 'o' */ st->gl = 3; *ptr = p; return 1; case WC_C_LS1R: /* ESC '~' */ st->gr = 1; *ptr = p; return 1; case WC_C_LS2R: /* ESC '}' */ st->gr = 2; *ptr = p; return 1; case WC_C_LS3R: /* ESC '|' */ st->gr = 3; *ptr = p; return 1; default: return 0; } break; case WC_C_MBCS: /* ESC '$' */ switch (*p) { case WC_F_JIS_C_6226: /* ESC '$' @ */ case WC_F_JIS_X_0208: /* ESC '$' B */ case WC_F_GB_2312: /* ESC '$' A */ state = 0; cs = WC_C_G0_CS94 | 0x80; g = 0; f = *p; break; case WC_C_G0_CS94: /* ESC '$' '(' */ case WC_C_G1_CS94: /* ESC '$' ')' */ case WC_C_G2_CS94: /* ESC '$' '*' */ case WC_C_G3_CS94: /* ESC '$' '+' */ state = cs = WC_C_G0_CS94 | 0x80; g = *p & 0x03; continue; case WC_C_G0_CS96: /* ESC '$' ',' */ /* ISO 2022 does not permit */ case WC_C_G1_CS96: /* ESC '$' '-' */ case WC_C_G2_CS96: /* ESC '$' '.' */ case WC_C_G3_CS96: /* ESC '$' '/' */ state = cs = WC_C_G0_CS96 | 0x80; g = *p & 0x03; continue; default: return 0; } break; case WC_C_G0_CS94: /* ESC [()*+] F */ if (*p == WC_C_CS942) { /* ESC [()*+] '!' */ state = cs = WC_C_CS942 | 0x80; g = *p & 0x03; continue; } case WC_C_G0_CS96: /* ESC [,-./] F */ case WC_C_G0_CS94 | 0x80: /* ESC '$' [()*+] F */ case WC_C_G0_CS96 | 0x80: /* ESC '$' [,-./] F */ case WC_C_CS942 | 0x80: /* ESC [()*+] '!' F */ case WC_C_C0: /* ESC [!"&] F */ case WC_C_CSWSR | 0x80: /* ESC '%' '/' F */ state = 0; f = *p; break; case WC_C_CSWSR: /* ESC '%' F */ if (*p == WC_C_CSWOSR) { /* ESC '%' '/' */ state = cs = WC_C_CSWSR | 0x80; continue; } state = 0; f = *p; break; default: return 0; } } if (f < WC_F_ISO_BASE || f > 0x7e) return 0; switch (cs) { case WC_C_G0_CS94: st->design[g] = WC_CCS_SET_CS94(f); break; case WC_C_G0_CS94 | 0x80: st->design[g] = WC_CCS_SET_CS94W(f); break; case WC_C_G0_CS96: st->design[g] = WC_CCS_SET_CS96(f); break; case WC_C_G0_CS96 | 0x80: st->design[g] = WC_CCS_SET_CS96W(f); break; case WC_C_CS942 | 0x80: st->design[g] = WC_CCS_SET_CS942(f); break; case WC_C_CSWSR: if (f == WC_F_ISO_BASE) st->state = WC_ISO_NOSTATE; else st->state = WC_ISO_CSWSR; break; case WC_C_CSWOSR: st->state = WC_ISO_CSWOSR; break; } *ptr = p - 1; return 1; } void wc_push_to_iso2022(Str os, wc_wchar_t cc, wc_status *st) { wc_uchar g = 0; wc_bool is_wide = WC_FALSE, retry = WC_FALSE; wc_wchar_t cc2; while (1) { switch (WC_CCS_TYPE(cc.ccs)) { case WC_CCS_A_CS94: if (cc.ccs == WC_CCS_US_ASCII) cc.ccs = st->g0_ccs; if (WC_CCS_INDEX(cc.ccs) >= WC_F_ISO_BASE) g = cs94_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE]; break; case WC_CCS_A_CS94W: is_wide = 1; switch (cc.ccs) { #ifdef USE_UNICODE case WC_CCS_JIS_X_0212: if (!WcOption.use_jisx0212 && WcOption.use_jisx0213 && WcOption.ucs_conv) { cc2 = wc_jisx0212_to_jisx0213(cc); if (cc2.ccs == WC_CCS_JIS_X_0213_1 || cc2.ccs == WC_CCS_JIS_X_0213_2) { cc = cc2; continue; } } break; case WC_CCS_JIS_X_0213_1: case WC_CCS_JIS_X_0213_2: if (!WcOption.use_jisx0213 && WcOption.use_jisx0212 && WcOption.ucs_conv) { cc2 = wc_jisx0213_to_jisx0212(cc); if (cc2.ccs == WC_CCS_JIS_X_0212) { cc = cc2; continue; } } break; #endif } if (WC_CCS_INDEX(cc.ccs) >= WC_F_ISO_BASE) g = cs94w_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE]; break; case WC_CCS_A_CS96: if (WC_CCS_INDEX(cc.ccs) >= WC_F_ISO_BASE) g = cs96_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE]; break; case WC_CCS_A_CS96W: is_wide = 1; if (WC_CCS_INDEX(cc.ccs) >= WC_F_ISO_BASE) g = cs96w_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE]; break; case WC_CCS_A_CS942: if (WC_CCS_INDEX(cc.ccs) >= WC_F_ISO_BASE) g = cs942_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE]; break; case WC_CCS_A_UNKNOWN_W: if (WcOption.no_replace) return; is_wide = 1; cc.ccs = WC_CCS_US_ASCII; if (WC_CCS_INDEX(cc.ccs) >= WC_F_ISO_BASE) g = cs94_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE]; cc.code = ((wc_uint32)WC_REPLACE_W[0] << 8) | WC_REPLACE_W[1]; break; case WC_CCS_A_UNKNOWN: if (WcOption.no_replace) return; cc.ccs = WC_CCS_US_ASCII; if (WC_CCS_INDEX(cc.ccs) >= WC_F_ISO_BASE) g = cs94_gmap[WC_CCS_INDEX(cc.ccs) - WC_F_ISO_BASE]; cc.code = (wc_uint32)WC_REPLACE[0]; break; default: if ((cc.ccs == WC_CCS_JOHAB || cc.ccs == WC_CCS_JOHAB_1 || cc.ccs == WC_CCS_JOHAB_2 || cc.ccs == WC_CCS_JOHAB_3) && cs94w_gmap[WC_F_KS_X_1001 - WC_F_ISO_BASE]) { wc_wchar_t cc2 = wc_johab_to_ksx1001(cc); if (cc2.ccs == WC_CCS_KS_X_1001) { cc = cc2; continue; } } #ifdef USE_UNICODE if (WcOption.ucs_conv) cc = wc_any_to_iso2022(cc, st); else #endif cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; continue; } if (! g) { #ifdef USE_UNICODE if (WcOption.ucs_conv && ! retry) cc = wc_any_to_any_ces(cc, st); else #endif cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; retry = WC_TRUE; continue; } wc_push_iso2022_esc(os, cc.ccs, g, 1, st); if (is_wide) Strcat_char(os, (char)((cc.code >> 8) & 0x7f)); Strcat_char(os, (char)(cc.code & 0x7f)); return; } } void wc_push_to_iso2022_end(Str os, wc_status *st) { if (st->design[1] != 0 && st->design[1] != st->g1_ccs) wc_push_iso2022_esc(os, st->g1_ccs, WC_C_G1_CS94, 0, st); wc_push_iso2022_esc(os, st->g0_ccs, WC_C_G0_CS94, 1, st); } void wc_push_iso2022_esc(Str os, wc_ccs ccs, wc_uchar g, wc_uint8 invoke, wc_status *st) { wc_uint8 g_invoke = g & 0x03; if (st->design[g_invoke] != ccs) { Strcat_char(os, WC_C_ESC); if (WC_CCS_IS_WIDE(ccs)) { Strcat_char(os, WC_C_MBCS); if (g_invoke != 0 || (ccs != WC_CCS_JIS_C_6226 && ccs != WC_CCS_JIS_X_0208 && ccs != WC_CCS_GB_2312)) Strcat_char(os, (char)g); } else { Strcat_char(os, (char)g); if ((ccs & WC_CCS_A_ISO_2022) == WC_CCS_A_CS942) Strcat_char(os, WC_C_CS942); } Strcat_char(os, (char)WC_CCS_GET_F(ccs)); st->design[g_invoke] = ccs; } if (! invoke) return; switch (g_invoke) { case 0: if (st->gl != 0) { Strcat_char(os, WC_C_SI); st->gl = 0; } break; case 1: if (st->gl != 1) { Strcat_char(os, WC_C_SO); st->gl = 1; } break; case 2: Strcat_char(os, WC_C_ESC); Strcat_char(os, WC_C_SS2); break; case 3: Strcat_char(os, WC_C_ESC); Strcat_char(os, WC_C_SS3); break; } } void wc_push_to_euc(Str os, wc_wchar_t cc, wc_status *st) { wc_ccs g1_ccs = st->ces_info->gset[1].ccs; while (1) { if (cc.ccs == g1_ccs) { Strcat_char(os, (char)((cc.code >> 8) | 0x80)); Strcat_char(os, (char)((cc.code & 0xff) | 0x80)); return; } switch (cc.ccs) { case WC_CCS_US_ASCII: Strcat_char(os, (char)cc.code); return; case WC_CCS_C1: Strcat_char(os, (char)(cc.code | 0x80)); return; case WC_CCS_UNKNOWN_W: if (!WcOption.no_replace) Strcat_charp(os, WC_REPLACE_W); return; case WC_CCS_UNKNOWN: if (!WcOption.no_replace) Strcat_charp(os, WC_REPLACE); return; case WC_CCS_JOHAB: case WC_CCS_JOHAB_1: case WC_CCS_JOHAB_2: case WC_CCS_JOHAB_3: if (st->ces_info->id == WC_CES_EUC_KR) { cc = wc_johab_to_ksx1001(cc); continue; } default: #ifdef USE_UNICODE if (WcOption.ucs_conv) cc = wc_any_to_any_ces(cc, st); else #endif cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; continue; } } } void wc_push_to_eucjp(Str os, wc_wchar_t cc, wc_status *st) { while (1) { switch (cc.ccs) { case WC_CCS_US_ASCII: Strcat_char(os, (char)cc.code); return; case WC_CCS_JIS_X_0201K: if (WcOption.use_jisx0201k) { Strcat_char(os, WC_C_SS2R); Strcat_char(os, (char)(cc.code | 0x80)); return; } else if (WcOption.fix_width_conv) cc.ccs = WC_CCS_UNKNOWN; else cc = wc_jisx0201k_to_jisx0208(cc); continue; case WC_CCS_JIS_X_0208: break; case WC_CCS_JIS_X_0213_1: if (WcOption.use_jisx0213) break; #ifdef USE_UNICODE else if (WcOption.ucs_conv && WcOption.use_jisx0212) cc = wc_jisx0213_to_jisx0212(cc); #endif else cc.ccs = WC_CCS_UNKNOWN_W; continue; case WC_CCS_JIS_X_0212: if (WcOption.use_jisx0212) { Strcat_char(os, WC_C_SS3R); break; } #ifdef USE_UNICODE else if (WcOption.ucs_conv && WcOption.use_jisx0213) cc = wc_jisx0212_to_jisx0213(cc); #endif else cc.ccs = WC_CCS_UNKNOWN_W; continue; case WC_CCS_JIS_X_0213_2: if (WcOption.use_jisx0213) { Strcat_char(os, WC_C_SS3R); break; } #ifdef USE_UNICODE else if (WcOption.ucs_conv && WcOption.use_jisx0212) cc = wc_jisx0213_to_jisx0212(cc); #endif else cc.ccs = WC_CCS_UNKNOWN_W; continue; case WC_CCS_C1: Strcat_char(os, (char)(cc.code | 0x80)); return; case WC_CCS_UNKNOWN_W: if (!WcOption.no_replace) Strcat_charp(os, WC_REPLACE_W); return; case WC_CCS_UNKNOWN: if (!WcOption.no_replace) Strcat_charp(os, WC_REPLACE); return; default: #ifdef USE_UNICODE if (WcOption.ucs_conv) cc = wc_any_to_any_ces(cc, st); else #endif cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; continue; } Strcat_char(os, (char)((cc.code >> 8) | 0x80)); Strcat_char(os, (char)((cc.code & 0xff) | 0x80)); return; } } void wc_push_to_euctw(Str os, wc_wchar_t cc, wc_status *st) { while (1) { switch (cc.ccs) { case WC_CCS_US_ASCII: Strcat_char(os, (char)cc.code); return; case WC_CCS_CNS_11643_1: break; case WC_CCS_CNS_11643_2: case WC_CCS_CNS_11643_3: case WC_CCS_CNS_11643_4: case WC_CCS_CNS_11643_5: case WC_CCS_CNS_11643_6: case WC_CCS_CNS_11643_7: Strcat_char(os, WC_C_SS2R); Strcat_char(os, (char)(0xA1 + (cc.ccs - WC_CCS_CNS_11643_1))); break; case WC_CCS_CNS_11643_8: case WC_CCS_CNS_11643_9: case WC_CCS_CNS_11643_10: case WC_CCS_CNS_11643_11: case WC_CCS_CNS_11643_12: case WC_CCS_CNS_11643_13: case WC_CCS_CNS_11643_14: case WC_CCS_CNS_11643_15: case WC_CCS_CNS_11643_16: Strcat_char(os, WC_C_SS2R); Strcat_char(os, (char)(0xA8 + (cc.ccs - WC_CCS_CNS_11643_8))); break; case WC_CCS_C1: Strcat_char(os, (char)(cc.code | 0x80)); return; case WC_CCS_UNKNOWN_W: if (!WcOption.no_replace) Strcat_charp(os, WC_REPLACE_W); return; case WC_CCS_UNKNOWN: if (!WcOption.no_replace) Strcat_charp(os, WC_REPLACE); return; default: #ifdef USE_UNICODE if (WcOption.ucs_conv) cc = wc_any_to_any_ces(cc, st); else #endif cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; continue; } Strcat_char(os, (char)((cc.code >> 8) | 0x80)); Strcat_char(os, (char)((cc.code & 0xff) | 0x80)); return; } } void wc_push_to_iso8859(Str os, wc_wchar_t cc, wc_status *st) { wc_ccs g1_ccs = st->ces_info->gset[1].ccs; while (1) { if (cc.ccs == g1_ccs) { Strcat_char(os, (char)(cc.code | 0x80)); return; } switch (cc.ccs) { case WC_CCS_US_ASCII: Strcat_char(os, (char)cc.code); return; case WC_CCS_C1: Strcat_char(os, (char)(cc.code | 0x80)); return; case WC_CCS_UNKNOWN_W: if (!WcOption.no_replace) Strcat_charp(os, WC_REPLACE_W); return; case WC_CCS_UNKNOWN: if (!WcOption.no_replace) Strcat_charp(os, WC_REPLACE); return; default: #ifdef USE_UNICODE if (WcOption.ucs_conv) cc = wc_any_to_any_ces(cc, st); else #endif cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; continue; } } } void wc_create_gmap(wc_status *st) { wc_gset *gset = st->ces_info->gset; wc_uchar *gset_ext = st->ces_info->gset_ext; int i, f; if (WcOption.strict_iso2022) { for (i = 0; i < WC_F_ISO_BASE; i++) { cs94_gmap[i] = 0; cs96_gmap[i] = 0; cs94w_gmap[i] = 0; cs96w_gmap[i] = 0; cs942_gmap[i] = 0; } } else { for (i = 0; i < WC_F_ISO_BASE; i++) { cs94_gmap[i] = gset_ext[0]; cs96_gmap[i] = gset_ext[1]; cs94w_gmap[i] = gset_ext[2]; cs96w_gmap[i] = gset_ext[3]; cs942_gmap[i] = gset_ext[0]; } } for (i = 0; gset[i].ccs; i++) { f = WC_CCS_GET_F(gset[i].ccs) - WC_F_ISO_BASE; switch (WC_CCS_TYPE(gset[i].ccs)) { case WC_CCS_A_CS94: switch (gset[i].ccs) { case WC_CCS_JIS_X_0201K: if (!WcOption.use_jisx0201k) continue; break; } cs94_gmap[f] = gset[i].g; break; case WC_CCS_A_CS94W: switch (gset[i].ccs) { case WC_CCS_JIS_X_0212: if (!WcOption.use_jisx0212) continue; break; case WC_CCS_JIS_X_0213_1: case WC_CCS_JIS_X_0213_2: if (!WcOption.use_jisx0213) continue; break; } cs94w_gmap[f] = gset[i].g; break; case WC_CCS_A_CS96: cs96_gmap[f] = gset[i].g; break; case WC_CCS_A_CS96W: cs96w_gmap[f] = gset[i].g; break; case WC_CCS_A_CS942: cs942_gmap[f] = gset[i].g; break; } } } Str wc_char_conv_from_iso2022(wc_uchar c, wc_status *st) { static Str os; static wc_uchar buf[4]; static size_t nbuf; wc_uchar *p; wc_ccs gl_ccs, gr_ccs; if (st->state == -1) { st->state = WC_ISO_NOSTATE; os = Strnew_size(8); nbuf = 0; } gl_ccs = st->ss ? st->design[st->ss] : st->design[st->gl]; gr_ccs = st->ss ? st->design[st->ss] : st->design[st->gr]; switch (st->state) { case WC_ISO_NOSTATE: switch (WC_ISO_MAP[c]) { case GL2: if (!(WC_CCS_TYPE(gl_ccs) & WC_CCS_A_CS96)) { Strcat_char(os, (char)c); break; } case GL: if (WC_CCS_IS_WIDE(gl_ccs)) { buf[nbuf++] = c; st->state = WC_ISO_MBYTE1; return NULL; } else if (gl_ccs == WC_CES_US_ASCII) Strcat_char(os, (char)c); else wtf_push_iso2022(os, gl_ccs, (wc_uint32)c); break; case GR2: if (!(WC_CCS_TYPE(gr_ccs) & WC_CCS_A_CS96)) break; case GR: if (WC_CCS_IS_WIDE(gr_ccs)) { buf[nbuf++] = c; st->state = WC_EUC_MBYTE1; return NULL; } else if (gr_ccs) wtf_push_iso2022(os, gr_ccs, (wc_uint32)c); break; case C0: Strcat_char(os, (char)c); break; case C1: break; case ESC: buf[nbuf++] = c; st->state = WC_C_ESC; return NULL; case SI: st->gl = 0; break; case SO: st->gl = 1; break; case SS2: if (! st->design[2]) return os; st->ss = 2; return NULL; case SS3: if (! st->design[3]) return os; st->ss = 3; return NULL; } break; case WC_ISO_MBYTE1: switch (WC_ISO_MAP[c]) { case GL2: if (!(WC_CCS_TYPE(gl_ccs) & WC_CCS_A_CS96)) break; case GL: buf[nbuf++] = c; wtf_push_iso2022(os, gl_ccs, ((wc_uint32)buf[0] << 8) | buf[1]); break; } st->state = WC_ISO_NOSTATE; break; case WC_EUC_MBYTE1: switch (WC_ISO_MAP[c]) { case GR2: if (!(WC_CCS_TYPE(gr_ccs) & WC_CCS_A_CS96)) break; case GR: if (gr_ccs == WC_CCS_CNS_11643_X) { buf[nbuf++] = c; st->state = WC_EUC_TW_MBYTE2; return NULL; } buf[nbuf++] = c; wtf_push_iso2022(os, gr_ccs, ((wc_uint32)buf[0] << 8) | buf[1]); break; } st->state = WC_ISO_NOSTATE; break; case WC_EUC_TW_MBYTE2: if (WC_ISO_MAP[c] == GR) { buf[nbuf++] = c; c = buf[0]; if (0xa1 <= c && c <= 0xa7) { wtf_push_iso2022(os, WC_CCS_CNS_11643_1 + (c - 0xa1), ((wc_uint32)buf[1] << 8) | buf[2]); break; } if (0xa8 <= c && c <= 0xb0) { wtf_push_iso2022(os, WC_CCS_CNS_11643_8 + (c - 0xa8), ((wc_uint32)buf[1] << 8) | buf[2]); break; } } st->state = WC_ISO_NOSTATE; break; case WC_C_ESC: switch (c) { case WC_C_G0_CS94: case WC_C_G1_CS94: case WC_C_G2_CS94: case WC_C_G3_CS94: buf[nbuf++] = c; st->state = WC_C_G0_CS94; return NULL; case WC_C_G0_CS96: case WC_C_G1_CS96: case WC_C_G2_CS96: case WC_C_G3_CS96: case WC_C_C0: case WC_C_C1: case WC_C_REP: buf[nbuf++] = c; st->state = WC_C_G0_CS96; return NULL; case WC_C_MBCS: case WC_C_CSWSR: buf[nbuf++] = c; st->state = c; return NULL; case WC_C_SS2: st->ss = 2; st->state = WC_ISO_NOSTATE; return NULL; case WC_C_SS3: st->ss = 3; st->state = WC_ISO_NOSTATE; return NULL; case WC_C_LS2: st->gl = 2; break; case WC_C_LS3: st->gl = 3; break; case WC_C_LS2R: st->gr = 2; break; case WC_C_LS3R: st->gr = 3; break; default: break; } break; case WC_C_MBCS: switch (c) { case WC_F_JIS_C_6226: case WC_F_JIS_X_0208: case WC_F_GB_2312: buf[nbuf++] = c; p = buf; wc_parse_iso2022_esc(&p, st); break; case WC_C_G0_CS94: case WC_C_G1_CS94: case WC_C_G2_CS94: case WC_C_G3_CS94: case WC_C_G0_CS96: case WC_C_G1_CS96: case WC_C_G2_CS96: case WC_C_G3_CS96: buf[nbuf++] = c; st->state = WC_C_G0_CS96; return NULL; } break; case WC_C_CSWSR: switch (c) { case WC_C_CSWOSR: buf[nbuf++] = c; st->state = WC_C_G1_CS94; return NULL; } buf[nbuf++] = c; p = buf; wc_parse_iso2022_esc(&p, st); break; case WC_C_G0_CS94: switch (c) { case WC_C_CS942: buf[nbuf++] = c; st->state = WC_C_G0_CS96; return NULL; } case WC_C_G0_CS96: buf[nbuf++] = c; p = buf; wc_parse_iso2022_esc(&p, st); break; } st->ss = 0; st->state = -1; return os; }