#include "wc.h" #include "sjis.h" #include "jis.h" #include "wtf.h" #include "ucs.h" #include "map/jisx02132_sjis.map" wc_uchar *wc_jisx0212_jisx02132_map = jisx02132_sjis_map; #define C0 WC_SJIS_MAP_C0 #define GL WC_SJIS_MAP_GL #define LB WC_SJIS_MAP_LB #define S80 WC_SJIS_MAP_80 #define SK WC_SJIS_MAP_SK #define SL WC_SJIS_MAP_SL #define SH WC_SJIS_MAP_SH #define SX WC_SJIS_MAP_SX #define C1 WC_SJIS_MAP_C1 #define SA0 WC_SJIS_MAP_A0 wc_uint8 WC_SJIS_MAP[ 0x100 ] = { C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, C0, S80,SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SA0,SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SX, SX, SX, SX, SX, SX, SX, SX, SX, SX, SX, SX, SX, C1, C1, C1, }; #define sjis_to_jisx0208(ub, lb) \ { \ ub -= (ub < 0xa0) ? 0x81 : 0xc1; \ ub = (ub << 1) + 0x21; \ if (lb < 0x9f) { \ lb -= (lb > 0x7e) ? 0x20 : 0x1f; \ } else { \ ub++; \ lb -= 0x7e; \ } \ } #define sjis_to_jisx02132(ub, lb) \ { \ if (lb < 0x9f) { \ ub = sjis1_jisx02132_map[ub - 0xf0]; \ lb -= (lb > 0x7e) ? 0x20 : 0x1f; \ } else { \ ub = sjis2_jisx02132_map[ub - 0xf0]; \ lb -= 0x7e; \ } \ } #define jisx0208_to_sjis(ub, lb) \ { \ lb += (ub & 1) ? 0x1f : 0x7d; \ if (lb > 0x7e) \ lb++; \ ub = (ub - 0x21) >> 1; \ ub += (ub < 0x1f) ? 0x81 : 0xc1; \ } #define jisx02132_to_sjis(ub, lb) \ { \ lb += (ub & 1) ? 0x1f : 0x7d; \ if (lb > 0x7e) \ lb++; \ ub = jisx02132_sjis_map[ ub ]; \ } wc_wchar_t wc_sjis_to_jis(wc_wchar_t cc) { wc_uchar ub, lb; ub = cc.code >> 8; lb = cc.code & 0xff; if (ub < 0xf0) { sjis_to_jisx0208(ub, lb); cc.ccs = WC_CCS_JIS_X_0208; } else { sjis_to_jisx02132(ub, lb); cc.ccs = WC_CCS_JIS_X_0213_2; } cc.code = ((wc_uint32)ub << 8) | lb; return cc; } wc_wchar_t wc_jis_to_sjis(wc_wchar_t cc) { wc_uchar ub, lb; ub = (cc.code >> 8) & 0x7f; lb = cc.code & 0x7f; if (cc.ccs == WC_CCS_JIS_X_0213_2) { jisx02132_to_sjis(ub, lb); if (! ub) { cc.ccs = WC_CCS_UNKNOWN_W; return cc; } } else { jisx0208_to_sjis(ub, lb); } cc.code = ((wc_uint32)ub << 8) | lb; return cc; } wc_wchar_t wc_sjis_ext_to_cs94w(wc_wchar_t cc) { wc_uchar ub, lb; ub = cc.code >> 8; lb = cc.code & 0xff; sjis_to_jisx0208(ub, lb); if (ub <= 0x7e) { cc.ccs = WC_CCS_SJIS_EXT_1; } else { ub -= 0x5e; cc.ccs = WC_CCS_SJIS_EXT_2; } cc.code = ((wc_uint32)ub << 8) | lb; return cc; } wc_wchar_t wc_cs94w_to_sjis_ext(wc_wchar_t cc) { wc_uchar ub, lb; ub = (cc.code >> 8) & 0x7f; lb = cc.code & 0x7f; if (cc.ccs == WC_CCS_SJIS_EXT_2) ub += 0x5e; jisx0208_to_sjis(ub, lb); cc.ccs = WC_CCS_SJIS_EXT; cc.code = ((wc_uint32)ub << 8) | lb; return cc; } wc_uint32 wc_sjis_ext1_to_N(wc_uint32 c) { wc_uchar ub; ub = (c >> 8) & 0x7f; switch(ub) { case 0x2D: /* 0x8740 - */ ub = 0; break; case 0x79: /* 0xED40 - */ case 0x7A: /* 0xED9F - */ case 0x7B: /* 0xEE40 - */ case 0x7C: /* 0xEE9F - */ ub -= 0x78; break; default: return WC_C_SJIS_ERROR; } return ub * 0x5e + (c & 0x7f) - 0x21; } wc_uint32 wc_sjis_ext2_to_N(wc_uint32 c) { wc_uchar ub; ub = (c >> 8) & 0x7f; switch(ub) { case 0x35: /* 0xFA40 - */ case 0x36: /* 0xFA9F - */ case 0x37: /* 0xFB40 - */ case 0x38: /* 0xFB9F - */ case 0x39: /* 0xFC40 - */ ub -= 0x30; break; default: return WC_C_SJIS_ERROR; } return ub * 0x5e + (c & 0x7f) - 0x21; } Str wc_conv_from_sjis(Str is, wc_ces ces) { Str os; wc_uchar *sp = (wc_uchar *)is->ptr; wc_uchar *ep = sp + is->length; wc_uchar *p; wc_uchar jis[2]; int state = WC_SJIS_NOSTATE; wc_wchar_t cc; for (p = sp; p < ep && *p < 0x80; p++) ; if (p == ep) return is; os = Strnew_size(is->length); if (p > sp) Strcat_charp_n(os, is->ptr, (int)(p - sp)); for (; p < ep; p++) { switch (state) { case WC_SJIS_NOSTATE: switch (WC_SJIS_MAP[*p]) { case SL: state = WC_SJIS_SHIFT_L; break; case SH: state = WC_SJIS_SHIFT_H; break; case SX: state = WC_SJIS_SHIFT_X; break; case SK: wtf_push(os, WC_CCS_JIS_X_0201K, (wc_uint32)*p); break; case S80: case SA0: case C1: wtf_push_unknown(os, p, 1); break; default: Strcat_char(os, (char)*p); break; } break; case WC_SJIS_SHIFT_L: case WC_SJIS_SHIFT_H: if (WC_SJIS_MAP[*p] & LB) { jis[0] = *(p-1); jis[1] = *p; sjis_to_jisx0208(jis[0], jis[1]); cc.code = ((wc_uint32)jis[0] << 8) | jis[1]; cc.ccs = wc_jisx0208_or_jisx02131(cc.code); if (cc.ccs == WC_CCS_JIS_X_0208) wtf_push(os, cc.ccs, cc.code); else wtf_push(os, WC_CCS_SJIS_EXT, ((wc_uint32)*(p-1) << 8) | *p); } else wtf_push_unknown(os, p-1, 2); state = WC_SJIS_NOSTATE; break; case WC_SJIS_SHIFT_X: if (WC_SJIS_MAP[*p] & LB) wtf_push(os, WC_CCS_SJIS_EXT, ((wc_uint32)*(p-1) << 8) | *p); else wtf_push_unknown(os, p-1, 2); state = WC_SJIS_NOSTATE; break; } } switch (state) { case WC_SJIS_SHIFT_L: case WC_SJIS_SHIFT_H: case WC_SJIS_SHIFT_X: wtf_push_unknown(os, p-1, 1); break; } return os; } Str wc_conv_from_sjisx0213(Str is, wc_ces ces) { Str os; wc_uchar *sp = (wc_uchar *)is->ptr; wc_uchar *ep = sp + is->length; wc_uchar *p; wc_uchar jis[2]; int state = WC_SJIS_NOSTATE; wc_wchar_t cc; for (p = sp; p < ep && *p < 0x80; p++) ; if (p == ep) return is; os = Strnew_size(is->length); if (p > sp) Strcat_charp_n(os, is->ptr, (int)(p - sp)); for (; p < ep; p++) { switch (state) { case WC_SJIS_NOSTATE: switch (WC_SJIS_MAP[*p]) { case SL: state = WC_SJIS_SHIFT_L; break; case SH: state = WC_SJIS_SHIFT_H; break; case SX: state = WC_SJIS_SHIFT_X; break; case SK: wtf_push(os, WC_CCS_JIS_X_0201K, (wc_uint32)*p); break; case S80: case SA0: case C1: wtf_push_unknown(os, p, 1); break; default: Strcat_char(os, (char)*p); break; } break; case WC_SJIS_SHIFT_L: case WC_SJIS_SHIFT_H: if (WC_SJIS_MAP[*p] & LB) { jis[0] = *(p-1); jis[1] = *p; sjis_to_jisx0208(jis[0], jis[1]); cc.code = ((wc_uint32)jis[0] << 8) | jis[1]; cc.ccs = wc_jisx0208_or_jisx02131(cc.code); wtf_push(os, cc.ccs, cc.code); } else wtf_push_unknown(os, p-1, 2); state = WC_SJIS_NOSTATE; break; case WC_SJIS_SHIFT_X: if (WC_SJIS_MAP[*p] & LB) { jis[0] = *(p-1); jis[1] = *p; sjis_to_jisx02132(jis[0], jis[1]); wtf_push(os, WC_CCS_JIS_X_0213_2, ((wc_uint32)jis[0] << 8) | jis[1]); } else wtf_push_unknown(os, p-1, 2); state = WC_SJIS_NOSTATE; break; } } switch (state) { case WC_SJIS_SHIFT_L: case WC_SJIS_SHIFT_H: case WC_SJIS_SHIFT_X: wtf_push_unknown(os, p-1, 1); break; } return os; } void wc_push_to_sjis(Str os, wc_wchar_t cc, wc_status *st) { wc_uchar ub, lb; while (1) { switch (cc.ccs) { case WC_CCS_US_ASCII: Strcat_char(os, cc.code); return; case WC_CCS_JIS_X_0201K: if (WcOption.use_jisx0201k) { Strcat_char(os, cc.code | 0x80); return; } else if (WcOption.fix_width_conv) cc.ccs = WC_CCS_UNKNOWN; else cc = wc_jisx0201k_to_jisx0208(cc); continue; case WC_CCS_JIS_X_0208: ub = (cc.code >> 8) & 0x7f; lb = cc.code & 0x7f; jisx0208_to_sjis(ub, lb); Strcat_char(os, ub); Strcat_char(os, lb); return; case WC_CCS_SJIS_EXT_1: case WC_CCS_SJIS_EXT_2: cc = wc_cs94w_to_sjis_ext(cc); case WC_CCS_SJIS_EXT: Strcat_char(os, (char)(cc.code >> 8)); Strcat_char(os, (char)(cc.code & 0xff)); return; case WC_CCS_UNKNOWN_W: if (!WcOption.no_replace) Strcat_charp(os, WC_REPLACE_W); return; case WC_CCS_UNKNOWN: if (!WcOption.no_replace) Strcat_charp(os, WC_REPLACE); return; default: #ifdef USE_UNICODE if (WcOption.ucs_conv) cc = wc_any_to_any_ces(cc, st); else #endif cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; continue; } } } void wc_push_to_sjisx0213(Str os, wc_wchar_t cc, wc_status *st) { wc_uchar ub, lb; while (1) { switch (cc.ccs) { case WC_CCS_US_ASCII: Strcat_char(os, cc.code); return; case WC_CCS_JIS_X_0201K: if (WcOption.use_jisx0201k) { Strcat_char(os, cc.code | 0x80); return; } else if (WcOption.fix_width_conv) cc.ccs = WC_CCS_UNKNOWN; else cc = wc_jisx0201k_to_jisx0208(cc); continue; case WC_CCS_JIS_X_0213_1: if (! WcOption.use_jisx0213) { cc.ccs = WC_CCS_UNKNOWN_W; continue; } case WC_CCS_JIS_X_0208: ub = (cc.code >> 8) & 0x7f; lb = cc.code & 0x7f; jisx0208_to_sjis(ub, lb); Strcat_char(os, ub); Strcat_char(os, lb); return; case WC_CCS_JIS_X_0213_2: if (! WcOption.use_jisx0213) { cc.ccs = WC_CCS_UNKNOWN_W; continue; } ub = (cc.code >> 8) & 0x7f; lb = cc.code & 0x7f; jisx02132_to_sjis(ub, lb); if (ub) { Strcat_char(os, ub); Strcat_char(os, lb); return; } case WC_CCS_UNKNOWN_W: if (!WcOption.no_replace) Strcat_charp(os, WC_REPLACE_W); return; case WC_CCS_UNKNOWN: if (!WcOption.no_replace) Strcat_charp(os, WC_REPLACE); return; default: #ifdef USE_UNICODE if (WcOption.ucs_conv) cc = wc_any_to_any_ces(cc, st); else #endif cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; continue; } } } Str wc_char_conv_from_sjis(wc_uchar c, wc_status *st) { static Str os; static wc_uchar jis[2]; wc_wchar_t cc; if (st->state == -1) { st->state = WC_SJIS_NOSTATE; os = Strnew_size(8); } switch (st->state) { case WC_SJIS_NOSTATE: switch (WC_SJIS_MAP[c]) { case SL: jis[0] = c; st->state = WC_SJIS_SHIFT_L; return NULL; case SH: jis[0] = c; st->state = WC_SJIS_SHIFT_H; return NULL; case SX: jis[0] = c; st->state = WC_SJIS_SHIFT_X; return NULL; case SK: wtf_push(os, WC_CCS_JIS_X_0201K, (wc_uint32)c); break; case S80: case SA0: case C1: break; default: Strcat_char(os, (char)c); break; } break; case WC_SJIS_SHIFT_L: case WC_SJIS_SHIFT_H: if (WC_SJIS_MAP[c] & LB) { jis[1] = c; sjis_to_jisx0208(jis[0], jis[1]); cc.code = ((wc_uint32)jis[0] << 8) | jis[1]; cc.ccs = wc_jisx0208_or_jisx02131(cc.code); if (cc.ccs == WC_CCS_JIS_X_0208) wtf_push(os, cc.ccs, cc.code); else wtf_push(os, WC_CCS_SJIS_EXT, ((wc_uint32)jis[0] << 8) | jis[1]); } st->state = WC_SJIS_NOSTATE; break; case WC_SJIS_SHIFT_X: if (WC_SJIS_MAP[c] & LB) { jis[1] = c; wtf_push(os, WC_CCS_SJIS_EXT, ((wc_uint32)jis[0] << 8) | jis[1]); } st->state = WC_SJIS_NOSTATE; break; } st->state = -1; return os; } Str wc_char_conv_from_sjisx0213(wc_uchar c, wc_status *st) { static Str os; static wc_uchar jis[2]; wc_wchar_t cc; if (st->state == -1) { st->state = WC_SJIS_NOSTATE; os = Strnew_size(8); } switch (st->state) { case WC_SJIS_NOSTATE: switch (WC_SJIS_MAP[c]) { case SL: jis[0] = c; st->state = WC_SJIS_SHIFT_L; return NULL; case SH: jis[0] = c; st->state = WC_SJIS_SHIFT_H; return NULL; case SX: jis[0] = c; st->state = WC_SJIS_SHIFT_X; return NULL; case SK: wtf_push(os, WC_CCS_JIS_X_0201K, (wc_uint32)c); break; case S80: case SA0: case C1: break; default: Strcat_char(os, (char)c); break; } break; case WC_SJIS_SHIFT_L: case WC_SJIS_SHIFT_H: if (WC_SJIS_MAP[c] & LB) { jis[1] = c; sjis_to_jisx0208(jis[0], jis[1]); cc.code = ((wc_uint32)jis[0] << 8) | jis[1]; cc.ccs = wc_jisx0208_or_jisx02131(cc.code); wtf_push(os, cc.ccs, cc.code); } st->state = WC_SJIS_NOSTATE; break; case WC_SJIS_SHIFT_X: if (WC_SJIS_MAP[c] & LB) { jis[1] = c; sjis_to_jisx02132(jis[0], jis[1]); wtf_push(os, WC_CCS_JIS_X_0213_2, ((wc_uint32)jis[0] << 8) | jis[1]); } st->state = WC_SJIS_NOSTATE; break; } st->state = -1; return os; }