diff options
Diffstat (limited to 'libwc/sjis.c')
-rw-r--r-- | libwc/sjis.c | 595 |
1 files changed, 595 insertions, 0 deletions
diff --git a/libwc/sjis.c b/libwc/sjis.c new file mode 100644 index 0000000..3b41cde --- /dev/null +++ b/libwc/sjis.c @@ -0,0 +1,595 @@ + +#include "wc.h" +#include "sjis.h" +#include "jis.h" +#include "wtf.h" +#include "ucs.h" + +#include "map/jisx02132_sjis.map" +wc_uchar *wc_jisx0212_jisx02132_map = jisx02132_sjis_map; + +#define C0 WC_SJIS_MAP_C0 +#define GL WC_SJIS_MAP_GL +#define LB WC_SJIS_MAP_LB +#define S80 WC_SJIS_MAP_80 +#define SK WC_SJIS_MAP_SK +#define SL WC_SJIS_MAP_SL +#define SH WC_SJIS_MAP_SH +#define SX WC_SJIS_MAP_SX +#define C1 WC_SJIS_MAP_C1 +#define SA0 WC_SJIS_MAP_A0 + +wc_uint8 WC_SJIS_MAP[ 0x100 ] = { + C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, + C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, + GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, + GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, + LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, + LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, + LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, + LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, C0, + + S80,SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, + SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, SL, + SA0,SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, + SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, + SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, + SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, SK, + SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, SH, + SX, SX, SX, SX, SX, SX, SX, SX, SX, SX, SX, SX, SX, C1, C1, C1, +}; + +#define sjis_to_jisx0208(ub, lb) \ +{ \ + ub -= (ub < 0xa0) ? 0x81 : 0xc1; \ + ub = (ub << 1) + 0x21; \ + if (lb < 0x9f) { \ + lb -= (lb > 0x7e) ? 0x20 : 0x1f; \ + } else { \ + ub++; \ + lb -= 0x7e; \ + } \ +} +#define sjis_to_jisx02132(ub, lb) \ +{ \ + if (lb < 0x9f) { \ + ub = sjis1_jisx02132_map[ub - 0xf0]; \ + lb -= (lb > 0x7e) ? 0x20 : 0x1f; \ + } else { \ + ub = sjis2_jisx02132_map[ub - 0xf0]; \ + lb -= 0x7e; \ + } \ +} +#define jisx0208_to_sjis(ub, lb) \ +{ \ + lb += (ub & 1) ? 0x1f : 0x7d; \ + if (lb > 0x7e) \ + lb++; \ + ub = (ub - 0x21) >> 1; \ + ub += (ub < 0x1f) ? 0x81 : 0xc1; \ +} +#define jisx02132_to_sjis(ub, lb) \ +{ \ + lb += (ub & 1) ? 0x1f : 0x7d; \ + if (lb > 0x7e) \ + lb++; \ + ub = jisx02132_sjis_map[ ub ]; \ +} + +wc_wchar_t +wc_sjis_to_jis(wc_wchar_t cc) +{ + wc_uchar ub, lb; + + ub = cc.code >> 8; + lb = cc.code & 0xff; + if (ub < 0xf0) { + sjis_to_jisx0208(ub, lb); + cc.ccs = WC_CCS_JIS_X_0208; + } else { + sjis_to_jisx02132(ub, lb); + cc.ccs = WC_CCS_JIS_X_0213_2; + } + cc.code = ((wc_uint32)ub << 8) | lb; + return cc; +} + +wc_wchar_t +wc_jis_to_sjis(wc_wchar_t cc) +{ + wc_uchar ub, lb; + + ub = (cc.code >> 8) & 0x7f; + lb = cc.code & 0x7f; + if (cc.ccs == WC_CCS_JIS_X_0213_2) { + jisx02132_to_sjis(ub, lb); + if (! ub) { + cc.ccs = WC_CCS_UNKNOWN_W; + return cc; + } + } else { + jisx0208_to_sjis(ub, lb); + } + cc.code = ((wc_uint32)ub << 8) | lb; + return cc; +} + +wc_wchar_t +wc_sjis_ext_to_cs94w(wc_wchar_t cc) +{ + wc_uchar ub, lb; + + ub = cc.code >> 8; + lb = cc.code & 0xff; + sjis_to_jisx0208(ub, lb); + if (ub <= 0x7e) { + cc.ccs = WC_CCS_SJIS_EXT_1; + } else { + ub -= 0x5e; + cc.ccs = WC_CCS_SJIS_EXT_2; + } + cc.code = ((wc_uint32)ub << 8) | lb; + return cc; +} + +wc_wchar_t +wc_cs94w_to_sjis_ext(wc_wchar_t cc) +{ + wc_uchar ub, lb; + + ub = (cc.code >> 8) & 0x7f; + lb = cc.code & 0x7f; + if (cc.ccs == WC_CCS_SJIS_EXT_2) + ub += 0x5e; + jisx0208_to_sjis(ub, lb); + cc.ccs = WC_CCS_SJIS_EXT; + cc.code = ((wc_uint32)ub << 8) | lb; + return cc; +} + +wc_uint32 +wc_sjis_ext1_to_N(wc_uint32 c) +{ + wc_uchar ub; + + ub = (c >> 8) & 0x7f; + switch(ub) { + case 0x2D: /* 0x8740 - */ + ub = 0; + break; + case 0x79: /* 0xED40 - */ + case 0x7A: /* 0xED9F - */ + case 0x7B: /* 0xEE40 - */ + case 0x7C: /* 0xEE9F - */ + ub -= 0x78; + break; + default: + return WC_C_SJIS_ERROR; + } + return ub * 0x5e + (c & 0x7f) - 0x21; +} + +wc_uint32 +wc_sjis_ext2_to_N(wc_uint32 c) +{ + wc_uchar ub; + + ub = (c >> 8) & 0x7f; + switch(ub) { + case 0x35: /* 0xFA40 - */ + case 0x36: /* 0xFA9F - */ + case 0x37: /* 0xFB40 - */ + case 0x38: /* 0xFB9F - */ + case 0x39: /* 0xFC40 - */ + ub -= 0x30; + break; + default: + return WC_C_SJIS_ERROR; + } + return ub * 0x5e + (c & 0x7f) - 0x21; +} + +Str +wc_conv_from_sjis(Str is, wc_ces ces) +{ + Str os; + wc_uchar *sp = (wc_uchar *)is->ptr; + wc_uchar *ep = sp + is->length; + wc_uchar *p; + wc_uchar jis[2]; + int state = WC_SJIS_NOSTATE; + wc_wchar_t cc; + + for (p = sp; p < ep && *p < 0x80; p++) + ; + if (p == ep) + return is; + os = Strnew_size(is->length); + if (p > sp) + Strcat_charp_n(os, is->ptr, (int)(p - sp)); + + for (; p < ep; p++) { + switch (state) { + case WC_SJIS_NOSTATE: + switch (WC_SJIS_MAP[*p]) { + case SL: + state = WC_SJIS_SHIFT_L; + break; + case SH: + state = WC_SJIS_SHIFT_H; + break; + case SX: + state = WC_SJIS_SHIFT_X; + break; + case SK: + wtf_push(os, WC_CCS_JIS_X_0201K, (wc_uint32)*p); + break; + case S80: + case SA0: + case C1: + wtf_push_unknown(os, p, 1); + break; + default: + Strcat_char(os, (char)*p); + break; + } + break; + case WC_SJIS_SHIFT_L: + case WC_SJIS_SHIFT_H: + if (WC_SJIS_MAP[*p] & LB) { + jis[0] = *(p-1); + jis[1] = *p; + sjis_to_jisx0208(jis[0], jis[1]); + cc.code = ((wc_uint32)jis[0] << 8) | jis[1]; + cc.ccs = wc_jisx0208_or_jisx02131(cc.code); + if (cc.ccs == WC_CCS_JIS_X_0208) + wtf_push(os, cc.ccs, cc.code); + else + wtf_push(os, WC_CCS_SJIS_EXT, ((wc_uint32)*(p-1) << 8) | *p); + } else + wtf_push_unknown(os, p-1, 2); + state = WC_SJIS_NOSTATE; + break; + case WC_SJIS_SHIFT_X: + if (WC_SJIS_MAP[*p] & LB) + wtf_push(os, WC_CCS_SJIS_EXT, ((wc_uint32)*(p-1) << 8) | *p); + else + wtf_push_unknown(os, p-1, 2); + state = WC_SJIS_NOSTATE; + break; + } + } + switch (state) { + case WC_SJIS_SHIFT_L: + case WC_SJIS_SHIFT_H: + case WC_SJIS_SHIFT_X: + wtf_push_unknown(os, p-1, 1); + break; + } + return os; +} + +Str +wc_conv_from_sjisx0213(Str is, wc_ces ces) +{ + Str os; + wc_uchar *sp = (wc_uchar *)is->ptr; + wc_uchar *ep = sp + is->length; + wc_uchar *p; + wc_uchar jis[2]; + int state = WC_SJIS_NOSTATE; + wc_wchar_t cc; + + for (p = sp; p < ep && *p < 0x80; p++) + ; + if (p == ep) + return is; + os = Strnew_size(is->length); + if (p > sp) + Strcat_charp_n(os, is->ptr, (int)(p - sp)); + + for (; p < ep; p++) { + switch (state) { + case WC_SJIS_NOSTATE: + switch (WC_SJIS_MAP[*p]) { + case SL: + state = WC_SJIS_SHIFT_L; + break; + case SH: + state = WC_SJIS_SHIFT_H; + break; + case SX: + state = WC_SJIS_SHIFT_X; + break; + case SK: + wtf_push(os, WC_CCS_JIS_X_0201K, (wc_uint32)*p); + break; + case S80: + case SA0: + case C1: + wtf_push_unknown(os, p, 1); + break; + default: + Strcat_char(os, (char)*p); + break; + } + break; + case WC_SJIS_SHIFT_L: + case WC_SJIS_SHIFT_H: + if (WC_SJIS_MAP[*p] & LB) { + jis[0] = *(p-1); + jis[1] = *p; + sjis_to_jisx0208(jis[0], jis[1]); + cc.code = ((wc_uint32)jis[0] << 8) | jis[1]; + cc.ccs = wc_jisx0208_or_jisx02131(cc.code); + wtf_push(os, cc.ccs, cc.code); + } else + wtf_push_unknown(os, p-1, 2); + state = WC_SJIS_NOSTATE; + break; + case WC_SJIS_SHIFT_X: + if (WC_SJIS_MAP[*p] & LB) { + jis[0] = *(p-1); + jis[1] = *p; + sjis_to_jisx02132(jis[0], jis[1]); + wtf_push(os, WC_CCS_JIS_X_0213_2, ((wc_uint32)jis[0] << 8) | jis[1]); + } else + wtf_push_unknown(os, p-1, 2); + state = WC_SJIS_NOSTATE; + break; + } + } + switch (state) { + case WC_SJIS_SHIFT_L: + case WC_SJIS_SHIFT_H: + case WC_SJIS_SHIFT_X: + wtf_push_unknown(os, p-1, 1); + break; + } + return os; +} + +void +wc_push_to_sjis(Str os, wc_wchar_t cc, wc_status *st) +{ + wc_uchar ub, lb; + + while (1) { + switch (cc.ccs) { + case WC_CCS_US_ASCII: + Strcat_char(os, cc.code); + return; + case WC_CCS_JIS_X_0201K: + if (WcOption.use_jisx0201k) { + Strcat_char(os, cc.code | 0x80); + return; + } else if (WcOption.fix_width_conv) + cc.ccs = WC_CCS_UNKNOWN; + else + cc = wc_jisx0201k_to_jisx0208(cc); + continue; + case WC_CCS_JIS_X_0208: + ub = (cc.code >> 8) & 0x7f; + lb = cc.code & 0x7f; + jisx0208_to_sjis(ub, lb); + Strcat_char(os, ub); + Strcat_char(os, lb); + return; + case WC_CCS_SJIS_EXT_1: + case WC_CCS_SJIS_EXT_2: + cc = wc_cs94w_to_sjis_ext(cc); + case WC_CCS_SJIS_EXT: + Strcat_char(os, (char)(cc.code >> 8)); + Strcat_char(os, (char)(cc.code & 0xff)); + return; + case WC_CCS_UNKNOWN_W: + if (!WcOption.no_replace) + Strcat_charp(os, WC_REPLACE_W); + return; + case WC_CCS_UNKNOWN: + if (!WcOption.no_replace) + Strcat_charp(os, WC_REPLACE); + return; + default: +#ifdef USE_UNICODE + if (WcOption.ucs_conv) + cc = wc_any_to_any_ces(cc, st); + else +#endif + cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; + continue; + } + } +} + +void +wc_push_to_sjisx0213(Str os, wc_wchar_t cc, wc_status *st) +{ + wc_uchar ub, lb; + + while (1) { + switch (cc.ccs) { + case WC_CCS_US_ASCII: + Strcat_char(os, cc.code); + return; + case WC_CCS_JIS_X_0201K: + if (WcOption.use_jisx0201k) { + Strcat_char(os, cc.code | 0x80); + return; + } else if (WcOption.fix_width_conv) + cc.ccs = WC_CCS_UNKNOWN; + else + cc = wc_jisx0201k_to_jisx0208(cc); + continue; + case WC_CCS_JIS_X_0213_1: + if (! WcOption.use_jisx0213) { + cc.ccs = WC_CCS_UNKNOWN_W; + continue; + } + case WC_CCS_JIS_X_0208: + ub = (cc.code >> 8) & 0x7f; + lb = cc.code & 0x7f; + jisx0208_to_sjis(ub, lb); + Strcat_char(os, ub); + Strcat_char(os, lb); + return; + case WC_CCS_JIS_X_0213_2: + if (! WcOption.use_jisx0213) { + cc.ccs = WC_CCS_UNKNOWN_W; + continue; + } + ub = (cc.code >> 8) & 0x7f; + lb = cc.code & 0x7f; + jisx02132_to_sjis(ub, lb); + if (ub) { + Strcat_char(os, ub); + Strcat_char(os, lb); + return; + } + case WC_CCS_UNKNOWN_W: + if (!WcOption.no_replace) + Strcat_charp(os, WC_REPLACE_W); + return; + case WC_CCS_UNKNOWN: + if (!WcOption.no_replace) + Strcat_charp(os, WC_REPLACE); + return; + default: +#ifdef USE_UNICODE + if (WcOption.ucs_conv) + cc = wc_any_to_any_ces(cc, st); + else +#endif + cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; + continue; + } + } +} + +Str +wc_char_conv_from_sjis(wc_uchar c, wc_status *st) +{ + static Str os; + static wc_uchar jis[2]; + wc_wchar_t cc; + + if (st->state == -1) { + st->state = WC_SJIS_NOSTATE; + os = Strnew_size(8); + } + + switch (st->state) { + case WC_SJIS_NOSTATE: + switch (WC_SJIS_MAP[c]) { + case SL: + jis[0] = c; + st->state = WC_SJIS_SHIFT_L; + return NULL; + case SH: + jis[0] = c; + st->state = WC_SJIS_SHIFT_H; + return NULL; + case SX: + jis[0] = c; + st->state = WC_SJIS_SHIFT_X; + return NULL; + case SK: + wtf_push(os, WC_CCS_JIS_X_0201K, (wc_uint32)c); + break; + case S80: + case SA0: + case C1: + break; + default: + Strcat_char(os, (char)c); + break; + } + break; + case WC_SJIS_SHIFT_L: + case WC_SJIS_SHIFT_H: + if (WC_SJIS_MAP[c] & LB) { + jis[1] = c; + sjis_to_jisx0208(jis[0], jis[1]); + cc.code = ((wc_uint32)jis[0] << 8) | jis[1]; + cc.ccs = wc_jisx0208_or_jisx02131(cc.code); + if (cc.ccs == WC_CCS_JIS_X_0208) + wtf_push(os, cc.ccs, cc.code); + else + wtf_push(os, WC_CCS_SJIS_EXT, ((wc_uint32)jis[0] << 8) | jis[1]); + } + st->state = WC_SJIS_NOSTATE; + break; + case WC_SJIS_SHIFT_X: + if (WC_SJIS_MAP[c] & LB) { + jis[1] = c; + wtf_push(os, WC_CCS_SJIS_EXT, ((wc_uint32)jis[0] << 8) | jis[1]); + } + st->state = WC_SJIS_NOSTATE; + break; + } + st->state = -1; + return os; +} + +Str +wc_char_conv_from_sjisx0213(wc_uchar c, wc_status *st) +{ + static Str os; + static wc_uchar jis[2]; + wc_wchar_t cc; + + if (st->state == -1) { + st->state = WC_SJIS_NOSTATE; + os = Strnew_size(8); + } + + switch (st->state) { + case WC_SJIS_NOSTATE: + switch (WC_SJIS_MAP[c]) { + case SL: + jis[0] = c; + st->state = WC_SJIS_SHIFT_L; + return NULL; + case SH: + jis[0] = c; + st->state = WC_SJIS_SHIFT_H; + return NULL; + case SX: + jis[0] = c; + st->state = WC_SJIS_SHIFT_X; + return NULL; + case SK: + wtf_push(os, WC_CCS_JIS_X_0201K, (wc_uint32)c); + break; + case S80: + case SA0: + case C1: + break; + default: + Strcat_char(os, (char)c); + break; + } + break; + case WC_SJIS_SHIFT_L: + case WC_SJIS_SHIFT_H: + if (WC_SJIS_MAP[c] & LB) { + jis[1] = c; + sjis_to_jisx0208(jis[0], jis[1]); + cc.code = ((wc_uint32)jis[0] << 8) | jis[1]; + cc.ccs = wc_jisx0208_or_jisx02131(cc.code); + wtf_push(os, cc.ccs, cc.code); + } + st->state = WC_SJIS_NOSTATE; + break; + case WC_SJIS_SHIFT_X: + if (WC_SJIS_MAP[c] & LB) { + jis[1] = c; + sjis_to_jisx02132(jis[0], jis[1]); + wtf_push(os, WC_CCS_JIS_X_0213_2, ((wc_uint32)jis[0] << 8) | jis[1]); + } + st->state = WC_SJIS_NOSTATE; + break; + } + st->state = -1; + return os; +} |