diff options
author | Tatsuya Kinoshita <tats@vega.ocn.ne.jp> | 2011-05-04 07:05:14 +0000 |
---|---|---|
committer | Tatsuya Kinoshita <tats@vega.ocn.ne.jp> | 2011-05-04 07:05:14 +0000 |
commit | 72f72d64a422d6628c4796f5c0bf2e508f134214 (patch) | |
tree | 0c9ea90cc53310832c977265521fb44db24a515e /libwc/johab.c | |
parent | Adding upstream version 0.3 (diff) | |
download | w3m-72f72d64a422d6628c4796f5c0bf2e508f134214.tar.gz w3m-72f72d64a422d6628c4796f5c0bf2e508f134214.zip |
Adding upstream version 0.5.1upstream/0.5.1
Diffstat (limited to 'libwc/johab.c')
-rw-r--r-- | libwc/johab.c | 389 |
1 files changed, 389 insertions, 0 deletions
diff --git a/libwc/johab.c b/libwc/johab.c new file mode 100644 index 0000000..8d587b8 --- /dev/null +++ b/libwc/johab.c @@ -0,0 +1,389 @@ + +#include "wc.h" +#include "johab.h" +#include "wtf.h" +#ifdef USE_UNICODE +#include "ucs.h" +#endif + +#define C0 WC_JOHAB_MAP_C0 +#define GL WC_JOHAB_MAP_GL +#define C1 WC_JOHAB_MAP_C1 +#define GH WC_JOHAB_MAP_GH +#define GB WC_JOHAB_MAP_GB +#define JJ WC_JOHAB_MAP_JJ +#define JB WC_JOHAB_MAP_JB +#define HB WC_JOHAB_MAP_HB +#define CJ WC_JOHAB_MAP_CJ +#define CB WC_JOHAB_MAP_CB + +/* + 00-1F 20-30 31-40 41-7E 7F 80 81-83 84-90 91-D3 D4-D7 D8-DE DF E0-F9 FA-FE FF + C0 GL GL GL C0 - - J J - H - H - - + - - J B - - J J B B B B B B - + + C0 GL GH GB C0 C1 CJ JJ JB CB HB CB HB CB C1 +*/ + +wc_uint8 WC_JOHAB_MAP[ 0x100 ] = { + C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, + C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, +/* 20 */ + GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, +/* 30 31 */ + GL, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, +/* 40 41 */ + GH, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, + GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, + GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, + GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, C0, + +/* 80 83 84 */ + C1, CJ, CJ, CJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, +/* 90 91 */ + JJ, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, + JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, + JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, + JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, +/* D3 D4 D7 D8 DF */ + JB, JB, JB, JB, CB, CB, CB, CB, HB, HB, HB, HB, HB, HB, HB, CB, + HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, +/* F9 FA FE FF */ + HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, CB, CB, CB, CB, CB, C1, +}; + +static wc_uint8 johab1_N_map[ 3 ][ 32 ] = { + { 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, + 15,16,17,18,19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 1, 2, 3, 4, 5, 0, 0, 6, 7, 8, 9,10,11, + 0, 0,12,13,14,15,16,17, 0, 0,18,19,20,21, 0, 0 }, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15, + 16,17, 0,18,19,20,21,22,23,24,25,26,27,28, 0, 0 } +}; + +static wc_uint8 N_johab1_map[ 3 ][ 32 ] = { + { 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17, + 18,19,20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 3, 4, 5, 6, 7,10,11,12,13,14,15,18,19,20,21,22, + 23,26,27,28,29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, + { 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16, + 17,19,20,21,22,23,24,25,26,27,28,29, 0, 0, 0, 0 } +}; + +wc_wchar_t +wc_johab_to_ksx1001(wc_wchar_t cc) +{ +#ifdef USE_UNICODE + static wc_table *t = NULL; +#endif + + switch (cc.ccs) { + case WC_CCS_JOHAB: + return wc_johab_to_ksx1001(wc_johab_to_cs128w(cc)); + case WC_CCS_JOHAB_1: + case WC_CCS_JOHAB_2: +#ifdef USE_UNICODE + if (WcOption.ucs_conv) { + if (t == NULL) + t = wc_get_ucs_table(WC_CCS_KS_X_1001); + cc = wc_any_to_any(cc, t); + } else +#endif + cc.ccs = WC_CCS_UNKNOWN_W; + break; + case WC_CCS_JOHAB_3: + if (cc.code >= 0x2121) + cc.ccs = WC_CCS_KS_X_1001; + else + cc.ccs = WC_CCS_UNKNOWN_W; + break; + } + return cc; +} + +wc_wchar_t +wc_ksx1001_to_johab(wc_wchar_t cc) +{ + cc.code &= 0x7f7f; + if ((cc.code >= 0x2121 && cc.code < 0x2421) || + (cc.code > 0x2453 && cc.code <= 0x2C7E) || + (cc.code >= 0x4A21 && cc.code <= 0x7D7E)) { + cc.ccs = WC_CCS_JOHAB_3; + return cc; + } +#ifdef USE_UNICODE + if (WcOption.ucs_conv) + cc = wc_ucs_to_johab(wc_any_to_ucs(cc)); + else +#endif + cc.ccs = WC_CCS_UNKNOWN_W; + return cc; +} + +#ifdef USE_UNICODE +wc_wchar_t +wc_ucs_to_johab(wc_uint32 ucs) +{ + wc_table *t; + wc_wchar_t cc; + + if (ucs >= WC_C_UCS2_HANGUL && ucs <= WC_C_UCS2_HANGUL_END) { + ucs -= WC_C_UCS2_HANGUL; + cc.code = WC_N_JOHAB1(ucs); + cc.ccs = WC_CCS_JOHAB; + } else if (ucs >= 0x3131 && ucs <= 0x3163) { + t = wc_get_ucs_table(WC_CCS_JOHAB_2); + cc = wc_ucs_to_any(ucs, t); + } else { + t = wc_get_ucs_table(WC_CCS_JOHAB_3); + cc = wc_ucs_to_any(ucs, t); + } + return cc; +} +#endif + +wc_uint32 +wc_johab1_to_N(wc_uint32 code) +{ + wc_uint32 a, b, c; + + a = johab1_N_map[0][(code >> 10) & 0x1F]; + b = johab1_N_map[1][(code >> 5) & 0x1F]; + c = johab1_N_map[2][ code & 0x1F]; + if (a && b && c) + return ((a - 1) * 21 + (b - 1)) * 28 + (c - 1); + return WC_C_JOHAB_ERROR; +} + +wc_uint32 +wc_N_to_johab1(wc_uint32 code) +{ + wc_uint32 a, b, c; + + a = N_johab1_map[0][(code / 28) / 21]; + b = N_johab1_map[1][(code / 28) % 21]; + c = N_johab1_map[2][ code % 28 ]; + return 0x8000 | (a << 10) | (b << 5) | c; +} + +/* 0x1F21 - 0x2C7E, 0x4A21 - 0x7C7E + (0x1F21 - 0x207E are not in KS X 1001) */ +#define johab3_to_ksx1001(ub, lb) \ +{ \ + if (ub < 0xe0) { \ + ub = ((ub - 0xd8) << 1) + 0x1f; \ + } else { \ + ub = ((ub - 0xe0) << 1) + 0x4a; \ + } \ + if (lb < 0xa1) { \ + lb -= (lb < 0x91) ? 0x10 : 0x22; \ + } else { \ + ub++; \ + lb -= 0x80; \ + } \ +} + +#define ksx1001_to_johab3(ub, lb) \ +{ \ + if (ub < 0x4a) { \ + ub -= 0x1f; \ + lb += (ub & 0x1) ? 0x80 : ((lb < 0x6f) ? 0x10 : 0x22); \ + ub = (ub >> 1) + 0xd8; \ + } else { \ + ub -= 0x4a; \ + lb += (ub & 0x1) ? 0x80 : ((lb < 0x6f) ? 0x10 : 0x22); \ + ub = (ub >> 1) + 0xe0; \ + } \ +} + +wc_wchar_t +wc_johab_to_cs128w(wc_wchar_t cc) +{ + wc_uint32 n; + wc_uchar ub, lb; + + if (cc.code < 0xD800) { + n = WC_JOHAB1_N(cc.code); + if (n != WC_C_JOHAB_ERROR) { + cc.code = WC_N_CS94x128(n); + cc.ccs = WC_CCS_JOHAB_1; + } else { + n = WC_JOHAB2_N(cc.code); + cc.code = WC_N_CS128W(n); + cc.ccs = WC_CCS_JOHAB_2; + } + } else { + ub = cc.code >> 8; + lb = cc.code & 0xff; + johab3_to_ksx1001(ub, lb); + cc.code = ((wc_uint32)ub << 8) | lb; + cc.ccs = WC_CCS_JOHAB_3; + } + return cc; +} + +wc_wchar_t +wc_cs128w_to_johab(wc_wchar_t cc) +{ + wc_uint32 n; + wc_uchar ub, lb; + + switch (cc.ccs) { + case WC_CCS_JOHAB_1: + n = WC_CS94x128_N(cc.code); + cc.code = WC_N_JOHAB1(n); + break; + case WC_CCS_JOHAB_2: + n = WC_CS128W_N(cc.code); + cc.code = WC_N_JOHAB2(n); + break; + case WC_CCS_JOHAB_3: + ub = (cc.code >> 8) & 0x7f; + lb = cc.code & 0x7f; + ksx1001_to_johab3(ub, lb); + cc.code = ((wc_uint32)ub << 8) | lb; + } + cc.ccs = WC_CCS_JOHAB; + return cc; +} + +Str +wc_conv_from_johab(Str is, wc_ces ces) +{ + Str os; + wc_uchar *sp = (wc_uchar *)is->ptr; + wc_uchar *ep = sp + is->length; + wc_uchar *p; + int state = WC_JOHAB_NOSTATE; + + for (p = sp; p < ep && *p < 0x80; p++) + ; + if (p == ep) + return is; + os = Strnew_size(is->length); + if (p > sp) + Strcat_charp_n(os, is->ptr, (int)(p - sp)); + + for (; p < ep; p++) { + switch (state) { + case WC_JOHAB_NOSTATE: + switch (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_1) { + case WC_JOHAB_MAP_UJ: + state = WC_JOHAB_HANGUL1; + break; + case WC_JOHAB_MAP_UH: + state = WC_JOHAB_HANJA1; + break; + case WC_JOHAB_MAP_C1: + wtf_push_unknown(os, p, 1); + break; + default: + Strcat_char(os, (char)*p); + break; + } + break; + case WC_JOHAB_HANGUL1: + if (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_LJ) + wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)*(p-1) << 8) | *p); + else + wtf_push_unknown(os, p-1, 2); + state = WC_JOHAB_NOSTATE; + break; + case WC_JOHAB_HANJA1: + if (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_LH) + wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)*(p-1) << 8) | *p); + else + wtf_push_unknown(os, p-1, 2); + state = WC_JOHAB_NOSTATE; + break; + } + } + switch (state) { + case WC_JOHAB_HANGUL1: + case WC_JOHAB_HANJA1: + wtf_push_unknown(os, p-1, 1); + break; + } + return os; +} + +void +wc_push_to_johab(Str os, wc_wchar_t cc, wc_status *st) +{ + while (1) { + switch (cc.ccs) { + case WC_CCS_US_ASCII: + Strcat_char(os, (char)cc.code); + return; + case WC_CCS_JOHAB_1: + case WC_CCS_JOHAB_2: + case WC_CCS_JOHAB_3: + cc = wc_cs128w_to_johab(cc); + case WC_CCS_JOHAB: + Strcat_char(os, (char)(cc.code >> 8)); + Strcat_char(os, (char)(cc.code & 0xff)); + return; + case WC_CCS_KS_X_1001: + cc = wc_ksx1001_to_johab(cc); + continue; + case WC_CCS_UNKNOWN_W: + if (!WcOption.no_replace) + Strcat_charp(os, WC_REPLACE_W); + return; + case WC_CCS_UNKNOWN: + if (!WcOption.no_replace) + Strcat_charp(os, WC_REPLACE); + return; + default: +#ifdef USE_UNICODE + if (WcOption.ucs_conv) + cc = wc_any_to_any_ces(cc, st); + else +#endif + cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; + continue; + } + } +} + +Str +wc_char_conv_from_johab(wc_uchar c, wc_status *st) +{ + static Str os; + static wc_uchar johabu; + + if (st->state == -1) { + st->state = WC_JOHAB_NOSTATE; + os = Strnew_size(8); + } + + switch (st->state) { + case WC_JOHAB_NOSTATE: + switch (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_1) { + case WC_JOHAB_MAP_UJ: + johabu = c; + st->state = WC_JOHAB_HANGUL1; + return NULL; + case WC_JOHAB_MAP_UH: + johabu = c; + st->state = WC_JOHAB_HANJA1; + return NULL; + case WC_JOHAB_MAP_C1: + break; + default: + Strcat_char(os, (char)c); + break; + } + break; + case WC_JOHAB_HANGUL1: + if (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_LJ) + wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)johabu << 8) | c); + break; + case WC_JOHAB_HANJA1: + if (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_LH) + wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)johabu << 8) | c); + break; + } + st->state = -1; + return os; +} |