aboutsummaryrefslogtreecommitdiffstats
path: root/libwc/johab.c
diff options
context:
space:
mode:
authorTatsuya Kinoshita <tats@vega.ocn.ne.jp>2011-05-04 07:05:14 +0000
committerTatsuya Kinoshita <tats@vega.ocn.ne.jp>2011-05-04 07:05:14 +0000
commit72f72d64a422d6628c4796f5c0bf2e508f134214 (patch)
tree0c9ea90cc53310832c977265521fb44db24a515e /libwc/johab.c
parentAdding upstream version 0.3 (diff)
downloadw3m-72f72d64a422d6628c4796f5c0bf2e508f134214.tar.gz
w3m-72f72d64a422d6628c4796f5c0bf2e508f134214.zip
Adding upstream version 0.5.1upstream/0.5.1
Diffstat (limited to 'libwc/johab.c')
-rw-r--r--libwc/johab.c389
1 files changed, 389 insertions, 0 deletions
diff --git a/libwc/johab.c b/libwc/johab.c
new file mode 100644
index 0000000..8d587b8
--- /dev/null
+++ b/libwc/johab.c
@@ -0,0 +1,389 @@
+
+#include "wc.h"
+#include "johab.h"
+#include "wtf.h"
+#ifdef USE_UNICODE
+#include "ucs.h"
+#endif
+
+#define C0 WC_JOHAB_MAP_C0
+#define GL WC_JOHAB_MAP_GL
+#define C1 WC_JOHAB_MAP_C1
+#define GH WC_JOHAB_MAP_GH
+#define GB WC_JOHAB_MAP_GB
+#define JJ WC_JOHAB_MAP_JJ
+#define JB WC_JOHAB_MAP_JB
+#define HB WC_JOHAB_MAP_HB
+#define CJ WC_JOHAB_MAP_CJ
+#define CB WC_JOHAB_MAP_CB
+
+/*
+ 00-1F 20-30 31-40 41-7E 7F 80 81-83 84-90 91-D3 D4-D7 D8-DE DF E0-F9 FA-FE FF
+ C0 GL GL GL C0 - - J J - H - H - -
+ - - J B - - J J B B B B B B -
+
+ C0 GL GH GB C0 C1 CJ JJ JB CB HB CB HB CB C1
+*/
+
+wc_uint8 WC_JOHAB_MAP[ 0x100 ] = {
+ C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
+ C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0,
+/* 20 */
+ GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL,
+/* 30 31 */
+ GL, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH,
+/* 40 41 */
+ GH, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB,
+ GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB,
+ GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB,
+ GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, C0,
+
+/* 80 83 84 */
+ C1, CJ, CJ, CJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ,
+/* 90 91 */
+ JJ, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB,
+ JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB,
+ JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB,
+ JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB,
+/* D3 D4 D7 D8 DF */
+ JB, JB, JB, JB, CB, CB, CB, CB, HB, HB, HB, HB, HB, HB, HB, CB,
+ HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB,
+/* F9 FA FE FF */
+ HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, CB, CB, CB, CB, CB, C1,
+};
+
+static wc_uint8 johab1_N_map[ 3 ][ 32 ] = {
+ { 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,
+ 15,16,17,18,19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 0, 0, 0, 1, 2, 3, 4, 5, 0, 0, 6, 7, 8, 9,10,11,
+ 0, 0,12,13,14,15,16,17, 0, 0,18,19,20,21, 0, 0 },
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,
+ 16,17, 0,18,19,20,21,22,23,24,25,26,27,28, 0, 0 }
+};
+
+static wc_uint8 N_johab1_map[ 3 ][ 32 ] = {
+ { 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,
+ 18,19,20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 3, 4, 5, 6, 7,10,11,12,13,14,15,18,19,20,21,22,
+ 23,26,27,28,29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,
+ 17,19,20,21,22,23,24,25,26,27,28,29, 0, 0, 0, 0 }
+};
+
+wc_wchar_t
+wc_johab_to_ksx1001(wc_wchar_t cc)
+{
+#ifdef USE_UNICODE
+ static wc_table *t = NULL;
+#endif
+
+ switch (cc.ccs) {
+ case WC_CCS_JOHAB:
+ return wc_johab_to_ksx1001(wc_johab_to_cs128w(cc));
+ case WC_CCS_JOHAB_1:
+ case WC_CCS_JOHAB_2:
+#ifdef USE_UNICODE
+ if (WcOption.ucs_conv) {
+ if (t == NULL)
+ t = wc_get_ucs_table(WC_CCS_KS_X_1001);
+ cc = wc_any_to_any(cc, t);
+ } else
+#endif
+ cc.ccs = WC_CCS_UNKNOWN_W;
+ break;
+ case WC_CCS_JOHAB_3:
+ if (cc.code >= 0x2121)
+ cc.ccs = WC_CCS_KS_X_1001;
+ else
+ cc.ccs = WC_CCS_UNKNOWN_W;
+ break;
+ }
+ return cc;
+}
+
+wc_wchar_t
+wc_ksx1001_to_johab(wc_wchar_t cc)
+{
+ cc.code &= 0x7f7f;
+ if ((cc.code >= 0x2121 && cc.code < 0x2421) ||
+ (cc.code > 0x2453 && cc.code <= 0x2C7E) ||
+ (cc.code >= 0x4A21 && cc.code <= 0x7D7E)) {
+ cc.ccs = WC_CCS_JOHAB_3;
+ return cc;
+ }
+#ifdef USE_UNICODE
+ if (WcOption.ucs_conv)
+ cc = wc_ucs_to_johab(wc_any_to_ucs(cc));
+ else
+#endif
+ cc.ccs = WC_CCS_UNKNOWN_W;
+ return cc;
+}
+
+#ifdef USE_UNICODE
+wc_wchar_t
+wc_ucs_to_johab(wc_uint32 ucs)
+{
+ wc_table *t;
+ wc_wchar_t cc;
+
+ if (ucs >= WC_C_UCS2_HANGUL && ucs <= WC_C_UCS2_HANGUL_END) {
+ ucs -= WC_C_UCS2_HANGUL;
+ cc.code = WC_N_JOHAB1(ucs);
+ cc.ccs = WC_CCS_JOHAB;
+ } else if (ucs >= 0x3131 && ucs <= 0x3163) {
+ t = wc_get_ucs_table(WC_CCS_JOHAB_2);
+ cc = wc_ucs_to_any(ucs, t);
+ } else {
+ t = wc_get_ucs_table(WC_CCS_JOHAB_3);
+ cc = wc_ucs_to_any(ucs, t);
+ }
+ return cc;
+}
+#endif
+
+wc_uint32
+wc_johab1_to_N(wc_uint32 code)
+{
+ wc_uint32 a, b, c;
+
+ a = johab1_N_map[0][(code >> 10) & 0x1F];
+ b = johab1_N_map[1][(code >> 5) & 0x1F];
+ c = johab1_N_map[2][ code & 0x1F];
+ if (a && b && c)
+ return ((a - 1) * 21 + (b - 1)) * 28 + (c - 1);
+ return WC_C_JOHAB_ERROR;
+}
+
+wc_uint32
+wc_N_to_johab1(wc_uint32 code)
+{
+ wc_uint32 a, b, c;
+
+ a = N_johab1_map[0][(code / 28) / 21];
+ b = N_johab1_map[1][(code / 28) % 21];
+ c = N_johab1_map[2][ code % 28 ];
+ return 0x8000 | (a << 10) | (b << 5) | c;
+}
+
+/* 0x1F21 - 0x2C7E, 0x4A21 - 0x7C7E
+ (0x1F21 - 0x207E are not in KS X 1001) */
+#define johab3_to_ksx1001(ub, lb) \
+{ \
+ if (ub < 0xe0) { \
+ ub = ((ub - 0xd8) << 1) + 0x1f; \
+ } else { \
+ ub = ((ub - 0xe0) << 1) + 0x4a; \
+ } \
+ if (lb < 0xa1) { \
+ lb -= (lb < 0x91) ? 0x10 : 0x22; \
+ } else { \
+ ub++; \
+ lb -= 0x80; \
+ } \
+}
+
+#define ksx1001_to_johab3(ub, lb) \
+{ \
+ if (ub < 0x4a) { \
+ ub -= 0x1f; \
+ lb += (ub & 0x1) ? 0x80 : ((lb < 0x6f) ? 0x10 : 0x22); \
+ ub = (ub >> 1) + 0xd8; \
+ } else { \
+ ub -= 0x4a; \
+ lb += (ub & 0x1) ? 0x80 : ((lb < 0x6f) ? 0x10 : 0x22); \
+ ub = (ub >> 1) + 0xe0; \
+ } \
+}
+
+wc_wchar_t
+wc_johab_to_cs128w(wc_wchar_t cc)
+{
+ wc_uint32 n;
+ wc_uchar ub, lb;
+
+ if (cc.code < 0xD800) {
+ n = WC_JOHAB1_N(cc.code);
+ if (n != WC_C_JOHAB_ERROR) {
+ cc.code = WC_N_CS94x128(n);
+ cc.ccs = WC_CCS_JOHAB_1;
+ } else {
+ n = WC_JOHAB2_N(cc.code);
+ cc.code = WC_N_CS128W(n);
+ cc.ccs = WC_CCS_JOHAB_2;
+ }
+ } else {
+ ub = cc.code >> 8;
+ lb = cc.code & 0xff;
+ johab3_to_ksx1001(ub, lb);
+ cc.code = ((wc_uint32)ub << 8) | lb;
+ cc.ccs = WC_CCS_JOHAB_3;
+ }
+ return cc;
+}
+
+wc_wchar_t
+wc_cs128w_to_johab(wc_wchar_t cc)
+{
+ wc_uint32 n;
+ wc_uchar ub, lb;
+
+ switch (cc.ccs) {
+ case WC_CCS_JOHAB_1:
+ n = WC_CS94x128_N(cc.code);
+ cc.code = WC_N_JOHAB1(n);
+ break;
+ case WC_CCS_JOHAB_2:
+ n = WC_CS128W_N(cc.code);
+ cc.code = WC_N_JOHAB2(n);
+ break;
+ case WC_CCS_JOHAB_3:
+ ub = (cc.code >> 8) & 0x7f;
+ lb = cc.code & 0x7f;
+ ksx1001_to_johab3(ub, lb);
+ cc.code = ((wc_uint32)ub << 8) | lb;
+ }
+ cc.ccs = WC_CCS_JOHAB;
+ return cc;
+}
+
+Str
+wc_conv_from_johab(Str is, wc_ces ces)
+{
+ Str os;
+ wc_uchar *sp = (wc_uchar *)is->ptr;
+ wc_uchar *ep = sp + is->length;
+ wc_uchar *p;
+ int state = WC_JOHAB_NOSTATE;
+
+ for (p = sp; p < ep && *p < 0x80; p++)
+ ;
+ if (p == ep)
+ return is;
+ os = Strnew_size(is->length);
+ if (p > sp)
+ Strcat_charp_n(os, is->ptr, (int)(p - sp));
+
+ for (; p < ep; p++) {
+ switch (state) {
+ case WC_JOHAB_NOSTATE:
+ switch (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_1) {
+ case WC_JOHAB_MAP_UJ:
+ state = WC_JOHAB_HANGUL1;
+ break;
+ case WC_JOHAB_MAP_UH:
+ state = WC_JOHAB_HANJA1;
+ break;
+ case WC_JOHAB_MAP_C1:
+ wtf_push_unknown(os, p, 1);
+ break;
+ default:
+ Strcat_char(os, (char)*p);
+ break;
+ }
+ break;
+ case WC_JOHAB_HANGUL1:
+ if (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_LJ)
+ wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)*(p-1) << 8) | *p);
+ else
+ wtf_push_unknown(os, p-1, 2);
+ state = WC_JOHAB_NOSTATE;
+ break;
+ case WC_JOHAB_HANJA1:
+ if (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_LH)
+ wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)*(p-1) << 8) | *p);
+ else
+ wtf_push_unknown(os, p-1, 2);
+ state = WC_JOHAB_NOSTATE;
+ break;
+ }
+ }
+ switch (state) {
+ case WC_JOHAB_HANGUL1:
+ case WC_JOHAB_HANJA1:
+ wtf_push_unknown(os, p-1, 1);
+ break;
+ }
+ return os;
+}
+
+void
+wc_push_to_johab(Str os, wc_wchar_t cc, wc_status *st)
+{
+ while (1) {
+ switch (cc.ccs) {
+ case WC_CCS_US_ASCII:
+ Strcat_char(os, (char)cc.code);
+ return;
+ case WC_CCS_JOHAB_1:
+ case WC_CCS_JOHAB_2:
+ case WC_CCS_JOHAB_3:
+ cc = wc_cs128w_to_johab(cc);
+ case WC_CCS_JOHAB:
+ Strcat_char(os, (char)(cc.code >> 8));
+ Strcat_char(os, (char)(cc.code & 0xff));
+ return;
+ case WC_CCS_KS_X_1001:
+ cc = wc_ksx1001_to_johab(cc);
+ continue;
+ case WC_CCS_UNKNOWN_W:
+ if (!WcOption.no_replace)
+ Strcat_charp(os, WC_REPLACE_W);
+ return;
+ case WC_CCS_UNKNOWN:
+ if (!WcOption.no_replace)
+ Strcat_charp(os, WC_REPLACE);
+ return;
+ default:
+#ifdef USE_UNICODE
+ if (WcOption.ucs_conv)
+ cc = wc_any_to_any_ces(cc, st);
+ else
+#endif
+ cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
+ continue;
+ }
+ }
+}
+
+Str
+wc_char_conv_from_johab(wc_uchar c, wc_status *st)
+{
+ static Str os;
+ static wc_uchar johabu;
+
+ if (st->state == -1) {
+ st->state = WC_JOHAB_NOSTATE;
+ os = Strnew_size(8);
+ }
+
+ switch (st->state) {
+ case WC_JOHAB_NOSTATE:
+ switch (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_1) {
+ case WC_JOHAB_MAP_UJ:
+ johabu = c;
+ st->state = WC_JOHAB_HANGUL1;
+ return NULL;
+ case WC_JOHAB_MAP_UH:
+ johabu = c;
+ st->state = WC_JOHAB_HANJA1;
+ return NULL;
+ case WC_JOHAB_MAP_C1:
+ break;
+ default:
+ Strcat_char(os, (char)c);
+ break;
+ }
+ break;
+ case WC_JOHAB_HANGUL1:
+ if (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_LJ)
+ wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)johabu << 8) | c);
+ break;
+ case WC_JOHAB_HANJA1:
+ if (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_LH)
+ wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)johabu << 8) | c);
+ break;
+ }
+ st->state = -1;
+ return os;
+}