diff options
Diffstat (limited to '')
-rw-r--r-- | conv.c | 697 |
1 files changed, 0 insertions, 697 deletions
@@ -1,697 +0,0 @@ -/* $Id: conv.c,v 1.7 2002/09/24 16:35:02 ukai Exp $ */ -#include <stdio.h> -#include <string.h> -#include "fm.h" - -#ifdef JP_CHARSET -#include "terms.h" -#include "Str.h" - -#ifdef DEBUG -#include <malloc.h> -#endif /* DEBUG */ - -#define uchar unsigned char -#define ushort unsigned short -#define uint unsigned int - -#ifdef TRUE -#undef TRUE -#endif -#ifdef FALSE -#undef FALSE -#endif -#define TRUE 1 -#define FALSE 0 -#ifdef ESC_CODE -#undef ESC_CODE -#endif -#define ESC_CODE '\033' - -#define CODE_NORMAL 0x00 -#define CODE_OK 0x01 -#define CODE_BROKEN 0x02 -#define CODE_ERROR 0x04 -#define EUC_NOSTATE 0x00 -#define EUC_MBYTE1 0x10 -#define EUC_SS2 0x20 -#define EUC_SS3 0x40 -#define SJIS_NOSTATE 0x00 -#define SJIS_SHIFT_L 0x10 -#define SJIS_SHIFT_H 0x20 -#define ISO_NOSTATE 0x00 -#define ISO_ESC 0x10 -#define ISO_CS94 0x20 -#define ISO_MBCS 0x40 -#define ISO_MBYTE1 0x80 -#define CODE_STATE(c) ((c) & 0x0f) -#define EUC_STATE(c) ((c) & 0xf0) -#define SJIS_STATE(c) ((c) & 0xf0) -#define ISO_STATE(c) ((c) & 0xf0) - -#define CSET_ASCII 0 -#define CSET_X0208 1 -#define CSET_X0201K 2 -#define CSET_UNKNOWN 3 - -#define JSIcode "\033$@" -#define JSOcode "\033(H" -#define J2SIcode "\033$@" -#define J2SOcode "\033(J" -#define NSIcode "\033$B" -#define NSOcode "\033(J" -#define N2SIcode "\033$B" -#define N2SOcode "\033(B" -#define N3SIcode "\033$@" -#define N3SOcode "\033(B" -#define USIcode "\033$" -#define USOcode "\033+" - -static char *SIcode, *SOcode; - -static Str cConvEE(Str is); -static Str cConvEJ(Str is); -static Str cConvES(Str is); -static Str cConvSE(Str is); -static Str cConvJE(Str is); -char checkShiftCode(Str buf, uchar); - -static char *han2zen_tab[] = { - "!!", "!#", "!V", "!W", "!\"", "!&", "%r", "%!", - "%#", "%%", "%'", "%)", "%c", "%e", "%g", "%C", - "!<", "%\"", "%$", "%&", "%(", "%*", "%+", "%-", - "%/", "%1", "%3", "%5", "%7", "%9", "%;", "%=", - "%?", "%A", "%D", "%F", "%H", "%J", "%K", "%L", - "%M", "%N", "%O", "%R", "%U", "%X", "%[", "%^", - "%_", "%`", "%a", "%b", "%d", "%f", "%h", "%i", - "%j", "%k", "%l", "%m", "%o", "%s", "!+", "!,", -}; - -typedef struct _ConvRoutine { - char key; - Str (*routine) (); - char *ShiftIn, *ShiftOut; -} ConvRoutine; - -static ConvRoutine FromEJ[] = { - {CODE_JIS_J, cConvEJ, JSIcode, JSOcode}, - {CODE_JIS_N, cConvEJ, NSIcode, NSOcode}, - {CODE_JIS_n, cConvEJ, N2SIcode, N2SOcode}, - {CODE_JIS_m, cConvEJ, N3SIcode, N3SOcode}, - {CODE_JIS_j, cConvEJ, J2SIcode, J2SOcode}, - {CODE_SJIS, cConvES, "", ""}, - {CODE_EUC, cConvEE, "", ""}, - {'\0', NULL, NULL, NULL} -}; - -static ConvRoutine ToEJ[] = { - {CODE_JIS_J, cConvJE, JSIcode, JSOcode}, - {CODE_JIS_N, cConvJE, NSIcode, NSOcode}, - {CODE_JIS_n, cConvJE, N2SIcode, N2SOcode}, - {CODE_JIS_m, cConvJE, N3SIcode, N3SOcode}, - {CODE_JIS_j, cConvJE, J2SIcode, J2SOcode}, - {CODE_SJIS, cConvSE, "", ""}, - {CODE_EUC, cConvEE, "", ""}, - {'\0', NULL, NULL, NULL} -}; - -char * -GetSICode(char key) -{ - int i; - for (i = 0; FromEJ[i].key != '\0'; i++) - if (FromEJ[i].key == key) - return FromEJ[i].ShiftIn; - return ""; -} - -char * -GetSOCode(char key) -{ - int i; - for (i = 0; FromEJ[i].key != '\0'; i++) - if (FromEJ[i].key == key) - return FromEJ[i].ShiftOut; - return ""; -} - -static void -n_impr(char s) -{ - fprintf(stderr, - "conv: option %c(0x%02x) is not implemented yet... sorry\n", s, s); - w3m_exit(1); -} - -Str -conv_str(Str is, char fc, char tc) -{ - int i; - Str os; - static char from_code = '\0'; - static char to_code = '\0'; - static Str (*conv_from) (); - static Str (*conv_to) (); - - if (fc == tc || fc == CODE_ASCII || tc == CODE_ASCII) - return is; - - if (fc == CODE_INNER_EUC) - os = is; - else { - if (from_code != fc) { - for (i = 0; ToEJ[i].key != '\0'; i++) { - if (ToEJ[i].key == fc) { - from_code = fc; - conv_from = *ToEJ[i].routine; - goto next; - } - } - n_impr(fc); - return NULL; - } - next: - os = conv_from(is); - } - if (tc == CODE_INNER_EUC || tc == CODE_EUC) - return os; - else { - if (to_code != tc) { - for (i = 0; FromEJ[i].key != '\0'; i++) { - if (FromEJ[i].key == tc) { - SIcode = FromEJ[i].ShiftIn; - SOcode = FromEJ[i].ShiftOut; - to_code = tc; - conv_to = *FromEJ[i].routine; - goto next2; - } - } - n_impr(tc); - return NULL; - } - next2: - return conv_to(os); - } -} - -Str -conv(char *is, char fc, char tc) -{ - return conv_str(Strnew_charp(is), fc, tc); -} - -static uchar -getSLb(uchar * ptr, uchar * ub) -{ /* Get Shift-JIS Lower byte */ - uchar c = *ptr; - - *ub <<= 1; - if (c < 0x9f) { - if (c > 0x7e) - c--; - *ub -= 1; - c -= 0x3f; - } - else { - c -= 0x9e; - } - return c; -} - -static Str -cConvSE(Str is) -{ /* Convert Shift-JIS to EUC-JP */ - uchar *p, ub, lb; - int state = SJIS_NOSTATE; - Str os = Strnew_size(is->length); - uchar *endp = (uchar *) & is->ptr[is->length]; - - for (p = (uchar *) is->ptr; p < endp; p++) { - switch (state) { - case SJIS_NOSTATE: - if (!(*p & 0x80)) /* ASCII */ - Strcat_char(os, (char)(*p)); - else if (0x81 <= *p && *p <= 0x9f) { /* JIS X 0208, - * 0213 */ - ub = *p & 0x7f; - state = SJIS_SHIFT_L; - } - else if (0xe0 <= *p && *p <= 0xef) { /* JIS X 0208 */ - /* } else if (0xe0 <= *p && *p <= 0xfc) { *//* JIS X 0213 */ - ub = (*p & 0x7f) - 0x40; - state = SJIS_SHIFT_H; - } - else if (0xa0 <= *p && *p <= 0xdf) { /* JIS X 0201-Kana - */ - Strcat_char(os, (char)(han2zen_tab[*p - 0xa0][0] | 0x80)); - Strcat_char(os, (char)(han2zen_tab[*p - 0xa0][1] | 0x80)); - } - break; - case SJIS_SHIFT_L: - case SJIS_SHIFT_H: - if ((0x40 <= *p && *p <= 0x7e) || (0x80 <= *p && *p <= 0xfc)) { /* JIS X 0208, 0213 */ - lb = getSLb(p, &ub); - ub += 0x20; - lb += 0x20; - Strcat_char(os, (char)(ub | 0x80)); - Strcat_char(os, (char)(lb | 0x80)); - } - else if (!(*p & 0x80)) /* broken ? */ - Strcat_char(os, (char)(*p)); - state = SJIS_NOSTATE; - break; - } - } - return os; -} - -static Str -cConvJE(Str is) -{ /* Convert ISO-2022-JP to EUC-JP */ - uchar *p, ub = 0; - char cset = CSET_ASCII; - int state = ISO_NOSTATE; - Str os = Strnew_size(is->length); - uchar *endp = (uchar *) & is->ptr[is->length]; - - for (p = (uchar *) is->ptr; p < endp; p++) { - switch (state) { - case ISO_NOSTATE: - if (*p == ESC_CODE) /* ESC sequence */ - state = ISO_ESC; - else if (cset == CSET_ASCII || *p < 0x21) - Strcat_char(os, (char)(*p)); - else if (cset == CSET_X0208 && *p <= 0x7e) { - /* JIS X 0208 */ - ub = *p; - state = ISO_MBYTE1; - } - else if (cset == CSET_X0201K && *p <= 0x5f) { - /* JIS X 0201-Kana */ - Strcat_char(os, (char)(han2zen_tab[*p - 0x20][0] | 0x80)); - Strcat_char(os, (char)(han2zen_tab[*p - 0x20][1] | 0x80)); - } - break; - case ISO_MBYTE1: - if (*p == ESC_CODE) /* ESC sequence */ - state = ISO_ESC; - else if (0x21 <= *p && *p <= 0x7e) { /* JIS X 0208 */ - Strcat_char(os, (char)(ub | 0x80)); - Strcat_char(os, (char)(*p | 0x80)); - state = ISO_NOSTATE; - } - else { - Strcat_char(os, (char)(*p)); - state = ISO_NOSTATE; - } - break; - case ISO_ESC: - if (*p == '(') /* ESC ( F */ - state = ISO_CS94; - else if (*p == '$') /* ESC $ F, ESC $ ( F */ - state = ISO_MBCS; - else { - Strcat_char(os, ESC_CODE); - Strcat_char(os, (char)(*p)); - state = ISO_NOSTATE; - } - break; - case ISO_CS94: - if (*p == 'B' || *p == 'J' || *p == 'H') - cset = CSET_ASCII; - else if (*p == 'I') - cset = CSET_X0201K; - else { - Strcat_char(os, ESC_CODE); - Strcat_char(os, '('); - Strcat_char(os, (char)(*p)); - } - state = ISO_NOSTATE; - break; - case ISO_MBCS: - if (*p == '(') { /* ESC $ ( F */ - state = ISO_MBCS | ISO_CS94; - break; - } - case ISO_MBCS | ISO_CS94: - if (*p == 'B' || *p == '@') - cset = CSET_X0208; - else { - Strcat_char(os, ESC_CODE); - Strcat_char(os, '$'); - if (state == (ISO_MBCS | ISO_CS94)) - Strcat_char(os, '('); - Strcat_char(os, (char)(*p)); - } - state = ISO_NOSTATE; - break; - } - } - return os; -} - -static Str -_cConvEE(Str is, char is_euc) -{ /* Convert EUC-JP to EUC-JP / ISO-2022-JP - * (no JIS X 0201-Kana, 0212, 0213-2) */ - uchar *p, ub = 0, euc = 0; - int state = EUC_NOSTATE; - char cset = CSET_ASCII; - Str os; - uchar *endp = (uchar *) & is->ptr[is->length]; - - if (is_euc) { - os = Strnew_size(is->length); - euc = 0x80; - } - else - os = Strnew_size(is->length * 3 / 2); - - for (p = (uchar *) is->ptr; p < endp; p++) { - switch (state) { - case EUC_NOSTATE: - if (!(*p & 0x80)) { /* ASCII */ - if (!is_euc && cset != CSET_ASCII) { - Strcat_charp(os, SOcode); - cset = CSET_ASCII; - } - Strcat_char(os, (char)(*p)); - } - else if (0xa1 <= *p && *p <= 0xfe) { /* JIS X 0208, - * 0213-1 */ - ub = *p; - state = EUC_MBYTE1; - } - else if (*p == EUC_SS2_CODE) /* SS2 + JIS X 0201-Kana */ - state = EUC_SS2; - else if (*p == EUC_SS3_CODE) /* SS3 + JIS X 0212, 0213-2 */ - state = EUC_SS3; - break; - case EUC_MBYTE1: - if (0xa1 <= *p && *p <= 0xfe) { /* JIS X 0208, 0213-1 */ - if (!is_euc && cset != CSET_X0208) { - Strcat_charp(os, SIcode); - cset = CSET_X0208; - } - Strcat_char(os, (char)((ub & 0x7f) | euc)); - Strcat_char(os, (char)((*p & 0x7f) | euc)); - } - else if (!(*p & 0x80)) { /* broken ? */ - if (!is_euc && cset != CSET_ASCII) { - Strcat_charp(os, SOcode); - cset = CSET_ASCII; - } - Strcat_char(os, (char)(*p)); - } - state = EUC_NOSTATE; - break; - case EUC_SS2: - if (0xa0 <= *p && *p <= 0xdf) { /* JIS X 0201-Kana */ - if (!is_euc && cset != CSET_X0208) { - Strcat_charp(os, SIcode); - cset = CSET_X0208; - } - Strcat_char(os, (char)(han2zen_tab[*p - 0xa0][0] | euc)); - Strcat_char(os, (char)(han2zen_tab[*p - 0xa0][1] | euc)); - } - state = EUC_NOSTATE; - break; - case EUC_SS3: - state = (EUC_SS3 | EUC_MBYTE1); - break; - case EUC_SS3 | EUC_MBYTE1: - state = EUC_NOSTATE; - break; - } - } - if (!is_euc && cset != CSET_ASCII) - Strcat_charp(os, SOcode); - return os; -} - -static Str -cConvEE(Str is) -{ - return _cConvEE(is, TRUE); -} - -static Str -cConvEJ(Str is) -{ - return _cConvEE(is, FALSE); -} - -void -put_sjis(Str os, uchar ub, uchar lb) -{ - ub -= 0x20; - lb -= 0x20; - if ((ub & 1) == 0) - lb += 94; - ub = ((ub - 1) >> 1) + 0x81; - lb += 0x3f; - if (ub > 0x9f) - ub += 0x40; - if (lb > 0x7e) - lb++; - - Strcat_char(os, (char)(ub)); - Strcat_char(os, (char)(lb)); -} - -static Str -cConvES(Str is) -{ /* Convert EUC-JP to Shift-JIS */ - uchar *p, ub = 0; - int state = EUC_NOSTATE; - Str os = Strnew_size(is->length); - uchar *endp = (uchar *) & is->ptr[is->length]; - - for (p = (uchar *) is->ptr; p < endp; p++) { - switch (state) { - case EUC_NOSTATE: - if (!(*p & 0x80)) /* ASCII */ - Strcat_char(os, (char)(*p)); - else if (0xa1 <= *p && *p <= 0xfe) { /* JIS X 0208, - * 0213-1 */ - ub = *p; - state = EUC_MBYTE1; - } - else if (*p == EUC_SS2_CODE) /* SS2 + JIS X 0201-Kana */ - state = EUC_SS2; - else if (*p == EUC_SS3_CODE) /* SS3 + JIS X 0212, 0213-2 */ - state = EUC_SS3; - break; - case EUC_MBYTE1: - if (0xa1 <= *p && *p <= 0xfe) /* JIS X 0208, 0213-1 */ - put_sjis(os, ub & 0x7f, *p & 0x7f); - else if (!(*p & 0x80)) /* broken ? */ - Strcat_char(os, (char)(*p)); - state = EUC_NOSTATE; - break; - case EUC_SS2: - if (0xa0 <= *p && *p <= 0xdf) /* JIS X 0201-Kana */ - put_sjis(os, han2zen_tab[*p - 0xa0][0], - han2zen_tab[*p - 0xa0][1]); - state = EUC_NOSTATE; - break; - case EUC_SS3: - state = (EUC_SS3 | EUC_MBYTE1); - break; - case EUC_SS3 | EUC_MBYTE1: - state = EUC_NOSTATE; - break; - } - } - return os; -} - -/* - * static ushort sjis_shift[8] = { 0x7fff, 0xffff, 0x0, 0x0, 0x0, - * 0x0, 0xffff, 0x0 }; static ushort sjis_second[16] = { 0x0, 0x0, - * 0x0, 0x0, 0xffff, 0xffff, 0xffff, 0xfffe, 0xffff, 0xffff, 0xffff, - * 0xffff, 0xffff, 0xffff, 0xffff, 0xfff8 }; */ - -char -checkShiftCode(Str buf, uchar hint) -{ - uchar *p, si = '\0', so = '\0'; - int euc = (CODE_NORMAL | EUC_NOSTATE), - sjis = (CODE_NORMAL | SJIS_NOSTATE), sjis_kana = CODE_NORMAL, - iso = (CODE_NORMAL | ISO_NOSTATE), iso_kana = CODE_NORMAL; - uchar *endp = (uchar *) & buf->ptr[buf->length]; - - if (hint == CODE_INNER_EUC) - return '\0'; - p = (uchar *) buf->ptr; - while (1) { - if (iso != CODE_ERROR && (si == '\0' || so == '\0')) { - switch (ISO_STATE(iso)) { - case ISO_NOSTATE: - if (*p == ESC_CODE) /* ESC sequence */ - iso = (CODE_STATE(iso) | ISO_ESC); - break; - case ISO_ESC: - if (*p == '(') /* ESC ( F */ - iso = (CODE_STATE(iso) | ISO_CS94); - else if (*p == '$') /* ESC $ F, ESC $ ( F */ - iso = (CODE_STATE(iso) | ISO_MBCS); - else - iso = (CODE_STATE(iso) | ISO_NOSTATE); - break; - case ISO_CS94: - if (*p == 'B' || *p == 'J' || *p == 'H') - so = *p; - else if (*p == 'I') - iso_kana = CODE_OK; - iso = (CODE_STATE(iso) | ISO_NOSTATE); - break; - case ISO_MBCS: - if (*p == '(') { /* ESC $ ( F */ - iso = (CODE_STATE(iso) | ISO_MBCS | ISO_CS94); - break; - } - case ISO_MBCS | ISO_CS94: - if (*p == 'B' || *p == '@') - si = *p; - iso = (CODE_STATE(iso) | ISO_NOSTATE); - break; - } - if (*p & 0x80) - iso = CODE_ERROR; - } - if (euc != CODE_ERROR) { - switch (EUC_STATE(euc)) { - case EUC_NOSTATE: - if (!(*p & 0x80)) /* ASCII */ - ; - else if (0xa1 <= *p && *p <= 0xfe) /* JIS X 0208, 0213-1 */ - euc = (CODE_STATE(euc) | EUC_MBYTE1); - else if (*p == EUC_SS2_CODE) /* SS2 + JIS X 0201-Kana */ - euc = (CODE_STATE(euc) | EUC_SS2); - else if (*p == EUC_SS3_CODE) /* SS3 + JIS X 0212, 0213-2 */ - euc = (CODE_STATE(euc) | EUC_SS3); - else - euc = CODE_ERROR; - break; - case EUC_MBYTE1: - if (CODE_STATE(euc) == CODE_NORMAL) - euc = CODE_OK; - case EUC_SS3 | EUC_MBYTE1: - if (0xa1 <= *p && *p <= 0xfe) /* JIS X 0208, 0213-1 */ - euc = (CODE_STATE(euc) | EUC_NOSTATE); - else if (euc & CODE_BROKEN) - euc = CODE_ERROR; - else - euc = (CODE_BROKEN | EUC_NOSTATE); - break; - case EUC_SS2: - if (0xa0 <= *p && *p <= 0xdf) /* JIS X 0201-Kana */ - euc = (CODE_STATE(euc) | EUC_NOSTATE); - else - euc = CODE_ERROR; - break; - case EUC_SS3: - if (0xa1 <= *p && *p <= 0xfe) /* JIS X 0212, 0213-2 */ - euc = (CODE_STATE(euc) | EUC_SS3 | EUC_MBYTE1); - else - euc = CODE_ERROR; - break; - } - } - if (sjis != CODE_ERROR) { - switch (SJIS_STATE(sjis)) { - case SJIS_NOSTATE: - if (!(*p & 0x80)) /* ASCII */ - ; - else if (0x81 <= *p && *p <= 0x9f) - sjis = (CODE_STATE(sjis) | SJIS_SHIFT_L); - else if (0xe0 <= *p && *p <= 0xef) - - /* JIS X 0208 */ - /* else if (0xe0 <= *p && *p <= 0xfc) */ - /* JIS X 0213 */ - sjis = (CODE_STATE(sjis) | SJIS_SHIFT_H); - else if (0xa0 == *p) - sjis = (CODE_BROKEN | SJIS_NOSTATE); - else if (0xa1 <= *p && *p <= 0xdf) /* JIS X 0201-Kana - */ - sjis_kana = CODE_OK; - else - sjis = CODE_ERROR; - break; - case SJIS_SHIFT_L: - case SJIS_SHIFT_H: - if (CODE_STATE(sjis) == CODE_NORMAL) - sjis = CODE_OK; - if ((0x40 <= *p && *p <= 0x7e) || (0x80 <= *p && *p <= 0xfc)) /* JIS X 0208, 0213 */ - sjis = (CODE_STATE(sjis) | SJIS_NOSTATE); - else if (sjis & CODE_BROKEN) - sjis = CODE_ERROR; - else - sjis = (CODE_BROKEN | SJIS_NOSTATE); - break; - } - } - if (euc == CODE_ERROR || sjis == CODE_ERROR) - break; - if (p == endp) - break; - p++; - } - if (iso != CODE_ERROR) { - if (si == '\0' && so == '\0' && iso_kana != CODE_OK) - return '\0'; - switch (si) { - case '@': - switch (so) { - case 'H': - return CODE_JIS_J; - case 'J': - return CODE_JIS_j; - case 'B': - return CODE_JIS_m; - default: - return CODE_JIS_m; - } - case 'B': - switch (so) { - case 'J': - return CODE_JIS_N; - case 'B': - return CODE_JIS_n; - default: - return CODE_JIS_n; - } - default: - switch (so) { - case 'H': - return CODE_JIS_J; - case 'J': - return CODE_JIS_N; - case 'B': - return CODE_JIS_n; - default: - return CODE_JIS_n; - } - } - } - if (hint == CODE_EUC) { - if (euc != CODE_ERROR) - return CODE_EUC; - } - else if (hint == CODE_SJIS) { - if (sjis != CODE_ERROR) - return CODE_SJIS; - } - if (CODE_STATE(euc) == CODE_OK) - return CODE_EUC; - if (CODE_STATE(sjis) == CODE_OK) - return CODE_SJIS; - if (CODE_STATE(euc) == CODE_NORMAL) - return CODE_EUC; - if (CODE_STATE(sjis) == CODE_NORMAL) - return CODE_SJIS; - return CODE_EUC; -} -#endif /* JP_CHARSET */ |