/* $Id: conv.c,v 1.7 2002/09/24 16:35:02 ukai Exp $ */
#include <stdio.h>
#include <string.h>
#include "fm.h"

#ifdef JP_CHARSET
#include "terms.h"
#include "Str.h"

#ifdef DEBUG
#include <malloc.h>
#endif				/* DEBUG */

#define	uchar		unsigned char
#define ushort		unsigned short
#define uint		unsigned int

#ifdef TRUE
#undef TRUE
#endif
#ifdef FALSE
#undef FALSE
#endif
#define	TRUE		1
#define	FALSE		0
#ifdef ESC_CODE
#undef ESC_CODE
#endif
#define ESC_CODE	'\033'

#define CODE_NORMAL	0x00
#define CODE_OK		0x01
#define CODE_BROKEN	0x02
#define CODE_ERROR	0x04
#define EUC_NOSTATE	0x00
#define EUC_MBYTE1	0x10
#define EUC_SS2		0x20
#define EUC_SS3		0x40
#define SJIS_NOSTATE	0x00
#define SJIS_SHIFT_L	0x10
#define SJIS_SHIFT_H	0x20
#define ISO_NOSTATE	0x00
#define ISO_ESC		0x10
#define ISO_CS94	0x20
#define ISO_MBCS	0x40
#define ISO_MBYTE1	0x80
#define CODE_STATE(c)	((c) & 0x0f)
#define EUC_STATE(c)	((c) & 0xf0)
#define SJIS_STATE(c)	((c) & 0xf0)
#define ISO_STATE(c)	((c) & 0xf0)

#define CSET_ASCII	0
#define CSET_X0208	1
#define CSET_X0201K	2
#define CSET_UNKNOWN	3

#define	JSIcode  "\033$@"
#define	JSOcode  "\033(H"
#define	J2SIcode "\033$@"
#define	J2SOcode "\033(J"
#define	NSIcode  "\033$B"
#define	NSOcode  "\033(J"
#define	N2SIcode  "\033$B"
#define	N2SOcode  "\033(B"
#define	N3SIcode "\033$@"
#define	N3SOcode "\033(B"
#define	USIcode  "\033$"
#define	USOcode  "\033+"

static char *SIcode, *SOcode;

static Str cConvEE(Str is);
static Str cConvEJ(Str is);
static Str cConvES(Str is);
static Str cConvSE(Str is);
static Str cConvJE(Str is);
char checkShiftCode(Str buf, uchar);

static char *han2zen_tab[] = {
    "!!", "!#", "!V", "!W", "!\"", "!&", "%r", "%!",
    "%#", "%%", "%'", "%)", "%c", "%e", "%g", "%C",
    "!<", "%\"", "%$", "%&", "%(", "%*", "%+", "%-",
    "%/", "%1", "%3", "%5", "%7", "%9", "%;", "%=",
    "%?", "%A", "%D", "%F", "%H", "%J", "%K", "%L",
    "%M", "%N", "%O", "%R", "%U", "%X", "%[", "%^",
    "%_", "%`", "%a", "%b", "%d", "%f", "%h", "%i",
    "%j", "%k", "%l", "%m", "%o", "%s", "!+", "!,",
};

typedef struct _ConvRoutine {
    char key;
    Str (*routine) ();
    char *ShiftIn, *ShiftOut;
} ConvRoutine;

static ConvRoutine FromEJ[] = {
    {CODE_JIS_J, cConvEJ, JSIcode, JSOcode},
    {CODE_JIS_N, cConvEJ, NSIcode, NSOcode},
    {CODE_JIS_n, cConvEJ, N2SIcode, N2SOcode},
    {CODE_JIS_m, cConvEJ, N3SIcode, N3SOcode},
    {CODE_JIS_j, cConvEJ, J2SIcode, J2SOcode},
    {CODE_SJIS, cConvES, "", ""},
    {CODE_EUC, cConvEE, "", ""},
    {'\0', NULL, NULL, NULL}
};

static ConvRoutine ToEJ[] = {
    {CODE_JIS_J, cConvJE, JSIcode, JSOcode},
    {CODE_JIS_N, cConvJE, NSIcode, NSOcode},
    {CODE_JIS_n, cConvJE, N2SIcode, N2SOcode},
    {CODE_JIS_m, cConvJE, N3SIcode, N3SOcode},
    {CODE_JIS_j, cConvJE, J2SIcode, J2SOcode},
    {CODE_SJIS, cConvSE, "", ""},
    {CODE_EUC, cConvEE, "", ""},
    {'\0', NULL, NULL, NULL}
};

char *
GetSICode(char key)
{
    int i;
    for (i = 0; FromEJ[i].key != '\0'; i++)
	if (FromEJ[i].key == key)
	    return FromEJ[i].ShiftIn;
    return "";
}

char *
GetSOCode(char key)
{
    int i;
    for (i = 0; FromEJ[i].key != '\0'; i++)
	if (FromEJ[i].key == key)
	    return FromEJ[i].ShiftOut;
    return "";
}

static void
n_impr(char s)
{
    fprintf(stderr,
	    "conv: option %c(0x%02x) is not implemented yet... sorry\n", s, s);
    w3m_exit(1);
}

Str
conv_str(Str is, char fc, char tc)
{
    int i;
    Str os;
    static char from_code = '\0';
    static char to_code = '\0';
    static Str (*conv_from) ();
    static Str (*conv_to) ();

    if (fc == tc || fc == CODE_ASCII || tc == CODE_ASCII)
	return is;

    if (fc == CODE_INNER_EUC)
	os = is;
    else {
	if (from_code != fc) {
	    for (i = 0; ToEJ[i].key != '\0'; i++) {
		if (ToEJ[i].key == fc) {
		    from_code = fc;
		    conv_from = *ToEJ[i].routine;
		    goto next;
		}
	    }
	    n_impr(fc);
	    return NULL;
	}
      next:
	os = conv_from(is);
    }
    if (tc == CODE_INNER_EUC || tc == CODE_EUC)
	return os;
    else {
	if (to_code != tc) {
	    for (i = 0; FromEJ[i].key != '\0'; i++) {
		if (FromEJ[i].key == tc) {
		    SIcode = FromEJ[i].ShiftIn;
		    SOcode = FromEJ[i].ShiftOut;
		    to_code = tc;
		    conv_to = *FromEJ[i].routine;
		    goto next2;
		}
	    }
	    n_impr(tc);
	    return NULL;
	}
      next2:
	return conv_to(os);
    }
}

Str
conv(char *is, char fc, char tc)
{
    return conv_str(Strnew_charp(is), fc, tc);
}

static uchar
getSLb(uchar * ptr, uchar * ub)
{				/* Get Shift-JIS Lower byte */
    uchar c = *ptr;

    *ub <<= 1;
    if (c < 0x9f) {
	if (c > 0x7e)
	    c--;
	*ub -= 1;
	c -= 0x3f;
    }
    else {
	c -= 0x9e;
    }
    return c;
}

static Str
cConvSE(Str is)
{				/* Convert Shift-JIS to EUC-JP */
    uchar *p, ub, lb;
    int state = SJIS_NOSTATE;
    Str os = Strnew_size(is->length);
    uchar *endp = (uchar *) & is->ptr[is->length];

    for (p = (uchar *) is->ptr; p < endp; p++) {
	switch (state) {
	case SJIS_NOSTATE:
	    if (!(*p & 0x80))	/* ASCII */
		Strcat_char(os, (char)(*p));
	    else if (0x81 <= *p && *p <= 0x9f) {	/* JIS X 0208,
							 * 0213 */
		ub = *p & 0x7f;
		state = SJIS_SHIFT_L;
	    }
	    else if (0xe0 <= *p && *p <= 0xef) {	/* JIS X 0208 */
		/* } else if (0xe0 <= *p && *p <= 0xfc) { *//* JIS X 0213 */
		ub = (*p & 0x7f) - 0x40;
		state = SJIS_SHIFT_H;
	    }
	    else if (0xa0 <= *p && *p <= 0xdf) {	/* JIS X 0201-Kana 
							 */
		Strcat_char(os, (char)(han2zen_tab[*p - 0xa0][0] | 0x80));
		Strcat_char(os, (char)(han2zen_tab[*p - 0xa0][1] | 0x80));
	    }
	    break;
	case SJIS_SHIFT_L:
	case SJIS_SHIFT_H:
	    if ((0x40 <= *p && *p <= 0x7e) || (0x80 <= *p && *p <= 0xfc)) {	/* JIS X 0208, 0213 */
		lb = getSLb(p, &ub);
		ub += 0x20;
		lb += 0x20;
		Strcat_char(os, (char)(ub | 0x80));
		Strcat_char(os, (char)(lb | 0x80));
	    }
	    else if (!(*p & 0x80))	/* broken ? */
		Strcat_char(os, (char)(*p));
	    state = SJIS_NOSTATE;
	    break;
	}
    }
    return os;
}

static Str
cConvJE(Str is)
{				/* Convert ISO-2022-JP to EUC-JP */
    uchar *p, ub = 0;
    char cset = CSET_ASCII;
    int state = ISO_NOSTATE;
    Str os = Strnew_size(is->length);
    uchar *endp = (uchar *) & is->ptr[is->length];

    for (p = (uchar *) is->ptr; p < endp; p++) {
	switch (state) {
	case ISO_NOSTATE:
	    if (*p == ESC_CODE)	/* ESC sequence */
		state = ISO_ESC;
	    else if (cset == CSET_ASCII || *p < 0x21)
		Strcat_char(os, (char)(*p));
	    else if (cset == CSET_X0208 && *p <= 0x7e) {
		/* JIS X 0208 */
		ub = *p;
		state = ISO_MBYTE1;
	    }
	    else if (cset == CSET_X0201K && *p <= 0x5f) {
		/* JIS X 0201-Kana */
		Strcat_char(os, (char)(han2zen_tab[*p - 0x20][0] | 0x80));
		Strcat_char(os, (char)(han2zen_tab[*p - 0x20][1] | 0x80));
	    }
	    break;
	case ISO_MBYTE1:
	    if (*p == ESC_CODE)	/* ESC sequence */
		state = ISO_ESC;
	    else if (0x21 <= *p && *p <= 0x7e) {	/* JIS X 0208 */
		Strcat_char(os, (char)(ub | 0x80));
		Strcat_char(os, (char)(*p | 0x80));
		state = ISO_NOSTATE;
	    }
	    else {
		Strcat_char(os, (char)(*p));
		state = ISO_NOSTATE;
	    }
	    break;
	case ISO_ESC:
	    if (*p == '(')	/* ESC ( F */
		state = ISO_CS94;
	    else if (*p == '$')	/* ESC $ F, ESC $ ( F */
		state = ISO_MBCS;
	    else {
		Strcat_char(os, ESC_CODE);
		Strcat_char(os, (char)(*p));
		state = ISO_NOSTATE;
	    }
	    break;
	case ISO_CS94:
	    if (*p == 'B' || *p == 'J' || *p == 'H')
		cset = CSET_ASCII;
	    else if (*p == 'I')
		cset = CSET_X0201K;
	    else {
		Strcat_char(os, ESC_CODE);
		Strcat_char(os, '(');
		Strcat_char(os, (char)(*p));
	    }
	    state = ISO_NOSTATE;
	    break;
	case ISO_MBCS:
	    if (*p == '(') {	/* ESC $ ( F */
		state = ISO_MBCS | ISO_CS94;
		break;
	    }
	case ISO_MBCS | ISO_CS94:
	    if (*p == 'B' || *p == '@')
		cset = CSET_X0208;
	    else {
		Strcat_char(os, ESC_CODE);
		Strcat_char(os, '$');
		if (state == (ISO_MBCS | ISO_CS94))
		    Strcat_char(os, '(');
		Strcat_char(os, (char)(*p));
	    }
	    state = ISO_NOSTATE;
	    break;
	}
    }
    return os;
}

static Str
_cConvEE(Str is, char is_euc)
{				/* Convert EUC-JP to EUC-JP / ISO-2022-JP
				 * (no JIS X 0201-Kana, 0212, 0213-2) */
    uchar *p, ub = 0, euc = 0;
    int state = EUC_NOSTATE;
    char cset = CSET_ASCII;
    Str os;
    uchar *endp = (uchar *) & is->ptr[is->length];

    if (is_euc) {
	os = Strnew_size(is->length);
	euc = 0x80;
    }
    else
	os = Strnew_size(is->length * 3 / 2);

    for (p = (uchar *) is->ptr; p < endp; p++) {
	switch (state) {
	case EUC_NOSTATE:
	    if (!(*p & 0x80)) {	/* ASCII */
		if (!is_euc && cset != CSET_ASCII) {
		    Strcat_charp(os, SOcode);
		    cset = CSET_ASCII;
		}
		Strcat_char(os, (char)(*p));
	    }
	    else if (0xa1 <= *p && *p <= 0xfe) {	/* JIS X 0208,
							 * 0213-1 */
		ub = *p;
		state = EUC_MBYTE1;
	    }
	    else if (*p == EUC_SS2_CODE)	/* SS2 + JIS X 0201-Kana */
		state = EUC_SS2;
	    else if (*p == EUC_SS3_CODE)	/* SS3 + JIS X 0212, 0213-2 */
		state = EUC_SS3;
	    break;
	case EUC_MBYTE1:
	    if (0xa1 <= *p && *p <= 0xfe) {	/* JIS X 0208, 0213-1 */
		if (!is_euc && cset != CSET_X0208) {
		    Strcat_charp(os, SIcode);
		    cset = CSET_X0208;
		}
		Strcat_char(os, (char)((ub & 0x7f) | euc));
		Strcat_char(os, (char)((*p & 0x7f) | euc));
	    }
	    else if (!(*p & 0x80)) {	/* broken ? */
		if (!is_euc && cset != CSET_ASCII) {
		    Strcat_charp(os, SOcode);
		    cset = CSET_ASCII;
		}
		Strcat_char(os, (char)(*p));
	    }
	    state = EUC_NOSTATE;
	    break;
	case EUC_SS2:
	    if (0xa0 <= *p && *p <= 0xdf) {	/* JIS X 0201-Kana */
		if (!is_euc && cset != CSET_X0208) {
		    Strcat_charp(os, SIcode);
		    cset = CSET_X0208;
		}
		Strcat_char(os, (char)(han2zen_tab[*p - 0xa0][0] | euc));
		Strcat_char(os, (char)(han2zen_tab[*p - 0xa0][1] | euc));
	    }
	    state = EUC_NOSTATE;
	    break;
	case EUC_SS3:
	    state = (EUC_SS3 | EUC_MBYTE1);
	    break;
	case EUC_SS3 | EUC_MBYTE1:
	    state = EUC_NOSTATE;
	    break;
	}
    }
    if (!is_euc && cset != CSET_ASCII)
	Strcat_charp(os, SOcode);
    return os;
}

static Str
cConvEE(Str is)
{
    return _cConvEE(is, TRUE);
}

static Str
cConvEJ(Str is)
{
    return _cConvEE(is, FALSE);
}

void
put_sjis(Str os, uchar ub, uchar lb)
{
    ub -= 0x20;
    lb -= 0x20;
    if ((ub & 1) == 0)
	lb += 94;
    ub = ((ub - 1) >> 1) + 0x81;
    lb += 0x3f;
    if (ub > 0x9f)
	ub += 0x40;
    if (lb > 0x7e)
	lb++;

    Strcat_char(os, (char)(ub));
    Strcat_char(os, (char)(lb));
}

static Str
cConvES(Str is)
{				/* Convert EUC-JP to Shift-JIS */
    uchar *p, ub = 0;
    int state = EUC_NOSTATE;
    Str os = Strnew_size(is->length);
    uchar *endp = (uchar *) & is->ptr[is->length];

    for (p = (uchar *) is->ptr; p < endp; p++) {
	switch (state) {
	case EUC_NOSTATE:
	    if (!(*p & 0x80))	/* ASCII */
		Strcat_char(os, (char)(*p));
	    else if (0xa1 <= *p && *p <= 0xfe) {	/* JIS X 0208,
							 * 0213-1 */
		ub = *p;
		state = EUC_MBYTE1;
	    }
	    else if (*p == EUC_SS2_CODE)	/* SS2 + JIS X 0201-Kana */
		state = EUC_SS2;
	    else if (*p == EUC_SS3_CODE)	/* SS3 + JIS X 0212, 0213-2 */
		state = EUC_SS3;
	    break;
	case EUC_MBYTE1:
	    if (0xa1 <= *p && *p <= 0xfe)	/* JIS X 0208, 0213-1 */
		put_sjis(os, ub & 0x7f, *p & 0x7f);
	    else if (!(*p & 0x80))	/* broken ? */
		Strcat_char(os, (char)(*p));
	    state = EUC_NOSTATE;
	    break;
	case EUC_SS2:
	    if (0xa0 <= *p && *p <= 0xdf)	/* JIS X 0201-Kana */
		put_sjis(os, han2zen_tab[*p - 0xa0][0],
			 han2zen_tab[*p - 0xa0][1]);
	    state = EUC_NOSTATE;
	    break;
	case EUC_SS3:
	    state = (EUC_SS3 | EUC_MBYTE1);
	    break;
	case EUC_SS3 | EUC_MBYTE1:
	    state = EUC_NOSTATE;
	    break;
	}
    }
    return os;
}

/* 
 * static ushort sjis_shift[8] = { 0x7fff, 0xffff, 0x0, 0x0, 0x0,
 * 0x0, 0xffff, 0x0 }; static ushort sjis_second[16] = { 0x0, 0x0, 
 * 0x0, 0x0, 0xffff, 0xffff, 0xffff, 0xfffe, 0xffff, 0xffff, 0xffff,
 * 0xffff, 0xffff, 0xffff, 0xffff, 0xfff8 }; */

char
checkShiftCode(Str buf, uchar hint)
{
    uchar *p, si = '\0', so = '\0';
    int euc = (CODE_NORMAL | EUC_NOSTATE),
	sjis = (CODE_NORMAL | SJIS_NOSTATE), sjis_kana = CODE_NORMAL,
	iso = (CODE_NORMAL | ISO_NOSTATE), iso_kana = CODE_NORMAL;
    uchar *endp = (uchar *) & buf->ptr[buf->length];

    if (hint == CODE_INNER_EUC)
	return '\0';
    p = (uchar *) buf->ptr;
    while (1) {
	if (iso != CODE_ERROR && (si == '\0' || so == '\0')) {
	    switch (ISO_STATE(iso)) {
	    case ISO_NOSTATE:
		if (*p == ESC_CODE)	/* ESC sequence */
		    iso = (CODE_STATE(iso) | ISO_ESC);
		break;
	    case ISO_ESC:
		if (*p == '(')	/* ESC ( F */
		    iso = (CODE_STATE(iso) | ISO_CS94);
		else if (*p == '$')	/* ESC $ F, ESC $ ( F */
		    iso = (CODE_STATE(iso) | ISO_MBCS);
		else
		    iso = (CODE_STATE(iso) | ISO_NOSTATE);
		break;
	    case ISO_CS94:
		if (*p == 'B' || *p == 'J' || *p == 'H')
		    so = *p;
		else if (*p == 'I')
		    iso_kana = CODE_OK;
		iso = (CODE_STATE(iso) | ISO_NOSTATE);
		break;
	    case ISO_MBCS:
		if (*p == '(') {	/* ESC $ ( F */
		    iso = (CODE_STATE(iso) | ISO_MBCS | ISO_CS94);
		    break;
		}
	    case ISO_MBCS | ISO_CS94:
		if (*p == 'B' || *p == '@')
		    si = *p;
		iso = (CODE_STATE(iso) | ISO_NOSTATE);
		break;
	    }
	    if (*p & 0x80)
		iso = CODE_ERROR;
	}
	if (euc != CODE_ERROR) {
	    switch (EUC_STATE(euc)) {
	    case EUC_NOSTATE:
		if (!(*p & 0x80))	/* ASCII */
		    ;
		else if (0xa1 <= *p && *p <= 0xfe)	/* JIS X 0208, 0213-1 */
		    euc = (CODE_STATE(euc) | EUC_MBYTE1);
		else if (*p == EUC_SS2_CODE)	/* SS2 + JIS X 0201-Kana */
		    euc = (CODE_STATE(euc) | EUC_SS2);
		else if (*p == EUC_SS3_CODE)	/* SS3 + JIS X 0212, 0213-2 */
		    euc = (CODE_STATE(euc) | EUC_SS3);
		else
		    euc = CODE_ERROR;
		break;
	    case EUC_MBYTE1:
		if (CODE_STATE(euc) == CODE_NORMAL)
		    euc = CODE_OK;
	    case EUC_SS3 | EUC_MBYTE1:
		if (0xa1 <= *p && *p <= 0xfe)	/* JIS X 0208, 0213-1 */
		    euc = (CODE_STATE(euc) | EUC_NOSTATE);
		else if (euc & CODE_BROKEN)
		    euc = CODE_ERROR;
		else
		    euc = (CODE_BROKEN | EUC_NOSTATE);
		break;
	    case EUC_SS2:
		if (0xa0 <= *p && *p <= 0xdf)	/* JIS X 0201-Kana */
		    euc = (CODE_STATE(euc) | EUC_NOSTATE);
		else
		    euc = CODE_ERROR;
		break;
	    case EUC_SS3:
		if (0xa1 <= *p && *p <= 0xfe)	/* JIS X 0212, 0213-2 */
		    euc = (CODE_STATE(euc) | EUC_SS3 | EUC_MBYTE1);
		else
		    euc = CODE_ERROR;
		break;
	    }
	}
	if (sjis != CODE_ERROR) {
	    switch (SJIS_STATE(sjis)) {
	    case SJIS_NOSTATE:
		if (!(*p & 0x80))	/* ASCII */
		    ;
		else if (0x81 <= *p && *p <= 0x9f)
		    sjis = (CODE_STATE(sjis) | SJIS_SHIFT_L);
		else if (0xe0 <= *p && *p <= 0xef)

		    /* JIS X 0208 */
		    /* else if (0xe0 <= *p && *p <= 0xfc) */
		    /* JIS X 0213 */
		    sjis = (CODE_STATE(sjis) | SJIS_SHIFT_H);
		else if (0xa0 == *p)
		    sjis = (CODE_BROKEN | SJIS_NOSTATE);
		else if (0xa1 <= *p && *p <= 0xdf)	/* JIS X 0201-Kana 
							 */
		    sjis_kana = CODE_OK;
		else
		    sjis = CODE_ERROR;
		break;
	    case SJIS_SHIFT_L:
	    case SJIS_SHIFT_H:
		if (CODE_STATE(sjis) == CODE_NORMAL)
		    sjis = CODE_OK;
		if ((0x40 <= *p && *p <= 0x7e) || (0x80 <= *p && *p <= 0xfc))	/* JIS X 0208, 0213 */
		    sjis = (CODE_STATE(sjis) | SJIS_NOSTATE);
		else if (sjis & CODE_BROKEN)
		    sjis = CODE_ERROR;
		else
		    sjis = (CODE_BROKEN | SJIS_NOSTATE);
		break;
	    }
	}
	if (euc == CODE_ERROR || sjis == CODE_ERROR)
	    break;
	if (p == endp)
	    break;
	p++;
    }
    if (iso != CODE_ERROR) {
	if (si == '\0' && so == '\0' && iso_kana != CODE_OK)
	    return '\0';
	switch (si) {
	case '@':
	    switch (so) {
	    case 'H':
		return CODE_JIS_J;
	    case 'J':
		return CODE_JIS_j;
	    case 'B':
		return CODE_JIS_m;
	    default:
		return CODE_JIS_m;
	    }
	case 'B':
	    switch (so) {
	    case 'J':
		return CODE_JIS_N;
	    case 'B':
		return CODE_JIS_n;
	    default:
		return CODE_JIS_n;
	    }
	default:
	    switch (so) {
	    case 'H':
		return CODE_JIS_J;
	    case 'J':
		return CODE_JIS_N;
	    case 'B':
		return CODE_JIS_n;
	    default:
		return CODE_JIS_n;
	    }
	}
    }
    if (hint == CODE_EUC) {
	if (euc != CODE_ERROR)
	    return CODE_EUC;
    }
    else if (hint == CODE_SJIS) {
	if (sjis != CODE_ERROR)
	    return CODE_SJIS;
    }
    if (CODE_STATE(euc) == CODE_OK)
	return CODE_EUC;
    if (CODE_STATE(sjis) == CODE_OK)
	return CODE_SJIS;
    if (CODE_STATE(euc) == CODE_NORMAL)
	return CODE_EUC;
    if (CODE_STATE(sjis) == CODE_NORMAL)
	return CODE_SJIS;
    return CODE_EUC;
}
#endif				/* JP_CHARSET */