diff options
| author | Dai Sato <satodai@w3m.jp> | 2007-06-07 10:59:51 +0000 | 
|---|---|---|
| committer | Dai Sato <satodai@w3m.jp> | 2007-06-07 10:59:51 +0000 | 
| commit | cb4a4e90d6bebb57752455dd23d05b0c18259d6a (patch) | |
| tree | c66eecba760a058dee00f3be595be2e5f05f4313 | |
| parent | add handling xterm-incompatible terminals without gpm. [w3m-dev 04278] (diff) | |
| download | w3m-cb4a4e90d6bebb57752455dd23d05b0c18259d6a.tar.gz w3m-cb4a4e90d6bebb57752455dd23d05b0c18259d6a.zip | |
fix charset handling. [w3m-dev 04279]
| -rw-r--r-- | ChangeLog | 8 | ||||
| -rw-r--r-- | libwc/ces.h | 1 | ||||
| -rw-r--r-- | libwc/charset.c | 120 | 
3 files changed, 68 insertions, 61 deletions
| @@ -1,5 +1,11 @@  2007-06-07  Hironori Sakamoto <hsaka@mth.biglobe.ne.jp> +	* [w3m-dev 04279] charset +	* libwc/ces.h, libwc/charset.c: set charset to Shift_JIS, when locale  +	  is japanese, jp_JP.PCK, ja_JP.IBM-932, ja_JP.IBM-943, or windows-31j. + +2007-06-07  Hironori Sakamoto <hsaka@mth.biglobe.ne.jp> +  	* [w3m-dev 04278] Re: segfault on CentOS4.5/libgc6.8  	* term.c: add handling xterm-incompatible terminals without gpm. @@ -8927,4 +8933,4 @@ a	* [w3m-dev 03276] compile error on EWS4800  	* release-0-2-1  	* import w3m-0.2.1 -$Id: ChangeLog,v 1.1000 2007/06/07 10:54:10 inu Exp $ +$Id: ChangeLog,v 1.1001 2007/06/07 10:59:51 inu Exp $ diff --git a/libwc/ces.h b/libwc/ces.h index 6b6720a..55e2806 100644 --- a/libwc/ces.h +++ b/libwc/ces.h @@ -176,6 +176,7 @@ enum {  #define WC_CES_SHIFT_JIS	(WC_CES_E_PRIV2|WC_CES_N_SHIFT_JIS)  #define WC_CES_CP932		WC_CES_SHIFT_JIS +#define WC_CES_CP943		WC_CES_SHIFT_JIS  #define WC_CES_SHIFT_JISX0213	(WC_CES_E_PRIV2|WC_CES_N_SHIFT_JISX0213)  #define WC_CES_GBK		(WC_CES_E_PRIV2|WC_CES_N_GBK)  #define WC_CES_CP936		WC_CES_GBK diff --git a/libwc/charset.c b/libwc/charset.c index d86a129..3f0b74d 100644 --- a/libwc/charset.c +++ b/libwc/charset.c @@ -32,6 +32,46 @@ static struct {    { NULL, 0 }  }; +static wc_ces +wc_codepage(int n) +{ +	switch (n) { +	case 437: return WC_CES_CP437; +	case 737: return WC_CES_CP737; +	case 775: return WC_CES_CP775; +	case 850: return WC_CES_CP850; +	case 852: return WC_CES_CP852; +	case 855: return WC_CES_CP855; +	case 856: return WC_CES_CP856; +	case 857: return WC_CES_CP857; +	case 860: return WC_CES_CP860; +	case 861: return WC_CES_CP861; +	case 862: return WC_CES_CP862; +	case 863: return WC_CES_CP863; +	case 864: return WC_CES_CP864; +	case 865: return WC_CES_CP865; +	case 866: return WC_CES_CP866; +	case 869: return WC_CES_CP869; +	case 874: return WC_CES_CP874; +	case 932: return WC_CES_CP932;		/* CP932 = Shift_JIS */ +	case 936: return WC_CES_CP936;		/* CP936 = GBK > EUC_CN */ +	case 943: return WC_CES_CP943;		/* CP943 = Shift_JIS */ +	case 949: return WC_CES_CP949;		/* CP949 = UHC > EUC_KR */ +	case 950: return WC_CES_CP950;		/* CP950 = Big5 */ +	case 1006: return WC_CES_CP1006; +	case 1250: return WC_CES_CP1250; +	case 1251: return WC_CES_CP1251; +	case 1252: return WC_CES_CP1252; +	case 1253: return WC_CES_CP1253; +	case 1254: return WC_CES_CP1254; +	case 1255: return WC_CES_CP1255; +	case 1256: return WC_CES_CP1256; +	case 1257: return WC_CES_CP1257; +	case 1258: return WC_CES_CP1258; +	} +	return 0; +} +  wc_ces  wc_guess_charset(char *charset, wc_ces orig)  { @@ -119,6 +159,11 @@ wc_charset_to_ces(char *charset)  	    if (n >= 1 && n <= 16 && n != 12)  		return (WC_CES_E_ISO_8859 | n);  	    return WC_CES_ISO_8859_1; +	} else if (! strncmp(p, "ibm", 3)) { +	    p += 3; +	    if (*p >= '1' && *p <= '9') +	    	return wc_codepage(atoi(p)); +	    return wc_charset_to_ces(p);  	}  	break;      case 'j': @@ -135,6 +180,10 @@ wc_charset_to_ces(char *charset)  	    ! strncmp(p, "sjis", 4))  	    return WC_CES_SHIFT_JIS;  	break; +    case 'p': +	if (! strncmp(p, "pck", 3)) +	    return WC_CES_SHIFT_JIS; +	break;      case 'g':  	if (! strncmp(p, "gb18030", 7) ||  	    ! strncmp(p, "gbk2k", 5)) @@ -210,58 +259,18 @@ wc_charset_to_ces(char *charset)  	    return WC_CES_EUC_CN;  	if (*(p+1) != 'p')  	    break; -	n = atoi(p + 2); -	switch (n) { -	case 437: return WC_CES_CP437; -	case 737: return WC_CES_CP737; -	case 775: return WC_CES_CP775; -	case 850: return WC_CES_CP850; -	case 852: return WC_CES_CP852; -	case 855: return WC_CES_CP855; -	case 856: return WC_CES_CP856; -	case 857: return WC_CES_CP857; -	case 860: return WC_CES_CP860; -	case 861: return WC_CES_CP861; -	case 862: return WC_CES_CP862; -	case 863: return WC_CES_CP863; -	case 864: return WC_CES_CP864; -	case 865: return WC_CES_CP865; -	case 866: return WC_CES_CP866; -	case 869: return WC_CES_CP869; -	case 874: return WC_CES_CP874; -	case 932: return WC_CES_CP932;		/* CP932 = Shift_JIS */ -	case 936: return WC_CES_CP936;		/* CP936 = GBK > EUC_CN */ -	case 949: return WC_CES_CP949;		/* CP949 = UHC > EUC_KR */ -	case 950: return WC_CES_CP950;		/* CP950 = Big5 */ -	case 1006: return WC_CES_CP1006; -	case 1250: return WC_CES_CP1250; -	case 1251: return WC_CES_CP1251; -	case 1252: return WC_CES_CP1252; -	case 1253: return WC_CES_CP1253; -	case 1254: return WC_CES_CP1254; -	case 1255: return WC_CES_CP1255; -	case 1256: return WC_CES_CP1256; -	case 1257: return WC_CES_CP1257; -	case 1258: return WC_CES_CP1258; -	} +	p += 2; +	if (*p >= '1' &&  *p <= '9') +	    return wc_codepage(atoi(p));  	break;      case 'w':  	if (strncmp(p, "windows", 7))  	    break; +	p += 7;  	if (! strncmp(p, "31j", 3))  	    return WC_CES_CP932; -	n = atoi(p + 7); -	switch (n) { -	case 1250: return WC_CES_CP1250; -	case 1251: return WC_CES_CP1251; -	case 1252: return WC_CES_CP1252; -	case 1253: return WC_CES_CP1253; -	case 1254: return WC_CES_CP1254; -	case 1255: return WC_CES_CP1255; -	case 1256: return WC_CES_CP1256; -	case 1257: return WC_CES_CP1257; -	case 1258: return WC_CES_CP1258; -	} +	if (*p >= '1' &&  *p <= '9') +	    return wc_codepage(atoi(p));  	break;      }      return 0; @@ -345,18 +354,9 @@ wc_charset_short_to_ces(char *charset)      case 'c':  	return WC_CES_ISO_2022_CN;      case 'w': -	n = atoi(p + 1); -	switch (n) { -	case 1250: return WC_CES_CP1250; -	case 1251: return WC_CES_CP1251; -	case 1252: return WC_CES_CP1252; -	case 1253: return WC_CES_CP1253; -	case 1254: return WC_CES_CP1254; -	case 1255: return WC_CES_CP1255; -	case 1256: return WC_CES_CP1256; -	case 1257: return WC_CES_CP1257; -	case 1258: return WC_CES_CP1258; -	} +	p++; +	if (*p >= '1' &&  *p <= '9') +	    return wc_codepage(atoi(p));  	break;      case 'r':  	return WC_CES_RAW; @@ -368,7 +368,7 @@ wc_ces  wc_locale_to_ces(char *locale)  {      char *p = locale; -    char buf[6]; +    char buf[8];      int n;      if (*p == 'C' && *(p+1) == '\0') @@ -380,7 +380,7 @@ wc_locale_to_ces(char *locale)  	    return wc_charset_to_ces(cs);      }  #endif -    for (n = 0; *p && *p != '.' && n < 5; p++) { +    for (n = 0; *p && *p != '.' && n < 7; p++) {  	if ((unsigned char)*p > 0x20)  	    buf[n++] = tolower(*p);      } | 
