diff options
author | Dai Sato <satodai@w3m.jp> | 2007-06-07 10:59:51 +0000 |
---|---|---|
committer | Dai Sato <satodai@w3m.jp> | 2007-06-07 10:59:51 +0000 |
commit | cb4a4e90d6bebb57752455dd23d05b0c18259d6a (patch) | |
tree | c66eecba760a058dee00f3be595be2e5f05f4313 /libwc | |
parent | add handling xterm-incompatible terminals without gpm. [w3m-dev 04278] (diff) | |
download | w3m-cb4a4e90d6bebb57752455dd23d05b0c18259d6a.tar.gz w3m-cb4a4e90d6bebb57752455dd23d05b0c18259d6a.zip |
fix charset handling. [w3m-dev 04279]
Diffstat (limited to '')
-rw-r--r-- | libwc/ces.h | 1 | ||||
-rw-r--r-- | libwc/charset.c | 120 |
2 files changed, 61 insertions, 60 deletions
diff --git a/libwc/ces.h b/libwc/ces.h index 6b6720a..55e2806 100644 --- a/libwc/ces.h +++ b/libwc/ces.h @@ -176,6 +176,7 @@ enum { #define WC_CES_SHIFT_JIS (WC_CES_E_PRIV2|WC_CES_N_SHIFT_JIS) #define WC_CES_CP932 WC_CES_SHIFT_JIS +#define WC_CES_CP943 WC_CES_SHIFT_JIS #define WC_CES_SHIFT_JISX0213 (WC_CES_E_PRIV2|WC_CES_N_SHIFT_JISX0213) #define WC_CES_GBK (WC_CES_E_PRIV2|WC_CES_N_GBK) #define WC_CES_CP936 WC_CES_GBK diff --git a/libwc/charset.c b/libwc/charset.c index d86a129..3f0b74d 100644 --- a/libwc/charset.c +++ b/libwc/charset.c @@ -32,6 +32,46 @@ static struct { { NULL, 0 } }; +static wc_ces +wc_codepage(int n) +{ + switch (n) { + case 437: return WC_CES_CP437; + case 737: return WC_CES_CP737; + case 775: return WC_CES_CP775; + case 850: return WC_CES_CP850; + case 852: return WC_CES_CP852; + case 855: return WC_CES_CP855; + case 856: return WC_CES_CP856; + case 857: return WC_CES_CP857; + case 860: return WC_CES_CP860; + case 861: return WC_CES_CP861; + case 862: return WC_CES_CP862; + case 863: return WC_CES_CP863; + case 864: return WC_CES_CP864; + case 865: return WC_CES_CP865; + case 866: return WC_CES_CP866; + case 869: return WC_CES_CP869; + case 874: return WC_CES_CP874; + case 932: return WC_CES_CP932; /* CP932 = Shift_JIS */ + case 936: return WC_CES_CP936; /* CP936 = GBK > EUC_CN */ + case 943: return WC_CES_CP943; /* CP943 = Shift_JIS */ + case 949: return WC_CES_CP949; /* CP949 = UHC > EUC_KR */ + case 950: return WC_CES_CP950; /* CP950 = Big5 */ + case 1006: return WC_CES_CP1006; + case 1250: return WC_CES_CP1250; + case 1251: return WC_CES_CP1251; + case 1252: return WC_CES_CP1252; + case 1253: return WC_CES_CP1253; + case 1254: return WC_CES_CP1254; + case 1255: return WC_CES_CP1255; + case 1256: return WC_CES_CP1256; + case 1257: return WC_CES_CP1257; + case 1258: return WC_CES_CP1258; + } + return 0; +} + wc_ces wc_guess_charset(char *charset, wc_ces orig) { @@ -119,6 +159,11 @@ wc_charset_to_ces(char *charset) if (n >= 1 && n <= 16 && n != 12) return (WC_CES_E_ISO_8859 | n); return WC_CES_ISO_8859_1; + } else if (! strncmp(p, "ibm", 3)) { + p += 3; + if (*p >= '1' && *p <= '9') + return wc_codepage(atoi(p)); + return wc_charset_to_ces(p); } break; case 'j': @@ -135,6 +180,10 @@ wc_charset_to_ces(char *charset) ! strncmp(p, "sjis", 4)) return WC_CES_SHIFT_JIS; break; + case 'p': + if (! strncmp(p, "pck", 3)) + return WC_CES_SHIFT_JIS; + break; case 'g': if (! strncmp(p, "gb18030", 7) || ! strncmp(p, "gbk2k", 5)) @@ -210,58 +259,18 @@ wc_charset_to_ces(char *charset) return WC_CES_EUC_CN; if (*(p+1) != 'p') break; - n = atoi(p + 2); - switch (n) { - case 437: return WC_CES_CP437; - case 737: return WC_CES_CP737; - case 775: return WC_CES_CP775; - case 850: return WC_CES_CP850; - case 852: return WC_CES_CP852; - case 855: return WC_CES_CP855; - case 856: return WC_CES_CP856; - case 857: return WC_CES_CP857; - case 860: return WC_CES_CP860; - case 861: return WC_CES_CP861; - case 862: return WC_CES_CP862; - case 863: return WC_CES_CP863; - case 864: return WC_CES_CP864; - case 865: return WC_CES_CP865; - case 866: return WC_CES_CP866; - case 869: return WC_CES_CP869; - case 874: return WC_CES_CP874; - case 932: return WC_CES_CP932; /* CP932 = Shift_JIS */ - case 936: return WC_CES_CP936; /* CP936 = GBK > EUC_CN */ - case 949: return WC_CES_CP949; /* CP949 = UHC > EUC_KR */ - case 950: return WC_CES_CP950; /* CP950 = Big5 */ - case 1006: return WC_CES_CP1006; - case 1250: return WC_CES_CP1250; - case 1251: return WC_CES_CP1251; - case 1252: return WC_CES_CP1252; - case 1253: return WC_CES_CP1253; - case 1254: return WC_CES_CP1254; - case 1255: return WC_CES_CP1255; - case 1256: return WC_CES_CP1256; - case 1257: return WC_CES_CP1257; - case 1258: return WC_CES_CP1258; - } + p += 2; + if (*p >= '1' && *p <= '9') + return wc_codepage(atoi(p)); break; case 'w': if (strncmp(p, "windows", 7)) break; + p += 7; if (! strncmp(p, "31j", 3)) return WC_CES_CP932; - n = atoi(p + 7); - switch (n) { - case 1250: return WC_CES_CP1250; - case 1251: return WC_CES_CP1251; - case 1252: return WC_CES_CP1252; - case 1253: return WC_CES_CP1253; - case 1254: return WC_CES_CP1254; - case 1255: return WC_CES_CP1255; - case 1256: return WC_CES_CP1256; - case 1257: return WC_CES_CP1257; - case 1258: return WC_CES_CP1258; - } + if (*p >= '1' && *p <= '9') + return wc_codepage(atoi(p)); break; } return 0; @@ -345,18 +354,9 @@ wc_charset_short_to_ces(char *charset) case 'c': return WC_CES_ISO_2022_CN; case 'w': - n = atoi(p + 1); - switch (n) { - case 1250: return WC_CES_CP1250; - case 1251: return WC_CES_CP1251; - case 1252: return WC_CES_CP1252; - case 1253: return WC_CES_CP1253; - case 1254: return WC_CES_CP1254; - case 1255: return WC_CES_CP1255; - case 1256: return WC_CES_CP1256; - case 1257: return WC_CES_CP1257; - case 1258: return WC_CES_CP1258; - } + p++; + if (*p >= '1' && *p <= '9') + return wc_codepage(atoi(p)); break; case 'r': return WC_CES_RAW; @@ -368,7 +368,7 @@ wc_ces wc_locale_to_ces(char *locale) { char *p = locale; - char buf[6]; + char buf[8]; int n; if (*p == 'C' && *(p+1) == '\0') @@ -380,7 +380,7 @@ wc_locale_to_ces(char *locale) return wc_charset_to_ces(cs); } #endif - for (n = 0; *p && *p != '.' && n < 5; p++) { + for (n = 0; *p && *p != '.' && n < 7; p++) { if ((unsigned char)*p > 0x20) buf[n++] = tolower(*p); } |