diff options
author | Tatsuya Kinoshita <tats@vega.ocn.ne.jp> | 2011-05-04 07:18:09 +0000 |
---|---|---|
committer | Tatsuya Kinoshita <tats@vega.ocn.ne.jp> | 2011-05-04 07:18:09 +0000 |
commit | 5f8e0f8ef9a422691dd72e8a953a42a41478fcb4 (patch) | |
tree | 4b2df4796a534793648b3c4fc532fc36bd0cd525 /libwc/conv.c | |
parent | Releasing debian version 0.3-2.4 (diff) | |
download | w3m-5f8e0f8ef9a422691dd72e8a953a42a41478fcb4.tar.gz w3m-5f8e0f8ef9a422691dd72e8a953a42a41478fcb4.zip |
Releasing debian version 0.5.1-1debian/0.5.1-1
Diffstat (limited to 'libwc/conv.c')
-rw-r--r-- | libwc/conv.c | 152 |
1 files changed, 152 insertions, 0 deletions
diff --git a/libwc/conv.c b/libwc/conv.c new file mode 100644 index 0000000..be58ed4 --- /dev/null +++ b/libwc/conv.c @@ -0,0 +1,152 @@ + +#include "wc.h" +#include "wtf.h" +#include "iso2022.h" +#include "hz.h" +#ifdef USE_UNICODE +#include "ucs.h" +#include "utf8.h" +#include "utf7.h" +#endif + +char *WcReplace = "?"; +char *WcReplaceW = "??"; + +static Str wc_conv_to_ces(Str is, wc_ces ces); + +Str +wc_Str_conv(Str is, wc_ces f_ces, wc_ces t_ces) +{ + if (f_ces != WC_CES_WTF) + is = (*WcCesInfo[WC_CES_INDEX(f_ces)].conv_from)(is, f_ces); + if (t_ces != WC_CES_WTF) + return wc_conv_to_ces(is, t_ces); + else + return is; +} + +Str +wc_Str_conv_strict(Str is, wc_ces f_ces, wc_ces t_ces) +{ + Str os; + wc_option opt = WcOption; + + WcOption.strict_iso2022 = WC_TRUE; + WcOption.no_replace = WC_TRUE; + WcOption.fix_width_conv = WC_FALSE; + os = wc_Str_conv(is, f_ces, t_ces); + WcOption = opt; + return os; +} + +static Str +wc_conv_to_ces(Str is, wc_ces ces) +{ + Str os; + wc_uchar *sp = (wc_uchar *)is->ptr; + wc_uchar *ep = sp + is->length; + wc_uchar *p; + wc_status st; + + switch (ces) { + case WC_CES_HZ_GB_2312: + for (p = sp; p < ep && *p != '~' && *p < 0x80; p++) + ; + break; + case WC_CES_TCVN_5712: + case WC_CES_VISCII_11: + case WC_CES_VPS: + for (p = sp; p < ep && 0x20 <= *p && *p < 0x80; p++) + ; + break; + default: + for (p = sp; p < ep && *p < 0x80; p++) + ; + break; + } + if (p == ep) + return is; + + os = Strnew_size(is->length); + if (p > sp) + p--; /* for precompose */ + if (p > sp) + Strcat_charp_n(os, is->ptr, (int)(p - sp)); + + wc_output_init(ces, &st); + + switch (ces) { + case WC_CES_ISO_2022_JP: + case WC_CES_ISO_2022_JP_2: + case WC_CES_ISO_2022_JP_3: + case WC_CES_ISO_2022_CN: + case WC_CES_ISO_2022_KR: + case WC_CES_HZ_GB_2312: + case WC_CES_TCVN_5712: + case WC_CES_VISCII_11: + case WC_CES_VPS: +#ifdef USE_UNICODE + case WC_CES_UTF_8: + case WC_CES_UTF_7: +#endif + while (p < ep) + (*st.ces_info->push_to)(os, wtf_parse(&p), &st); + break; + default: + while (p < ep) { + if (*p < 0x80 && wtf_width(p + 1)) { + Strcat_char(os, (char)*p); + p++; + } else + (*st.ces_info->push_to)(os, wtf_parse(&p), &st); + } + break; + } + + wc_push_end(os, &st); + + return os; +} + +Str +wc_Str_conv_with_detect(Str is, wc_ces *f_ces, wc_ces hint, wc_ces t_ces) +{ + wc_ces detect; + + if (*f_ces == WC_CES_WTF || hint == WC_CES_WTF) { + *f_ces = WC_CES_WTF; + detect = WC_CES_WTF; + } else if (WcOption.auto_detect == WC_OPT_DETECT_OFF) { + *f_ces = hint; + detect = hint; + } else { + if (*f_ces & WC_CES_T_8BIT) + hint = *f_ces; + detect = wc_auto_detect(is->ptr, is->length, hint); + if (WcOption.auto_detect == WC_OPT_DETECT_ON) { + if ((detect & WC_CES_T_8BIT) || + ((detect & WC_CES_T_NASCII) && ! (*f_ces & WC_CES_T_8BIT))) + *f_ces = detect; + } else { + if ((detect & WC_CES_T_ISO_2022) && ! (*f_ces & WC_CES_T_8BIT)) + *f_ces = detect; + } + } + return wc_Str_conv(is, detect, t_ces); +} + +void +wc_push_end(Str os, wc_status *st) +{ + if (st->ces_info->id & WC_CES_T_ISO_2022) + wc_push_to_iso2022_end(os, st); + else if (st->ces_info->id == WC_CES_HZ_GB_2312) + wc_push_to_hz_end(os, st); +#ifdef USE_UNICODE + else if (st->ces_info->id == WC_CES_UTF_8) + wc_push_to_utf8_end(os, st); + else if (st->ces_info->id == WC_CES_UTF_7) + wc_push_to_utf7_end(os, st); +#endif +} + |