1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
|
#include "wc.h"
#include "wtf.h"
#include "iso2022.h"
#include "hz.h"
#ifdef USE_UNICODE
#include "ucs.h"
#include "utf8.h"
#include "utf7.h"
#endif
char *WcReplace = "?";
char *WcReplaceW = "??";
static Str wc_conv_to_ces(Str is, wc_ces ces);
Str
wc_Str_conv(Str is, wc_ces f_ces, wc_ces t_ces)
{
if (f_ces != WC_CES_WTF)
is = (*WcCesInfo[WC_CES_INDEX(f_ces)].conv_from)(is, f_ces);
if (t_ces != WC_CES_WTF)
return wc_conv_to_ces(is, t_ces);
else
return is;
}
Str
wc_Str_conv_strict(Str is, wc_ces f_ces, wc_ces t_ces)
{
Str os;
wc_option opt = WcOption;
WcOption.strict_iso2022 = WC_TRUE;
WcOption.no_replace = WC_TRUE;
WcOption.fix_width_conv = WC_FALSE;
os = wc_Str_conv(is, f_ces, t_ces);
WcOption = opt;
return os;
}
static Str
wc_conv_to_ces(Str is, wc_ces ces)
{
Str os;
wc_uchar *sp = (wc_uchar *)is->ptr;
wc_uchar *ep = sp + is->length;
wc_uchar *p;
wc_status st;
switch (ces) {
case WC_CES_HZ_GB_2312:
for (p = sp; p < ep && *p != '~' && *p < 0x80; p++)
;
break;
case WC_CES_TCVN_5712:
case WC_CES_VISCII_11:
case WC_CES_VPS:
for (p = sp; p < ep && 0x20 <= *p && *p < 0x80; p++)
;
break;
default:
for (p = sp; p < ep && *p < 0x80; p++)
;
break;
}
if (p == ep)
return is;
os = Strnew_size(is->length);
if (p > sp)
p--; /* for precompose */
if (p > sp)
Strcat_charp_n(os, is->ptr, (int)(p - sp));
wc_output_init(ces, &st);
switch (ces) {
case WC_CES_ISO_2022_JP:
case WC_CES_ISO_2022_JP_2:
case WC_CES_ISO_2022_JP_3:
case WC_CES_ISO_2022_CN:
case WC_CES_ISO_2022_KR:
case WC_CES_HZ_GB_2312:
case WC_CES_TCVN_5712:
case WC_CES_VISCII_11:
case WC_CES_VPS:
#ifdef USE_UNICODE
case WC_CES_UTF_8:
case WC_CES_UTF_7:
#endif
while (p < ep)
(*st.ces_info->push_to)(os, wtf_parse(&p), &st);
break;
default:
while (p < ep) {
if (*p < 0x80 && wtf_width(p + 1)) {
Strcat_char(os, (char)*p);
p++;
} else
(*st.ces_info->push_to)(os, wtf_parse(&p), &st);
}
break;
}
wc_push_end(os, &st);
return os;
}
Str
wc_Str_conv_with_detect(Str is, wc_ces *f_ces, wc_ces hint, wc_ces t_ces)
{
wc_ces detect;
if (*f_ces == WC_CES_WTF || hint == WC_CES_WTF) {
*f_ces = WC_CES_WTF;
detect = WC_CES_WTF;
} else if (WcOption.auto_detect == WC_OPT_DETECT_OFF) {
*f_ces = hint;
detect = hint;
} else {
if (*f_ces & WC_CES_T_8BIT)
hint = *f_ces;
detect = wc_auto_detect(is->ptr, is->length, hint);
if (WcOption.auto_detect == WC_OPT_DETECT_ON) {
if ((detect & WC_CES_T_8BIT) ||
((detect & WC_CES_T_NASCII) && ! (*f_ces & WC_CES_T_8BIT)))
*f_ces = detect;
} else {
if ((detect & WC_CES_T_ISO_2022) && ! (*f_ces & WC_CES_T_8BIT))
*f_ces = detect;
}
}
return wc_Str_conv(is, detect, t_ces);
}
void
wc_push_end(Str os, wc_status *st)
{
if (st->ces_info->id & WC_CES_T_ISO_2022)
wc_push_to_iso2022_end(os, st);
else if (st->ces_info->id == WC_CES_HZ_GB_2312)
wc_push_to_hz_end(os, st);
#ifdef USE_UNICODE
else if (st->ces_info->id == WC_CES_UTF_8)
wc_push_to_utf8_end(os, st);
else if (st->ces_info->id == WC_CES_UTF_7)
wc_push_to_utf7_end(os, st);
#endif
}
|