1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
|
#include "wc.h"
#include "iso2022.h"
#include "hz.h"
#include "wtf.h"
#ifdef USE_UNICODE
#include "ucs.h"
#endif
Str
wc_conv_from_hz(Str is, wc_ces ces)
{
Str os;
wc_uchar *sp = (wc_uchar *)is->ptr;
wc_uchar *ep = sp + is->length;
wc_uchar *p;
int state = WC_HZ_NOSTATE;
for (p = sp; p < ep && *p < 0x80 && *p != WC_C_HZ_TILDA; p++)
;
if (p == ep)
return is;
os = Strnew_size(is->length);
if (p > sp)
Strcat_charp_n(os, is->ptr, (int)(p - sp));
for (; p < ep; p++) {
switch (state) {
case WC_HZ_NOSTATE:
if (*p == WC_C_HZ_TILDA)
state = WC_HZ_TILDA;
else if (WC_ISO_MAP[*p] == WC_ISO_MAP_GR)
state = WC_HZ_MBYTE1_GR; /* GB 2312 ? */
else if (*p & 0x80)
wtf_push_unknown(os, p, 1);
else
Strcat_char(os, (char)*p);
break;
case WC_HZ_TILDA:
if (*p == WC_C_HZ_SI) {
state = WC_HZ_MBYTE;
break;
} else if (*p == WC_C_HZ_TILDA)
Strcat_char(os, (char)*p);
else if (*p == '\n')
break;
else
wtf_push_unknown(os, p-1, 2);
state = WC_HZ_NOSTATE;
break;
case WC_HZ_TILDA_MB:
if (*p == WC_C_HZ_SO || *p == '\n') {
state = WC_HZ_NOSTATE;
break;
}
else if (WC_ISO_MAP[*p & 0x7f] == WC_ISO_MAP_GL)
wtf_push(os, WC_CCS_GB_2312, ((wc_uint32)*(p-1) << 8) | *p);
else
wtf_push_unknown(os, p-1, 2);
state = WC_HZ_MBYTE;
break;
case WC_HZ_MBYTE:
if (*p == WC_C_HZ_TILDA)
state = WC_HZ_TILDA_MB;
else if (WC_ISO_MAP[*p & 0x7f] == WC_ISO_MAP_GL)
state = WC_HZ_MBYTE1;
else
wtf_push_unknown(os, p, 1);
break;
case WC_HZ_MBYTE1:
if (WC_ISO_MAP[*p & 0x7f] == WC_ISO_MAP_GL)
wtf_push(os, WC_CCS_GB_2312, ((wc_uint32)*(p-1) << 8) | *p);
else
wtf_push_unknown(os, p-1, 2);
state = WC_HZ_MBYTE;
break;
case WC_HZ_MBYTE1_GR:
if (WC_ISO_MAP[*p] == WC_ISO_MAP_GR)
wtf_push(os, WC_CCS_GB_2312, ((wc_uint32)*(p-1) << 8) | *p);
else
wtf_push_unknown(os, p-1, 2);
state = WC_HZ_NOSTATE;
break;
}
}
switch (state) {
case WC_HZ_TILDA:
case WC_HZ_TILDA_MB:
case WC_HZ_MBYTE1:
case WC_HZ_MBYTE1_GR:
wtf_push_unknown(os, p-1, 1);
break;
}
return os;
}
void
wc_push_to_hz(Str os, wc_wchar_t cc, wc_status *st)
{
while (1) {
switch (cc.ccs) {
case WC_CCS_US_ASCII:
if (st->gl) {
Strcat_char(os, WC_C_HZ_TILDA);
Strcat_char(os, WC_C_HZ_SO);
st->gl = 0;
}
if ((char)cc.code == WC_C_HZ_TILDA)
Strcat_char(os, WC_C_HZ_TILDA);
Strcat_char(os, (char)cc.code);
return;
case WC_CCS_GB_2312:
if (! st->gl) {
Strcat_char(os, WC_C_HZ_TILDA);
Strcat_char(os, WC_C_HZ_SI);
st->gl = 1;
}
Strcat_char(os, (char)((cc.code >> 8) & 0x7f));
Strcat_char(os, (char)(cc.code & 0x7f));
return;
case WC_CCS_UNKNOWN_W:
if (WcOption.no_replace)
return;
if (st->gl) {
Strcat_char(os, WC_C_HZ_TILDA);
Strcat_char(os, WC_C_HZ_SO);
st->gl = 0;
}
Strcat_charp(os, WC_REPLACE_W);
return;
case WC_CCS_UNKNOWN:
if (WcOption.no_replace)
return;
if (st->gl) {
Strcat_char(os, WC_C_HZ_TILDA);
Strcat_char(os, WC_C_HZ_SO);
st->gl = 0;
}
Strcat_charp(os, WC_REPLACE);
return;
default:
#ifdef USE_UNICODE
if (WcOption.ucs_conv)
cc = wc_any_to_any_ces(cc, st);
else
#endif
cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN;
continue;
}
}
}
void
wc_push_to_hz_end(Str os, wc_status *st)
{
if (st->gl) {
Strcat_char(os, WC_C_HZ_TILDA);
Strcat_char(os, WC_C_HZ_SO);
st->gl = 0;
}
}
|