aboutsummaryrefslogtreecommitdiffstats
path: root/libwc/map/mk_ucs_map.pl
blob: 347ad3db287785824b222d2d441aba31d7210b8d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
@NAME = ();
while(<DATA>) {
  chop;
  s/\s*$//;
  (($n, $m, $c) = split(" ", $_, 3)) >= 3 || next;
  push(@NAME, $n);
  $MAP{$n} = $m;
  $CODE{$n} = $c;
}

foreach $name (@NAME) {

$code = $CODE{$name};
$map = $MAP{$name};

print "$name\t$map\t$code\n";

@to_ucs = ();
%from_ucs = ();
open(MAP, "< $map");
open(OUT, "> ${name}_ucs.map");
while(<MAP>) {
  /^#/ && next;
  s/#.*//;
  (($i, $u) = split(" ")) || next;
  $i = hex($i);
  $u = hex($u);
  if ($u >= 0xa0) {
    $to_ucs[$i] = $u;
    if ($i < 0x80) {
      print STDERR "$map $i $u\n";
#     $from_ucs{$u} = $i;
    } else {
      $from_ucs{$u} = $i;
    }
  }
}

#  print OUT <<EOF;
# /*
#   These conversion tables between $code and
#   Unicode were made from
# 
#     ftp://ftp.unicode.org/Public/MAPPINGS/$map.
# */
# 
print OUT <<EOF;
/* $code */

static wc_uint16 ${name}_ucs_map[ 0x80 ] = {
EOF

foreach $i (0x10 .. 0x1F) {
  print OUT " ";
foreach $j (0 .. 7) {
  $_ = $i * 8 + $j;
  $u = $to_ucs[$_];
  if ($u) {
    printf OUT " 0x%.4X,", $u;
  } else {
    print OUT " 0,     ";
  }
}
  print OUT "\n";
}

@ucs = sort { $a <=> $b } keys %from_ucs;
$nucs = @ucs + 0;

print OUT <<EOF;
};

#define N_ucs_${name}_map $nucs

static wc_map ucs_${name}_map[ N_ucs_${name}_map ] = {
EOF
for(@ucs) {
  printf OUT "  { 0x%.4X, 0x%.2X },\n", $_, $from_ucs{$_};
}

print OUT <<EOF;
};
EOF

close(MAP);
}

__END__
jisx0201k	EASTASIA/JIS/JIS0201.TXT	JIS X 0201 (Japanese Kana)
iso88591	ISO8859/8859-1.TXT	ISO-8859-1 (Latin 1)
iso88592	ISO8859/8859-2.TXT	ISO-8859-2 (Latin 2)
iso88593	ISO8859/8859-3.TXT	ISO-8859-3 (Latin 3)
iso88594	ISO8859/8859-4.TXT	ISO-8859-4 (Latin 4)
iso88595	ISO8859/8859-5.TXT	ISO-8859-5 (Cyrillic)
iso88596	ISO8859/8859-6.TXT	ISO-8859-6 (Arabic)
iso88597	ISO8859/8859-7.TXT	ISO-8859-7 (Greek)
iso88598	ISO8859/8859-8.TXT	ISO-8859-8 (Hebrew)
iso88599	ISO8859/8859-9.TXT	ISO-8859-9 (Latin 5)
iso885910	ISO8859/8859-10.TXT	ISO-8859-10 (Latin 6)
iso885913	ISO8859/8859-13.TXT	ISO-8859-13 (Latin 7)
iso885914	ISO8859/8859-14.TXT	ISO-8859-14 (Latin 8)
iso885915	ISO8859/8859-15.TXT	ISO-8859-15 (Latin 9)
iso885916	ISO8859/8859-16.TXT	ISO-8859-16 (Romanian)

cp856	VENDORS/MISC/CP856.TXT		CP856 (Hebrew)                  
cp1006	VENDORS/MISC/CP1006.TXT		IBM CP1006 (Arabic)
koi8r	VENDORS/MISC/KOI8-R.TXT		KOI8-R (Cyrillic)

nextstep	VENDORS/NEXT/NEXTSTEP.TXT	NeXTSTEP

cp437	VENDORS/MICSFT/PC/CP437.TXT	CP437 (Latin)
cp737	VENDORS/MICSFT/PC/CP737.TXT	CP737 (Greek)
cp775	VENDORS/MICSFT/PC/CP775.TXT	CP775 (Baltic Rim)
cp850	VENDORS/MICSFT/PC/CP850.TXT	CP850 (Latin 1)
cp852	VENDORS/MICSFT/PC/CP852.TXT	CP852 (Latin 2)
cp855	VENDORS/MICSFT/PC/CP855.TXT	CP855 (Cyrillic)
cp857	VENDORS/MICSFT/PC/CP857.TXT	CP857 (Turkish)
cp860	VENDORS/MICSFT/PC/CP860.TXT	CP860 (Portuguese)
cp861	VENDORS/MICSFT/PC/CP861.TXT	CP861 (Icelandic)
cp862	VENDORS/MICSFT/PC/CP862.TXT	CP862 (Hebrew)
cp863	VENDORS/MICSFT/PC/CP863.TXT	CP863 (Canada French)
cp864	VENDORS/MICSFT/PC/CP864.TXT	CP864 (Arabic)
cp865	VENDORS/MICSFT/PC/CP865.TXT	CP865 (Nordic)
cp866	VENDORS/MICSFT/PC/CP866.TXT	CP866 (Cyrillic Russian)
cp869	VENDORS/MICSFT/PC/CP869.TXT	CP869 (Greek 2)
cp874	VENDORS/MICSFT/PC/CP874.TXT	CP874 (Thai)

cp1250	VENDORS/MICSFT/WINDOWS/CP1250.TXT	CP1250 (Latin 2)
cp1251	VENDORS/MICSFT/WINDOWS/CP1251.TXT	CP1251 (Cyrillic)
cp1252	VENDORS/MICSFT/WINDOWS/CP1252.TXT	CP1252 (Latin 1)
cp1253	VENDORS/MICSFT/WINDOWS/CP1253.TXT	CP1253 (Greek)
cp1254	VENDORS/MICSFT/WINDOWS/CP1254.TXT	CP1254 (Turkish)
cp1255	VENDORS/MICSFT/WINDOWS/CP1255.TXT	CP1255 (Hebrew)
cp1256	VENDORS/MICSFT/WINDOWS/CP1256.TXT	CP1256 (Arabic)
cp1257	VENDORS/MICSFT/WINDOWS/CP1257.TXT	CP1257 (Baltic Rim)