aboutsummaryrefslogtreecommitdiffstats
path: root/scripts/w3mman/w3mman2html.cgi.in
diff options
context:
space:
mode:
authorTatsuya Kinoshita <tats@debian.org>2013-08-01 21:58:09 +0000
committerTatsuya Kinoshita <tats@debian.org>2013-08-01 22:00:31 +0000
commita9e13f000adaeb9eb8c9c956f7ecaaf5f58a0512 (patch)
tree076f525c4766158bc675ae81b3344d0c20dc5054 /scripts/w3mman/w3mman2html.cgi.in
parentMerge from upstream on 2012-05-22 (diff)
downloadw3m-a9e13f000adaeb9eb8c9c956f7ecaaf5f58a0512.tar.gz
w3m-a9e13f000adaeb9eb8c9c956f7ecaaf5f58a0512.zip
Correct underline processing and more UTF-8 support for w3mman2html.cgi
Patch from <https://bugs.launchpad.net/ubuntu/+source/w3m/+bug/680202>, provided by Piotr P. Karwasz.
Diffstat (limited to 'scripts/w3mman/w3mman2html.cgi.in')
-rw-r--r--scripts/w3mman/w3mman2html.cgi.in8
1 files changed, 5 insertions, 3 deletions
diff --git a/scripts/w3mman/w3mman2html.cgi.in b/scripts/w3mman/w3mman2html.cgi.in
index f430307..2e3576c 100644
--- a/scripts/w3mman/w3mman2html.cgi.in
+++ b/scripts/w3mman/w3mman2html.cgi.in
@@ -126,12 +126,14 @@ while(<F>) {
s/\&/\&amp;/g;
s/\</\&lt;/g;
s/\>/\&gt;/g;
+ # non ASCII UTF-8 codepoint
+ my $utf8="[\300-\337][\200-\277]|[\340-\357][\200-\277]{2}|[\360-\367][\200-\277]{3}|[\370-\373][\200-\277]{4}|[\374\375][\200-\277]{5}";
- s@([\200-\377].)(\010{1,2}\1)+@<b>$1</b>@g;
+ s@($utf8)(\010\1)+@<b>$1</b>@g;
s@(\&\w+;|.)(\010\1)+@<b>$1</b>@g;
- s@__\010{1,2}((\<b\>)?[\200-\377].(\</b\>)?)@<u>$1</u>@g;
+ s@_\010((\<b\>)?($utf8)(\</b\>)?)@<u>$1</u>@g;
s@_\010((\<b\>)?(\&\w+\;|.)(\</b\>)?)@<u>$1</u>@g;
- s@((\<b\>)?[\200-\377].(\</b\>)?)\010{1,2}__@<u>$1</u>@g;
+ s@((\<b\>)?($utf8)(\</b\>)?)\010_@<u>$1</u>@g;
s@((\<b\>)?(\&\w+\;|.)(\</b\>)?)\010_@<u>$1</u>@g;
s@.\010(.)@$1@g;