From a9e13f000adaeb9eb8c9c956f7ecaaf5f58a0512 Mon Sep 17 00:00:00 2001 From: Tatsuya Kinoshita Date: Fri, 2 Aug 2013 06:58:09 +0900 Subject: Correct underline processing and more UTF-8 support for w3mman2html.cgi Patch from , provided by Piotr P. Karwasz. --- scripts/w3mman/w3mman2html.cgi.in | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/w3mman/w3mman2html.cgi.in b/scripts/w3mman/w3mman2html.cgi.in index f430307..2e3576c 100644 --- a/scripts/w3mman/w3mman2html.cgi.in +++ b/scripts/w3mman/w3mman2html.cgi.in @@ -126,12 +126,14 @@ while() { s/\&/\&/g; s/\/\>/g; + # non ASCII UTF-8 codepoint + my $utf8="[\300-\337][\200-\277]|[\340-\357][\200-\277]{2}|[\360-\367][\200-\277]{3}|[\370-\373][\200-\277]{4}|[\374\375][\200-\277]{5}"; - s@([\200-\377].)(\010{1,2}\1)+@$1@g; + s@($utf8)(\010\1)+@$1@g; s@(\&\w+;|.)(\010\1)+@$1@g; - s@__\010{1,2}((\)?[\200-\377].(\)?)@$1@g; + s@_\010((\)?($utf8)(\)?)@$1@g; s@_\010((\)?(\&\w+\;|.)(\)?)@$1@g; - s@((\)?[\200-\377].(\)?)\010{1,2}__@$1@g; + s@((\)?($utf8)(\)?)\010_@$1@g; s@((\)?(\&\w+\;|.)(\)?)\010_@$1@g; s@.\010(.)@$1@g; -- cgit v1.2.3