aboutsummaryrefslogblamecommitdiffstats
path: root/debian/patches/170_w3mman2html-utf8.patch
blob: 94c5f09cce123eb3ccc06e7cc828f06d4dc5193a (plain) (tree)
1
2
3
4
5

                                                                  
                                                                 
                                                                
 












                                                                                  
















                                                                                                                                            

















                                                                                         
Subject: More UTF-8 support and fixes for w3mman2html.cgi
Author: Piotr P. Karwasz, Justin B Rye <justin.byam.rye@gmail.com>
Origin: https://bugs.launchpad.net/ubuntu/+source/w3m/+bug/680202
Origin: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=771004

diff --git a/scripts/w3mman/w3mman2html.cgi.in b/scripts/w3mman/w3mman2html.cgi.in
index f430307..2cd00f9 100644
--- a/scripts/w3mman/w3mman2html.cgi.in
+++ b/scripts/w3mman/w3mman2html.cgi.in
@@ -34,7 +34,6 @@ Content-Type: text/html
 EOF
     $keyword =~ s:([^-\w\200-\377.,])::g;
     open(F, "$MAN -k $keyword 2> /dev/null |");
-    @line = ();
     while(<F>) {
       chop;
       $_ = &html_quote($_);
@@ -126,12 +125,14 @@ while(<F>) {
   s/\&/\&amp;/g;
   s/\</\&lt;/g;
   s/\>/\&gt;/g;
+  # non ASCII UTF-8 codepoint
+  my $utf8="[\300-\337][\200-\277]|[\340-\357][\200-\277]{2}|[\360-\367][\200-\277]{3}|[\370-\373][\200-\277]{4}|[\374\375][\200-\277]{5}";
 
-  s@([\200-\377].)(\010{1,2}\1)+@<b>$1</b>@g;
+  s@($utf8)(\010\1)+@<b>$1</b>@g;
   s@(\&\w+;|.)(\010\1)+@<b>$1</b>@g;
-  s@__\010{1,2}((\<b\>)?[\200-\377].(\</b\>)?)@<u>$1</u>@g;
+  s@_\010((\<b\>)?($utf8)(\</b\>)?)@<u>$1</u>@g;
   s@_\010((\<b\>)?(\&\w+\;|.)(\</b\>)?)@<u>$1</u>@g;
-  s@((\<b\>)?[\200-\377].(\</b\>)?)\010{1,2}__@<u>$1</u>@g;
+  s@((\<b\>)?($utf8)(\</b\>)?)\010_@<u>$1</u>@g;
   s@((\<b\>)?(\&\w+\;|.)(\</b\>)?)\010_@<u>$1</u>@g;
   s@.\010(.)@$1@g;
 
@@ -156,7 +157,7 @@ EOF
   }
 
   s@(http|ftp)://[\w.\-/~]+[\w/]@<a href="$&">$&</a>@g;
-  s@(\W)(mailto:)?(\w[\w.\-]*\@\w[\w.\-]*\.[\w.\-]*\w)@$1<a href="mailto:$3">$2$3</a>@g;
+  s@\b(mailto:|)(\w[\w.\-]*\@\w[\w.\-]*\.[\w.\-]*\w)@<a href="mailto:$2">$1$2</a>@g;
   s@(\W)(\~?/[\w.][\w.\-/~]*)@$1 . &file_ref($2)@ge;
   s@(include(<\/?[bu]\>|\s)*\&lt;)([\w.\-/]+)@$1 . &include_ref($3)@ge;
   if ($prev && m@^\s*(\<[bu]\>)*(\w[\w.\-]*)(\</[bu]\>)*(\([\dm]\w*\))@) {
@@ -220,7 +221,7 @@ sub is_command {
   local($p);
 
   (! -d && -x) || return 0;
-  if (! defined(%PATH)) {
+  if (! %PATH) {
     for $p (split(":", $ENV{'PATH'})) {
       $p =~ s@/+$@@;
       $PATH{$p} = 1;