diff options
Diffstat (limited to 'Bonus')
-rw-r--r-- | Bonus/README | 48 | ||||
-rw-r--r-- | Bonus/README.eng | 49 | ||||
-rwxr-xr-x | Bonus/html2latex | 517 | ||||
-rwxr-xr-x | Bonus/htmldump | 12 | ||||
-rwxr-xr-x | Bonus/makeref | 266 | ||||
-rw-r--r-- | Bonus/scanhist.rb | 88 | ||||
-rwxr-xr-x | Bonus/wrap3m | 33 |
7 files changed, 1013 insertions, 0 deletions
diff --git a/Bonus/README b/Bonus/README new file mode 100644 index 0000000..1e3a0de --- /dev/null +++ b/Bonus/README @@ -0,0 +1,48 @@ +html2latex + + HTMLの文書をLaTeX に変換します.Rubyスクリプトです.不完全です. + ある程度の役には立つかもしれません. + + 使用法 + + html2latex file.html > file.tex + + ここに置いてある理由 + + makeref のための部品取りです :-) + +makeref + + HTMLの文書を読み,アンカーに番号を振ります.番号を振った文書を + 標準出力に書き出し,最後にその一覧を出力します.Ruby スクリプトです. + + 使用法 + + makeref [-url base_url] [file] + + -url: 文書のURLを指定します.リンクの一覧を出すときに,そのURLを + 補完するために使います. + + バグ + + HTMLの記述ミス( < ではなく < を使う,&... の最後に ; を付けない + 等)があると,悲惨な結果になることがあります. + + 半角カナ(JIS X-0201カナ)に対応していません. + +htmldump + + URL からHTML文書を読み,アンカーに番号を振って整形し,標準出力に + 書き出します. + + 使用法 + + dumphtml [URL] + + URL を省略すると,$WWW_HOME の内容を読みます. + + バグ + + URL の指す文書がHTMLでなかった場合,かわいそうなことになります. + makeref を使っているので,makeref がうまく処理できない文書の表示 + は変になります. diff --git a/Bonus/README.eng b/Bonus/README.eng new file mode 100644 index 0000000..06613ee --- /dev/null +++ b/Bonus/README.eng @@ -0,0 +1,49 @@ +html2latex + + Convert HTML document into LaTeX. Ruby script. incomplete. + + Usage: + + html2latex file.html > file.tex + + Why this script is here? + + To exploit code for makeref. :-) + +makeref + + Read HTML document and number the anchors. Print numbered document + into standard output and append reference index. Ruby script. + + Usage: + + makeref [-u] [-url base_url] [file] + + -url: Specify URL of the document. It is used to complete link + in the document. + + -u: Append URL after each anchor, instead of reference number. + + Bugs + + If there are any error in HTML (unbalanced < , character entity + without ; , etc.), output will be miserable. + +htmldump + + Read HTML document from URL, number the anchors and format it, + and output it on standard output. + + Usage + + htmldump [-u] [URL] + + -u: Append URL after each anchor, instead of reference number. + + If URL is omitted, $WWW_HOME is used instead. + + Bugs + + It assumes that the document on URL is HTML. + As it uses makeref to number the anchor, it can't handle any document + makeref can't handle. diff --git a/Bonus/html2latex b/Bonus/html2latex new file mode 100755 index 0000000..7b894e7 --- /dev/null +++ b/Bonus/html2latex @@ -0,0 +1,517 @@ +#!/usr/local/bin/ruby + +# +# HTML to LaTeX converter +# by A. Ito, 16 June, 1997 +# + +require 'kconv' + +# configuration +def gif2eps(giffile,epsfile) + cmd = "convert #{giffile} #{epsfile}" + STDERR.print cmd,"\n" + system cmd +end + +########################################################################### +class Tag + def initialize(str) + if str =~ /<(.+)>/ then + str = $1 + end + tags = str.split + @tagname = tags.shift.downcase + @vals = {} + tags.each do |t| + if t =~ /=/ then + tn,tv = t.split(/\s*=\s*/,2) + tv.sub!(/^"/,"") + tv.sub!(/"$/,"") + @vals[tn.downcase] = tv + else + @vals[t.downcase] = TRUE + end + end + end + def tagname + return @tagname + end + def each + @vals.each do |k,v| + yield k,v + end + end + def switch(k) + return @vals[k] + end +end + +class TokenStream + TAG_START = ?< + TAG_END = ?> + AMP_START = ?& + AMP_END = ?; + + AMP_REPLACE_TABLE = { + '&' => '\\&', + '>' => '$>$', + '<' => '$<$', + ' ' => '~', + '"' => '"', + } + def initialize(file) + if file.kind_of?(File) then + @f = file + else + @f = File.new(file) + end + @buf = nil + @bpos = 0 + end + + def read_until(endsym) + complete = FALSE + tag = [] + begin + while @bpos < @buf.size + c = @buf[@bpos] + if c == endsym then + tag.push(c.chr) + complete = TRUE + @bpos += 1 + break + end + if c == 10 || c == 13 then + tag.push(' ') + else + tag.push(c.chr) + end + @bpos += 1 + end + unless complete + @buf = @f.gets + @bpos = 0 + break if @f.eof? + end + end until complete + return tag.join('') + end + + def get + while TRUE + if @buf.nil? then + @buf = Kconv.toeuc(@f.gets) + if @f.eof? then + return nil + end + @bpos = 0 + end + if @buf[@bpos] == TAG_START then + return Tag.new(read_until(TAG_END)) + elsif @buf[@bpos] == AMP_START then + return replace_amp(read_until(AMP_END)) + else + i = @bpos + while i < @buf.size && @buf[i] != TAG_START && @buf[i] != AMP_START + i += 1 + end + r = @buf[@bpos,i-@bpos] + if i == @buf.size then + @buf = nil + else + @bpos = i + end + redo if r =~ /^\s+$/ + return r + end + end + end + public :eof? + def eof? + @f.eof? + end + def replace_amp(s) + if AMP_REPLACE_TABLE.key?(s) then + return AMP_REPLACE_TABLE[s] + else + return s + end + end +end + + +def print_header + print ' +\documentstyle[epsf]{jarticle} +\def\hr{\par\hbox to \textwidth{\hrulefill}} +\def\pre{\begin{quote}\def\baselinestretch{0.8}\tt\obeylines} +\def\endpre{\end{quote}} +\makeatletter +\@ifundefined{gt}{\let\gt=\dg}{} +\makeatother +' +end + + +class Environ_stack + def initialize(*envs) + @stack = envs + end + def action(tag) + if tag =~ /^!/ then # comment + return ["",nil] + end + i = @stack.size-1 + while i >= 0 + a = @stack[i].action(tag) + unless a.nil? then + return a + end + i -= 1 + end + return nil + end + def pop + @stack.pop + end + def push(env) + @stack.push(env) + end + def top + @stack[@stack.size-1] + end + def dup + @stack.push(top.clone) + end +end + + +class Environment + def initialize(interp) + @silent = FALSE + @in_table = FALSE + @interp = interp; + @align = nil; + end + def action(tag) + return @interp[tag] + end + + def flush(tok) + if tok.kind_of?(String) then + tok = tok.gsub(/&/,"\\&"); + tok = tok.gsub(/%/,"\\%"); + tok = tok.gsub(/#/,"\\#"); + tok = tok.gsub(/\$/,"\\$"); + tok = tok.gsub(/_/,"\\verb+_+"); + tok = tok.gsub(/\^/,"\\verb+^+"); + tok = tok.gsub(/~/,"\\verb+~+"); + end + if @in_table then + @table[@table_rows][@table_cols] += tok + elsif !@silent then + if !@align.nil? && tok =~ /\n$/ then + print tok.chop,"\\\\\n" + else + print tok + end + end + end + + def set_interp(interp) + @interp = interp + end + + # tag processing methods + + # <TITLE> + def do_silent(tag) + @silent = TRUE + end + + # </TITLE> + def undo_silent(tag) + @silent = FALSE + end + + # <IMG> + def img_proc(tag) + src = tag.switch('src') + newfile = src.sub(/\.GIF/i,".eps") + gif2eps(src,newfile) + flush "\\epsfile{file=#{newfile}}\n" + end + + # <TABLE> + def starttable(tag) + @table = [] + @tablespan = [] + @table_rows = -1 + @table_cols_max = 0 + @in_table = TRUE + unless tag.switch('border').nil? then + @table_border = TRUE + else + @table_border = FALSE + end + end + + # <TR> + def start_row(tag) + @table_rows += 1 + @table[@table_rows] = [] + @tablespan[@table_rows] = [] + @table_cols = -1 + @colspan = 1 + end + + # <TD> + def start_col(tag) + @colspan = tag.switch('colspan') + if @colspan.nil? then + @colspan = 1 + else + @colspan = @colspan.to_i + end + @tablespan[@table_rows][@table_cols+1] = @colspan + @table_cols += @colspan + if @table_cols > @table_cols_max then + @table_cols_max = @table_cols + end + end + + # </TABLE> + def endtable(tag) + @in_table = FALSE + flush "\\begin{tabular}{*{" + flush @table_cols_max+1 + if @table_border then + flush "}{|l}|}\n\\hline\n" + else + flush "}{l}}\n" + end + for i in 0..@table_rows + j = 0 + while j <= @table_cols + span = @tablespan[i][j] + if span == 1 then + flush @table[i][j] + elsif @table_border then + form = "|l" + if j+span > @table_cols then + form = "|l|" + end + flush "\\multicolumn{"+span.to_s+"}{"+form+"}{" + flush @table[i][j+span-1] + flush "}" + else + flush "\\multicolumn{"+span.to_s+"}{l}{" + flush @table[i][j+span-1] + flush "}" + end + j += span + if j <= @table_cols then + flush "&" + end + end + flush "\\\\\n" + flush "\\hline\n" if @table_border + end + flush "\\end{tabular}\n" + end + + # <CENTER> + def startcenter(tag) + if @in_table then + flush "\\hfil" + else + flush "\\begin{center}\n" + end + end + + # </CENTER> + def endcenter(tag) + if @in_table then + flush "\\hfil" + else + flush "\\end{center}\n" + end + end + + # <P> + def paragraph(tag) + align = tag.switch('align') + if align.nil? then + flush "\\par\n" + @endparagraph = "" + else + align = align.downcase + case align + when "left" then + flush "\\begin{flushleft}\n" + @endparagraph = "\\end{flushleft}\n" + when "center" then + flush "\\begin{center}\n" + @endparagraph = "\\end{center}\n" + when "right" then + flush "\\begin{flushright}\n" + @endparagraph = "\\end{flushright}\n" + end + end + @align = align + end + + # </P> + def endparagraph(tag) + unless @align.nil? then + @align = nil + flush @endparagraph + end + end +end + + +enum_interp = { + 'li' => ["\\item ",nil] +} + +item_interp = { + 'li' => ["\\item ",nil] +} + +desc_interp = { + 'dt' => ["\\item[",nil], + 'dd' => ["]\n",nil] +} + +table_interp = { + 'tr' => [:start_row,nil], + 'td' => [:start_col,nil], + '/tr' => ["",nil], + '/td' => ["",nil], +} + +para_interp = { + '/p' => [:endparagraph ,"pop",TRUE], +} + +main_interp = { + 'body' => ["\\begin{document}\n",nil,FALSE], + '/body' => ["\\end{document}\n",nil,FALSE], + 'head' => ["",nil,FALSE], + '/head' => ["",nil,FALSE], + 'html' => ["",nil,FALSE], + '/html' => ["",nil,FALSE], + 'title' => [:do_silent,nil,FALSE], + '/title' => [:undo_silent,nil,FALSE], + '!' => ["",nil,FALSE], + 'h1' => ["\\section{",nil,TRUE], + 'h2' => ["\\subsection{",nil,TRUE], + 'h3' => ["\\subsubsection{",nil,TRUE], + 'h4' => ["\\paragraph{",nil,TRUE], + '/h1' => ["}\n",nil,TRUE], + '/h2' => ["}\n",nil,TRUE], + '/h3' => ["}\n",nil,TRUE], + '/h4' => ["}\n",nil,TRUE], + 'a' => ["",nil,TRUE], + '/a' => ["",nil,TRUE], + 'center' => [:startcenter,nil,TRUE], + '/center' => [:endcenter,nil,TRUE], + 'ol' => ["\\begin{enumerate}\n",enum_interp,TRUE], + '/ol' => ["\\end{enumerate}\n","pop",TRUE], + 'ul' => ["\\begin{itemize}\n",item_interp,TRUE], + '/ul' => ["\\end{itemize}\n","pop",TRUE], + 'dl' => ["\\begin{description}\n",desc_interp,TRUE], + '/dl' => ["\\end{description}\n","pop",TRUE], + 'pre' => ["\\begin{pre}\n",nil,TRUE], + '/pre' => ["\\end{pre}\n",nil,TRUE], + 'p' => [:paragraph ,para_interp,TRUE], + 'br' => ["\\par ",nil,TRUE], + 'img' => [:img_proc,nil,TRUE], + 'hr' => ["\\hr ",nil,TRUE], + 'b' => ["{\\bf\\gt ",nil,TRUE], + '/b' => ["}",nil,TRUE], + 'strong' => ["{\\bf\\gt ",nil,TRUE], + '/strong' => ["}",nil,TRUE], + 'dfn' => ["{\\bf\\gt ",nil,TRUE], + '/dfn' => ["}",nil,TRUE], + 'i' => ["{\\it",nil,TRUE], + '/i' => ["}",nil,TRUE], + 'address' => ["{\\it",nil,TRUE], + '/address'=> ["}",nil,TRUE], + 'cite' => ["{\\it",nil,TRUE], + '/cite' => ["}",nil,TRUE], + 'code' => ["{\\tt",nil,TRUE], + '/code' => ["}",nil,TRUE], + 'kbd' => ["{\\tt",nil,TRUE], + '/kbd' => ["}",nil,TRUE], + 'tt' => ["{\\tt",nil,TRUE], + '/tt' => ["}",nil,TRUE], + 'samp' => ["{\\tt",nil,TRUE], + '/samp' => ["}",nil,TRUE], + 'em' => ["{\\em",nil,TRUE], + '/em' => ["}",nil,TRUE], + 'u' => ["$\\underline{\\mbox{",nil,TRUE], + '/u' => ["}}$",nil,TRUE], + 'sub' => ["${}_\mbox{",nil,TRUE], + '/sub' => ["}$",nil,TRUE], + 'sup' => ["${}^\mbox{",nil,TRUE], + '/sup' => ["}$",nil,TRUE], + 'table' => [:starttable, table_interp,TRUE], + '/table' => [:endtable, "pop",TRUE], + 'font' => ["",nil,TRUE], + '/font' => ["",nil,TRUE], +} + + + + +################################ MAIN #################################### + +$in_document = FALSE +print_header +intp = Environ_stack.new(Environment.new(main_interp)) +f = TokenStream.new(ARGV[0]) +until f.eof? + tok = f.get + if tok.kind_of?(Tag) then + case tok.tagname + when "body" + $in_document = TRUE + when "/body" + $in_document = FALSE + end + act = intp.action(tok.tagname) + if act.nil? then + STDERR.print "tag ",tok.tagname," ignored\n" + else + if act[2] && !$in_document then + print "\\begin{document}\n" + $in_document = TRUE + end + # enviconment push + if act[1].kind_of?(Hash) && + (tok.tagname != "p" || tok.switch('align') != nil) then + intp.dup + intp.top.set_interp(act[1]) + end + + if act[0].kind_of?(String) then + intp.top.flush act[0] + elsif act[0].kind_of?(Fixnum) then # interned symbol + intp.top.send(act[0],tok) + end + + # environment pop + if act[1] == "pop" then + intp.pop + end + end + elsif !tok.nil? then + intp.top.flush tok + end +end +if $in_document then + print "\\end{document}\n" +end diff --git a/Bonus/htmldump b/Bonus/htmldump new file mode 100755 index 0000000..4be60bf --- /dev/null +++ b/Bonus/htmldump @@ -0,0 +1,12 @@ +#!/bin/sh +OPT= +if [ $# -gt 0 -a $1 = "-u" ]; then + OPT=-u + shift +fi +if [ $# = 0 ]; then + URL=$WWW_HOME +else + URL=$1 +fi +w3m -dump_source $URL | makeref $OPT -url $URL | w3m -dump -F -T text/html diff --git a/Bonus/makeref b/Bonus/makeref new file mode 100755 index 0000000..9cb1942 --- /dev/null +++ b/Bonus/makeref @@ -0,0 +1,266 @@ +#!/usr/local/bin/ruby + +# HTML reference generator +# by A.Ito 1999/3/30 + +require 'kconv' + +########################################################################### +class URL + attr 'scheme' + attr 'host' + attr 'port' + attr 'file' + attr 'label' + def initialize(str) + if /([a-zA-Z+\-]+):(.*)/ =~ str then + @scheme = $1 + str = $2 + else + @scheme = 'unknown' + end + hostpart = '' + if %r'//([^/]*)(/.*)' =~ str then + hostpart = $1 + str = $2 + elsif %r'//([^/]*)$' =~ str then + hostpart = str + str = '' + end + if hostpart != '' then + if /(.*):(\d+)/ =~ hostpart then + @host = $1 + @port = $2 + else + @host = hostpart + @port = '' + end + else + @host = @port = '' + end + if /(.*)#(.*)/ =~ str then + @file = $1 + @label = $2 + else + @file = str + @label = '' + end + end + def to_s + s = "#{@scheme}:" + if s == 'news' or s == 'mailto' then + return s+@file + end + s += "//"+@host + s += ":"+@port if @port.size > 0 + s += @file + s += "#"+@label if @label.size > 0 + s + end + def complete(current) + @scheme = current.scheme if @scheme == 'unknown' + @port = current.port if @host == '' and @port == '' + @host = current.host if @host == '' + unless @file =~ %r'^/' then + @file = File.expand_path(File.dirname(current.file)+'/'+@file) + end + self + end +end + +class Tag + def initialize(str) + if str =~ /<(.+)>/ then + str = $1 + end + tags = str.split + @tagname = tags.shift.downcase + @vals = {} + tags.each do |t| + if t =~ /=/ then + tn,tv = t.split(/\s*=\s*/,2) + tv.sub!(/^"/,"") + tv.sub!(/"$/,"") + @vals[tn.downcase] = tv + else + @vals[t.downcase] = TRUE + end + end + end + def tagname + return @tagname + end + def each + @vals.each do |k,v| + yield k,v + end + end + def switch(k) + return @vals[k] + end + def to_s + if tagname =~ /!--/ then + return '' + end + t = "<"+tagname + if @vals.size == 0 then + return t+">" + end + each do |a,v| + if v == true then + t += " #{a}" + else + t += " #{a}=\"#{v}\"" + end + end + t+">" + end +end + +class TokenStream + TAG_START = ?< + TAG_END = ?> + AMP_START = ?& + AMP_END = ?; + + def initialize(file) + if file.kind_of?(IO) then + @f = file + else + @f = File.new(file) + end + @buf = nil + @bpos = 0 + end + + def read_until(endsym) + complete = FALSE + tag = [] + begin + while @bpos < @buf.size + c = @buf[@bpos] + if c == endsym then + tag.push(c.chr) + complete = TRUE + @bpos += 1 + break + end + if c == 10 || c == 13 then + tag.push(' ') + else + tag.push(c.chr) + end + @bpos += 1 + end + unless complete + @buf = @f.gets + @bpos = 0 + break if @f.eof? + end + end until complete + return tag.join('') + end + + def get + while TRUE + if @buf.nil? then + @buf = @f.gets + if @f.eof? then + return nil + end + @buf = Kconv.toeuc(@buf) + @bpos = 0 + end + if @buf[@bpos] == TAG_START then + return Tag.new(read_until(TAG_END)) + elsif @buf[@bpos] == AMP_START then + return read_until(AMP_END) + else + i = @bpos + while i < @buf.size && @buf[i] != TAG_START && @buf[i] != AMP_START + i += 1 + end + r = @buf[@bpos,i-@bpos] + if i == @buf.size then + @buf = nil + else + @bpos = i + end + redo if r =~ /^\s+$/ + return r + end + end + end + public :eof? + def eof? + @f.eof? + end +end + +################################ MAIN #################################### + +refs = [] +refnum = 0 +body_finished = false +html_finished = false +currentURL = nil +immediate_ref = false + +while ARGV[0] =~ /^-/ + case ARGV.shift + when '-url' + currentURL = URL.new(ARGV.shift) + when '-u' + immediate_ref = true + end +end + +if ARGV.size > 0 then + f = TokenStream.new(ARGV[0]) +else + f = TokenStream.new(STDIN) +end + +until f.eof? + tok = f.get + if tok.kind_of?(Tag) then + if tok.tagname == 'a' and !tok.switch('href').nil? then + refs[refnum] = tok.switch('href') + refnum += 1 + elsif tok.tagname == '/a' then + if immediate_ref then + r = refs[refnum-1] + if !currentURL.nil? then + r = URL.new(r).complete(currentURL).to_s + end + print "[#{r}]" + else + print "[#{refnum}]" + end + elsif tok.tagname == '/body' then + body_finished = true + break + elsif tok.tagname == '/html' then + html_finished = true + break + end + print tok.to_s + elsif !tok.nil? then + print tok + end +end +if !immediate_ref and refs.size > 0 then + print "<hr><h2>References</h2>\n" + for i in 0..refs.size-1 + if currentURL.nil? then + r = refs[i] + else + r = URL.new(refs[i]) + r.complete(currentURL) + r = r.to_s + end + print "[#{i+1}] #{r}<br>\n" + end +end +print "</body>\n" unless body_finished +print "</html>\n" unless html_finished diff --git a/Bonus/scanhist.rb b/Bonus/scanhist.rb new file mode 100644 index 0000000..69dcc9d --- /dev/null +++ b/Bonus/scanhist.rb @@ -0,0 +1,88 @@ +#!/usr/local/bin/ruby + +# scan history + +def usage + STDERR.print "usage: scanhist -h HISTORY ML-archive1 ML-archive2 ...\n" + exit 1 +end + +def html_quote(s) + s.gsub!(/&/,"&") + s.gsub!(/</,"<") + s.gsub!(/>/,">") + s +end + +if ARGV.size == 0 then + usage +end + +histfile = nil + +while ARGV[0] =~ /^-/ + case ARGV.shift + when "-h" + histfile = ARGV.shift + else + usage + end +end + +if histfile.nil? then + usage +end + +patched = {} +histline = {} +f = open(histfile) +while f.gets + if /Subject: (\[w3m-dev.*\])/ then + patched[$1] = true + histline[$1] = $. + end +end +f.close + +archive = {} +subject = nil +for fn in ARGV + f = open(fn) + while f.gets + if /^From / then + # beginning of a mail + subject = nil + elsif subject.nil? and /^Subject: / then + $_ =~ /Subject: (\[w3m-dev.*\])/ + subject = $1 + archive[subject] = [$_.chop.sub(/^Subject:\s*/,""),false,fn+"#"+($.).to_s] + elsif /^\+\+\+/ or /\*\*\*/ or /filename=.*(patch|diff).*/ or /^begin \d\d\d/ + archive[subject][1] = true + end + end + f.close +end + +print "<html><head><title>w3m patch configuration\n</title></head><body>\n" +print "<pre>\n" +for sub in archive.keys.sort + a = archive[sub] + if a[1] then + if patched[sub] then + print "[<a href=\"#{histfile}\##{histline[sub]}\">+</a>]" + else + print "[-]" + end + print "<a href=\"#{a[2]}\">" + print "<b>",html_quote(a[0]),"</b></a>\n" + else + if patched[sub] then + print "[<a href=\"#{histfile}\##{histline[sub]}\">o</a>]" + else + print " " + end + print "<a href=\"#{a[2]}\">" + print "<b>",html_quote(a[0]),"</b></a>\n" + end +end +print "</pre></body></html>\n" diff --git a/Bonus/wrap3m b/Bonus/wrap3m new file mode 100755 index 0000000..9555c9a --- /dev/null +++ b/Bonus/wrap3m @@ -0,0 +1,33 @@ +#!/bin/sh + +HOMEPAGE=http://ei5nazha.yz.yamagata-u.ac.jp/~aito/w3m/ +OPT="" +URL="" +for i in $@ +do + case $i in + -*) + OPT="$OPT $i" + ;; + *) + URL="$URL $i" + ;; + esac +done + +if [ -z "$URL" ]; then + URL=$HOMEPAGE +fi +URLARG="" +for u in $URL +do + if [ `expr $u : '[a-z][a-z]*://'` -gt 0 ]; then + URLARG="$URLARG $u" + elif [ -f $u -o -d $u ]; then + URLARG="$URLARG $u" + else + URLARG="$URLARG http://$u" + fi +done + +w3m $OPTS $URLARG |