diff options
author | Tatsuya Kinoshita <tats@vega.ocn.ne.jp> | 2011-05-04 07:05:14 +0000 |
---|---|---|
committer | Tatsuya Kinoshita <tats@vega.ocn.ne.jp> | 2011-05-04 07:05:14 +0000 |
commit | 72f72d64a422d6628c4796f5c0bf2e508f134214 (patch) | |
tree | 0c9ea90cc53310832c977265521fb44db24a515e /Bonus/html2latex | |
parent | Adding upstream version 0.3 (diff) | |
download | w3m-upstream/0.5.1.tar.gz w3m-upstream/0.5.1.zip |
Adding upstream version 0.5.1upstream/0.5.1
Diffstat (limited to '')
-rwxr-xr-x | Bonus/html2latex | 517 |
1 files changed, 517 insertions, 0 deletions
diff --git a/Bonus/html2latex b/Bonus/html2latex new file mode 100755 index 0000000..898799a --- /dev/null +++ b/Bonus/html2latex @@ -0,0 +1,517 @@ +#!/usr/local/bin/ruby + +# +# HTML to LaTeX converter +# by A. Ito, 16 June, 1997 +# + +require 'kconv' + +# configuration +def gif2eps(giffile,epsfile) + cmd = "convert #{giffile} #{epsfile}" + STDERR.print cmd,"\n" + system cmd +end + +########################################################################### +class Tag + def initialize(str) + if str =~ /<(.+)>/ then + str = $1 + end + tags = str.split + @tagname = tags.shift.downcase + @vals = {} + tags.each do |t| + if t =~ /=/ then + tn,tv = t.split(/\s*=\s*/,2) + tv.sub!(/^"/,"") + tv.sub!(/"$/,"") + @vals[tn.downcase] = tv + else + @vals[t.downcase] = TRUE + end + end + end + def tagname + return @tagname + end + def each + @vals.each do |k,v| + yield k,v + end + end + def switch(k) + return @vals[k] + end +end + +class TokenStream + TAG_START = ?< + TAG_END = ?> + AMP_START = ?& + AMP_END = ?; + + AMP_REPLACE_TABLE = { + '&' => '\\&', + '>' => '$>$', + '<' => '$<$', + ' ' => '~', + '"' => '"', + } + def initialize(file) + if file.kind_of?(File) then + @f = file + else + @f = File.new(file) + end + @buf = nil + @bpos = 0 + end + + def read_until(endsym) + complete = FALSE + tag = [] + begin + while @bpos < @buf.size + c = @buf[@bpos] + if c == endsym then + tag.push(c.chr) + complete = TRUE + @bpos += 1 + break + end + if c == 10 || c == 13 then + tag.push(' ') + else + tag.push(c.chr) + end + @bpos += 1 + end + unless complete + @buf = @f.gets + @bpos = 0 + break if @f.eof? + end + end until complete + return tag.join('') + end + + def get + while TRUE + if @buf.nil? then + @buf = Kconv.toeuc(@f.gets) + if @f.eof? then + return nil + end + @bpos = 0 + end + if @buf[@bpos] == TAG_START then + return Tag.new(read_until(TAG_END)) + elsif @buf[@bpos] == AMP_START then + return replace_amp(read_until(AMP_END)) + else + i = @bpos + while i < @buf.size && @buf[i] != TAG_START && @buf[i] != AMP_START + i += 1 + end + r = @buf[@bpos,i-@bpos] + if i == @buf.size then + @buf = nil + else + @bpos = i + end + redo if r =~ /^\s+$/ + return r + end + end + end + public :eof? + def eof? + @f.eof? + end + def replace_amp(s) + if AMP_REPLACE_TABLE.key?(s) then + return AMP_REPLACE_TABLE[s] + else + return s + end + end +end + + +def print_header + print ' +\documentstyle[epsf]{jarticle} +\def\hr{\par\hbox to \textwidth{\hrulefill}} +\def\pre{\begin{quote}\def\baselinestretch{0.8}\tt\obeylines} +\def\endpre{\end{quote}} +\makeatletter +\@ifundefined{gt}{\let\gt=\dg}{} +\makeatother +' +end + + +class Environ_stack + def initialize(*envs) + @stack = envs + end + def action(tag) + if tag =~ /^!/ then # comment + return ["",nil] + end + i = @stack.size-1 + while i >= 0 + a = @stack[i].action(tag) + unless a.nil? then + return a + end + i -= 1 + end + return nil + end + def pop + @stack.pop + end + def push(env) + @stack.push(env) + end + def top + @stack[@stack.size-1] + end + def dup + @stack.push(top.clone) + end +end + + +class Environment + def initialize(interp) + @silent = FALSE + @in_table = FALSE + @interp = interp; + @align = nil; + end + def action(tag) + return @interp[tag] + end + + def flush(tok) + if tok.kind_of?(String) then + tok = tok.gsub(/&/,"\\&"); + tok = tok.gsub(/%/,"\\%"); + tok = tok.gsub(/#/,"\\#"); + tok = tok.gsub(/\$/,"\\$"); + tok = tok.gsub(/_/,"\\verb+_+"); + tok = tok.gsub(/\^/,"\\verb+^+"); + tok = tok.gsub(/~/,"\\verb+~+"); + end + if @in_table then + @table[@table_rows][@table_cols] += tok + elsif !@silent then + if !@align.nil? && tok =~ /\n$/ then + print tok.chop,"\\\\\n" + else + print tok + end + end + end + + def set_interp(interp) + @interp = interp + end + + # tag processing methods + + # <TITLE> + def do_silent(tag) + @silent = TRUE + end + + # </TITLE> + def undo_silent(tag) + @silent = FALSE + end + + # <IMG> + def img_proc(tag) + src = tag.switch('src') + newfile = src.sub(/\.GIF/i,".eps") + gif2eps(src,newfile) + flush "\\epsfile{file=#{newfile}}\n" + end + + # <TABLE> + def starttable(tag) + @table = [] + @tablespan = [] + @table_rows = -1 + @table_cols_max = 0 + @in_table = TRUE + unless tag.switch('border').nil? then + @table_border = TRUE + else + @table_border = FALSE + end + end + + # <TR> + def start_row(tag) + @table_rows += 1 + @table[@table_rows] = [] + @tablespan[@table_rows] = [] + @table_cols = -1 + @colspan = 1 + end + + # <TD> + def start_col(tag) + @colspan = tag.switch('colspan') + if @colspan.nil? then + @colspan = 1 + else + @colspan = @colspan.to_i + end + @tablespan[@table_rows][@table_cols+1] = @colspan + @table_cols += @colspan + if @table_cols > @table_cols_max then + @table_cols_max = @table_cols + end + end + + # </TABLE> + def endtable(tag) + @in_table = FALSE + flush "\\begin{tabular}{*{" + flush @table_cols_max+1 + if @table_border then + flush "}{|l}|}\n\\hline\n" + else + flush "}{l}}\n" + end + for i in 0..@table_rows + j = 0 + while j <= @table_cols + span = @tablespan[i][j] + if span == 1 then + flush @table[i][j] + elsif @table_border then + form = "|l" + if j+span > @table_cols then + form = "|l|" + end + flush "\\multicolumn{"+span.to_s+"}{"+form+"}{" + flush @table[i][j+span-1] + flush "}" + else + flush "\\multicolumn{"+span.to_s+"}{l}{" + flush @table[i][j+span-1] + flush "}" + end + j += span + if j <= @table_cols then + flush "&" + end + end + flush "\\\\\n" + flush "\\hline\n" if @table_border + end + flush "\\end{tabular}\n" + end + + # <CENTER> + def startcenter(tag) + if @in_table then + flush "\\hfil" + else + flush "\\begin{center}\n" + end + end + + # </CENTER> + def endcenter(tag) + if @in_table then + flush "\\hfil" + else + flush "\\end{center}\n" + end + end + + # <P> + def paragraph(tag) + align = tag.switch('align') + if align.nil? then + flush "\\par\n" + @endparagraph = "" + else + align = align.downcase + case align + when "left" then + flush "\\begin{flushleft}\n" + @endparagraph = "\\end{flushleft}\n" + when "center" then + flush "\\begin{center}\n" + @endparagraph = "\\end{center}\n" + when "right" then + flush "\\begin{flushright}\n" + @endparagraph = "\\end{flushright}\n" + end + end + @align = align + end + + # </P> + def endparagraph(tag) + unless @align.nil? then + @align = nil + flush @endparagraph + end + end +end + + +enum_interp = { + 'li' => ["\\item ",nil] +} + +item_interp = { + 'li' => ["\\item ",nil] +} + +desc_interp = { + 'dt' => ["\\item[",nil], + 'dd' => ["]\n",nil] +} + +table_interp = { + 'tr' => [:start_row,nil], + 'td' => [:start_col,nil], + '/tr' => ["",nil], + '/td' => ["",nil], +} + +para_interp = { + '/p' => [:endparagraph ,"pop",TRUE], +} + +main_interp = { + 'body' => ["\\begin{document}\n",nil,FALSE], + '/body' => ["\\end{document}\n",nil,FALSE], + 'head' => ["",nil,FALSE], + '/head' => ["",nil,FALSE], + 'html' => ["",nil,FALSE], + '/html' => ["",nil,FALSE], + 'title' => [:do_silent,nil,FALSE], + '/title' => [:undo_silent,nil,FALSE], + '!' => ["",nil,FALSE], + 'h1' => ["\\section{",nil,TRUE], + 'h2' => ["\\subsection{",nil,TRUE], + 'h3' => ["\\subsubsection{",nil,TRUE], + 'h4' => ["\\paragraph{",nil,TRUE], + '/h1' => ["}\n",nil,TRUE], + '/h2' => ["}\n",nil,TRUE], + '/h3' => ["}\n",nil,TRUE], + '/h4' => ["}\n",nil,TRUE], + 'a' => ["",nil,TRUE], + '/a' => ["",nil,TRUE], + 'center' => [:startcenter,nil,TRUE], + '/center' => [:endcenter,nil,TRUE], + 'ol' => ["\\begin{enumerate}\n",enum_interp,TRUE], + '/ol' => ["\\end{enumerate}\n","pop",TRUE], + 'ul' => ["\\begin{itemize}\n",item_interp,TRUE], + '/ul' => ["\\end{itemize}\n","pop",TRUE], + 'dl' => ["\\begin{description}\n",desc_interp,TRUE], + '/dl' => ["\\end{description}\n","pop",TRUE], + 'pre' => ["\\begin{pre}\n",nil,TRUE], + '/pre' => ["\\end{pre}\n",nil,TRUE], + 'p' => [:paragraph ,para_interp,TRUE], + 'br' => ["\\par ",nil,TRUE], + 'img' => [:img_proc,nil,TRUE], + 'hr' => ["\\hr ",nil,TRUE], + 'b' => ["{\\bf\\gt ",nil,TRUE], + '/b' => ["}",nil,TRUE], + 'strong' => ["{\\bf\\gt ",nil,TRUE], + '/strong' => ["}",nil,TRUE], + 'dfn' => ["{\\bf\\gt ",nil,TRUE], + '/dfn' => ["}",nil,TRUE], + 'i' => ["{\\it",nil,TRUE], + '/i' => ["}",nil,TRUE], + 'address' => ["{\\it",nil,TRUE], + '/address'=> ["}",nil,TRUE], + 'cite' => ["{\\it",nil,TRUE], + '/cite' => ["}",nil,TRUE], + 'code' => ["{\\tt",nil,TRUE], + '/code' => ["}",nil,TRUE], + 'kbd' => ["{\\tt",nil,TRUE], + '/kbd' => ["}",nil,TRUE], + 'tt' => ["{\\tt",nil,TRUE], + '/tt' => ["}",nil,TRUE], + 'samp' => ["{\\tt",nil,TRUE], + '/samp' => ["}",nil,TRUE], + 'em' => ["{\\em",nil,TRUE], + '/em' => ["}",nil,TRUE], + 'u' => ["$\\underline{\\mbox{",nil,TRUE], + '/u' => ["}}$",nil,TRUE], + 'sub' => ["${}_\mbox{",nil,TRUE], + '/sub' => ["}$",nil,TRUE], + 'sup' => ["${}^\mbox{",nil,TRUE], + '/sup' => ["}$",nil,TRUE], + 'table' => [:starttable, table_interp,TRUE], + '/table' => [:endtable, "pop",TRUE], + 'font' => ["",nil,TRUE], + '/font' => ["",nil,TRUE], +} + + + + +################################ MAIN #################################### + +$in_document = FALSE +print_header +intp = Environ_stack.new(Environment.new(main_interp)) +f = TokenStream.new(ARGV[0]) +until f.eof? + tok = f.get + if tok.kind_of?(Tag) then + case tok.tagname + when "body" + $in_document = TRUE + when "/body" + $in_document = FALSE + end + act = intp.action(tok.tagname) + if act.nil? then + STDERR.print "tag ",tok.tagname," ignored\n" + else + if act[2] && !$in_document then + print "\\begin{document}\n" + $in_document = TRUE + end + # environment push + if act[1].kind_of?(Hash) && + (tok.tagname != "p" || tok.switch('align') != nil) then + intp.dup + intp.top.set_interp(act[1]) + end + + if act[0].kind_of?(String) then + intp.top.flush act[0] + elsif act[0].kind_of?(Fixnum) then # interned symbol + intp.top.send(act[0],tok) + end + + # environment pop + if act[1] == "pop" then + intp.pop + end + end + elsif !tok.nil? then + intp.top.flush tok + end +end +if $in_document then + print "\\end{document}\n" +end |