#!/usr/local/bin/ruby # # HTML to LaTeX converter # by A. Ito, 16 June, 1997 # require 'kconv' # configuration def gif2eps(giffile,epsfile) cmd = "convert #{giffile} #{epsfile}" STDERR.print cmd,"\n" system cmd end ########################################################################### class Tag def initialize(str) if str =~ /<(.+)>/ then str = $1 end tags = str.split @tagname = tags.shift.downcase @vals = {} tags.each do |t| if t =~ /=/ then tn,tv = t.split(/\s*=\s*/,2) tv.sub!(/^"/,"") tv.sub!(/"$/,"") @vals[tn.downcase] = tv else @vals[t.downcase] = TRUE end end end def tagname return @tagname end def each @vals.each do |k,v| yield k,v end end def switch(k) return @vals[k] end end class TokenStream TAG_START = ?< TAG_END = ?> AMP_START = ?& AMP_END = ?; AMP_REPLACE_TABLE = { '&' => '\\&', '>' => '$>$', '<' => '$<$', ' ' => '~', '"' => '"', } def initialize(file) if file.kind_of?(File) then @f = file else @f = File.new(file) end @buf = nil @bpos = 0 end def read_until(endsym) complete = FALSE tag = [] begin while @bpos < @buf.size c = @buf[@bpos] if c == endsym then tag.push(c.chr) complete = TRUE @bpos += 1 break end if c == 10 || c == 13 then tag.push(' ') else tag.push(c.chr) end @bpos += 1 end unless complete @buf = @f.gets @bpos = 0 break if @f.eof? end end until complete return tag.join('') end def get while TRUE if @buf.nil? then @buf = Kconv.toeuc(@f.gets) if @f.eof? then return nil end @bpos = 0 end if @buf[@bpos] == TAG_START then return Tag.new(read_until(TAG_END)) elsif @buf[@bpos] == AMP_START then return replace_amp(read_until(AMP_END)) else i = @bpos while i < @buf.size && @buf[i] != TAG_START && @buf[i] != AMP_START i += 1 end r = @buf[@bpos,i-@bpos] if i == @buf.size then @buf = nil else @bpos = i end redo if r =~ /^\s+$/ return r end end end public :eof? def eof? @f.eof? end def replace_amp(s) if AMP_REPLACE_TABLE.key?(s) then return AMP_REPLACE_TABLE[s] else return s end end end def print_header print ' \documentstyle[epsf]{jarticle} \def\hr{\par\hbox to \textwidth{\hrulefill}} \def\pre{\begin{quote}\def\baselinestretch{0.8}\tt\obeylines} \def\endpre{\end{quote}} \makeatletter \@ifundefined{gt}{\let\gt=\dg}{} \makeatother ' end class Environ_stack def initialize(*envs) @stack = envs end def action(tag) if tag =~ /^!/ then # comment return ["",nil] end i = @stack.size-1 while i >= 0 a = @stack[i].action(tag) unless a.nil? then return a end i -= 1 end return nil end def pop @stack.pop end def push(env) @stack.push(env) end def top @stack[@stack.size-1] end def dup @stack.push(top.clone) end end class Environment def initialize(interp) @silent = FALSE @in_table = FALSE @interp = interp; @align = nil; end def action(tag) return @interp[tag] end def flush(tok) if tok.kind_of?(String) then tok = tok.gsub(/&/,"\\&"); tok = tok.gsub(/%/,"\\%"); tok = tok.gsub(/#/,"\\#"); tok = tok.gsub(/\$/,"\\$"); tok = tok.gsub(/_/,"\\verb+_+"); tok = tok.gsub(/\^/,"\\verb+^+"); tok = tok.gsub(/~/,"\\verb+~+"); end if @in_table then @table[@table_rows][@table_cols] += tok elsif !@silent then if !@align.nil? && tok =~ /\n$/ then print tok.chop,"\\\\\n" else print tok end end end def set_interp(interp) @interp = interp end # tag processing methods # <TITLE> def do_silent(tag) @silent = TRUE end # </TITLE> def undo_silent(tag) @silent = FALSE end # <IMG> def img_proc(tag) src = tag.switch('src') newfile = src.sub(/\.GIF/i,".eps") gif2eps(src,newfile) flush "\\epsfile{file=#{newfile}}\n" end # <TABLE> def starttable(tag) @table = [] @tablespan = [] @table_rows = -1 @table_cols_max = 0 @in_table = TRUE unless tag.switch('border').nil? then @table_border = TRUE else @table_border = FALSE end end # <TR> def start_row(tag) @table_rows += 1 @table[@table_rows] = [] @tablespan[@table_rows] = [] @table_cols = -1 @colspan = 1 end # <TD> def start_col(tag) @colspan = tag.switch('colspan') if @colspan.nil? then @colspan = 1 else @colspan = @colspan.to_i end @tablespan[@table_rows][@table_cols+1] = @colspan @table_cols += @colspan if @table_cols > @table_cols_max then @table_cols_max = @table_cols end end # </TABLE> def endtable(tag) @in_table = FALSE flush "\\begin{tabular}{*{" flush @table_cols_max+1 if @table_border then flush "}{|l}|}\n\\hline\n" else flush "}{l}}\n" end for i in 0..@table_rows j = 0 while j <= @table_cols span = @tablespan[i][j] if span == 1 then flush @table[i][j] elsif @table_border then form = "|l" if j+span > @table_cols then form = "|l|" end flush "\\multicolumn{"+span.to_s+"}{"+form+"}{" flush @table[i][j+span-1] flush "}" else flush "\\multicolumn{"+span.to_s+"}{l}{" flush @table[i][j+span-1] flush "}" end j += span if j <= @table_cols then flush "&" end end flush "\\\\\n" flush "\\hline\n" if @table_border end flush "\\end{tabular}\n" end # <CENTER> def startcenter(tag) if @in_table then flush "\\hfil" else flush "\\begin{center}\n" end end # </CENTER> def endcenter(tag) if @in_table then flush "\\hfil" else flush "\\end{center}\n" end end # <P> def paragraph(tag) align = tag.switch('align') if align.nil? then flush "\\par\n" @endparagraph = "" else align = align.downcase case align when "left" then flush "\\begin{flushleft}\n" @endparagraph = "\\end{flushleft}\n" when "center" then flush "\\begin{center}\n" @endparagraph = "\\end{center}\n" when "right" then flush "\\begin{flushright}\n" @endparagraph = "\\end{flushright}\n" end end @align = align end # </P> def endparagraph(tag) unless @align.nil? then @align = nil flush @endparagraph end end end enum_interp = { 'li' => ["\\item ",nil] } item_interp = { 'li' => ["\\item ",nil] } desc_interp = { 'dt' => ["\\item[",nil], 'dd' => ["]\n",nil] } table_interp = { 'tr' => [:start_row,nil], 'td' => [:start_col,nil], '/tr' => ["",nil], '/td' => ["",nil], } para_interp = { '/p' => [:endparagraph ,"pop",TRUE], } main_interp = { 'body' => ["\\begin{document}\n",nil,FALSE], '/body' => ["\\end{document}\n",nil,FALSE], 'head' => ["",nil,FALSE], '/head' => ["",nil,FALSE], 'html' => ["",nil,FALSE], '/html' => ["",nil,FALSE], 'title' => [:do_silent,nil,FALSE], '/title' => [:undo_silent,nil,FALSE], '!' => ["",nil,FALSE], 'h1' => ["\\section{",nil,TRUE], 'h2' => ["\\subsection{",nil,TRUE], 'h3' => ["\\subsubsection{",nil,TRUE], 'h4' => ["\\paragraph{",nil,TRUE], '/h1' => ["}\n",nil,TRUE], '/h2' => ["}\n",nil,TRUE], '/h3' => ["}\n",nil,TRUE], '/h4' => ["}\n",nil,TRUE], 'a' => ["",nil,TRUE], '/a' => ["",nil,TRUE], 'center' => [:startcenter,nil,TRUE], '/center' => [:endcenter,nil,TRUE], 'ol' => ["\\begin{enumerate}\n",enum_interp,TRUE], '/ol' => ["\\end{enumerate}\n","pop",TRUE], 'ul' => ["\\begin{itemize}\n",item_interp,TRUE], '/ul' => ["\\end{itemize}\n","pop",TRUE], 'dl' => ["\\begin{description}\n",desc_interp,TRUE], '/dl' => ["\\end{description}\n","pop",TRUE], 'pre' => ["\\begin{pre}\n",nil,TRUE], '/pre' => ["\\end{pre}\n",nil,TRUE], 'p' => [:paragraph ,para_interp,TRUE], 'br' => ["\\par ",nil,TRUE], 'img' => [:img_proc,nil,TRUE], 'hr' => ["\\hr ",nil,TRUE], 'b' => ["{\\bf\\gt ",nil,TRUE], '/b' => ["}",nil,TRUE], 'strong' => ["{\\bf\\gt ",nil,TRUE], '/strong' => ["}",nil,TRUE], 'dfn' => ["{\\bf\\gt ",nil,TRUE], '/dfn' => ["}",nil,TRUE], 'i' => ["{\\it",nil,TRUE], '/i' => ["}",nil,TRUE], 'address' => ["{\\it",nil,TRUE], '/address'=> ["}",nil,TRUE], 'cite' => ["{\\it",nil,TRUE], '/cite' => ["}",nil,TRUE], 'code' => ["{\\tt",nil,TRUE], '/code' => ["}",nil,TRUE], 'kbd' => ["{\\tt",nil,TRUE], '/kbd' => ["}",nil,TRUE], 'tt' => ["{\\tt",nil,TRUE], '/tt' => ["}",nil,TRUE], 'samp' => ["{\\tt",nil,TRUE], '/samp' => ["}",nil,TRUE], 'em' => ["{\\em",nil,TRUE], '/em' => ["}",nil,TRUE], 'u' => ["$\\underline{\\mbox{",nil,TRUE], '/u' => ["}}$",nil,TRUE], 'sub' => ["${}_\mbox{",nil,TRUE], '/sub' => ["}$",nil,TRUE], 'sup' => ["${}^\mbox{",nil,TRUE], '/sup' => ["}$",nil,TRUE], 'table' => [:starttable, table_interp,TRUE], '/table' => [:endtable, "pop",TRUE], 'font' => ["",nil,TRUE], '/font' => ["",nil,TRUE], } ################################ MAIN #################################### $in_document = FALSE print_header intp = Environ_stack.new(Environment.new(main_interp)) f = TokenStream.new(ARGV[0]) until f.eof? tok = f.get if tok.kind_of?(Tag) then case tok.tagname when "body" $in_document = TRUE when "/body" $in_document = FALSE end act = intp.action(tok.tagname) if act.nil? then STDERR.print "tag ",tok.tagname," ignored\n" else if act[2] && !$in_document then print "\\begin{document}\n" $in_document = TRUE end # environment push if act[1].kind_of?(Hash) && (tok.tagname != "p" || tok.switch('align') != nil) then intp.dup intp.top.set_interp(act[1]) end if act[0].kind_of?(String) then intp.top.flush act[0] elsif act[0].kind_of?(Fixnum) then # interned symbol intp.top.send(act[0],tok) end # environment pop if act[1] == "pop" then intp.pop end end elsif !tok.nil? then intp.top.flush tok end end if $in_document then print "\\end{document}\n" end