diff options
Diffstat (limited to 'Bonus/html2latex')
| -rwxr-xr-x | Bonus/html2latex | 517 | 
1 files changed, 517 insertions, 0 deletions
| diff --git a/Bonus/html2latex b/Bonus/html2latex new file mode 100755 index 0000000..7b894e7 --- /dev/null +++ b/Bonus/html2latex @@ -0,0 +1,517 @@ +#!/usr/local/bin/ruby + +# +#       HTML to LaTeX converter +#         by A. Ito, 16 June, 1997 +# + +require 'kconv' + +# configuration +def gif2eps(giffile,epsfile) +  cmd = "convert #{giffile} #{epsfile}" +  STDERR.print cmd,"\n" +  system cmd +end + +########################################################################### +class Tag +  def initialize(str) +    if str =~ /<(.+)>/ then +      str = $1 +    end +    tags = str.split +    @tagname = tags.shift.downcase +    @vals = {} +    tags.each do |t| +      if t =~ /=/ then +	tn,tv = t.split(/\s*=\s*/,2) +	tv.sub!(/^"/,"") +	tv.sub!(/"$/,"") +	@vals[tn.downcase] = tv +      else +	@vals[t.downcase] = TRUE +      end +    end +  end +  def tagname +    return @tagname +  end +  def each +    @vals.each do |k,v| +      yield k,v +    end +  end +  def switch(k) +    return @vals[k] +  end +end + +class TokenStream +  TAG_START = ?< +  TAG_END = ?> +  AMP_START = ?& +  AMP_END = ?; +   +  AMP_REPLACE_TABLE = { +    '&'   => '\\&', +    '>'    => '$>$', +    '<'    => '$<$', +    ' '  => '~', +    '"'  => '"', +  } +  def initialize(file) +    if file.kind_of?(File) then +      @f = file +    else +      @f = File.new(file) +    end +    @buf = nil +    @bpos = 0 +  end +   +  def read_until(endsym) +    complete = FALSE +    tag = [] +    begin +      while @bpos < @buf.size +	c = @buf[@bpos] +	if c == endsym then +	  tag.push(c.chr) +	  complete = TRUE +	  @bpos += 1 +	  break +	end +	if c == 10 || c == 13 then +	  tag.push(' ') +	else +	  tag.push(c.chr) +	end +	@bpos += 1 +      end +      unless complete +	@buf = @f.gets +	@bpos = 0 +	break if @f.eof? +      end +    end until complete +    return tag.join('') +  end +     +  def get +    while TRUE +      if @buf.nil? then +	@buf = Kconv.toeuc(@f.gets) +	if @f.eof? then +	  return nil +	end +	@bpos = 0 +      end +      if @buf[@bpos] == TAG_START then +	return Tag.new(read_until(TAG_END)) +      elsif @buf[@bpos] == AMP_START then +	return replace_amp(read_until(AMP_END)) +      else +	i = @bpos +	while i < @buf.size && @buf[i] != TAG_START && @buf[i] != AMP_START +	  i += 1 +	end +	r = @buf[@bpos,i-@bpos] +	if i == @buf.size then +	  @buf = nil +	else +	  @bpos = i +	end +	redo if r =~ /^\s+$/ +	return r +      end +    end +  end +  public :eof? +  def eof? +    @f.eof? +  end +  def replace_amp(s) +    if AMP_REPLACE_TABLE.key?(s) then +      return AMP_REPLACE_TABLE[s] +    else +      return s +    end +  end +end + + +def print_header +  print ' +\documentstyle[epsf]{jarticle} +\def\hr{\par\hbox to \textwidth{\hrulefill}} +\def\pre{\begin{quote}\def\baselinestretch{0.8}\tt\obeylines} +\def\endpre{\end{quote}} +\makeatletter +\@ifundefined{gt}{\let\gt=\dg}{} +\makeatother +' +end + + +class Environ_stack +  def initialize(*envs) +    @stack = envs +  end +  def action(tag) +    if tag =~ /^!/ then # comment +      return ["",nil] +    end +    i = @stack.size-1 +    while i >= 0 +      a = @stack[i].action(tag) +      unless a.nil? then +	return a +      end +      i -= 1 +    end +    return nil +  end +  def pop +    @stack.pop +  end +  def push(env) +    @stack.push(env) +  end +  def top +    @stack[@stack.size-1] +  end +  def dup +    @stack.push(top.clone) +  end +end + + +class Environment +  def initialize(interp) +    @silent = FALSE +    @in_table = FALSE +    @interp = interp; +    @align = nil; +  end +  def action(tag) +    return @interp[tag] +  end +   +  def flush(tok) +    if tok.kind_of?(String) then +      tok = tok.gsub(/&/,"\\&"); +      tok = tok.gsub(/%/,"\\%"); +      tok = tok.gsub(/#/,"\\#"); +      tok = tok.gsub(/\$/,"\\$"); +      tok = tok.gsub(/_/,"\\verb+_+"); +      tok = tok.gsub(/\^/,"\\verb+^+"); +      tok = tok.gsub(/~/,"\\verb+~+"); +    end +    if @in_table then +      @table[@table_rows][@table_cols] += tok +    elsif !@silent then +      if !@align.nil? && tok =~ /\n$/ then +	print tok.chop,"\\\\\n" +      else +	print tok +      end +    end +  end +   +  def set_interp(interp) +    @interp = interp +  end +   +  # tag processing methods +   +  # <TITLE> +  def do_silent(tag) +    @silent = TRUE +  end +   +  # </TITLE> +  def undo_silent(tag) +    @silent = FALSE +  end +   +  # <IMG> +  def img_proc(tag) +    src = tag.switch('src') +    newfile = src.sub(/\.GIF/i,".eps") +    gif2eps(src,newfile) +    flush "\\epsfile{file=#{newfile}}\n" +  end +   +  # <TABLE> +  def starttable(tag) +    @table = [] +    @tablespan = [] +    @table_rows = -1 +    @table_cols_max = 0 +    @in_table = TRUE +    unless tag.switch('border').nil? then +      @table_border = TRUE +    else +      @table_border = FALSE +    end +  end +   +  # <TR> +  def start_row(tag) +    @table_rows += 1 +    @table[@table_rows] = [] +    @tablespan[@table_rows] = [] +    @table_cols = -1 +    @colspan = 1 +  end +   +  # <TD> +  def start_col(tag) +    @colspan = tag.switch('colspan') +    if @colspan.nil? then +      @colspan = 1 +    else +      @colspan = @colspan.to_i +    end +    @tablespan[@table_rows][@table_cols+1] = @colspan +    @table_cols += @colspan +    if @table_cols > @table_cols_max then +      @table_cols_max = @table_cols +    end +  end +   +  # </TABLE> +  def endtable(tag) +    @in_table = FALSE +    flush "\\begin{tabular}{*{" +    flush @table_cols_max+1 +    if @table_border then +      flush "}{|l}|}\n\\hline\n" +    else +      flush "}{l}}\n" +    end +    for i in 0..@table_rows +      j = 0 +      while j <= @table_cols +	span = @tablespan[i][j] +	if span == 1 then +	  flush @table[i][j] +	elsif @table_border then +	  form = "|l" +	  if j+span > @table_cols then +	    form = "|l|" +	  end +	  flush "\\multicolumn{"+span.to_s+"}{"+form+"}{" +	  flush @table[i][j+span-1] +	  flush "}" +	else +	  flush "\\multicolumn{"+span.to_s+"}{l}{" +	  flush @table[i][j+span-1] +	  flush "}" +	end +	j += span +	if j <= @table_cols then +	  flush "&" +	end +      end +      flush "\\\\\n" +      flush "\\hline\n" if @table_border +    end +    flush "\\end{tabular}\n" +  end   +   +  # <CENTER> +  def startcenter(tag) +    if @in_table then +      flush "\\hfil" +    else +      flush "\\begin{center}\n" +    end +  end +   +  # </CENTER> +  def endcenter(tag) +    if @in_table then +      flush "\\hfil" +    else +      flush "\\end{center}\n" +    end +  end +   +  # <P> +  def paragraph(tag) +    align = tag.switch('align') +    if align.nil? then +      flush "\\par\n" +      @endparagraph = "" +    else +      align = align.downcase +      case align +      when "left" then +	flush "\\begin{flushleft}\n" +	@endparagraph = "\\end{flushleft}\n" +      when "center" then +	flush "\\begin{center}\n" +	@endparagraph = "\\end{center}\n" +      when "right" then +	flush "\\begin{flushright}\n" +	@endparagraph = "\\end{flushright}\n" +      end +    end +    @align = align +  end +   +  # </P> +  def endparagraph(tag) +    unless @align.nil? then +      @align = nil +      flush @endparagraph +    end +  end +end + + +enum_interp = { +  'li' => ["\\item ",nil] +} + +item_interp = { +  'li' => ["\\item ",nil] +} + +desc_interp = { +  'dt' => ["\\item[",nil], +  'dd' => ["]\n",nil] +} + +table_interp = { +  'tr' => [:start_row,nil], +  'td' => [:start_col,nil], +  '/tr' => ["",nil], +  '/td' => ["",nil], +} + +para_interp = { +  '/p'      => [:endparagraph ,"pop",TRUE], +} + +main_interp = { +  'body'    => ["\\begin{document}\n",nil,FALSE], +  '/body'   => ["\\end{document}\n",nil,FALSE], +  'head'    => ["",nil,FALSE], +  '/head'   => ["",nil,FALSE], +  'html'    => ["",nil,FALSE], +  '/html'   => ["",nil,FALSE], +  'title'   => [:do_silent,nil,FALSE], +  '/title'  => [:undo_silent,nil,FALSE], +  '!'       => ["",nil,FALSE], +  'h1'      => ["\\section{",nil,TRUE], +  'h2'      => ["\\subsection{",nil,TRUE], +  'h3'      => ["\\subsubsection{",nil,TRUE], +  'h4'      => ["\\paragraph{",nil,TRUE], +  '/h1'     => ["}\n",nil,TRUE], +  '/h2'     => ["}\n",nil,TRUE], +  '/h3'     => ["}\n",nil,TRUE], +  '/h4'     => ["}\n",nil,TRUE], +  'a'       => ["",nil,TRUE], +  '/a'      => ["",nil,TRUE], +  'center'  => [:startcenter,nil,TRUE], +  '/center' => [:endcenter,nil,TRUE], +  'ol'      => ["\\begin{enumerate}\n",enum_interp,TRUE], +  '/ol'     => ["\\end{enumerate}\n","pop",TRUE], +  'ul'      => ["\\begin{itemize}\n",item_interp,TRUE], +  '/ul'     => ["\\end{itemize}\n","pop",TRUE], +  'dl'      => ["\\begin{description}\n",desc_interp,TRUE], +  '/dl'     => ["\\end{description}\n","pop",TRUE], +  'pre'     => ["\\begin{pre}\n",nil,TRUE], +  '/pre'    => ["\\end{pre}\n",nil,TRUE], +  'p'       => [:paragraph ,para_interp,TRUE], +  'br'      => ["\\par ",nil,TRUE], +  'img'     => [:img_proc,nil,TRUE], +  'hr'      => ["\\hr ",nil,TRUE], +  'b'       => ["{\\bf\\gt ",nil,TRUE], +  '/b'      => ["}",nil,TRUE], +  'strong'  => ["{\\bf\\gt ",nil,TRUE], +  '/strong' => ["}",nil,TRUE], +  'dfn'     => ["{\\bf\\gt ",nil,TRUE], +  '/dfn'    => ["}",nil,TRUE], +  'i'       => ["{\\it",nil,TRUE], +  '/i'      => ["}",nil,TRUE], +  'address' => ["{\\it",nil,TRUE], +  '/address'=> ["}",nil,TRUE], +  'cite'    => ["{\\it",nil,TRUE], +  '/cite'   => ["}",nil,TRUE], +  'code'    => ["{\\tt",nil,TRUE], +  '/code'   => ["}",nil,TRUE], +  'kbd'     => ["{\\tt",nil,TRUE], +  '/kbd'    => ["}",nil,TRUE], +  'tt'      => ["{\\tt",nil,TRUE], +  '/tt'     => ["}",nil,TRUE], +  'samp'    => ["{\\tt",nil,TRUE], +  '/samp'   => ["}",nil,TRUE], +  'em'      => ["{\\em",nil,TRUE], +  '/em'     => ["}",nil,TRUE], +  'u'       => ["$\\underline{\\mbox{",nil,TRUE], +  '/u'      => ["}}$",nil,TRUE], +  'sub'     => ["${}_\mbox{",nil,TRUE], +  '/sub'    => ["}$",nil,TRUE], +  'sup'     => ["${}^\mbox{",nil,TRUE], +  '/sup'    => ["}$",nil,TRUE], +  'table'   => [:starttable, table_interp,TRUE], +  '/table'  => [:endtable, "pop",TRUE], +  'font'    => ["",nil,TRUE], +  '/font'   => ["",nil,TRUE], +} + + + + +################################ MAIN #################################### + +$in_document = FALSE +print_header +intp = Environ_stack.new(Environment.new(main_interp)) +f = TokenStream.new(ARGV[0]) +until f.eof? +  tok = f.get +  if tok.kind_of?(Tag) then +    case tok.tagname +    when "body" +      $in_document = TRUE +    when "/body" +      $in_document = FALSE +    end +    act = intp.action(tok.tagname) +    if act.nil? then +      STDERR.print "tag ",tok.tagname," ignored\n" +    else +      if act[2] && !$in_document then +        print "\\begin{document}\n" +	$in_document = TRUE +      end +      # enviconment push +      if act[1].kind_of?(Hash) && +	  (tok.tagname != "p" || tok.switch('align') != nil) then +	  intp.dup +	  intp.top.set_interp(act[1]) +      end +       +      if act[0].kind_of?(String) then +	intp.top.flush act[0] +      elsif act[0].kind_of?(Fixnum) then # interned symbol +	intp.top.send(act[0],tok) +      end +       +      # environment pop +      if act[1] == "pop" then +	intp.pop +      end +    end +  elsif !tok.nil? then +    intp.top.flush tok +  end +end +if $in_document then +  print "\\end{document}\n" +end | 
