#!/usr/local/bin/ruby # HTML reference generator # by A.Ito 1999/3/30 require 'kconv' ########################################################################### class URL attr 'scheme' attr 'host' attr 'port' attr 'file' attr 'label' def initialize(str) if /([a-zA-Z+\-]+):(.*)/ =~ str then @scheme = $1 str = $2 else @scheme = 'unknown' end hostpart = '' if %r'//([^/]*)(/.*)' =~ str then hostpart = $1 str = $2 elsif %r'//([^/]*)$' =~ str then hostpart = str str = '' end if hostpart != '' then if /(.*):(\d+)/ =~ hostpart then @host = $1 @port = $2 else @host = hostpart @port = '' end else @host = @port = '' end if /(.*)#(.*)/ =~ str then @file = $1 @label = $2 else @file = str @label = '' end end def to_s s = "#{@scheme}:" if s == 'news' or s == 'mailto' then return s+@file end s += "//"+@host s += ":"+@port if @port.size > 0 s += @file s += "#"+@label if @label.size > 0 s end def complete(current) @scheme = current.scheme if @scheme == 'unknown' @port = current.port if @host == '' and @port == '' @host = current.host if @host == '' unless @file =~ %r'^/' then @file = File.expand_path(File.dirname(current.file)+'/'+@file) end self end end class Tag def initialize(str) if str =~ /<(.+)>/ then str = $1 end tags = str.split @tagname = tags.shift.downcase @vals = {} tags.each do |t| if t =~ /=/ then tn,tv = t.split(/\s*=\s*/,2) tv.sub!(/^"/,"") tv.sub!(/"$/,"") @vals[tn.downcase] = tv else @vals[t.downcase] = TRUE end end end def tagname return @tagname end def each @vals.each do |k,v| yield k,v end end def switch(k) return @vals[k] end def to_s if tagname =~ /!--/ then return '' end t = "<"+tagname if @vals.size == 0 then return t+">" end each do |a,v| if v == true then t += " #{a}" else t += " #{a}=\"#{v}\"" end end t+">" end end class TokenStream TAG_START = ?< TAG_END = ?> AMP_START = ?& AMP_END = ?; def initialize(file) if file.kind_of?(IO) then @f = file else @f = File.new(file) end @buf = nil @bpos = 0 end def read_until(endsym) complete = FALSE tag = [] begin while @bpos < @buf.size c = @buf[@bpos] if c == endsym then tag.push(c.chr) complete = TRUE @bpos += 1 break end if c == 10 || c == 13 then tag.push(' ') else tag.push(c.chr) end @bpos += 1 end unless complete @buf = @f.gets @bpos = 0 break if @f.eof? end end until complete return tag.join('') end def get while TRUE if @buf.nil? then @buf = @f.gets if @f.eof? then return nil end @buf = Kconv.toeuc(@buf) @bpos = 0 end if @buf[@bpos] == TAG_START then return Tag.new(read_until(TAG_END)) elsif @buf[@bpos] == AMP_START then return read_until(AMP_END) else i = @bpos while i < @buf.size && @buf[i] != TAG_START && @buf[i] != AMP_START i += 1 end r = @buf[@bpos,i-@bpos] if i == @buf.size then @buf = nil else @bpos = i end redo if r =~ /^\s+$/ return r end end end public :eof? def eof? @f.eof? end end ################################ MAIN #################################### refs = [] refnum = 0 body_finished = false html_finished = false currentURL = nil immediate_ref = false while ARGV[0] =~ /^-/ case ARGV.shift when '-url' currentURL = URL.new(ARGV.shift) when '-u' immediate_ref = true end end if ARGV.size > 0 then f = TokenStream.new(ARGV[0]) else f = TokenStream.new(STDIN) end until f.eof? tok = f.get if tok.kind_of?(Tag) then if tok.tagname == 'a' and !tok.switch('href').nil? then refs[refnum] = tok.switch('href') refnum += 1 elsif tok.tagname == '/a' then if immediate_ref then r = refs[refnum-1] if !currentURL.nil? then r = URL.new(r).complete(currentURL).to_s end print "[#{r}]" else print "[#{refnum}]" end elsif tok.tagname == '/body' then body_finished = true break elsif tok.tagname == '/html' then html_finished = true break end print tok.to_s elsif !tok.nil? then print tok end end if !immediate_ref and refs.size > 0 then print "