blob: 9cb1942e5cf3e19f6c339626f99e479784545a56 (
plain) (
tree)
|
|
#!/usr/local/bin/ruby
# HTML reference generator
# by A.Ito 1999/3/30
require 'kconv'
###########################################################################
class URL
attr 'scheme'
attr 'host'
attr 'port'
attr 'file'
attr 'label'
def initialize(str)
if /([a-zA-Z+\-]+):(.*)/ =~ str then
@scheme = $1
str = $2
else
@scheme = 'unknown'
end
hostpart = ''
if %r'//([^/]*)(/.*)' =~ str then
hostpart = $1
str = $2
elsif %r'//([^/]*)$' =~ str then
hostpart = str
str = ''
end
if hostpart != '' then
if /(.*):(\d+)/ =~ hostpart then
@host = $1
@port = $2
else
@host = hostpart
@port = ''
end
else
@host = @port = ''
end
if /(.*)#(.*)/ =~ str then
@file = $1
@label = $2
else
@file = str
@label = ''
end
end
def to_s
s = "#{@scheme}:"
if s == 'news' or s == 'mailto' then
return s+@file
end
s += "//"+@host
s += ":"+@port if @port.size > 0
s += @file
s += "#"+@label if @label.size > 0
s
end
def complete(current)
@scheme = current.scheme if @scheme == 'unknown'
@port = current.port if @host == '' and @port == ''
@host = current.host if @host == ''
unless @file =~ %r'^/' then
@file = File.expand_path(File.dirname(current.file)+'/'+@file)
end
self
end
end
class Tag
def initialize(str)
if str =~ /<(.+)>/ then
str = $1
end
tags = str.split
@tagname = tags.shift.downcase
@vals = {}
tags.each do |t|
if t =~ /=/ then
tn,tv = t.split(/\s*=\s*/,2)
tv.sub!(/^"/,"")
tv.sub!(/"$/,"")
@vals[tn.downcase] = tv
else
@vals[t.downcase] = TRUE
end
end
end
def tagname
return @tagname
end
def each
@vals.each do |k,v|
yield k,v
end
end
def switch(k)
return @vals[k]
end
def to_s
if tagname =~ /!--/ then
return ''
end
t = "<"+tagname
if @vals.size == 0 then
return t+">"
end
each do |a,v|
if v == true then
t += " #{a}"
else
t += " #{a}=\"#{v}\""
end
end
t+">"
end
end
class TokenStream
TAG_START = ?<
TAG_END = ?>
AMP_START = ?&
AMP_END = ?;
def initialize(file)
if file.kind_of?(IO) then
@f = file
else
@f = File.new(file)
end
@buf = nil
@bpos = 0
end
def read_until(endsym)
complete = FALSE
tag = []
begin
while @bpos < @buf.size
c = @buf[@bpos]
if c == endsym then
tag.push(c.chr)
complete = TRUE
@bpos += 1
break
end
if c == 10 || c == 13 then
tag.push(' ')
else
tag.push(c.chr)
end
@bpos += 1
end
unless complete
@buf = @f.gets
@bpos = 0
break if @f.eof?
end
end until complete
return tag.join('')
end
def get
while TRUE
if @buf.nil? then
@buf = @f.gets
if @f.eof? then
return nil
end
@buf = Kconv.toeuc(@buf)
@bpos = 0
end
if @buf[@bpos] == TAG_START then
return Tag.new(read_until(TAG_END))
elsif @buf[@bpos] == AMP_START then
return read_until(AMP_END)
else
i = @bpos
while i < @buf.size && @buf[i] != TAG_START && @buf[i] != AMP_START
i += 1
end
r = @buf[@bpos,i-@bpos]
if i == @buf.size then
@buf = nil
else
@bpos = i
end
redo if r =~ /^\s+$/
return r
end
end
end
public :eof?
def eof?
@f.eof?
end
end
################################ MAIN ####################################
refs = []
refnum = 0
body_finished = false
html_finished = false
currentURL = nil
immediate_ref = false
while ARGV[0] =~ /^-/
case ARGV.shift
when '-url'
currentURL = URL.new(ARGV.shift)
when '-u'
immediate_ref = true
end
end
if ARGV.size > 0 then
f = TokenStream.new(ARGV[0])
else
f = TokenStream.new(STDIN)
end
until f.eof?
tok = f.get
if tok.kind_of?(Tag) then
if tok.tagname == 'a' and !tok.switch('href').nil? then
refs[refnum] = tok.switch('href')
refnum += 1
elsif tok.tagname == '/a' then
if immediate_ref then
r = refs[refnum-1]
if !currentURL.nil? then
r = URL.new(r).complete(currentURL).to_s
end
print "[#{r}]"
else
print "[#{refnum}]"
end
elsif tok.tagname == '/body' then
body_finished = true
break
elsif tok.tagname == '/html' then
html_finished = true
break
end
print tok.to_s
elsif !tok.nil? then
print tok
end
end
if !immediate_ref and refs.size > 0 then
print "<hr><h2>References</h2>\n"
for i in 0..refs.size-1
if currentURL.nil? then
r = refs[i]
else
r = URL.new(refs[i])
r.complete(currentURL)
r = r.to_s
end
print "[#{i+1}] #{r}<br>\n"
end
end
print "</body>\n" unless body_finished
print "</html>\n" unless html_finished
|