# A normal HTML report: Top 10 popular URLs by hits and bytes, top 10 404s,
#  top 10 client IPs by hits, and the top 10 referrers.  Only, skip the HTML

@u_hits = {}
@u_bytes = {}
@s404s = {}
@clients = {}
@refs = {}

@u_hits.default = @u_bytes.default = @s404s.default =
  @clients.default = @refs.default = 0

def record(client, u, bytes, ref)
  @u_bytes[u] += bytes
  if u =~  %r{^/ongoing/When/\d\d\dx/\d\d\d\d/\d\d/\d\d/[^ .]+$}
    @u_hits[u] += 1
    @clients[client] += 1
    unless (ref == '"-"' || ref =~ %r{^\"http://www.tbray.org/ongoing/})
      @refs[ref[1 .. -2]] += 1 # lose the quotes
    end
  end
end

def report(label, hash, shrink = false)
  puts "Top #{label}:"
  keys_by_count = hash.keys.sort_by{ |key| -hash[key] }[0 .. 9]
  fmt = (shrink) ? " %9.1fM: %s\n" : " %10d: %s\n"
  keys_by_count.each do |key|
    pkey = (key.length > 60) ? key[0 .. 59] + "..." : key
    hash[key] = hash[key] / (1024.0 * 1024.0) if shrink
    printf fmt, hash[key], pkey
  end
  puts
end

ARGF.each_line do |line|
  f = line.split(/\s+/)
  next unless f[5] == '"GET'
  client, u, status, bytes, ref = f[0], f[6], f[8], f[9], f[10]
  # puts "u, #{u}, s, #{status}, b, #{bytes}, r, #{ref}"
  if status == '200'
    record(client, u, bytes.to_i, ref)
  elsif status == '304'
    record(client, u, 0, ref)
  elsif status == '404'
    @s404s[u] += 1
  end
end

report('URIs by hit', @u_hits)
report('URIs by bytes', @u_bytes, true)
report('404s', @s404s)
report('client addresses', @clients)
report( 'referrers', @refs)