# A normal HTML report: Top 10 popular URLs by hits and bytes, top 10 404s,
# top 10 client IPs by hits, and the top 10 referrers. Only, skip the HTML
@u_hits = {}
@u_bytes = {}
@s404s = {}
@clients = {}
@refs = {}
@u_hits.default = @u_bytes.default = @s404s.default =
@clients.default = @refs.default = 0
def record(client, u, bytes, ref)
@u_bytes[u] += bytes
if u =~ %r{^/ongoing/When/\d\d\dx/\d\d\d\d/\d\d/\d\d/[^ .]+$}
@u_hits[u] += 1
@clients[client] += 1
unless (ref == '"-"' || ref =~ %r{^\"http://www.tbray.org/ongoing/})
@refs[ref[1 .. -2]] += 1 # lose the quotes
end
end
end
def report(label, hash, shrink = false)
puts "Top #{label}:"
keys_by_count = hash.keys.sort_by{ |key| -hash[key] }[0 .. 9]
fmt = (shrink) ? " %9.1fM: %s\n" : " %10d: %s\n"
keys_by_count.each do |key|
pkey = (key.length > 60) ? key[0 .. 59] + "..." : key
hash[key] = hash[key] / (1024.0 * 1024.0) if shrink
printf fmt, hash[key], pkey
end
puts
end
ARGF.each_line do |line|
f = line.split(/\s+/)
next unless f[5] == '"GET'
client, u, status, bytes, ref = f[0], f[6], f[8], f[9], f[10]
# puts "u, #{u}, s, #{status}, b, #{bytes}, r, #{ref}"
if status == '200'
record(client, u, bytes.to_i, ref)
elsif status == '304'
record(client, u, 0, ref)
elsif status == '404'
@s404s[u] += 1
end
end
report('URIs by hit', @u_hits)
report('URIs by bytes', @u_bytes, true)
report('404s', @s404s)
report('client addresses', @clients)
report( 'referrers', @refs)