# A normal HTML report: Top 10 popular URLs by hits and bytes, top 10 404s, # top 10 client IPs by hits, and the top 10 referrers. Only, skip the HTML @u_hits = {} @u_bytes = {} @s404s = {} @clients = {} @refs = {} @u_hits.default = @u_bytes.default = @s404s.default = @clients.default = @refs.default = 0 def record(client, u, bytes, ref) @u_bytes[u] += bytes if u =~ %r{^/ongoing/When/\d\d\dx/\d\d\d\d/\d\d/\d\d/[^ .]+$} @u_hits[u] += 1 @clients[client] += 1 unless (ref == '"-"' || ref =~ %r{^\"http://www.tbray.org/ongoing/}) @refs[ref[1 .. -2]] += 1 # lose the quotes end end end def report(label, hash, shrink = false) puts "Top #{label}:" keys_by_count = hash.keys.sort_by{ |key| -hash[key] }[0 .. 9] fmt = (shrink) ? " %9.1fM: %s\n" : " %10d: %s\n" keys_by_count.each do |key| pkey = (key.length > 60) ? key[0 .. 59] + "..." : key hash[key] = hash[key] / (1024.0 * 1024.0) if shrink printf fmt, hash[key], pkey end puts end ARGF.each_line do |line| f = line.split(/\s+/) next unless f[5] == '"GET' client, u, status, bytes, ref = f[0], f[6], f[8], f[9], f[10] # puts "u, #{u}, s, #{status}, b, #{bytes}, r, #{ref}" if status == '200' record(client, u, bytes.to_i, ref) elsif status == '304' record(client, u, 0, ref) elsif status == '404' @s404s[u] += 1 end end report('URIs by hit', @u_hits) report('URIs by bytes', @u_bytes, true) report('404s', @s404s) report('client addresses', @clients) report( 'referrers', @refs)