require 'rexml/document'

# some useful constants
now = Time.now.to_i
week_ago = now - 7 * 86400
week = 7 * 86400.0

# here's where what we accumulate
all_links = {}

for file in Dir['/home/rubys/planet/cache/*']
  # ensure the entry isn't older than a week, or from Bob Sutor
  stat = File.stat(file)
  next if stat.directory? 
  next if stat.mtime.to_i < week_ago
  next if file.include? 'sutor'

  # extract all hrefs, excluding source and self links
  entry_links = []
  doc = REXML::Document.new File.new(file)
  for element in REXML::XPath.match(doc, '//*[@href]')
    next if element.parent.name == 'source'
    attrs = element.attributes
    next if attrs.has_key? 'rel' and attrs['rel'] == 'self'
    entry_links.push attrs['href']
  end

  # grab the link to this entry
  source = REXML::XPath.first(doc, '//link[@rel="alternate"]')
  next unless source and source.attributes.has_key? 'href'
  source = source.attributes['href']

  # add all unique links and weight to all_links
  weight = 1.0 - (now - stat.mtime.to_i)**2 / week**2
  for link in entry_links.sort.uniq
    (all_links[link] ||= []) << [weight, source]
  end
end

# here's the magic that does the data reduction
weighted_links = all_links.map{|k,v| [v.map{|w,s| w}.inject(0) {|a,b| a+b}, k]}
weighted_links.sort!.reverse!

# output the top 10, and who linked to it
for weight, link in weighted_links[0..10]
  puts link
  for source in all_links[link].sort.reverse
    puts "  #{source[1]}"
  end
  puts
end
