#!/usr/bin/python

#
# Figure out how many messed-up records (where links
# have not been correctly detected) we have and
# generate a 'linkedness' table for the Blogging
# Ecosystem.
#
#	Copyright (C) 2002 Phillip Pearson
#
#	http://www.myelin.co.nz/ecosystem/
#

#       Permission is hereby granted, free of charge, to any person
#       obtaining a copy of this software and associated documentation
#       files (the "Software"), to deal in the Software without
#       restriction, including without limitation the rights to use,
#       copy, modify, merge, publish, distribute, sublicense, and/or
#       sell copies of the Software, and to permit persons to whom the
#       Software is furnished to do so, subject to the following
#       conditions:

#       The above copyright notice and this permission notice shall be
#       included in all copies or substantial portions of the
#       Software.

#       THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
#       KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
#       WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
#       PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
#       COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#       LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
#       OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
#       SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

from blog import *
import pickle

print "Loading ecosystem data"
blogs = pickle.load( open( 'linkData.pickle' ) )

linkedness = {}

for url,blog in blogs.items():
	linkedness.setdefault( len( blog.backLinks ), {} )[ url ] = blog

linkednesses = linkedness.keys()
linkednesses.sort()
linkednesses.reverse()

for linkCount in linkednesses:
	pages = linkedness[ linkCount ]
	print "Pages with %d links: %d" % ( linkCount, len( pages ) )
	if len( pages ) < 5:
		for page in pages:
			print "\t",page


