#!/usr/bin/python

#
# Calculate the degree of separation in the Blogging Ecosystem
#
#	Copyright (C) 2002 Phillip Pearson
#
#	http://www.myelin.co.nz/ecosystem/
#

#       Permission is hereby granted, free of charge, to any person
#       obtaining a copy of this software and associated documentation
#       files (the "Software"), to deal in the Software without
#       restriction, including without limitation the rights to use,
#       copy, modify, merge, publish, distribute, sublicense, and/or
#       sell copies of the Software, and to permit persons to whom the
#       Software is furnished to do so, subject to the following
#       conditions:

#       The above copyright notice and this permission notice shall be
#       included in all copies or substantial portions of the
#       Software.

#       THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
#       KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
#       WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
#       PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
#       COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#       LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
#       OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
#       SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

from blog import *
import pickle

print "reading"
blogs = pickle.load( open( 'linkData.pickle' ) )

print "scanning"
totalOutLinks = 0
totalInLinks = 0
totalBlogs = 0
for blog in blogs.values():
	totalOutLinks += len( blog.fwdLinks )
	totalInLinks += len( blog.backLinks )
	totalBlogs += 1

print "total links: %d outbound, %d inbound (out of %d blogs)" % (
	totalOutLinks,
	totalInLinks,
	totalBlogs,
	)

avg = float( totalOutLinks ) / float( totalBlogs )
print "average links per page: %g" % ( avg, )

import math
logAvg = math.log( avg )
logTotal = math.log( totalBlogs )

separation = logTotal / logAvg
print "degrees of separation: %g (= %g / %g)" % ( separation, logTotal, logAvg )


