#!/usr/bin/python

#
# Figure out the shortest path between everybody in the
# ecosystem and you (forward or backward).
#
#	Copyright (C) 2002 Phillip Pearson
#
#	http://www.myelin.co.nz/ecosystem/
#

#	Permission is hereby granted, free of charge, to any person
#	obtaining a copy of this software and associated documentation
#	files (the "Software"), to deal in the Software without
#	restriction, including without limitation the rights to use,
#	copy, modify, merge, publish, distribute, sublicense, and/or
#	sell copies of the Software, and to permit persons to whom the
#	Software is furnished to do so, subject to the following
#	conditions:

#	The above copyright notice and this permission notice shall be
#	included in all copies or substantial portions of the
#	Software.

#	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
#	KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
#	WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
#	PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
#	COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
#	OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
#	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


from blog import *
import pickle

print "Loading ecosystem data"
blogs = pickle.load( open( 'linkData.pickle' ) )

# Are we going forward or backward?
direction = "fwdLinks"
#direction = "backLinks"

# Do this iteratively, otherwise it will take ages.
reached = {}

def walk( start, dist ):
	links = getattr( start, direction )
	#print "walking from",start
	for link in links:
		link = genericLink( link )
		if not reached.has_key( link ):
			blog = blogs[link]
			#print "\twalked to",blog
			reached[link] = blog
			blog.dist = dist
			

# Start from you!
#you = 'salon.com/0000002'	# me ;-)
#you = 'zopenx.net'		# robert barksdale
#you = 'pycs.net/workbench'	# rogers cadenhead
you = 'scripting.com'		# dave winer; an extremely well-connected blogger
#you = 'home.earthlink.net/~epcostello/wwwlogue'	# somebody who isn't connected very well

# see how long it takes to get here:
#dest = 'slashdot.org'
dest = 'foo.co.nz'

yourBlog = None

matches = []
for url,blog in blogs.items():
	if url.find( you ) != -1:
		matches.append( ( len( genericLink( blog.url ) ), blog ) )

if len( matches ):
	if len( matches ) > 1:
		print "ambiguous: picking the first of the following ..."
		matches.sort()
		import pprint
		pprint.pprint( [ m[1] for m in matches ] )
	yourBlog = matches[0][1]
else:
	raise "Couldn't find your blog - URL " + you
	
# Now go ...
print "Starting from:",yourBlog
iteration = 1
reached[ genericLink( yourBlog.url ) ] = yourBlog
yourBlog.dist = 0

while 1:
	print "iteration #%d; reached %d, %d to go" % ( iteration, len( reached ), len( blogs ) - len( reached ) )
	walkCount = 0
	for start in reached.values():
		if not hasattr( start, 'walked' ):
			if start.url.find( dest ) != -1:
				print "\t\treached",start
			start.walked = 1
			walk( start, iteration )
			walkCount += 1
	print "(walked %d this iter)" % ( walkCount, )
	if not walkCount: break
	iteration += 1

import sys
so = sys.stdout
try:
	sys.stdout = open( 'unreachable.txt', 'wt' )
	if len( reached ) != len( blogs ):
		for link,blog in blogs.items():
			if not reached.has_key( link ):
				print "Couldn't reach",blog
finally:
	sys.stdout = so

