#!/usr/bin/python
#
# Version: 0.3.3
#
# A plugin for the Yellowdog Updater Modified which sorts each repo's
# mirrorlist by connection speed prior to download.
#
# To install this plugin, just drop it into /usr/lib/yum-plugins, and
# make sure you have 'plugins=1' in your /etc/yum.conf. You also need to
# create the following configuration file, if not installed through an RPM:
#
# /etc/yum/pluginconf.d/fastestmirror.conf:
# [main]
# enabled=1
# verbose=1
# socket_timeout=3
# hostfilepath=timedhosts
# maxhostfileage=10
# maxthreads=15
# #exclude=.gov, facebook
# #include_only=.nl,.de,.uk,.ie
# #prefer=your.favourite.mirror
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# (C) Copyright 2005 Luke Macken <lmacken@redhat.com>
#
"""
B{FastestMirror} is a Yum plugin which sorts each repository's mirrorlist
according to connection speed prior to download.
"""
import os
import sys
import time
import socket
import string
import urlparse
import datetime
import threading
import re
from yum.plugins import TYPE_CORE
requires_api_version = '2.5'
plugin_type = (TYPE_CORE,)
verbose = False
always_print_best_host = True
socket_timeout = 3
timedhosts = {}
hostfilepath = ''
maxhostfileage = 10
loadcache = False
maxthreads = 15
exclude = None
include_only = None
prefer = None
downgrade_ftp = True
done_sock_timeout = False
done_repos = set()
def init_hook(conduit):
"""
This function initiliazes the variables required for running B{fastestmirror}
module. The variables are initiliazed from the main section of the plugin file.
There are no parameteres for this function. It uses global variables to
communicate with other functions.
This function refers:
- L{get_hostfile_age}
@param verbose : Verbosity of output.
@type verbose : Boolean
@param socket_timeout : The default timeout for a socket connection.
@type socket_timeout : Integer
@param hostfilepath : Absolute path to the plugin's cache file.
@type hostfilepath : String
@param maxhostfileage : Maximum age of the plugin's cache file.
@type maxhostfileage : Integer
@param loadcache : Fastest Mirrors to be loaded from plugin's cache or not.
@type loadcache : Boolean
"""
global verbose, socket_timeout, hostfilepath, maxhostfileage, loadcache
global maxthreads, exclude, include_only, prefer, downgrade_ftp, always_print_best_host
if hasattr(conduit, 'registerPackageName'):
conduit.registerPackageName("yum-plugin-fastestmirror")
verbose = conduit.confBool('main', 'verbose', default=False)
always_print_best_host = conduit.confBool('main', 'always_print_best_host',
default=True)
socket_timeout = conduit.confInt('main', 'socket_timeout', default=3)
hostfilepath = conduit.confString('main', 'hostfilepath',
default='timedhosts')
maxhostfileage = conduit.confInt('main', 'maxhostfileage', default=10)
maxthreads = conduit.confInt('main', 'maxthreads', default=10)
exclude = conduit.confString('main', 'exclude', default=None)
include_only = conduit.confString('main', 'include_only', default=None)
prefer = conduit.confString('main', 'prefer', default='no.prefer.mirror')
downgrade_ftp = conduit.confBool('main', 'downgrade_ftp', default=True)
def clean_hook(conduit):
"""
This function cleans the plugin cache file if exists. The function is called
when C{yum [options] clean [plugins | all ]} is executed.
"""
global hostfilepath
if hostfilepath and hostfilepath[0] != '/':
hostfilepath = conduit._base.conf.cachedir + '/' + hostfilepath
if os.path.exists(hostfilepath):
conduit.info(2, "Cleaning up list of fastest mirrors")
try:
os.unlink(hostfilepath)
except Exception, e:
conduit.info(2, "Cleanup failed: %s" % e)
# Get the hostname from a url, stripping away any usernames/passwords
host = lambda mirror: mirror.split('/')[2].split('@')[-1]
def _can_write_results(fname):
if not os.path.exists(fname):
try:
hostfile = file(hostfilepath, 'w')
return True
except:
return False
return os.access(fname, os.W_OK)
def _len_non_ftp(urls):
''' Count the number of urls, which aren't ftp. '''
num = 0
for url in urls:
if url.startswith("ftp:"):
continue
num += 1
return num
def prereposetup_hook(conduit):
"""
This function is called after Yum has initiliazed all the repository information.
If cache file exists, this function will load the mirror speeds from the file,
else it will determine the fastest mirrors afresh and write them back to the cache
file.
There are no parameteres for this function. It uses global variables to
communicate with other functions.
This function refers:
- L{read_timedhosts()}
- L{FastestMirror.get_mirrorlist()}
- L{write_timedhosts()}
@param loadcache : Fastest Mirrors to be loaded from plugin's cache file or not.
@type loadcache : Boolean
"""
global loadcache, exclude, include_only, prefer, hostfilepath
if hostfilepath and hostfilepath[0] != '/':
hostfilepath = conduit._base.conf.cachedir + '/' + hostfilepath
# If the file hostfilepath exists and is newer than the maxhostfileage,
# then load the cache.
if os.path.exists(hostfilepath) and get_hostfile_age() < maxhostfileage:
loadcache = True
opts, commands = conduit.getCmdLine()
if conduit._base.conf.cache or not _can_write_results(hostfilepath):
return
if done_repos:
conduit.info(2, "Checking for new repos for mirrors")
elif loadcache:
conduit.info(2, "Loading mirror speeds from cached hostfile")
read_timedhosts()
else:
conduit.info(2, "Determining fastest mirrors")
repomirrors = {}
repos = conduit.getRepos()
# First do all of the URLs as one big list, this way we get as much
# parallelism as possible (if we need to do the network tests).
all_urls = []
for repo in repos.listEnabled():
if repo.id in done_repos:
continue
if downgrade_ftp and _len_non_ftp(repo.urls) == 1:
continue
if len(repo.urls) == 1:
continue
all_urls.extend(repo.urls)
all_urls = FastestMirror(all_urls).get_mirrorlist()
# This should now just be looking up the cached times.
for repo in repos.listEnabled():
if repo.id in done_repos:
continue
if downgrade_ftp and _len_non_ftp(repo.urls) == 1:
repo.urls = sorted(repo.urls, reverse=True) # ftp comes before http
continue
if len(repo.urls) == 1:
continue
if str(repo) not in repomirrors:
repomirrors[str(repo)] = FastestMirror(repo.urls).get_mirrorlist()
if include_only:
def includeCheck(mirror):
if filter(lambda exp: re.search(exp, host(mirror)),
include_only.replace(',', ' ').split()):
conduit.info(2, "Including mirror: %s" % host(mirror))
return True
return False
repomirrors[str(repo)] = filter(includeCheck,repomirrors[str(repo)])
else:
if exclude:
def excludeCheck(mirror):
if filter(lambda exp: re.search(exp, host(mirror)),
exclude.replace(',', ' ').split()):
conduit.info(2, "Excluding mirror: %s" % host(mirror))
return False
return True
repomirrors[str(repo)] = filter(excludeCheck,repomirrors[str(repo)])
repo.urls = repomirrors[str(repo)]
if len(repo.urls):
lvl = 3
if always_print_best_host:
lvl = 2
conduit.info(lvl, " * %s: %s" % (str(repo), host(repo.urls[0])))
repo.failovermethod = 'priority'
repo.check()
repo.setupGrab()
done_repos.add(repo.id)
if done_sock_timeout:
socket.setdefaulttimeout(None)
if not loadcache:
write_timedhosts()
def read_timedhosts():
"""
This function reads the time and hostname from the plugin's cache file and
store them in C{timedhosts}.
There are no parameteres for this function. It uses global variables to
communicate with other functions.
This function is referred by:
- L{prereposetup_hook()}
@param timedhosts : A list of time intervals to reach different hosts
corresponding to the mirrors. The index of the list are hostnames.
C{timedhosts[host] = time}.
@type timedhosts : List
"""
global timedhosts
try:
hostfile = file(hostfilepath)
for line in hostfile.readlines():
host, time = line.split()
timedhosts[host] = float(time)
hostfile.close()
except IOError:
pass
def write_timedhosts():
"""
This function writes the plugin's cache file with the entries in the
C{timedhosts} list.
There are no parameteres for this function. It uses global variables to
communicate with other functions.
This function is referred by:
- L{prereposetup_hook()}
@param timedhosts : A list of time intervals to reach different hosts
corresponding to the mirrors. The index of the list are hostnames.
C{timedhosts[host] = time}.
@type timedhosts : List
"""
global timedhosts
try:
hostfile = file(hostfilepath, 'w')
for host in timedhosts:
hostfile.write('%s %s\n' % (host, timedhosts[host]))
hostfile.close()
except IOError:
pass
def get_hostfile_age():
"""
This function returns the current age of the plugin's cache file.
There are no parameteres for this function. It uses global variables to
communicate with other functions.
This function is referred by:
- L{init_hook()}
@param hostfilepath : Absolute path to the plugin's cache file.
@type hostfilepath : String
@rtype: Integer
@return: The age of the plugin's cache file.
"""
global hostfilepath
timestamp = datetime.datetime.fromtimestamp(os.path.getmtime(hostfilepath))
return (datetime.datetime.now() - timestamp).days
class FastestMirror:
"""
This is the helper class of B{fastestmirror} module. This class does
all the processing of the response time calculation for all the mirrors
of all the enabled Yum repositories.
"""
def __init__(self, mirrorlist):
"""
This is the initiliazer function of the B{L{FastestMirror}} class.
@param mirrorlist : A list of mirrors for an enabled repository.
@type mirrorlist : List
"""
self.mirrorlist = mirrorlist
self.results = {}
self.threads = []
# If we don't spawn any threads, we don't need locking...
def _init_lock(self):
if not hasattr(self, '_results_lock'):
self._results_lock = threading.Lock()
global done_sock_timeout
done_sock_timeout = True
socket.setdefaulttimeout(socket_timeout)
def _acquire_lock(self):
if hasattr(self, '_results_lock'):
self._results_lock.acquire()
def _release_lock(self):
if hasattr(self, '_results_lock'):
self._results_lock.release()
def get_mirrorlist(self):
"""
This function pings/polls all the mirrors in the list
C{FastestMirror.mirrorlist} and returns the sorted list of mirrors
according to the increasing response time of the mirrors.
This function refers:
- L{FastestMirror._poll_mirrors()}
This function is referred by:
- L{prereposetup_hook()}
- L{main()}
@rtype: List
@return: The list of mirrors sorted according to the increasing
response time.
"""
self._poll_mirrors()
if not downgrade_ftp:
mirrors = [(v, k) for k, v in self.results.items()]
else:
# False comes before True
mirrors = [(k.startswith("ftp"), v, k) for k, v in
self.results.items()]
mirrors.sort()
return [x[-1] for x in mirrors]
def _poll_mirrors(self):
"""
This function uses L{PollThread} class to ping/poll individual mirror
in parallel.
This function refers:
- L{PollThread.run()}
This function is referred by:
- L{FastestMirror.get_mirrorlist()}
"""
global maxthreads
for mirror in self.mirrorlist:
if len(self.threads) > maxthreads:
if self.threads[0].isAlive():
self.threads[0].join()
del self.threads[0]
if mirror.startswith("file:"):
mhost = "127.0.0.1"
else:
mhost = host(mirror)
if mhost in timedhosts:
result = timedhosts[mhost]
if verbose:
print "%s already timed: %s" % (mhost, result)
self._add_result(mirror, mhost, result)
elif mhost in ("127.0.0.1", "::1", "localhost", prefer):
self._add_result(mirror, mhost, 0)
else:
# No cached info. so spawn a thread and find the info. out
self._init_lock()
pollThread = PollThread(self, mirror)
pollThread.start()
self.threads.append(pollThread)
while len(self.threads) > 0:
if self.threads[0].isAlive():
self.threads[0].join()
del self.threads[0]
def _add_result(self, mirror, host, time):
"""
This function is called by L{PollThread.run()} to add details of a
mirror in C{FastestMirror.results} dictionary.
This function is referred by:
- L{PollThread.run()}
@param mirror : The mirror that was polled for response time.
@type mirror : String
@param host : The hostname of the mirror.
@type host : String
@param time : The response time of the mirror.
@type time : Integer
@param timedhosts : A list of time intervals to reach different hosts
corresponding to the mirrors. The index of the list are hostnames.
@type timedhosts : List
"""
global timedhosts
self._acquire_lock()
if verbose: print " * %s : %f secs" % (host, time)
self.results[mirror] = time
timedhosts[host] = time
self._release_lock()
class PollThread(threading.Thread):
"""
B{PollThread} class implements C{threading.Thread} class. This class
provides the functionalities to ping/poll the mirrors in parallel.
"""
def __init__(self, parent, mirror):
"""
It is initiliazer function for B{L{PollThread}} class. This function
initiliazes the service ports for different webservices.
@param parent : The parent class.
@type parent : Class
@param mirror : The mirror of a repository.
@type mirror : String
"""
threading.Thread.__init__(self)
self.parent = parent
self.mirror = mirror
self.host = host(mirror)
uService = urlparse.urlparse(mirror)[0]
if uService == "http":
self.port = 80
elif uService == "https":
self.port = 443
elif uService == "ftp":
self.port = 21
elif uService == "file":
self.host = "127.0.0.1"
else:
self.port = -2
def run(self):
"""
The C{threading.Thread.run()} function is being overridden here.
This function pings/polls a mirror and add the details of that
mirror to the C{FastestMirror.results} dictionary.
The response time of any mirror is '99999999999' if any exception
occurs during polling.
This function refers:
- L{FastestMirror._add_result()}
This function is referred by:
- L{FastestMirror._poll_mirrors()}
"""
try:
if self.host in timedhosts:
result = timedhosts[self.host]
if verbose:
print "%s already timed: %s" % (self.host, result)
else:
if self.host in ("127.0.0.1", "::1", "localhost", prefer):
result = 0
else:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
uPort = string.find(self.host,":")
if uPort > 0:
self.port = int(self.host[uPort+1:])
self.host = self.host[:uPort]
time_before = time.time()
sock.connect((self.host, self.port))
result = time.time() - time_before
sock.close()
self.parent._add_result(self.mirror, self.host, result)
except:
if verbose:
print " * %s : dead" % self.host
self.parent._add_result(self.mirror, self.host, 99999999999)
def main():
"""
This is the main function for B{fastestmirror} module.
This function explains the usage of B{fastestmirror} module. Also parses
the command line arguments.
This function refers:
- L{FastestMirror.get_mirrorlist()}
"""
global verbose
verbose = True
if len(sys.argv) == 1:
print "Usage: %s <mirror1> [mirror2] ... [mirrorN]" % sys.argv[0]
sys.exit(-1)
mirrorlist = sys.argv[1:]
print "Result: " + str(FastestMirror(mirrorlist).get_mirrorlist())
if __name__ == '__main__':
main()