index : reflector32 | |
Archlinux32 fork of reflector | gitolite user |
summaryrefslogtreecommitdiff |
-rw-r--r-- | Reflector.py | 199 | ||||
-rw-r--r-- | setup.py | 2 |
diff --git a/Reflector.py b/Reflector.py index dcf85fd..6dc92b3 100644 --- a/Reflector.py +++ b/Reflector.py @@ -1,7 +1,8 @@ #!/usr/bin/env python3 -# -*- encoding: utf-8 -*- -# Ignore the invalid snake-case error for the module name. -# pylint: disable=invalid-name + +# Ignore the invalid snake-case error for the module name and the number of +# lines. +# pylint: disable=invalid-name,too-many-lines # Copyright (C) 2012-2020 Xyne # @@ -31,6 +32,7 @@ import http.client import itertools import json import logging +import multiprocessing import os import pipes import re @@ -128,7 +130,9 @@ def get_mirrorstatus( return obj, mtime except (IOError, urllib.error.URLError, socket.timeout) as err: - raise MirrorStatusError(f'failed to retrieve mirrorstatus data: {err.__class__.__name__}: {err}') from err + raise MirrorStatusError( + f'failed to retrieve mirrorstatus data: {err.__class__.__name__}: {err}' + ) from err # ------------------------------ Miscellaneous ------------------------------- # @@ -305,10 +309,11 @@ def sort(mirrors, by=None, key=None, **kwargs): # pylint: disable=invalid-name def key(mir): return mir[by] try: - print('sorting by', by, key) mirrors.sort(key=key) except KeyError as err: - raise MirrorStatusError('attempted to sort mirrors by unrecognized criterion: "{}"'.format(by)) from err + raise MirrorStatusError( + 'attempted to sort mirrors by unrecognized criterion: "{}"'.format(by) + ) from err return mirrors @@ -382,8 +387,60 @@ def rate_http( return 0, 0 +def _rate_unthreaded(mirrors, fmt, kwargs): + ''' + Rate mirrors without using threads. + ''' + logger = get_logger() + rates = dict() + for mir in mirrors: + url = mir['url'] + db_url = url + DB_SUBPATH + scheme = urllib.parse.urlparse(url).scheme + + if scheme == 'rsync': + time_delta, ratio = rate_rsync(db_url, **kwargs) + else: + time_delta, ratio = rate_http(db_url, **kwargs) + + kibps = ratio / 1024.0 + logger.info(fmt.format(url, kibps, time_delta)) + rates[url] = ratio + return rates + + +def _rate_wrapper(func, url, kwargs): + ''' + Wrapper function for multithreaded rating. + ''' + time_delta, ratio = func(url + DB_SUBPATH, **kwargs) + return url, time_delta, ratio + + +def _rate_threaded(mirrors, fmt, n_threads, kwargs): # pylint: disable=too-many-locals + ''' + Rate mirrors using threads. + ''' + args = list() + for mir in mirrors: + url = mir['url'] + scheme = urllib.parse.urlparse(url).scheme + rfunc = rate_rsync if scheme == 'rsync' else rate_http + args.append((rfunc, url, kwargs)) + + logger = get_logger() + rates = dict() + with multiprocessing.Pool(n_threads) as pool: + for url, time_delta, ratio in pool.starmap(_rate_wrapper, args): + kibps = ratio / 1024.0 + logger.info(fmt.format(url, kibps, time_delta)) + rates[url] = ratio + return rates + + def rate( mirrors, + n_threads=0, **kwargs ): ''' @@ -405,22 +462,9 @@ def rate( logger.info(header_fmt.format('Server', 'Rate', 'Time')) fmt = '{{:{:d}s}} {{:8.2f}} KiB/s {{:7.2f}} s'.format(url_len) - rates = dict() - for mir in mirrors: - url = mir['url'] - db_url = url + DB_SUBPATH - scheme = urllib.parse.urlparse(url).scheme - - if scheme == 'rsync': - time_delta, ratio = rate_rsync(db_url, **kwargs) - else: - time_delta, ratio = rate_http(db_url, **kwargs) - - kibps = ratio / 1024.0 - logger.info(fmt.format(url, kibps, time_delta)) - rates[url] = ratio - - return rates + if n_threads > 0: + return _rate_threaded(mirrors, fmt, n_threads, kwargs) + return _rate_unthreaded(mirrors, fmt, kwargs) # -------------------------------- Exceptions -------------------------------- # @@ -440,7 +484,7 @@ class MirrorStatusError(Exception): # ---------------------------- MirrorStatusFilter ---------------------------- # -class MirrorStatusFilter(): # pylint: disable=too-many-instance-attributes +class MirrorStatusFilter(): # pylint: disable=too-many-instance-attributes,too-few-public-methods ''' Filter mirrors by different criteria. ''' @@ -456,7 +500,7 @@ class MirrorStatusFilter(): # pylint: disable=too-many-instance-attributes isos=False, ipv4=False, ipv6=False - ): + ): # pylint: disable=too-many-arguments self.min_completion_pct = min_completion_pct self.countries = tuple(c.upper() for c in countries) if countries else tuple() self.protocols = protocols @@ -524,7 +568,13 @@ class MirrorStatusFilter(): # pylint: disable=too-many-instance-attributes # -------------------------------- Formatting -------------------------------- # -def format_mirrorlist(mirror_status, mtime, include_country=False, command=None, url=URL): +def format_mirrorlist( + mirror_status, + mtime, + include_country=False, + command=None, + url=URL +): # pylint: disable=too-many-locals ''' Format the mirrorlist. ''' @@ -592,8 +642,9 @@ class MirrorStatus(): importers of this module. ''' - # TODO: move these to another module or remove them completely - # Related: https://bugs.archlinux.org/task/32895 + # TODO: + # Move these to another module or remove them completely Related: + # https://bugs.archlinux.org/task/32895 REPOSITORIES = ( 'community', 'community-staging', @@ -619,8 +670,9 @@ class MirrorStatus(): download_timeout=DEFAULT_DOWNLOAD_TIMEOUT, cache_timeout=DEFAULT_CACHE_TIMEOUT, min_completion_pct=1.0, + n_threads=0, url=URL - ): + ): # pylint: disable=too-many-arguments self.connection_timeout = connection_timeout self.download_timeout = download_timeout self.cache_timeout = cache_timeout @@ -629,6 +681,7 @@ class MirrorStatus(): self.mirror_status = None self.ms_mtime = 0 + self.n_threads = n_threads def retrieve(self): ''' @@ -676,13 +729,13 @@ class MirrorStatus(): mirrors = self.get_mirrors() kwargs.setdefault('connection_timeout', self.connection_timeout) kwargs.setdefault('download_timeout', self.download_timeout) - yield from sort(mirrors, **kwargs) + yield from sort(mirrors, n_threads=self.n_threads, **kwargs) def rate(self, mirrors=None, **kwargs): ''' Sort mirrors by download speed. ''' - yield from self.sort(mirrors, by='rate', **kwargs) + yield from self.sort(mirrors, by='rate', n_threads=self.n_threads, **kwargs) def get_mirrorlist(self, mirrors=None, include_country=False, cmd=None): ''' @@ -693,7 +746,13 @@ class MirrorStatus(): if not isinstance(mirrors, list): mirrors = list(mirrors) obj['urls'] = mirrors - return format_mirrorlist(obj, self.ms_mtime, include_country=include_country, command=cmd, url=self.url) + return format_mirrorlist( + obj, + self.ms_mtime, + include_country=include_country, + command=cmd, + url=self.url + ) def list_countries(self): ''' @@ -768,12 +827,19 @@ def add_arguments(parser): parser.add_argument( '--cache-timeout', type=int, metavar='n', default=DEFAULT_CACHE_TIMEOUT, - help='The cache timeout in seconds for the data retrieved from the Arch Linux Mirror Status API. The default is %(default)s.' + help=( + '''The cache timeout in seconds for the data retrieved from the Arch + Linux Mirror Status API. The default is %(default)s. ''' + ) ) parser.add_argument( '--url', default=URL, - help='The URL from which to retrieve the mirror data in JSON format. If different from the default, it must follow the same format. Default: %(default)s' + help=( + '''The URL from which to retrieve the mirror data in JSON format. If + different from the default, it must follow the same format. Default: + %(default)s''' + ) ) parser.add_argument( @@ -784,7 +850,19 @@ def add_arguments(parser): sort_help = '; '.join('"{}": {}'.format(k, v) for k, v in SORT_TYPES.items()) parser.add_argument( '--sort', choices=SORT_TYPES, - help='Sort the mirrorlist. {}.'.format(sort_help) + help=f'Sort the mirrorlist. {sort_help}.' + ) + + parser.add_argument( + '--threads', metavar='n', type=int, default=0, + help=( + '''Use n threads for rating mirrors. This option will speed up the + rating step but the results will be inaccurate if the local + bandwidth is saturated at any point during the operation. If rating + takes too long without this option then you should probably apply + more filters to reduce the number of rated servers before using this + option.''' + ) ) parser.add_argument( @@ -799,22 +877,52 @@ def add_arguments(parser): filters = parser.add_argument_group( 'filters', - 'The following filters are inclusive, i.e. the returned list will only contain mirrors for which all of the given conditions are met.' + '''The following filters are inclusive, i.e. the returned list will only + contain mirrors for which all of the given conditions are met.''' ) filters.add_argument( '-a', '--age', type=float, metavar='n', - help='Only return mirrors that have synchronized in the last n hours. n may be an integer or a decimal number.' + help=( + '''Only return mirrors that have synchronized in the last n hours. n + may be an integer or a decimal number.''' + ) ) filters.add_argument( '-c', '--country', dest='countries', action='append', metavar='<country name or code>', - help='Restrict mirrors to selected countries. Countries may be given by name or country code, or a mix of both. The case is ignored. Multiple countries may be selected using commas (e.g. --country France,Germany) or by passing this option multiple times (e.g. -c fr -c de). Use "--list-countries" to display a table of available countries along with their country codes. When sorting by country, this option may also be used to sort by a preferred order instead of alphabetically. For example, to select mirrors from Sweden, Norway, Denmark and Finland, in that order, use the options "--country se,no,dk,fi --sort country". To set a preferred country sort order without filtering any countries. this option also recognizes the glob pattern "*", which will match any country. For example, to ensure that any mirrors from Sweden are at the top of the list and any mirrors from Denmark are at the bottom, with any other countries in between, use "--country \'se,*,dk\' --sort country". It is however important to note that when "*" is given along with other filter criteria, there is no guarantee that certain countries will be included in the results. For example, with the options "--country \'se,*,dk\' --sort country --latest 10", the latest 10 mirrors may all be from the United States. When the glob pattern is present, it only ensures that if certain countries are included in the results, they will be sorted in the requested order.' + help=( + '''Restrict mirrors to selected countries. Countries may be given by + name or country code, or a mix of both. The case is ignored. + Multiple countries may be selected using commas (e.g. --country + France,Germany) or by passing this option multiple times (e.g. -c + fr -c de). Use "--list-countries" to display a table of available + countries along with their country codes. When sorting by country, + this option may also be used to sort by a preferred order instead of + alphabetically. For example, to select mirrors from Sweden, Norway, + Denmark and Finland, in that order, use the options "--country + se,no,dk,fi --sort country". To set a preferred country sort order + without filtering any countries. this option also recognizes the + glob pattern "*", which will match any country. For example, to + ensure that any mirrors from Sweden are at the top of the list and + any mirrors from Denmark are at the bottom, with any other countries + in between, use "--country \'se,*,dk\' --sort country". It is + however important to note that when "*" is given along with other + filter criteria, there is no guarantee that certain countries will + be included in the results. For example, with the options "--country + \'se,*,dk\' --sort country --latest 10", the latest 10 mirrors may + all be from the United States. When the glob pattern is present, it + only ensures that if certain countries are included in the results, + they will be sorted in the requested order.''' + ) ) filters.add_argument( '-f', '--fastest', type=int, metavar='n', - help='Return the n fastest mirrors that meet the other criteria. Do not use this option without other filtering options.' + help=( + '''Return the n fastest mirrors that meet the other criteria. Do not + use this option without other filtering options.''' + ) ) filters.add_argument( @@ -844,12 +952,20 @@ def add_arguments(parser): filters.add_argument( '-p', '--protocol', dest='protocols', action='append', metavar='<protocol>', - help='Match one of the given protocols, e.g. "https" or "ftp". Multiple protocols may be selected using commas (e.g. "https,http") or by passing this option multiple times.' + help=( + '''Match one of the given protocols, e.g. "https" or "ftp". Multiple + protocols may be selected using commas (e.g. "https,http") or by + passing this option multiple times.''' + ) ) filters.add_argument( '--completion-percent', type=float, metavar='[0-100]', default=100., - help='Set the minimum completion percent for the returned mirrors. Check the mirrorstatus webpage for the meaning of this parameter. Default value: %(default)s.' + help=( + '''Set the minimum completion percent for the returned mirrors. + Check the mirrorstatus webpage for the meaning of this parameter. + Default value: %(default)s.''' + ) ) filters.add_argument( @@ -921,7 +1037,8 @@ def process_options(options, mirrorstatus=None, mirrors=None): download_timeout=options.download_timeout, cache_timeout=options.cache_timeout, min_completion_pct=(options.completion_percent / 100.), - url=options.url + url=options.url, + n_threads=options.threads ) if mirrors is None: @@ -5,7 +5,7 @@ import time setup( name='Reflector', - version=time.strftime('%Y.%m.%d.%H.%M.%S', time.gmtime( 1608505600)), + version=time.strftime('%Y.%m.%d.%H.%M.%S', time.gmtime( 1610237194)), description='''A Python 3 module and script to retrieve and filter the latest Pacman mirror list.''', author='Xyne', author_email='ac xunilhcra enyx, backwards', |