PyPI Download Stats
Every so often I find myself in need of a small ego boost (or reality check). One of the things I've done in the past to satisfy such a need is go to the PyPI and see how many downloads my packages have. Depending on how much time I have or how much effort I want to put into my pride, I may or may not check the download stats for all releases of each package.
A couple of weeks ago, I was in the mood for an ego boost. It was actually an every day thing for nearly a week! So, instead of wasting a lot of time checking download stats for each version of each package I have on PyPI, I wrote a script to do it for me. It uses the XML-RPC API that PyPI offers.
Here she is!
#!/usr/bin/env python # -*- coding: utf-8 -*- """ Calculates the total number of downloads that a particular PyPI package has received across all versions tracked by PyPI """ from datetime import datetime import locale import sys import xmlrpclib locale.setlocale(locale.LC_ALL, '') class PyPIDownloadAggregator(object): def __init__(self, package_name, include_hidden=True): self.package_name = package_name self.include_hidden = include_hidden self.proxy = xmlrpclib.Server('http://pypi.python.org/pypi') self._downloads = {} self.first_upload = None self.first_upload_rel = None self.last_upload = None self.last_upload_rel = None @property def releases(self): """Retrieves the release number for each uploaded release""" result = self.proxy.package_releases(self.package_name, self.include_hidden) if len(result) == 0: # no matching package--search for possibles, and limit to 15 results results = self.proxy.search({ 'name': self.package_name, 'description': self.package_name }, 'or')[:15] # make sure we only get unique package names matches = [] for match in results: name = match['name'] if name not in matches: matches.append(name) # if only one package was found, return it if len(matches) == 1: self.package_name = matches[0] return self.releases error = """No such package found: %s Possible matches include: %s """ % (self.package_name, '\n'.join('\t- %s' % n for n in matches)) sys.exit(error) return result @property def downloads(self, force=False): """Calculate the total number of downloads for the package""" if len(self._downloads) == 0 or force: for release in self.releases: urls = self.proxy.release_urls(self.package_name, release) self._downloads[release] = 0 for url in urls: # upload times uptime = datetime.strptime(url['upload_time'].value, "%Y%m%dT%H:%M:%S") if self.first_upload is None or uptime < self.first_upload: self.first_upload = uptime self.first_upload_rel = release if self.last_upload is None or uptime > self.last_upload: self.last_upload = uptime self.last_upload_rel = release self._downloads[release] += url['downloads'] return self._downloads def total(self): return sum(self.downloads.values()) def average(self): return self.total() / len(self.downloads) def max(self): return max(self.downloads.values()) def min(self): return min(self.downloads.values()) def stats(self): """Prints a nicely formatted list of statistics about the package""" self.downloads # explicitly call, so we have first/last upload data fmt = locale.nl_langinfo(locale.D_T_FMT) sep = lambda s: locale.format('%d', s, 3) val = lambda dt: dt and dt.strftime(fmt) or '--' params = ( self.package_name, val(self.first_upload), self.first_upload_rel, val(self.last_upload), self.last_upload_rel, sep(len(self.releases)), sep(self.max()), sep(self.min()), sep(self.average()), sep(self.total()), ) print """PyPI Package statistics for: %s First Upload: %40s (%s) Last Upload: %40s (%s) Number of releases: %34s Most downloads: %35s Fewest downloads: %35s Average downloads: %35s Total downloads: %35s """ % params def main(): if len(sys.argv) < 2: sys.exit('Please specify at least one package name') for pkg in sys.argv[1:]: PyPIDownloadAggregator(pkg).stats() if __name__ == '__main__': main()
Usage is pretty simple. All you need to do is call the script (I called it pypi_downloads.py with the name or names of the package(s) you want download stats for:
bash-4.0$ ./pypi_downloads.py clip2zeus PyPI Package statistics for: Clip2Zeus First Upload: Sun 10 Jan 2010 03:25:30 AM (0.1) Last Upload: Mon 18 Jan 2010 06:58:42 PM (0.9d) Number of releases: 12 Most downloads: 41 Fewest downloads: 21 Average downloads: 28 Total downloads: 342
And there you have it!
Comments
Comments powered by Disqus