PyPI Download Stats

Every so often I find myself in need of a small ego boost (or reality check). One of the things I've done in the past to satisfy such a need is go to the PyPI and see how many downloads my packages have. Depending on how much time I have or how much effort I want to put into my pride, I may or may not check the download stats for all releases of each package.

A couple of weeks ago, I was in the mood for an ego boost. It was actually an every day thing for nearly a week! So, instead of wasting a lot of time checking download stats for each version of each package I have on PyPI, I wrote a script to do it for me. It uses the XML-RPC API that PyPI offers.

Here she is!

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Calculates the total number of downloads that a particular PyPI package has
received across all versions tracked by PyPI
"""

from datetime import datetime
import locale
import sys
import xmlrpclib

locale.setlocale(locale.LC_ALL, '')

class PyPIDownloadAggregator(object):

    def __init__(self, package_name, include_hidden=True):
        self.package_name = package_name
        self.include_hidden = include_hidden
        self.proxy = xmlrpclib.Server('http://pypi.python.org/pypi')
        self._downloads = {}

        self.first_upload = None
        self.first_upload_rel = None
        self.last_upload = None
        self.last_upload_rel = None

    @property
    def releases(self):
        """Retrieves the release number for each uploaded release"""

        result = self.proxy.package_releases(self.package_name, self.include_hidden)

        if len(result) == 0:
            # no matching package--search for possibles, and limit to 15 results
            results = self.proxy.search({
                'name': self.package_name,
                'description': self.package_name
            }, 'or')[:15]

            # make sure we only get unique package names
            matches = []
            for match in results:
                name = match['name']
                if name not in matches:
                    matches.append(name)

            # if only one package was found, return it
            if len(matches) == 1:
                self.package_name = matches[0]
                return self.releases

            error = """No such package found: %s

Possible matches include:
%s
""" % (self.package_name, '\n'.join('\t- %s' % n for n in matches))

            sys.exit(error)

        return result

    @property
    def downloads(self, force=False):
        """Calculate the total number of downloads for the package"""

        if len(self._downloads) == 0 or force:
            for release in self.releases:
                urls = self.proxy.release_urls(self.package_name, release)
                self._downloads[release] = 0
                for url in urls:
                    # upload times
                    uptime = datetime.strptime(url['upload_time'].value, "%Y%m%dT%H:%M:%S")
                    if self.first_upload is None or uptime < self.first_upload:
                        self.first_upload = uptime
                        self.first_upload_rel = release

                    if self.last_upload is None or uptime > self.last_upload:
                        self.last_upload = uptime
                        self.last_upload_rel = release

                    self._downloads[release] += url['downloads']

        return self._downloads

    def total(self):
        return sum(self.downloads.values())

    def average(self):
        return self.total() / len(self.downloads)

    def max(self):
        return max(self.downloads.values())

    def min(self):
        return min(self.downloads.values())

    def stats(self):
        """Prints a nicely formatted list of statistics about the package"""

        self.downloads # explicitly call, so we have first/last upload data
        fmt = locale.nl_langinfo(locale.D_T_FMT)
        sep = lambda s: locale.format('%d', s, 3)
        val = lambda dt: dt and dt.strftime(fmt) or '--'

        params = (
            self.package_name,
            val(self.first_upload),
            self.first_upload_rel,
            val(self.last_upload),
            self.last_upload_rel,
            sep(len(self.releases)),
            sep(self.max()),
            sep(self.min()),
            sep(self.average()),
            sep(self.total()),
        )

        print """PyPI Package statistics for: %s

    First Upload: %40s (%s)
    Last Upload:  %40s (%s)
    Number of releases: %34s
    Most downloads:    %35s
    Fewest downloads:  %35s
    Average downloads: %35s
    Total downloads:   %35s
""" % params

def main():
    if len(sys.argv) < 2:
        sys.exit('Please specify at least one package name')

    for pkg in sys.argv[1:]:
        PyPIDownloadAggregator(pkg).stats()

if __name__ == '__main__':
    main()

Usage is pretty simple. All you need to do is call the script (I called it pypi_downloads.py with the name or names of the package(s) you want download stats for:

bash-4.0$ ./pypi_downloads.py clip2zeus
PyPI Package statistics for: Clip2Zeus

    First Upload:             Sun 10 Jan 2010 03:25:30 AM  (0.1)
    Last Upload:              Mon 18 Jan 2010 06:58:42 PM  (0.9d)
    Number of releases:                                 12
    Most downloads:                                     41
    Fewest downloads:                                   21
    Average downloads:                                  28
    Total downloads:                                   342

And there you have it!

Comments

Comments powered by Disqus