Switch to the version 1.1.21

Add a small work around patch for the --skip and --only options in
illuminapairedend
2015-07-13 15:31:20 +02:00 · 2015-07-11 23:38:47 +02:00 · 2015-07-03 14:57:24 +02:00 · 2015-07-03 14:55:44 +02:00 · 2015-07-03 10:39:59 +02:00 · 2015-07-03 09:10:49 +02:00
386 changed files with 60493 additions and 152 deletions
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <?eclipse-pydev version="1.0"?><pydev_project>
 <pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
-<path>/OBITools-1.1/src</path>
+<path>/obitools/src</path>
 <path>/${PROJECT_DIR_NAME}/distutils.ext</path>
 </pydev_pathproperty>
 <pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property>
@@ -7,6 +7,7 @@ Created on 20 oct. 2012
 from distutils.command.build import build as ori_build
 from obidistutils.serenity.checksystem import is_mac_system

+from distutils import log

 class build(ori_build):
    
@@ -39,4 +40,8 @@ class build(ori_build):
                    ('build_cexe', has_executables)] \
                   + ori_build.sub_commands + \
                   [('build_sphinx',has_doc)]
+                   
+    def run(self):
+        log.info('\n\nRunning obidistutils build process\n\n')
+        ori_build.run(self)
    
@@ -96,7 +96,7 @@ class build_ext(ori_build_ext):
        

    
-#     from obidistutils.serenity.cython import get_a_cython_module
+#     from obidistutils.serenity.getcython import get_a_cython_module
 #     import imp
 #     import os.path 
 #     
@@ -18,21 +18,21 @@ except ImportError:
        log.info("Restarting installation with all dependencies ok")
        rerun_with_anothe_python(os.path.realpath(sys.executable))
    
-    class build_sphinx(ori_build_sphinx):
-        '''
-        Build Sphinx documentation in html, epub and man formats 
-        '''
-    
-        description = __doc__
-    
-        def run(self):
-            self.builder='html'
-            self.finalize_options()
-            ori_build_sphinx.run(self)
-            self.builder='epub'
-            self.finalize_options()
-            ori_build_sphinx.run(self)
-            self.builder='man'
-            self.finalize_options()
-            ori_build_sphinx.run(self)
+class build_sphinx(ori_build_sphinx):
+    '''
+    Build Sphinx documentation in html, epub and man formats 
+    '''
+
+    description = __doc__
+
+    def run(self):
+        self.builder='html'
+        self.finalize_options()
+        ori_build_sphinx.run(self)
+        self.builder='epub'
+        self.finalize_options()
+        ori_build_sphinx.run(self)
+        self.builder='man'
+        self.finalize_options()
+        ori_build_sphinx.run(self)
    
@@ -11,9 +11,15 @@ except ImportError:
    from distutils.command.install import install as install_ori
    has_setuptools=False

+from distutils import log
+
 class install(install_ori):
    
    def __init__(self,dist):
        install_ori.__init__(self, dist)
        self.sub_commands.insert(0, ('build',lambda self: True))
        self.sub_commands.append(('install_sphinx',lambda self: self.distribution.serenity))
+
+    def run(self):
+        log.info('\n\nRunning obidistutils install process\n\n')
+        install_ori.run(self)
@@ -8,7 +8,7 @@ import os

 from obidistutils.command.build_exe import build_exe
 from obidistutils.serenity.checksystem import is_mac_system
-
+from distutils import log

 class pidname(build_exe):
    
@@ -44,7 +44,9 @@ class pidname(build_exe):

    def run(self):
        if is_mac_system():
+            log.info("Building pidname...")
            build_exe.run(self)
+            log.info("Done")
            self.pidname=True
        else:
            self.pidname=False
@@ -201,5 +201,7 @@ def setup(**attrs):
    
    if 'ext_modules' not in attrs:
        attrs['ext_modules'] = EXTENTION
+        
+    print "$$$$$$$$$$$$$$$$$$$$$$$$$$$",COMMANDS
    
    ori_setup(**attrs)
@@ -5,10 +5,12 @@ Created on 2 oct. 2014
 '''

 from distutils import util
+from distutils import log

 def is_mac_system():
    platform = util.get_platform().split('-')[0]
-    
+    if platform=='macosx':
+        log.info('You are running on a Mac platform')
    return platform=='macosx'

 def is_windows_system():
@@ -0,0 +1,277 @@
+#!/usr/bin/env python
+import os
+import optparse
+
+import sys
+import re
+
+from pip.exceptions import InstallationError, CommandError, PipError
+from pip.log import logger
+from pip.util import get_installed_distributions, get_prog
+from pip.vcs import git, mercurial, subversion, bazaar  # noqa
+from pip.baseparser import ConfigOptionParser, UpdatingDefaultsHelpFormatter
+from pip.commands import commands, get_summaries, get_similar_commands
+
+# This fixes a peculiarity when importing via __import__ - as we are
+# initialising the pip module, "from pip import cmdoptions" is recursive
+# and appears not to work properly in that situation.
+import pip.cmdoptions
+cmdoptions = pip.cmdoptions
+
+# The version as used in the setup.py and the docs conf.py
+__version__ = "1.5.6"
+
+
+def autocomplete():
+    """Command and option completion for the main option parser (and options)
+    and its subcommands (and options).
+
+    Enable by sourcing one of the completion shell scripts (bash or zsh).
+    """
+    # Don't complete if user hasn't sourced bash_completion file.
+    if 'PIP_AUTO_COMPLETE' not in os.environ:
+        return
+    cwords = os.environ['COMP_WORDS'].split()[1:]
+    cword = int(os.environ['COMP_CWORD'])
+    try:
+        current = cwords[cword - 1]
+    except IndexError:
+        current = ''
+
+    subcommands = [cmd for cmd, summary in get_summaries()]
+    options = []
+    # subcommand
+    try:
+        subcommand_name = [w for w in cwords if w in subcommands][0]
+    except IndexError:
+        subcommand_name = None
+
+    parser = create_main_parser()
+    # subcommand options
+    if subcommand_name:
+        # special case: 'help' subcommand has no options
+        if subcommand_name == 'help':
+            sys.exit(1)
+        # special case: list locally installed dists for uninstall command
+        if subcommand_name == 'uninstall' and not current.startswith('-'):
+            installed = []
+            lc = current.lower()
+            for dist in get_installed_distributions(local_only=True):
+                if dist.key.startswith(lc) and dist.key not in cwords[1:]:
+                    installed.append(dist.key)
+            # if there are no dists installed, fall back to option completion
+            if installed:
+                for dist in installed:
+                    print(dist)
+                sys.exit(1)
+
+        subcommand = commands[subcommand_name]()
+        options += [(opt.get_opt_string(), opt.nargs)
+                    for opt in subcommand.parser.option_list_all
+                    if opt.help != optparse.SUPPRESS_HELP]
+
+        # filter out previously specified options from available options
+        prev_opts = [x.split('=')[0] for x in cwords[1:cword - 1]]
+        options = [(x, v) for (x, v) in options if x not in prev_opts]
+        # filter options by current input
+        options = [(k, v) for k, v in options if k.startswith(current)]
+        for option in options:
+            opt_label = option[0]
+            # append '=' to options which require args
+            if option[1]:
+                opt_label += '='
+            print(opt_label)
+    else:
+        # show main parser options only when necessary
+        if current.startswith('-') or current.startswith('--'):
+            opts = [i.option_list for i in parser.option_groups]
+            opts.append(parser.option_list)
+            opts = (o for it in opts for o in it)
+
+            subcommands += [i.get_opt_string() for i in opts
+                            if i.help != optparse.SUPPRESS_HELP]
+
+        print(' '.join([x for x in subcommands if x.startswith(current)]))
+    sys.exit(1)
+
+
+def create_main_parser():
+    parser_kw = {
+        'usage': '\n%prog <command> [options]',
+        'add_help_option': False,
+        'formatter': UpdatingDefaultsHelpFormatter(),
+        'name': 'global',
+        'prog': get_prog(),
+    }
+
+    parser = ConfigOptionParser(**parser_kw)
+    parser.disable_interspersed_args()
+    
+    pip_pkg_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    parser.version = 'pip %s from %s (python %s)' % (
+        __version__,  pip_pkg_dir, sys.version[:3])
+
+    # add the general options
+    gen_opts = cmdoptions.make_option_group(cmdoptions.general_group, parser)
+    parser.add_option_group(gen_opts)
+
+    parser.main = True # so the help formatter knows
+
+    # create command listing for description
+    command_summaries = get_summaries()
+    description = [''] + ['%-27s %s' % (i, j) for i, j in command_summaries]
+    parser.description = '\n'.join(description)
+
+    return parser
+
+
+def parseopts(args):
+    parser = create_main_parser()
+
+    # Note: parser calls disable_interspersed_args(), so the result of this call
+    # is to split the initial args into the general options before the
+    # subcommand and everything else.
+    # For example:
+    #  args: ['--timeout=5', 'install', '--user', 'INITools']
+    #  general_options: ['--timeout==5']
+    #  args_else: ['install', '--user', 'INITools']
+    general_options, args_else = parser.parse_args(args)
+
+    # --version
+    if general_options.version:
+        sys.stdout.write(parser.version)
+        sys.stdout.write(os.linesep)
+        sys.exit()
+
+    # pip || pip help -> print_help()
+    if not args_else or (args_else[0] == 'help' and len(args_else) == 1):
+        parser.print_help()
+        sys.exit()
+
+    # the subcommand name
+    cmd_name = args_else[0].lower()
+
+    #all the args without the subcommand
+    cmd_args = args[:]
+    cmd_args.remove(args_else[0].lower())
+
+    if cmd_name not in commands:
+        guess = get_similar_commands(cmd_name)
+
+        msg = ['unknown command "%s"' % cmd_name]
+        if guess:
+            msg.append('maybe you meant "%s"' % guess)
+
+        raise CommandError(' - '.join(msg))
+
+    return cmd_name, cmd_args
+
+
+def main(initial_args=None):
+    if initial_args is None:
+        initial_args = sys.argv[1:]
+
+    autocomplete()
+
+    try:
+        cmd_name, cmd_args = parseopts(initial_args)
+    except PipError:
+        e = sys.exc_info()[1]
+        sys.stderr.write("ERROR: %s" % e)
+        sys.stderr.write(os.linesep)
+        sys.exit(1)
+
+    command = commands[cmd_name]()
+    return command.main(cmd_args)
+
+
+def bootstrap():
+    """
+    Bootstrapping function to be called from install-pip.py script.
+    """
+    pkgs = ['pip']
+    try:
+        import setuptools
+    except ImportError:
+        pkgs.append('setuptools')
+    return main(['install', '--upgrade'] + pkgs + sys.argv[1:])
+
+############################################################
+## Writing freeze files
+
+
+class FrozenRequirement(object):
+
+    def __init__(self, name, req, editable, comments=()):
+        self.name = name
+        self.req = req
+        self.editable = editable
+        self.comments = comments
+
+    _rev_re = re.compile(r'-r(\d+)$')
+    _date_re = re.compile(r'-(20\d\d\d\d\d\d)$')
+
+    @classmethod
+    def from_dist(cls, dist, dependency_links, find_tags=False):
+        location = os.path.normcase(os.path.abspath(dist.location))
+        comments = []
+        from pip.vcs import vcs, get_src_requirement
+        if vcs.get_backend_name(location):
+            editable = True
+            try:
+                req = get_src_requirement(dist, location, find_tags)
+            except InstallationError:
+                ex = sys.exc_info()[1]
+                logger.warn("Error when trying to get requirement for VCS system %s, falling back to uneditable format" % ex)
+                req = None
+            if req is None:
+                logger.warn('Could not determine repository location of %s' % location)
+                comments.append('## !! Could not determine repository location')
+                req = dist.as_requirement()
+                editable = False
+        else:
+            editable = False
+            req = dist.as_requirement()
+            specs = req.specs
+            assert len(specs) == 1 and specs[0][0] == '=='
+            version = specs[0][1]
+            ver_match = cls._rev_re.search(version)
+            date_match = cls._date_re.search(version)
+            if ver_match or date_match:
+                svn_backend = vcs.get_backend('svn')
+                if svn_backend:
+                    svn_location = svn_backend(
+                        ).get_location(dist, dependency_links)
+                if not svn_location:
+                    logger.warn(
+                        'Warning: cannot find svn location for %s' % req)
+                    comments.append('## FIXME: could not find svn URL in dependency_links for this package:')
+                else:
+                    comments.append('# Installing as editable to satisfy requirement %s:' % req)
+                    if ver_match:
+                        rev = ver_match.group(1)
+                    else:
+                        rev = '{%s}' % date_match.group(1)
+                    editable = True
+                    req = '%s@%s#egg=%s' % (svn_location, rev, cls.egg_name(dist))
+        return cls(dist.project_name, req, editable, comments)
+
+    @staticmethod
+    def egg_name(dist):
+        name = dist.egg_name()
+        match = re.search(r'-py\d\.\d$', name)
+        if match:
+            name = name[:match.start()]
+        return name
+
+    def __str__(self):
+        req = self.req
+        if self.editable:
+            req = '-e %s' % req
+        return '\n'.join(list(self.comments) + [str(req)]) + '\n'
+
+
+if __name__ == '__main__':
+    exit = main()
+    if exit:
+        sys.exit(exit)
@@ -0,0 +1,7 @@
+import sys
+from .runner import run
+
+if __name__ == '__main__':
+    exit = run()
+    if exit:
+        sys.exit(exit)
@@ -0,0 +1,8 @@
+"""
+pip._vendor is for vendoring dependencies of pip to prevent needing pip to
+depend on something external.
+
+Files inside of pip._vendor should be considered immutable and should only be
+updated to versions from upstream.
+"""
+from __future__ import absolute_import
@@ -0,0 +1,16 @@
+try:
+    import ast
+    from pip._vendor._markerlib.markers import default_environment, compile, interpret
+except ImportError:
+    if 'ast' in globals():
+        raise
+    def default_environment():
+        return {}
+    def compile(marker):
+        def marker_fn(environment=None, override=None):
+            # 'empty markers are True' heuristic won't install extra deps.
+            return not marker.strip()
+        marker_fn.__doc__ = marker
+        return marker_fn
+    def interpret(marker, environment=None, override=None):
+        return compile(marker)()
@@ -0,0 +1,119 @@
+# -*- coding: utf-8 -*-
+"""Interpret PEP 345 environment markers.
+
+EXPR [in|==|!=|not in] EXPR [or|and] ...
+
+where EXPR belongs to any of those:
+
+    python_version = '%s.%s' % (sys.version_info[0], sys.version_info[1])
+    python_full_version = sys.version.split()[0]
+    os.name = os.name
+    sys.platform = sys.platform
+    platform.version = platform.version()
+    platform.machine = platform.machine()
+    platform.python_implementation = platform.python_implementation()
+    a free string, like '2.6', or 'win32'
+"""
+
+__all__ = ['default_environment', 'compile', 'interpret']
+
+import ast
+import os
+import platform
+import sys
+import weakref
+
+_builtin_compile = compile
+
+try:
+    from platform import python_implementation
+except ImportError:
+    if os.name == "java":
+        # Jython 2.5 has ast module, but not platform.python_implementation() function.
+        def python_implementation():
+            return "Jython"
+    else:
+        raise
+
+
+# restricted set of variables
+_VARS = {'sys.platform': sys.platform,
+         'python_version': '%s.%s' % sys.version_info[:2],
+         # FIXME parsing sys.platform is not reliable, but there is no other
+         # way to get e.g. 2.7.2+, and the PEP is defined with sys.version
+         'python_full_version': sys.version.split(' ', 1)[0],
+         'os.name': os.name,
+         'platform.version': platform.version(),
+         'platform.machine': platform.machine(),
+         'platform.python_implementation': python_implementation(),
+         'extra': None # wheel extension
+        }
+
+for var in list(_VARS.keys()):
+    if '.' in var:
+        _VARS[var.replace('.', '_')] = _VARS[var]
+
+def default_environment():
+    """Return copy of default PEP 385 globals dictionary."""
+    return dict(_VARS)
+
+class ASTWhitelist(ast.NodeTransformer):
+    def __init__(self, statement):
+        self.statement = statement # for error messages
+
+    ALLOWED = (ast.Compare, ast.BoolOp, ast.Attribute, ast.Name, ast.Load, ast.Str)
+    # Bool operations
+    ALLOWED += (ast.And, ast.Or)
+    # Comparison operations
+    ALLOWED += (ast.Eq, ast.Gt, ast.GtE, ast.In, ast.Is, ast.IsNot, ast.Lt, ast.LtE, ast.NotEq, ast.NotIn)
+
+    def visit(self, node):
+        """Ensure statement only contains allowed nodes."""
+        if not isinstance(node, self.ALLOWED):
+            raise SyntaxError('Not allowed in environment markers.\n%s\n%s' %
+                               (self.statement,
+                               (' ' * node.col_offset) + '^'))
+        return ast.NodeTransformer.visit(self, node)
+
+    def visit_Attribute(self, node):
+        """Flatten one level of attribute access."""
+        new_node = ast.Name("%s.%s" % (node.value.id, node.attr), node.ctx)
+        return ast.copy_location(new_node, node)
+
+def parse_marker(marker):
+    tree = ast.parse(marker, mode='eval')
+    new_tree = ASTWhitelist(marker).generic_visit(tree)
+    return new_tree
+
+def compile_marker(parsed_marker):
+    return _builtin_compile(parsed_marker, '<environment marker>', 'eval',
+                   dont_inherit=True)
+
+_cache = weakref.WeakValueDictionary()
+
+def compile(marker):
+    """Return compiled marker as a function accepting an environment dict."""
+    try:
+        return _cache[marker]
+    except KeyError:
+        pass
+    if not marker.strip():
+        def marker_fn(environment=None, override=None):
+            """"""
+            return True
+    else:
+        compiled_marker = compile_marker(parse_marker(marker))
+        def marker_fn(environment=None, override=None):
+            """override updates environment"""
+            if override is None:
+                override = {}
+            if environment is None:
+                environment = default_environment()
+            environment.update(override)
+            return eval(compiled_marker, environment)
+    marker_fn.__doc__ = marker
+    _cache[marker] = marker_fn
+    return _cache[marker]
+
+def interpret(marker, environment=None):
+    return compile(marker)(environment)
@@ -0,0 +1,7 @@
+# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
+from .initialise import init, deinit, reinit
+from .ansi import Fore, Back, Style
+from .ansitowin32 import AnsiToWin32
+
+__version__ = '0.3.1'
+
@@ -0,0 +1,50 @@
+# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
+'''
+This module generates ANSI character codes to printing colors to terminals.
+See: http://en.wikipedia.org/wiki/ANSI_escape_code
+'''
+
+CSI = '\033['
+
+def code_to_chars(code):
+    return CSI + str(code) + 'm'
+
+class AnsiCodes(object):
+    def __init__(self, codes):
+        for name in dir(codes):
+            if not name.startswith('_'):
+                value = getattr(codes, name)
+                setattr(self, name, code_to_chars(value))
+
+class AnsiFore:
+    BLACK   = 30
+    RED     = 31
+    GREEN   = 32
+    YELLOW  = 33
+    BLUE    = 34
+    MAGENTA = 35
+    CYAN    = 36
+    WHITE   = 37
+    RESET   = 39
+
+class AnsiBack:
+    BLACK   = 40
+    RED     = 41
+    GREEN   = 42
+    YELLOW  = 43
+    BLUE    = 44
+    MAGENTA = 45
+    CYAN    = 46
+    WHITE   = 47
+    RESET   = 49
+
+class AnsiStyle:
+    BRIGHT    = 1
+    DIM       = 2
+    NORMAL    = 22
+    RESET_ALL = 0
+
+Fore = AnsiCodes( AnsiFore )
+Back = AnsiCodes( AnsiBack )
+Style = AnsiCodes( AnsiStyle )
+
@@ -0,0 +1,190 @@
+# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
+import re
+import sys
+
+from .ansi import AnsiFore, AnsiBack, AnsiStyle, Style
+from .winterm import WinTerm, WinColor, WinStyle
+from .win32 import windll
+
+
+winterm = None
+if windll is not None:
+    winterm = WinTerm()
+
+
+def is_a_tty(stream):
+    return hasattr(stream, 'isatty') and stream.isatty()
+
+
+class StreamWrapper(object):
+    '''
+    Wraps a stream (such as stdout), acting as a transparent proxy for all
+    attribute access apart from method 'write()', which is delegated to our
+    Converter instance.
+    '''
+    def __init__(self, wrapped, converter):
+        # double-underscore everything to prevent clashes with names of
+        # attributes on the wrapped stream object.
+        self.__wrapped = wrapped
+        self.__convertor = converter
+
+    def __getattr__(self, name):
+        return getattr(self.__wrapped, name)
+
+    def write(self, text):
+        self.__convertor.write(text)
+
+
+class AnsiToWin32(object):
+    '''
+    Implements a 'write()' method which, on Windows, will strip ANSI character
+    sequences from the text, and if outputting to a tty, will convert them into
+    win32 function calls.
+    '''
+    ANSI_RE = re.compile('\033\[((?:\d|;)*)([a-zA-Z])')
+
+    def __init__(self, wrapped, convert=None, strip=None, autoreset=False):
+        # The wrapped stream (normally sys.stdout or sys.stderr)
+        self.wrapped = wrapped
+
+        # should we reset colors to defaults after every .write()
+        self.autoreset = autoreset
+
+        # create the proxy wrapping our output stream
+        self.stream = StreamWrapper(wrapped, self)
+
+        on_windows = sys.platform.startswith('win')
+
+        # should we strip ANSI sequences from our output?
+        if strip is None:
+            strip = on_windows
+        self.strip = strip
+
+        # should we should convert ANSI sequences into win32 calls?
+        if convert is None:
+            convert = on_windows and is_a_tty(wrapped)
+        self.convert = convert
+
+        # dict of ansi codes to win32 functions and parameters
+        self.win32_calls = self.get_win32_calls()
+
+        # are we wrapping stderr?
+        self.on_stderr = self.wrapped is sys.stderr
+
+
+    def should_wrap(self):
+        '''
+        True if this class is actually needed. If false, then the output
+        stream will not be affected, nor will win32 calls be issued, so
+        wrapping stdout is not actually required. This will generally be
+        False on non-Windows platforms, unless optional functionality like
+        autoreset has been requested using kwargs to init()
+        '''
+        return self.convert or self.strip or self.autoreset
+
+
+    def get_win32_calls(self):
+        if self.convert and winterm:
+            return {
+                AnsiStyle.RESET_ALL: (winterm.reset_all, ),
+                AnsiStyle.BRIGHT: (winterm.style, WinStyle.BRIGHT),
+                AnsiStyle.DIM: (winterm.style, WinStyle.NORMAL),
+                AnsiStyle.NORMAL: (winterm.style, WinStyle.NORMAL),
+                AnsiFore.BLACK: (winterm.fore, WinColor.BLACK),
+                AnsiFore.RED: (winterm.fore, WinColor.RED),
+                AnsiFore.GREEN: (winterm.fore, WinColor.GREEN),
+                AnsiFore.YELLOW: (winterm.fore, WinColor.YELLOW),
+                AnsiFore.BLUE: (winterm.fore, WinColor.BLUE),
+                AnsiFore.MAGENTA: (winterm.fore, WinColor.MAGENTA),
+                AnsiFore.CYAN: (winterm.fore, WinColor.CYAN),
+                AnsiFore.WHITE: (winterm.fore, WinColor.GREY),
+                AnsiFore.RESET: (winterm.fore, ),
+                AnsiBack.BLACK: (winterm.back, WinColor.BLACK),
+                AnsiBack.RED: (winterm.back, WinColor.RED),
+                AnsiBack.GREEN: (winterm.back, WinColor.GREEN),
+                AnsiBack.YELLOW: (winterm.back, WinColor.YELLOW),
+                AnsiBack.BLUE: (winterm.back, WinColor.BLUE),
+                AnsiBack.MAGENTA: (winterm.back, WinColor.MAGENTA),
+                AnsiBack.CYAN: (winterm.back, WinColor.CYAN),
+                AnsiBack.WHITE: (winterm.back, WinColor.GREY),
+                AnsiBack.RESET: (winterm.back, ),
+            }
+
+
+    def write(self, text):
+        if self.strip or self.convert:
+            self.write_and_convert(text)
+        else:
+            self.wrapped.write(text)
+            self.wrapped.flush()
+        if self.autoreset:
+            self.reset_all()
+
+
+    def reset_all(self):
+        if self.convert:
+            self.call_win32('m', (0,))
+        elif not self.wrapped.closed and is_a_tty(self.wrapped):
+            self.wrapped.write(Style.RESET_ALL)
+
+
+    def write_and_convert(self, text):
+        '''
+        Write the given text to our wrapped stream, stripping any ANSI
+        sequences from the text, and optionally converting them into win32
+        calls.
+        '''
+        cursor = 0
+        for match in self.ANSI_RE.finditer(text):
+            start, end = match.span()
+            self.write_plain_text(text, cursor, start)
+            self.convert_ansi(*match.groups())
+            cursor = end
+        self.write_plain_text(text, cursor, len(text))
+
+
+    def write_plain_text(self, text, start, end):
+        if start < end:
+            self.wrapped.write(text[start:end])
+            self.wrapped.flush()
+
+
+    def convert_ansi(self, paramstring, command):
+        if self.convert:
+            params = self.extract_params(paramstring)
+            self.call_win32(command, params)
+
+
+    def extract_params(self, paramstring):
+        def split(paramstring):
+            for p in paramstring.split(';'):
+                if p != '':
+                    yield int(p)
+        return tuple(split(paramstring))
+
+
+    def call_win32(self, command, params):
+        if params == []:
+            params = [0]
+        if command == 'm':
+            for param in params:
+                if param in self.win32_calls:
+                    func_args = self.win32_calls[param]
+                    func = func_args[0]
+                    args = func_args[1:]
+                    kwargs = dict(on_stderr=self.on_stderr)
+                    func(*args, **kwargs)
+        elif command in ('H', 'f'): # set cursor position
+            func = winterm.set_cursor_position
+            func(params, on_stderr=self.on_stderr)
+        elif command in ('J'):
+            func = winterm.erase_data
+            func(params, on_stderr=self.on_stderr)
+        elif command == 'A':
+            if params == () or params == None:
+                num_rows = 1
+            else:
+                num_rows = params[0]
+            func = winterm.cursor_up
+            func(num_rows, on_stderr=self.on_stderr)
+
@@ -0,0 +1,56 @@
+# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
+import atexit
+import sys
+
+from .ansitowin32 import AnsiToWin32
+
+
+orig_stdout = sys.stdout
+orig_stderr = sys.stderr
+
+wrapped_stdout = sys.stdout
+wrapped_stderr = sys.stderr
+
+atexit_done = False
+
+
+def reset_all():
+    AnsiToWin32(orig_stdout).reset_all()
+
+
+def init(autoreset=False, convert=None, strip=None, wrap=True):
+
+    if not wrap and any([autoreset, convert, strip]):
+        raise ValueError('wrap=False conflicts with any other arg=True')
+
+    global wrapped_stdout, wrapped_stderr
+    sys.stdout = wrapped_stdout = \
+        wrap_stream(orig_stdout, convert, strip, autoreset, wrap)
+    sys.stderr = wrapped_stderr = \
+        wrap_stream(orig_stderr, convert, strip, autoreset, wrap)
+
+    global atexit_done
+    if not atexit_done:
+        atexit.register(reset_all)
+        atexit_done = True
+
+
+def deinit():
+    sys.stdout = orig_stdout
+    sys.stderr = orig_stderr
+
+
+def reinit():
+    sys.stdout = wrapped_stdout
+    sys.stderr = wrapped_stdout
+
+
+def wrap_stream(stream, convert, strip, autoreset, wrap):
+    if wrap:
+        wrapper = AnsiToWin32(stream,
+            convert=convert, strip=strip, autoreset=autoreset)
+        if wrapper.should_wrap():
+            stream = wrapper.stream
+    return stream
+
+
@@ -0,0 +1,137 @@
+# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
+
+# from winbase.h
+STDOUT = -11
+STDERR = -12
+
+import ctypes
+from ctypes import LibraryLoader
+
+try:
+    windll = LibraryLoader(ctypes.WinDLL)
+    from ctypes import wintypes
+except (AttributeError, ImportError):
+    windll = None
+    SetConsoleTextAttribute = lambda *_: None
+else:
+    from ctypes import (
+        byref, Structure, c_char, c_short, c_uint32, c_ushort, POINTER
+    )
+
+    class CONSOLE_SCREEN_BUFFER_INFO(Structure):
+        """struct in wincon.h."""
+        _fields_ = [
+            ("dwSize", wintypes._COORD),
+            ("dwCursorPosition", wintypes._COORD),
+            ("wAttributes", wintypes.WORD),
+            ("srWindow", wintypes.SMALL_RECT),
+            ("dwMaximumWindowSize", wintypes._COORD),
+        ]
+        def __str__(self):
+            return '(%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)' % (
+                self.dwSize.Y, self.dwSize.X
+                , self.dwCursorPosition.Y, self.dwCursorPosition.X
+                , self.wAttributes
+                , self.srWindow.Top, self.srWindow.Left, self.srWindow.Bottom, self.srWindow.Right
+                , self.dwMaximumWindowSize.Y, self.dwMaximumWindowSize.X
+            )
+
+    _GetStdHandle = windll.kernel32.GetStdHandle
+    _GetStdHandle.argtypes = [
+        wintypes.DWORD,
+    ]
+    _GetStdHandle.restype = wintypes.HANDLE
+
+    _GetConsoleScreenBufferInfo = windll.kernel32.GetConsoleScreenBufferInfo
+    _GetConsoleScreenBufferInfo.argtypes = [
+        wintypes.HANDLE,
+        POINTER(CONSOLE_SCREEN_BUFFER_INFO),
+    ]
+    _GetConsoleScreenBufferInfo.restype = wintypes.BOOL
+
+    _SetConsoleTextAttribute = windll.kernel32.SetConsoleTextAttribute
+    _SetConsoleTextAttribute.argtypes = [
+        wintypes.HANDLE,
+        wintypes.WORD,
+    ]
+    _SetConsoleTextAttribute.restype = wintypes.BOOL
+
+    _SetConsoleCursorPosition = windll.kernel32.SetConsoleCursorPosition
+    _SetConsoleCursorPosition.argtypes = [
+        wintypes.HANDLE,
+        wintypes._COORD,
+    ]
+    _SetConsoleCursorPosition.restype = wintypes.BOOL
+
+    _FillConsoleOutputCharacterA = windll.kernel32.FillConsoleOutputCharacterA
+    _FillConsoleOutputCharacterA.argtypes = [
+        wintypes.HANDLE,
+        c_char,
+        wintypes.DWORD,
+        wintypes._COORD,
+        POINTER(wintypes.DWORD),
+    ]
+    _FillConsoleOutputCharacterA.restype = wintypes.BOOL
+
+    _FillConsoleOutputAttribute = windll.kernel32.FillConsoleOutputAttribute
+    _FillConsoleOutputAttribute.argtypes = [
+        wintypes.HANDLE,
+        wintypes.WORD,
+        wintypes.DWORD,
+        wintypes._COORD,
+        POINTER(wintypes.DWORD),
+    ]
+    _FillConsoleOutputAttribute.restype = wintypes.BOOL
+
+    handles = {
+        STDOUT: _GetStdHandle(STDOUT),
+        STDERR: _GetStdHandle(STDERR),
+    }
+
+    def GetConsoleScreenBufferInfo(stream_id=STDOUT):
+        handle = handles[stream_id]
+        csbi = CONSOLE_SCREEN_BUFFER_INFO()
+        success = _GetConsoleScreenBufferInfo(
+            handle, byref(csbi))
+        return csbi
+
+    def SetConsoleTextAttribute(stream_id, attrs):
+        handle = handles[stream_id]
+        return _SetConsoleTextAttribute(handle, attrs)
+
+    def SetConsoleCursorPosition(stream_id, position):
+        position = wintypes._COORD(*position)
+        # If the position is out of range, do nothing.
+        if position.Y <= 0 or position.X <= 0:
+            return
+        # Adjust for Windows' SetConsoleCursorPosition:
+        #    1. being 0-based, while ANSI is 1-based.
+        #    2. expecting (x,y), while ANSI uses (y,x).
+        adjusted_position = wintypes._COORD(position.Y - 1, position.X - 1)
+        # Adjust for viewport's scroll position
+        sr = GetConsoleScreenBufferInfo(STDOUT).srWindow
+        adjusted_position.Y += sr.Top
+        adjusted_position.X += sr.Left
+        # Resume normal processing
+        handle = handles[stream_id]
+        return _SetConsoleCursorPosition(handle, adjusted_position)
+
+    def FillConsoleOutputCharacter(stream_id, char, length, start):
+        handle = handles[stream_id]
+        char = c_char(char)
+        length = wintypes.DWORD(length)
+        num_written = wintypes.DWORD(0)
+        # Note that this is hard-coded for ANSI (vs wide) bytes.
+        success = _FillConsoleOutputCharacterA(
+            handle, char, length, start, byref(num_written))
+        return num_written.value
+
+    def FillConsoleOutputAttribute(stream_id, attr, length, start):
+        ''' FillConsoleOutputAttribute( hConsole, csbi.wAttributes, dwConSize, coordScreen, &cCharsWritten )'''
+        handle = handles[stream_id]
+        attribute = wintypes.WORD(attr)
+        length = wintypes.DWORD(length)
+        num_written = wintypes.DWORD(0)
+        # Note that this is hard-coded for ANSI (vs wide) bytes.
+        return _FillConsoleOutputAttribute(
+            handle, attribute, length, start, byref(num_written))
@@ -0,0 +1,120 @@
+# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
+from . import win32
+
+
+# from wincon.h
+class WinColor(object):
+    BLACK   = 0
+    BLUE    = 1
+    GREEN   = 2
+    CYAN    = 3
+    RED     = 4
+    MAGENTA = 5
+    YELLOW  = 6
+    GREY    = 7
+
+# from wincon.h
+class WinStyle(object):
+    NORMAL = 0x00 # dim text, dim background
+    BRIGHT = 0x08 # bright text, dim background
+
+
+class WinTerm(object):
+
+    def __init__(self):
+        self._default = win32.GetConsoleScreenBufferInfo(win32.STDOUT).wAttributes
+        self.set_attrs(self._default)
+        self._default_fore = self._fore
+        self._default_back = self._back
+        self._default_style = self._style
+
+    def get_attrs(self):
+        return self._fore + self._back * 16 + self._style
+
+    def set_attrs(self, value):
+        self._fore = value & 7
+        self._back = (value >> 4) & 7
+        self._style = value & WinStyle.BRIGHT
+
+    def reset_all(self, on_stderr=None):
+        self.set_attrs(self._default)
+        self.set_console(attrs=self._default)
+
+    def fore(self, fore=None, on_stderr=False):
+        if fore is None:
+            fore = self._default_fore
+        self._fore = fore
+        self.set_console(on_stderr=on_stderr)
+
+    def back(self, back=None, on_stderr=False):
+        if back is None:
+            back = self._default_back
+        self._back = back
+        self.set_console(on_stderr=on_stderr)
+
+    def style(self, style=None, on_stderr=False):
+        if style is None:
+            style = self._default_style
+        self._style = style
+        self.set_console(on_stderr=on_stderr)
+
+    def set_console(self, attrs=None, on_stderr=False):
+        if attrs is None:
+            attrs = self.get_attrs()
+        handle = win32.STDOUT
+        if on_stderr:
+            handle = win32.STDERR
+        win32.SetConsoleTextAttribute(handle, attrs)
+
+    def get_position(self, handle):
+        position = win32.GetConsoleScreenBufferInfo(handle).dwCursorPosition
+        # Because Windows coordinates are 0-based,
+        # and win32.SetConsoleCursorPosition expects 1-based.
+        position.X += 1
+        position.Y += 1
+        return position
+    
+    def set_cursor_position(self, position=None, on_stderr=False):
+        if position is None:
+            #I'm not currently tracking the position, so there is no default.
+            #position = self.get_position()
+            return
+        handle = win32.STDOUT
+        if on_stderr:
+            handle = win32.STDERR
+        win32.SetConsoleCursorPosition(handle, position)
+
+    def cursor_up(self, num_rows=0, on_stderr=False):
+        if num_rows == 0:
+            return
+        handle = win32.STDOUT
+        if on_stderr:
+            handle = win32.STDERR
+        position = self.get_position(handle)
+        adjusted_position = (position.Y - num_rows, position.X)
+        self.set_cursor_position(adjusted_position, on_stderr)
+
+    def erase_data(self, mode=0, on_stderr=False):
+        # 0 (or None) should clear from the cursor to the end of the screen.
+        # 1 should clear from the cursor to the beginning of the screen.
+        # 2 should clear the entire screen. (And maybe move cursor to (1,1)?)
+        #
+        # At the moment, I only support mode 2. From looking at the API, it
+        #    should be possible to calculate a different number of bytes to clear,
+        #    and to do so relative to the cursor position.
+        if mode[0] not in (2,):
+            return
+        handle = win32.STDOUT
+        if on_stderr:
+            handle = win32.STDERR
+        # here's where we'll home the cursor
+        coord_screen = win32.COORD(0,0)
+        csbi = win32.GetConsoleScreenBufferInfo(handle)
+        # get the number of character cells in the current buffer
+        dw_con_size = csbi.dwSize.X * csbi.dwSize.Y
+        # fill the entire screen with blanks
+        win32.FillConsoleOutputCharacter(handle, ' ', dw_con_size, coord_screen)
+        # now set the buffer's attributes accordingly
+        win32.FillConsoleOutputAttribute(handle, self.get_attrs(), dw_con_size, coord_screen );
+        # put the cursor at (0, 0)
+        win32.SetConsoleCursorPosition(handle, (coord_screen.X, coord_screen.Y))
@@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2012-2014 Vinay Sajip.
+# Licensed to the Python Software Foundation under a contributor agreement.
+# See LICENSE.txt and CONTRIBUTORS.txt.
+#
+import logging
+
+__version__ = '0.1.8'
+
+class DistlibException(Exception):
+    pass
+
+try:
+    from logging import NullHandler
+except ImportError: # pragma: no cover
+    class NullHandler(logging.Handler):
+        def handle(self, record): pass
+        def emit(self, record): pass
+        def createLock(self): self.lock = None
+
+logger = logging.getLogger(__name__)
+logger.addHandler(NullHandler())
@@ -0,0 +1,6 @@
+"""Modules copied from Python 3 standard libraries, for internal use only.
+
+Individual classes and functions are found in d2._backport.misc.  Intended
+usage is to always import things missing from 3.1 from that module: the
+built-in/stdlib objects will be used if found.
+"""
@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2012 The Python Software Foundation.
+# See LICENSE.txt and CONTRIBUTORS.txt.
+#
+"""Backports for individual classes and functions."""
+
+import os
+import sys
+
+__all__ = ['cache_from_source', 'callable', 'fsencode']
+
+
+try:
+    from imp import cache_from_source
+except ImportError:
+    def cache_from_source(py_file, debug=__debug__):
+        ext = debug and 'c' or 'o'
+        return py_file + ext
+
+
+try:
+    callable = callable
+except NameError:
+    from collections import Callable
+
+    def callable(obj):
+        return isinstance(obj, Callable)
+
+
+try:
+    fsencode = os.fsencode
+except AttributeError:
+    def fsencode(filename):
+        if isinstance(filename, bytes):
+            return filename
+        elif isinstance(filename, str):
+            return filename.encode(sys.getfilesystemencoding())
+        else:
+            raise TypeError("expect bytes or str, not %s" %
+                            type(filename).__name__)
@@ -0,0 +1,761 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2012 The Python Software Foundation.
+# See LICENSE.txt and CONTRIBUTORS.txt.
+#
+"""Utility functions for copying and archiving files and directory trees.
+
+XXX The functions here don't copy the resource fork or other metadata on Mac.
+
+"""
+
+import os
+import sys
+import stat
+from os.path import abspath
+import fnmatch
+import collections
+import errno
+from . import tarfile
+
+try:
+    import bz2
+    _BZ2_SUPPORTED = True
+except ImportError:
+    _BZ2_SUPPORTED = False
+
+try:
+    from pwd import getpwnam
+except ImportError:
+    getpwnam = None
+
+try:
+    from grp import getgrnam
+except ImportError:
+    getgrnam = None
+
+__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
+           "copytree", "move", "rmtree", "Error", "SpecialFileError",
+           "ExecError", "make_archive", "get_archive_formats",
+           "register_archive_format", "unregister_archive_format",
+           "get_unpack_formats", "register_unpack_format",
+           "unregister_unpack_format", "unpack_archive", "ignore_patterns"]
+
+class Error(EnvironmentError):
+    pass
+
+class SpecialFileError(EnvironmentError):
+    """Raised when trying to do a kind of operation (e.g. copying) which is
+    not supported on a special file (e.g. a named pipe)"""
+
+class ExecError(EnvironmentError):
+    """Raised when a command could not be executed"""
+
+class ReadError(EnvironmentError):
+    """Raised when an archive cannot be read"""
+
+class RegistryError(Exception):
+    """Raised when a registery operation with the archiving
+    and unpacking registeries fails"""
+
+
+try:
+    WindowsError
+except NameError:
+    WindowsError = None
+
+def copyfileobj(fsrc, fdst, length=16*1024):
+    """copy data from file-like object fsrc to file-like object fdst"""
+    while 1:
+        buf = fsrc.read(length)
+        if not buf:
+            break
+        fdst.write(buf)
+
+def _samefile(src, dst):
+    # Macintosh, Unix.
+    if hasattr(os.path, 'samefile'):
+        try:
+            return os.path.samefile(src, dst)
+        except OSError:
+            return False
+
+    # All other platforms: check for same pathname.
+    return (os.path.normcase(os.path.abspath(src)) ==
+            os.path.normcase(os.path.abspath(dst)))
+
+def copyfile(src, dst):
+    """Copy data from src to dst"""
+    if _samefile(src, dst):
+        raise Error("`%s` and `%s` are the same file" % (src, dst))
+
+    for fn in [src, dst]:
+        try:
+            st = os.stat(fn)
+        except OSError:
+            # File most likely does not exist
+            pass
+        else:
+            # XXX What about other special files? (sockets, devices...)
+            if stat.S_ISFIFO(st.st_mode):
+                raise SpecialFileError("`%s` is a named pipe" % fn)
+
+    with open(src, 'rb') as fsrc:
+        with open(dst, 'wb') as fdst:
+            copyfileobj(fsrc, fdst)
+
+def copymode(src, dst):
+    """Copy mode bits from src to dst"""
+    if hasattr(os, 'chmod'):
+        st = os.stat(src)
+        mode = stat.S_IMODE(st.st_mode)
+        os.chmod(dst, mode)
+
+def copystat(src, dst):
+    """Copy all stat info (mode bits, atime, mtime, flags) from src to dst"""
+    st = os.stat(src)
+    mode = stat.S_IMODE(st.st_mode)
+    if hasattr(os, 'utime'):
+        os.utime(dst, (st.st_atime, st.st_mtime))
+    if hasattr(os, 'chmod'):
+        os.chmod(dst, mode)
+    if hasattr(os, 'chflags') and hasattr(st, 'st_flags'):
+        try:
+            os.chflags(dst, st.st_flags)
+        except OSError as why:
+            if (not hasattr(errno, 'EOPNOTSUPP') or
+                why.errno != errno.EOPNOTSUPP):
+                raise
+
+def copy(src, dst):
+    """Copy data and mode bits ("cp src dst").
+
+    The destination may be a directory.
+
+    """
+    if os.path.isdir(dst):
+        dst = os.path.join(dst, os.path.basename(src))
+    copyfile(src, dst)
+    copymode(src, dst)
+
+def copy2(src, dst):
+    """Copy data and all stat info ("cp -p src dst").
+
+    The destination may be a directory.
+
+    """
+    if os.path.isdir(dst):
+        dst = os.path.join(dst, os.path.basename(src))
+    copyfile(src, dst)
+    copystat(src, dst)
+
+def ignore_patterns(*patterns):
+    """Function that can be used as copytree() ignore parameter.
+
+    Patterns is a sequence of glob-style patterns
+    that are used to exclude files"""
+    def _ignore_patterns(path, names):
+        ignored_names = []
+        for pattern in patterns:
+            ignored_names.extend(fnmatch.filter(names, pattern))
+        return set(ignored_names)
+    return _ignore_patterns
+
+def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
+             ignore_dangling_symlinks=False):
+    """Recursively copy a directory tree.
+
+    The destination directory must not already exist.
+    If exception(s) occur, an Error is raised with a list of reasons.
+
+    If the optional symlinks flag is true, symbolic links in the
+    source tree result in symbolic links in the destination tree; if
+    it is false, the contents of the files pointed to by symbolic
+    links are copied. If the file pointed by the symlink doesn't
+    exist, an exception will be added in the list of errors raised in
+    an Error exception at the end of the copy process.
+
+    You can set the optional ignore_dangling_symlinks flag to true if you
+    want to silence this exception. Notice that this has no effect on
+    platforms that don't support os.symlink.
+
+    The optional ignore argument is a callable. If given, it
+    is called with the `src` parameter, which is the directory
+    being visited by copytree(), and `names` which is the list of
+    `src` contents, as returned by os.listdir():
+
+        callable(src, names) -> ignored_names
+
+    Since copytree() is called recursively, the callable will be
+    called once for each directory that is copied. It returns a
+    list of names relative to the `src` directory that should
+    not be copied.
+
+    The optional copy_function argument is a callable that will be used
+    to copy each file. It will be called with the source path and the
+    destination path as arguments. By default, copy2() is used, but any
+    function that supports the same signature (like copy()) can be used.
+
+    """
+    names = os.listdir(src)
+    if ignore is not None:
+        ignored_names = ignore(src, names)
+    else:
+        ignored_names = set()
+
+    os.makedirs(dst)
+    errors = []
+    for name in names:
+        if name in ignored_names:
+            continue
+        srcname = os.path.join(src, name)
+        dstname = os.path.join(dst, name)
+        try:
+            if os.path.islink(srcname):
+                linkto = os.readlink(srcname)
+                if symlinks:
+                    os.symlink(linkto, dstname)
+                else:
+                    # ignore dangling symlink if the flag is on
+                    if not os.path.exists(linkto) and ignore_dangling_symlinks:
+                        continue
+                    # otherwise let the copy occurs. copy2 will raise an error
+                    copy_function(srcname, dstname)
+            elif os.path.isdir(srcname):
+                copytree(srcname, dstname, symlinks, ignore, copy_function)
+            else:
+                # Will raise a SpecialFileError for unsupported file types
+                copy_function(srcname, dstname)
+        # catch the Error from the recursive copytree so that we can
+        # continue with other files
+        except Error as err:
+            errors.extend(err.args[0])
+        except EnvironmentError as why:
+            errors.append((srcname, dstname, str(why)))
+    try:
+        copystat(src, dst)
+    except OSError as why:
+        if WindowsError is not None and isinstance(why, WindowsError):
+            # Copying file access times may fail on Windows
+            pass
+        else:
+            errors.extend((src, dst, str(why)))
+    if errors:
+        raise Error(errors)
+
+def rmtree(path, ignore_errors=False, onerror=None):
+    """Recursively delete a directory tree.
+
+    If ignore_errors is set, errors are ignored; otherwise, if onerror
+    is set, it is called to handle the error with arguments (func,
+    path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
+    path is the argument to that function that caused it to fail; and
+    exc_info is a tuple returned by sys.exc_info().  If ignore_errors
+    is false and onerror is None, an exception is raised.
+
+    """
+    if ignore_errors:
+        def onerror(*args):
+            pass
+    elif onerror is None:
+        def onerror(*args):
+            raise
+    try:
+        if os.path.islink(path):
+            # symlinks to directories are forbidden, see bug #1669
+            raise OSError("Cannot call rmtree on a symbolic link")
+    except OSError:
+        onerror(os.path.islink, path, sys.exc_info())
+        # can't continue even if onerror hook returns
+        return
+    names = []
+    try:
+        names = os.listdir(path)
+    except os.error:
+        onerror(os.listdir, path, sys.exc_info())
+    for name in names:
+        fullname = os.path.join(path, name)
+        try:
+            mode = os.lstat(fullname).st_mode
+        except os.error:
+            mode = 0
+        if stat.S_ISDIR(mode):
+            rmtree(fullname, ignore_errors, onerror)
+        else:
+            try:
+                os.remove(fullname)
+            except os.error:
+                onerror(os.remove, fullname, sys.exc_info())
+    try:
+        os.rmdir(path)
+    except os.error:
+        onerror(os.rmdir, path, sys.exc_info())
+
+
+def _basename(path):
+    # A basename() variant which first strips the trailing slash, if present.
+    # Thus we always get the last component of the path, even for directories.
+    return os.path.basename(path.rstrip(os.path.sep))
+
+def move(src, dst):
+    """Recursively move a file or directory to another location. This is
+    similar to the Unix "mv" command.
+
+    If the destination is a directory or a symlink to a directory, the source
+    is moved inside the directory. The destination path must not already
+    exist.
+
+    If the destination already exists but is not a directory, it may be
+    overwritten depending on os.rename() semantics.
+
+    If the destination is on our current filesystem, then rename() is used.
+    Otherwise, src is copied to the destination and then removed.
+    A lot more could be done here...  A look at a mv.c shows a lot of
+    the issues this implementation glosses over.
+
+    """
+    real_dst = dst
+    if os.path.isdir(dst):
+        if _samefile(src, dst):
+            # We might be on a case insensitive filesystem,
+            # perform the rename anyway.
+            os.rename(src, dst)
+            return
+
+        real_dst = os.path.join(dst, _basename(src))
+        if os.path.exists(real_dst):
+            raise Error("Destination path '%s' already exists" % real_dst)
+    try:
+        os.rename(src, real_dst)
+    except OSError:
+        if os.path.isdir(src):
+            if _destinsrc(src, dst):
+                raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst))
+            copytree(src, real_dst, symlinks=True)
+            rmtree(src)
+        else:
+            copy2(src, real_dst)
+            os.unlink(src)
+
+def _destinsrc(src, dst):
+    src = abspath(src)
+    dst = abspath(dst)
+    if not src.endswith(os.path.sep):
+        src += os.path.sep
+    if not dst.endswith(os.path.sep):
+        dst += os.path.sep
+    return dst.startswith(src)
+
+def _get_gid(name):
+    """Returns a gid, given a group name."""
+    if getgrnam is None or name is None:
+        return None
+    try:
+        result = getgrnam(name)
+    except KeyError:
+        result = None
+    if result is not None:
+        return result[2]
+    return None
+
+def _get_uid(name):
+    """Returns an uid, given a user name."""
+    if getpwnam is None or name is None:
+        return None
+    try:
+        result = getpwnam(name)
+    except KeyError:
+        result = None
+    if result is not None:
+        return result[2]
+    return None
+
+def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
+                  owner=None, group=None, logger=None):
+    """Create a (possibly compressed) tar file from all the files under
+    'base_dir'.
+
+    'compress' must be "gzip" (the default), "bzip2", or None.
+
+    'owner' and 'group' can be used to define an owner and a group for the
+    archive that is being built. If not provided, the current owner and group
+    will be used.
+
+    The output tar file will be named 'base_name' +  ".tar", possibly plus
+    the appropriate compression extension (".gz", or ".bz2").
+
+    Returns the output filename.
+    """
+    tar_compression = {'gzip': 'gz', None: ''}
+    compress_ext = {'gzip': '.gz'}
+
+    if _BZ2_SUPPORTED:
+        tar_compression['bzip2'] = 'bz2'
+        compress_ext['bzip2'] = '.bz2'
+
+    # flags for compression program, each element of list will be an argument
+    if compress is not None and compress not in compress_ext:
+        raise ValueError("bad value for 'compress', or compression format not "
+                         "supported : {0}".format(compress))
+
+    archive_name = base_name + '.tar' + compress_ext.get(compress, '')
+    archive_dir = os.path.dirname(archive_name)
+
+    if not os.path.exists(archive_dir):
+        if logger is not None:
+            logger.info("creating %s", archive_dir)
+        if not dry_run:
+            os.makedirs(archive_dir)
+
+    # creating the tarball
+    if logger is not None:
+        logger.info('Creating tar archive')
+
+    uid = _get_uid(owner)
+    gid = _get_gid(group)
+
+    def _set_uid_gid(tarinfo):
+        if gid is not None:
+            tarinfo.gid = gid
+            tarinfo.gname = group
+        if uid is not None:
+            tarinfo.uid = uid
+            tarinfo.uname = owner
+        return tarinfo
+
+    if not dry_run:
+        tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
+        try:
+            tar.add(base_dir, filter=_set_uid_gid)
+        finally:
+            tar.close()
+
+    return archive_name
+
+def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
+    # XXX see if we want to keep an external call here
+    if verbose:
+        zipoptions = "-r"
+    else:
+        zipoptions = "-rq"
+    from distutils.errors import DistutilsExecError
+    from distutils.spawn import spawn
+    try:
+        spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
+    except DistutilsExecError:
+        # XXX really should distinguish between "couldn't find
+        # external 'zip' command" and "zip failed".
+        raise ExecError("unable to create zip file '%s': "
+            "could neither import the 'zipfile' module nor "
+            "find a standalone zip utility") % zip_filename
+
+def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
+    """Create a zip file from all the files under 'base_dir'.
+
+    The output zip file will be named 'base_name' + ".zip".  Uses either the
+    "zipfile" Python module (if available) or the InfoZIP "zip" utility
+    (if installed and found on the default search path).  If neither tool is
+    available, raises ExecError.  Returns the name of the output zip
+    file.
+    """
+    zip_filename = base_name + ".zip"
+    archive_dir = os.path.dirname(base_name)
+
+    if not os.path.exists(archive_dir):
+        if logger is not None:
+            logger.info("creating %s", archive_dir)
+        if not dry_run:
+            os.makedirs(archive_dir)
+
+    # If zipfile module is not available, try spawning an external 'zip'
+    # command.
+    try:
+        import zipfile
+    except ImportError:
+        zipfile = None
+
+    if zipfile is None:
+        _call_external_zip(base_dir, zip_filename, verbose, dry_run)
+    else:
+        if logger is not None:
+            logger.info("creating '%s' and adding '%s' to it",
+                        zip_filename, base_dir)
+
+        if not dry_run:
+            zip = zipfile.ZipFile(zip_filename, "w",
+                                  compression=zipfile.ZIP_DEFLATED)
+
+            for dirpath, dirnames, filenames in os.walk(base_dir):
+                for name in filenames:
+                    path = os.path.normpath(os.path.join(dirpath, name))
+                    if os.path.isfile(path):
+                        zip.write(path, path)
+                        if logger is not None:
+                            logger.info("adding '%s'", path)
+            zip.close()
+
+    return zip_filename
+
+_ARCHIVE_FORMATS = {
+    'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
+    'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
+    'tar':   (_make_tarball, [('compress', None)], "uncompressed tar file"),
+    'zip':   (_make_zipfile, [], "ZIP file"),
+    }
+
+if _BZ2_SUPPORTED:
+    _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
+                                "bzip2'ed tar-file")
+
+def get_archive_formats():
+    """Returns a list of supported formats for archiving and unarchiving.
+
+    Each element of the returned sequence is a tuple (name, description)
+    """
+    formats = [(name, registry[2]) for name, registry in
+               _ARCHIVE_FORMATS.items()]
+    formats.sort()
+    return formats
+
+def register_archive_format(name, function, extra_args=None, description=''):
+    """Registers an archive format.
+
+    name is the name of the format. function is the callable that will be
+    used to create archives. If provided, extra_args is a sequence of
+    (name, value) tuples that will be passed as arguments to the callable.
+    description can be provided to describe the format, and will be returned
+    by the get_archive_formats() function.
+    """
+    if extra_args is None:
+        extra_args = []
+    if not isinstance(function, collections.Callable):
+        raise TypeError('The %s object is not callable' % function)
+    if not isinstance(extra_args, (tuple, list)):
+        raise TypeError('extra_args needs to be a sequence')
+    for element in extra_args:
+        if not isinstance(element, (tuple, list)) or len(element) !=2:
+            raise TypeError('extra_args elements are : (arg_name, value)')
+
+    _ARCHIVE_FORMATS[name] = (function, extra_args, description)
+
+def unregister_archive_format(name):
+    del _ARCHIVE_FORMATS[name]
+
+def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
+                 dry_run=0, owner=None, group=None, logger=None):
+    """Create an archive file (eg. zip or tar).
+
+    'base_name' is the name of the file to create, minus any format-specific
+    extension; 'format' is the archive format: one of "zip", "tar", "bztar"
+    or "gztar".
+
+    'root_dir' is a directory that will be the root directory of the
+    archive; ie. we typically chdir into 'root_dir' before creating the
+    archive.  'base_dir' is the directory where we start archiving from;
+    ie. 'base_dir' will be the common prefix of all files and
+    directories in the archive.  'root_dir' and 'base_dir' both default
+    to the current directory.  Returns the name of the archive file.
+
+    'owner' and 'group' are used when creating a tar archive. By default,
+    uses the current owner and group.
+    """
+    save_cwd = os.getcwd()
+    if root_dir is not None:
+        if logger is not None:
+            logger.debug("changing into '%s'", root_dir)
+        base_name = os.path.abspath(base_name)
+        if not dry_run:
+            os.chdir(root_dir)
+
+    if base_dir is None:
+        base_dir = os.curdir
+
+    kwargs = {'dry_run': dry_run, 'logger': logger}
+
+    try:
+        format_info = _ARCHIVE_FORMATS[format]
+    except KeyError:
+        raise ValueError("unknown archive format '%s'" % format)
+
+    func = format_info[0]
+    for arg, val in format_info[1]:
+        kwargs[arg] = val
+
+    if format != 'zip':
+        kwargs['owner'] = owner
+        kwargs['group'] = group
+
+    try:
+        filename = func(base_name, base_dir, **kwargs)
+    finally:
+        if root_dir is not None:
+            if logger is not None:
+                logger.debug("changing back to '%s'", save_cwd)
+            os.chdir(save_cwd)
+
+    return filename
+
+
+def get_unpack_formats():
+    """Returns a list of supported formats for unpacking.
+
+    Each element of the returned sequence is a tuple
+    (name, extensions, description)
+    """
+    formats = [(name, info[0], info[3]) for name, info in
+               _UNPACK_FORMATS.items()]
+    formats.sort()
+    return formats
+
+def _check_unpack_options(extensions, function, extra_args):
+    """Checks what gets registered as an unpacker."""
+    # first make sure no other unpacker is registered for this extension
+    existing_extensions = {}
+    for name, info in _UNPACK_FORMATS.items():
+        for ext in info[0]:
+            existing_extensions[ext] = name
+
+    for extension in extensions:
+        if extension in existing_extensions:
+            msg = '%s is already registered for "%s"'
+            raise RegistryError(msg % (extension,
+                                       existing_extensions[extension]))
+
+    if not isinstance(function, collections.Callable):
+        raise TypeError('The registered function must be a callable')
+
+
+def register_unpack_format(name, extensions, function, extra_args=None,
+                           description=''):
+    """Registers an unpack format.
+
+    `name` is the name of the format. `extensions` is a list of extensions
+    corresponding to the format.
+
+    `function` is the callable that will be
+    used to unpack archives. The callable will receive archives to unpack.
+    If it's unable to handle an archive, it needs to raise a ReadError
+    exception.
+
+    If provided, `extra_args` is a sequence of
+    (name, value) tuples that will be passed as arguments to the callable.
+    description can be provided to describe the format, and will be returned
+    by the get_unpack_formats() function.
+    """
+    if extra_args is None:
+        extra_args = []
+    _check_unpack_options(extensions, function, extra_args)
+    _UNPACK_FORMATS[name] = extensions, function, extra_args, description
+
+def unregister_unpack_format(name):
+    """Removes the pack format from the registery."""
+    del _UNPACK_FORMATS[name]
+
+def _ensure_directory(path):
+    """Ensure that the parent directory of `path` exists"""
+    dirname = os.path.dirname(path)
+    if not os.path.isdir(dirname):
+        os.makedirs(dirname)
+
+def _unpack_zipfile(filename, extract_dir):
+    """Unpack zip `filename` to `extract_dir`
+    """
+    try:
+        import zipfile
+    except ImportError:
+        raise ReadError('zlib not supported, cannot unpack this archive.')
+
+    if not zipfile.is_zipfile(filename):
+        raise ReadError("%s is not a zip file" % filename)
+
+    zip = zipfile.ZipFile(filename)
+    try:
+        for info in zip.infolist():
+            name = info.filename
+
+            # don't extract absolute paths or ones with .. in them
+            if name.startswith('/') or '..' in name:
+                continue
+
+            target = os.path.join(extract_dir, *name.split('/'))
+            if not target:
+                continue
+
+            _ensure_directory(target)
+            if not name.endswith('/'):
+                # file
+                data = zip.read(info.filename)
+                f = open(target, 'wb')
+                try:
+                    f.write(data)
+                finally:
+                    f.close()
+                    del data
+    finally:
+        zip.close()
+
+def _unpack_tarfile(filename, extract_dir):
+    """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
+    """
+    try:
+        tarobj = tarfile.open(filename)
+    except tarfile.TarError:
+        raise ReadError(
+            "%s is not a compressed or uncompressed tar file" % filename)
+    try:
+        tarobj.extractall(extract_dir)
+    finally:
+        tarobj.close()
+
+_UNPACK_FORMATS = {
+    'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
+    'tar':   (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
+    'zip':   (['.zip'], _unpack_zipfile, [], "ZIP file")
+    }
+
+if _BZ2_SUPPORTED:
+    _UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [],
+                                "bzip2'ed tar-file")
+
+def _find_unpack_format(filename):
+    for name, info in _UNPACK_FORMATS.items():
+        for extension in info[0]:
+            if filename.endswith(extension):
+                return name
+    return None
+
+def unpack_archive(filename, extract_dir=None, format=None):
+    """Unpack an archive.
+
+    `filename` is the name of the archive.
+
+    `extract_dir` is the name of the target directory, where the archive
+    is unpacked. If not provided, the current working directory is used.
+
+    `format` is the archive format: one of "zip", "tar", or "gztar". Or any
+    other registered format. If not provided, unpack_archive will use the
+    filename extension and see if an unpacker was registered for that
+    extension.
+
+    In case none is found, a ValueError is raised.
+    """
+    if extract_dir is None:
+        extract_dir = os.getcwd()
+
+    if format is not None:
+        try:
+            format_info = _UNPACK_FORMATS[format]
+        except KeyError:
+            raise ValueError("Unknown unpack format '{0}'".format(format))
+
+        func = format_info[1]
+        func(filename, extract_dir, **dict(format_info[2]))
+    else:
+        # we need to look at the registered unpackers supported extensions
+        format = _find_unpack_format(filename)
+        if format is None:
+            raise ReadError("Unknown archive format '{0}'".format(filename))
+
+        func = _UNPACK_FORMATS[format][1]
+        kwargs = dict(_UNPACK_FORMATS[format][2])
+        func(filename, extract_dir, **kwargs)
@@ -0,0 +1,84 @@
+[posix_prefix]
+# Configuration directories.  Some of these come straight out of the
+# configure script.  They are for implementing the other variables, not to
+# be used directly in [resource_locations].
+confdir = /etc
+datadir = /usr/share
+libdir = /usr/lib
+statedir = /var
+# User resource directory
+local = ~/.local/{distribution.name}
+
+stdlib = {base}/lib/python{py_version_short}
+platstdlib = {platbase}/lib/python{py_version_short}
+purelib = {base}/lib/python{py_version_short}/site-packages
+platlib = {platbase}/lib/python{py_version_short}/site-packages
+include = {base}/include/python{py_version_short}{abiflags}
+platinclude = {platbase}/include/python{py_version_short}{abiflags}
+data = {base}
+
+[posix_home]
+stdlib = {base}/lib/python
+platstdlib = {base}/lib/python
+purelib = {base}/lib/python
+platlib = {base}/lib/python
+include = {base}/include/python
+platinclude = {base}/include/python
+scripts = {base}/bin
+data = {base}
+
+[nt]
+stdlib = {base}/Lib
+platstdlib = {base}/Lib
+purelib = {base}/Lib/site-packages
+platlib = {base}/Lib/site-packages
+include = {base}/Include
+platinclude = {base}/Include
+scripts = {base}/Scripts
+data = {base}
+
+[os2]
+stdlib = {base}/Lib
+platstdlib = {base}/Lib
+purelib = {base}/Lib/site-packages
+platlib = {base}/Lib/site-packages
+include = {base}/Include
+platinclude = {base}/Include
+scripts = {base}/Scripts
+data = {base}
+
+[os2_home]
+stdlib = {userbase}/lib/python{py_version_short}
+platstdlib = {userbase}/lib/python{py_version_short}
+purelib = {userbase}/lib/python{py_version_short}/site-packages
+platlib = {userbase}/lib/python{py_version_short}/site-packages
+include = {userbase}/include/python{py_version_short}
+scripts = {userbase}/bin
+data = {userbase}
+
+[nt_user]
+stdlib = {userbase}/Python{py_version_nodot}
+platstdlib = {userbase}/Python{py_version_nodot}
+purelib = {userbase}/Python{py_version_nodot}/site-packages
+platlib = {userbase}/Python{py_version_nodot}/site-packages
+include = {userbase}/Python{py_version_nodot}/Include
+scripts = {userbase}/Scripts
+data = {userbase}
+
+[posix_user]
+stdlib = {userbase}/lib/python{py_version_short}
+platstdlib = {userbase}/lib/python{py_version_short}
+purelib = {userbase}/lib/python{py_version_short}/site-packages
+platlib = {userbase}/lib/python{py_version_short}/site-packages
+include = {userbase}/include/python{py_version_short}
+scripts = {userbase}/bin
+data = {userbase}
+
+[osx_framework_user]
+stdlib = {userbase}/lib/python
+platstdlib = {userbase}/lib/python
+purelib = {userbase}/lib/python/site-packages
+platlib = {userbase}/lib/python/site-packages
+include = {userbase}/include
+scripts = {userbase}/bin
+data = {userbase}
@@ -0,0 +1,788 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2012 The Python Software Foundation.
+# See LICENSE.txt and CONTRIBUTORS.txt.
+#
+"""Access to Python's configuration information."""
+
+import codecs
+import os
+import re
+import sys
+from os.path import pardir, realpath
+try:
+    import configparser
+except ImportError:
+    import ConfigParser as configparser
+
+
+__all__ = [
+    'get_config_h_filename',
+    'get_config_var',
+    'get_config_vars',
+    'get_makefile_filename',
+    'get_path',
+    'get_path_names',
+    'get_paths',
+    'get_platform',
+    'get_python_version',
+    'get_scheme_names',
+    'parse_config_h',
+]
+
+
+def _safe_realpath(path):
+    try:
+        return realpath(path)
+    except OSError:
+        return path
+
+
+if sys.executable:
+    _PROJECT_BASE = os.path.dirname(_safe_realpath(sys.executable))
+else:
+    # sys.executable can be empty if argv[0] has been changed and Python is
+    # unable to retrieve the real program name
+    _PROJECT_BASE = _safe_realpath(os.getcwd())
+
+if os.name == "nt" and "pcbuild" in _PROJECT_BASE[-8:].lower():
+    _PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir))
+# PC/VS7.1
+if os.name == "nt" and "\\pc\\v" in _PROJECT_BASE[-10:].lower():
+    _PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir))
+# PC/AMD64
+if os.name == "nt" and "\\pcbuild\\amd64" in _PROJECT_BASE[-14:].lower():
+    _PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir))
+
+
+def is_python_build():
+    for fn in ("Setup.dist", "Setup.local"):
+        if os.path.isfile(os.path.join(_PROJECT_BASE, "Modules", fn)):
+            return True
+    return False
+
+_PYTHON_BUILD = is_python_build()
+
+_cfg_read = False
+
+def _ensure_cfg_read():
+    global _cfg_read
+    if not _cfg_read:
+        from ..resources import finder
+        backport_package = __name__.rsplit('.', 1)[0]
+        _finder = finder(backport_package)
+        _cfgfile = _finder.find('sysconfig.cfg')
+        assert _cfgfile, 'sysconfig.cfg exists'
+        with _cfgfile.as_stream() as s:
+            _SCHEMES.readfp(s)
+        if _PYTHON_BUILD:
+            for scheme in ('posix_prefix', 'posix_home'):
+                _SCHEMES.set(scheme, 'include', '{srcdir}/Include')
+                _SCHEMES.set(scheme, 'platinclude', '{projectbase}/.')
+
+        _cfg_read = True
+
+
+_SCHEMES = configparser.RawConfigParser()
+_VAR_REPL = re.compile(r'\{([^{]*?)\}')
+
+def _expand_globals(config):
+    _ensure_cfg_read()
+    if config.has_section('globals'):
+        globals = config.items('globals')
+    else:
+        globals = tuple()
+
+    sections = config.sections()
+    for section in sections:
+        if section == 'globals':
+            continue
+        for option, value in globals:
+            if config.has_option(section, option):
+                continue
+            config.set(section, option, value)
+    config.remove_section('globals')
+
+    # now expanding local variables defined in the cfg file
+    #
+    for section in config.sections():
+        variables = dict(config.items(section))
+
+        def _replacer(matchobj):
+            name = matchobj.group(1)
+            if name in variables:
+                return variables[name]
+            return matchobj.group(0)
+
+        for option, value in config.items(section):
+            config.set(section, option, _VAR_REPL.sub(_replacer, value))
+
+#_expand_globals(_SCHEMES)
+
+ # FIXME don't rely on sys.version here, its format is an implementation detail
+ # of CPython, use sys.version_info or sys.hexversion
+_PY_VERSION = sys.version.split()[0]
+_PY_VERSION_SHORT = sys.version[:3]
+_PY_VERSION_SHORT_NO_DOT = _PY_VERSION[0] + _PY_VERSION[2]
+_PREFIX = os.path.normpath(sys.prefix)
+_EXEC_PREFIX = os.path.normpath(sys.exec_prefix)
+_CONFIG_VARS = None
+_USER_BASE = None
+
+
+def _subst_vars(path, local_vars):
+    """In the string `path`, replace tokens like {some.thing} with the
+    corresponding value from the map `local_vars`.
+
+    If there is no corresponding value, leave the token unchanged.
+    """
+    def _replacer(matchobj):
+        name = matchobj.group(1)
+        if name in local_vars:
+            return local_vars[name]
+        elif name in os.environ:
+            return os.environ[name]
+        return matchobj.group(0)
+    return _VAR_REPL.sub(_replacer, path)
+
+
+def _extend_dict(target_dict, other_dict):
+    target_keys = target_dict.keys()
+    for key, value in other_dict.items():
+        if key in target_keys:
+            continue
+        target_dict[key] = value
+
+
+def _expand_vars(scheme, vars):
+    res = {}
+    if vars is None:
+        vars = {}
+    _extend_dict(vars, get_config_vars())
+
+    for key, value in _SCHEMES.items(scheme):
+        if os.name in ('posix', 'nt'):
+            value = os.path.expanduser(value)
+        res[key] = os.path.normpath(_subst_vars(value, vars))
+    return res
+
+
+def format_value(value, vars):
+    def _replacer(matchobj):
+        name = matchobj.group(1)
+        if name in vars:
+            return vars[name]
+        return matchobj.group(0)
+    return _VAR_REPL.sub(_replacer, value)
+
+
+def _get_default_scheme():
+    if os.name == 'posix':
+        # the default scheme for posix is posix_prefix
+        return 'posix_prefix'
+    return os.name
+
+
+def _getuserbase():
+    env_base = os.environ.get("PYTHONUSERBASE", None)
+
+    def joinuser(*args):
+        return os.path.expanduser(os.path.join(*args))
+
+    # what about 'os2emx', 'riscos' ?
+    if os.name == "nt":
+        base = os.environ.get("APPDATA") or "~"
+        if env_base:
+            return env_base
+        else:
+            return joinuser(base, "Python")
+
+    if sys.platform == "darwin":
+        framework = get_config_var("PYTHONFRAMEWORK")
+        if framework:
+            if env_base:
+                return env_base
+            else:
+                return joinuser("~", "Library", framework, "%d.%d" %
+                                sys.version_info[:2])
+
+    if env_base:
+        return env_base
+    else:
+        return joinuser("~", ".local")
+
+
+def _parse_makefile(filename, vars=None):
+    """Parse a Makefile-style file.
+
+    A dictionary containing name/value pairs is returned.  If an
+    optional dictionary is passed in as the second argument, it is
+    used instead of a new dictionary.
+    """
+    # Regexes needed for parsing Makefile (and similar syntaxes,
+    # like old-style Setup files).
+    _variable_rx = re.compile("([a-zA-Z][a-zA-Z0-9_]+)\s*=\s*(.*)")
+    _findvar1_rx = re.compile(r"\$\(([A-Za-z][A-Za-z0-9_]*)\)")
+    _findvar2_rx = re.compile(r"\${([A-Za-z][A-Za-z0-9_]*)}")
+
+    if vars is None:
+        vars = {}
+    done = {}
+    notdone = {}
+
+    with codecs.open(filename, encoding='utf-8', errors="surrogateescape") as f:
+        lines = f.readlines()
+
+    for line in lines:
+        if line.startswith('#') or line.strip() == '':
+            continue
+        m = _variable_rx.match(line)
+        if m:
+            n, v = m.group(1, 2)
+            v = v.strip()
+            # `$$' is a literal `$' in make
+            tmpv = v.replace('$$', '')
+
+            if "$" in tmpv:
+                notdone[n] = v
+            else:
+                try:
+                    v = int(v)
+                except ValueError:
+                    # insert literal `$'
+                    done[n] = v.replace('$$', '$')
+                else:
+                    done[n] = v
+
+    # do variable interpolation here
+    variables = list(notdone.keys())
+
+    # Variables with a 'PY_' prefix in the makefile. These need to
+    # be made available without that prefix through sysconfig.
+    # Special care is needed to ensure that variable expansion works, even
+    # if the expansion uses the name without a prefix.
+    renamed_variables = ('CFLAGS', 'LDFLAGS', 'CPPFLAGS')
+
+    while len(variables) > 0:
+        for name in tuple(variables):
+            value = notdone[name]
+            m = _findvar1_rx.search(value) or _findvar2_rx.search(value)
+            if m is not None:
+                n = m.group(1)
+                found = True
+                if n in done:
+                    item = str(done[n])
+                elif n in notdone:
+                    # get it on a subsequent round
+                    found = False
+                elif n in os.environ:
+                    # do it like make: fall back to environment
+                    item = os.environ[n]
+
+                elif n in renamed_variables:
+                    if (name.startswith('PY_') and
+                        name[3:] in renamed_variables):
+                        item = ""
+
+                    elif 'PY_' + n in notdone:
+                        found = False
+
+                    else:
+                        item = str(done['PY_' + n])
+
+                else:
+                    done[n] = item = ""
+
+                if found:
+                    after = value[m.end():]
+                    value = value[:m.start()] + item + after
+                    if "$" in after:
+                        notdone[name] = value
+                    else:
+                        try:
+                            value = int(value)
+                        except ValueError:
+                            done[name] = value.strip()
+                        else:
+                            done[name] = value
+                        variables.remove(name)
+
+                        if (name.startswith('PY_') and
+                            name[3:] in renamed_variables):
+
+                            name = name[3:]
+                            if name not in done:
+                                done[name] = value
+
+            else:
+                # bogus variable reference (e.g. "prefix=$/opt/python");
+                # just drop it since we can't deal
+                done[name] = value
+                variables.remove(name)
+
+    # strip spurious spaces
+    for k, v in done.items():
+        if isinstance(v, str):
+            done[k] = v.strip()
+
+    # save the results in the global dictionary
+    vars.update(done)
+    return vars
+
+
+def get_makefile_filename():
+    """Return the path of the Makefile."""
+    if _PYTHON_BUILD:
+        return os.path.join(_PROJECT_BASE, "Makefile")
+    if hasattr(sys, 'abiflags'):
+        config_dir_name = 'config-%s%s' % (_PY_VERSION_SHORT, sys.abiflags)
+    else:
+        config_dir_name = 'config'
+    return os.path.join(get_path('stdlib'), config_dir_name, 'Makefile')
+
+
+def _init_posix(vars):
+    """Initialize the module as appropriate for POSIX systems."""
+    # load the installed Makefile:
+    makefile = get_makefile_filename()
+    try:
+        _parse_makefile(makefile, vars)
+    except IOError as e:
+        msg = "invalid Python installation: unable to open %s" % makefile
+        if hasattr(e, "strerror"):
+            msg = msg + " (%s)" % e.strerror
+        raise IOError(msg)
+    # load the installed pyconfig.h:
+    config_h = get_config_h_filename()
+    try:
+        with open(config_h) as f:
+            parse_config_h(f, vars)
+    except IOError as e:
+        msg = "invalid Python installation: unable to open %s" % config_h
+        if hasattr(e, "strerror"):
+            msg = msg + " (%s)" % e.strerror
+        raise IOError(msg)
+    # On AIX, there are wrong paths to the linker scripts in the Makefile
+    # -- these paths are relative to the Python source, but when installed
+    # the scripts are in another directory.
+    if _PYTHON_BUILD:
+        vars['LDSHARED'] = vars['BLDSHARED']
+
+
+def _init_non_posix(vars):
+    """Initialize the module as appropriate for NT"""
+    # set basic install directories
+    vars['LIBDEST'] = get_path('stdlib')
+    vars['BINLIBDEST'] = get_path('platstdlib')
+    vars['INCLUDEPY'] = get_path('include')
+    vars['SO'] = '.pyd'
+    vars['EXE'] = '.exe'
+    vars['VERSION'] = _PY_VERSION_SHORT_NO_DOT
+    vars['BINDIR'] = os.path.dirname(_safe_realpath(sys.executable))
+
+#
+# public APIs
+#
+
+
+def parse_config_h(fp, vars=None):
+    """Parse a config.h-style file.
+
+    A dictionary containing name/value pairs is returned.  If an
+    optional dictionary is passed in as the second argument, it is
+    used instead of a new dictionary.
+    """
+    if vars is None:
+        vars = {}
+    define_rx = re.compile("#define ([A-Z][A-Za-z0-9_]+) (.*)\n")
+    undef_rx = re.compile("/[*] #undef ([A-Z][A-Za-z0-9_]+) [*]/\n")
+
+    while True:
+        line = fp.readline()
+        if not line:
+            break
+        m = define_rx.match(line)
+        if m:
+            n, v = m.group(1, 2)
+            try:
+                v = int(v)
+            except ValueError:
+                pass
+            vars[n] = v
+        else:
+            m = undef_rx.match(line)
+            if m:
+                vars[m.group(1)] = 0
+    return vars
+
+
+def get_config_h_filename():
+    """Return the path of pyconfig.h."""
+    if _PYTHON_BUILD:
+        if os.name == "nt":
+            inc_dir = os.path.join(_PROJECT_BASE, "PC")
+        else:
+            inc_dir = _PROJECT_BASE
+    else:
+        inc_dir = get_path('platinclude')
+    return os.path.join(inc_dir, 'pyconfig.h')
+
+
+def get_scheme_names():
+    """Return a tuple containing the schemes names."""
+    return tuple(sorted(_SCHEMES.sections()))
+
+
+def get_path_names():
+    """Return a tuple containing the paths names."""
+    # xxx see if we want a static list
+    return _SCHEMES.options('posix_prefix')
+
+
+def get_paths(scheme=_get_default_scheme(), vars=None, expand=True):
+    """Return a mapping containing an install scheme.
+
+    ``scheme`` is the install scheme name. If not provided, it will
+    return the default scheme for the current platform.
+    """
+    _ensure_cfg_read()
+    if expand:
+        return _expand_vars(scheme, vars)
+    else:
+        return dict(_SCHEMES.items(scheme))
+
+
+def get_path(name, scheme=_get_default_scheme(), vars=None, expand=True):
+    """Return a path corresponding to the scheme.
+
+    ``scheme`` is the install scheme name.
+    """
+    return get_paths(scheme, vars, expand)[name]
+
+
+def get_config_vars(*args):
+    """With no arguments, return a dictionary of all configuration
+    variables relevant for the current platform.
+
+    On Unix, this means every variable defined in Python's installed Makefile;
+    On Windows and Mac OS it's a much smaller set.
+
+    With arguments, return a list of values that result from looking up
+    each argument in the configuration variable dictionary.
+    """
+    global _CONFIG_VARS
+    if _CONFIG_VARS is None:
+        _CONFIG_VARS = {}
+        # Normalized versions of prefix and exec_prefix are handy to have;
+        # in fact, these are the standard versions used most places in the
+        # distutils2 module.
+        _CONFIG_VARS['prefix'] = _PREFIX
+        _CONFIG_VARS['exec_prefix'] = _EXEC_PREFIX
+        _CONFIG_VARS['py_version'] = _PY_VERSION
+        _CONFIG_VARS['py_version_short'] = _PY_VERSION_SHORT
+        _CONFIG_VARS['py_version_nodot'] = _PY_VERSION[0] + _PY_VERSION[2]
+        _CONFIG_VARS['base'] = _PREFIX
+        _CONFIG_VARS['platbase'] = _EXEC_PREFIX
+        _CONFIG_VARS['projectbase'] = _PROJECT_BASE
+        try:
+            _CONFIG_VARS['abiflags'] = sys.abiflags
+        except AttributeError:
+            # sys.abiflags may not be defined on all platforms.
+            _CONFIG_VARS['abiflags'] = ''
+
+        if os.name in ('nt', 'os2'):
+            _init_non_posix(_CONFIG_VARS)
+        if os.name == 'posix':
+            _init_posix(_CONFIG_VARS)
+        # Setting 'userbase' is done below the call to the
+        # init function to enable using 'get_config_var' in
+        # the init-function.
+        if sys.version >= '2.6':
+            _CONFIG_VARS['userbase'] = _getuserbase()
+
+        if 'srcdir' not in _CONFIG_VARS:
+            _CONFIG_VARS['srcdir'] = _PROJECT_BASE
+        else:
+            _CONFIG_VARS['srcdir'] = _safe_realpath(_CONFIG_VARS['srcdir'])
+
+        # Convert srcdir into an absolute path if it appears necessary.
+        # Normally it is relative to the build directory.  However, during
+        # testing, for example, we might be running a non-installed python
+        # from a different directory.
+        if _PYTHON_BUILD and os.name == "posix":
+            base = _PROJECT_BASE
+            try:
+                cwd = os.getcwd()
+            except OSError:
+                cwd = None
+            if (not os.path.isabs(_CONFIG_VARS['srcdir']) and
+                base != cwd):
+                # srcdir is relative and we are not in the same directory
+                # as the executable. Assume executable is in the build
+                # directory and make srcdir absolute.
+                srcdir = os.path.join(base, _CONFIG_VARS['srcdir'])
+                _CONFIG_VARS['srcdir'] = os.path.normpath(srcdir)
+
+        if sys.platform == 'darwin':
+            kernel_version = os.uname()[2]  # Kernel version (8.4.3)
+            major_version = int(kernel_version.split('.')[0])
+
+            if major_version < 8:
+                # On Mac OS X before 10.4, check if -arch and -isysroot
+                # are in CFLAGS or LDFLAGS and remove them if they are.
+                # This is needed when building extensions on a 10.3 system
+                # using a universal build of python.
+                for key in ('LDFLAGS', 'BASECFLAGS',
+                        # a number of derived variables. These need to be
+                        # patched up as well.
+                        'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
+                    flags = _CONFIG_VARS[key]
+                    flags = re.sub('-arch\s+\w+\s', ' ', flags)
+                    flags = re.sub('-isysroot [^ \t]*', ' ', flags)
+                    _CONFIG_VARS[key] = flags
+            else:
+                # Allow the user to override the architecture flags using
+                # an environment variable.
+                # NOTE: This name was introduced by Apple in OSX 10.5 and
+                # is used by several scripting languages distributed with
+                # that OS release.
+                if 'ARCHFLAGS' in os.environ:
+                    arch = os.environ['ARCHFLAGS']
+                    for key in ('LDFLAGS', 'BASECFLAGS',
+                        # a number of derived variables. These need to be
+                        # patched up as well.
+                        'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
+
+                        flags = _CONFIG_VARS[key]
+                        flags = re.sub('-arch\s+\w+\s', ' ', flags)
+                        flags = flags + ' ' + arch
+                        _CONFIG_VARS[key] = flags
+
+                # If we're on OSX 10.5 or later and the user tries to
+                # compiles an extension using an SDK that is not present
+                # on the current machine it is better to not use an SDK
+                # than to fail.
+                #
+                # The major usecase for this is users using a Python.org
+                # binary installer  on OSX 10.6: that installer uses
+                # the 10.4u SDK, but that SDK is not installed by default
+                # when you install Xcode.
+                #
+                CFLAGS = _CONFIG_VARS.get('CFLAGS', '')
+                m = re.search('-isysroot\s+(\S+)', CFLAGS)
+                if m is not None:
+                    sdk = m.group(1)
+                    if not os.path.exists(sdk):
+                        for key in ('LDFLAGS', 'BASECFLAGS',
+                             # a number of derived variables. These need to be
+                             # patched up as well.
+                            'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
+
+                            flags = _CONFIG_VARS[key]
+                            flags = re.sub('-isysroot\s+\S+(\s|$)', ' ', flags)
+                            _CONFIG_VARS[key] = flags
+
+    if args:
+        vals = []
+        for name in args:
+            vals.append(_CONFIG_VARS.get(name))
+        return vals
+    else:
+        return _CONFIG_VARS
+
+
+def get_config_var(name):
+    """Return the value of a single variable using the dictionary returned by
+    'get_config_vars()'.
+
+    Equivalent to get_config_vars().get(name)
+    """
+    return get_config_vars().get(name)
+
+
+def get_platform():
+    """Return a string that identifies the current platform.
+
+    This is used mainly to distinguish platform-specific build directories and
+    platform-specific built distributions.  Typically includes the OS name
+    and version and the architecture (as supplied by 'os.uname()'),
+    although the exact information included depends on the OS; eg. for IRIX
+    the architecture isn't particularly important (IRIX only runs on SGI
+    hardware), but for Linux the kernel version isn't particularly
+    important.
+
+    Examples of returned values:
+       linux-i586
+       linux-alpha (?)
+       solaris-2.6-sun4u
+       irix-5.3
+       irix64-6.2
+
+    Windows will return one of:
+       win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc)
+       win-ia64 (64bit Windows on Itanium)
+       win32 (all others - specifically, sys.platform is returned)
+
+    For other non-POSIX platforms, currently just returns 'sys.platform'.
+    """
+    if os.name == 'nt':
+        # sniff sys.version for architecture.
+        prefix = " bit ("
+        i = sys.version.find(prefix)
+        if i == -1:
+            return sys.platform
+        j = sys.version.find(")", i)
+        look = sys.version[i+len(prefix):j].lower()
+        if look == 'amd64':
+            return 'win-amd64'
+        if look == 'itanium':
+            return 'win-ia64'
+        return sys.platform
+
+    if os.name != "posix" or not hasattr(os, 'uname'):
+        # XXX what about the architecture? NT is Intel or Alpha,
+        # Mac OS is M68k or PPC, etc.
+        return sys.platform
+
+    # Try to distinguish various flavours of Unix
+    osname, host, release, version, machine = os.uname()
+
+    # Convert the OS name to lowercase, remove '/' characters
+    # (to accommodate BSD/OS), and translate spaces (for "Power Macintosh")
+    osname = osname.lower().replace('/', '')
+    machine = machine.replace(' ', '_')
+    machine = machine.replace('/', '-')
+
+    if osname[:5] == "linux":
+        # At least on Linux/Intel, 'machine' is the processor --
+        # i386, etc.
+        # XXX what about Alpha, SPARC, etc?
+        return  "%s-%s" % (osname, machine)
+    elif osname[:5] == "sunos":
+        if release[0] >= "5":           # SunOS 5 == Solaris 2
+            osname = "solaris"
+            release = "%d.%s" % (int(release[0]) - 3, release[2:])
+        # fall through to standard osname-release-machine representation
+    elif osname[:4] == "irix":              # could be "irix64"!
+        return "%s-%s" % (osname, release)
+    elif osname[:3] == "aix":
+        return "%s-%s.%s" % (osname, version, release)
+    elif osname[:6] == "cygwin":
+        osname = "cygwin"
+        rel_re = re.compile(r'[\d.]+')
+        m = rel_re.match(release)
+        if m:
+            release = m.group()
+    elif osname[:6] == "darwin":
+        #
+        # For our purposes, we'll assume that the system version from
+        # distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set
+        # to. This makes the compatibility story a bit more sane because the
+        # machine is going to compile and link as if it were
+        # MACOSX_DEPLOYMENT_TARGET.
+        cfgvars = get_config_vars()
+        macver = cfgvars.get('MACOSX_DEPLOYMENT_TARGET')
+
+        if True:
+            # Always calculate the release of the running machine,
+            # needed to determine if we can build fat binaries or not.
+
+            macrelease = macver
+            # Get the system version. Reading this plist is a documented
+            # way to get the system version (see the documentation for
+            # the Gestalt Manager)
+            try:
+                f = open('/System/Library/CoreServices/SystemVersion.plist')
+            except IOError:
+                # We're on a plain darwin box, fall back to the default
+                # behaviour.
+                pass
+            else:
+                try:
+                    m = re.search(r'<key>ProductUserVisibleVersion</key>\s*'
+                                  r'<string>(.*?)</string>', f.read())
+                finally:
+                    f.close()
+                if m is not None:
+                    macrelease = '.'.join(m.group(1).split('.')[:2])
+                # else: fall back to the default behaviour
+
+        if not macver:
+            macver = macrelease
+
+        if macver:
+            release = macver
+            osname = "macosx"
+
+            if ((macrelease + '.') >= '10.4.' and
+                '-arch' in get_config_vars().get('CFLAGS', '').strip()):
+                # The universal build will build fat binaries, but not on
+                # systems before 10.4
+                #
+                # Try to detect 4-way universal builds, those have machine-type
+                # 'universal' instead of 'fat'.
+
+                machine = 'fat'
+                cflags = get_config_vars().get('CFLAGS')
+
+                archs = re.findall('-arch\s+(\S+)', cflags)
+                archs = tuple(sorted(set(archs)))
+
+                if len(archs) == 1:
+                    machine = archs[0]
+                elif archs == ('i386', 'ppc'):
+                    machine = 'fat'
+                elif archs == ('i386', 'x86_64'):
+                    machine = 'intel'
+                elif archs == ('i386', 'ppc', 'x86_64'):
+                    machine = 'fat3'
+                elif archs == ('ppc64', 'x86_64'):
+                    machine = 'fat64'
+                elif archs == ('i386', 'ppc', 'ppc64', 'x86_64'):
+                    machine = 'universal'
+                else:
+                    raise ValueError(
+                       "Don't know machine value for archs=%r" % (archs,))
+
+            elif machine == 'i386':
+                # On OSX the machine type returned by uname is always the
+                # 32-bit variant, even if the executable architecture is
+                # the 64-bit variant
+                if sys.maxsize >= 2**32:
+                    machine = 'x86_64'
+
+            elif machine in ('PowerPC', 'Power_Macintosh'):
+                # Pick a sane name for the PPC architecture.
+                # See 'i386' case
+                if sys.maxsize >= 2**32:
+                    machine = 'ppc64'
+                else:
+                    machine = 'ppc'
+
+    return "%s-%s-%s" % (osname, release, machine)
+
+
+def get_python_version():
+    return _PY_VERSION_SHORT
+
+
+def _print_dict(title, data):
+    for index, (key, value) in enumerate(sorted(data.items())):
+        if index == 0:
+            print('%s: ' % (title))
+        print('\t%s = "%s"' % (key, value))
+
+
+def _main():
+    """Display all information sysconfig detains."""
+    print('Platform: "%s"' % get_platform())
+    print('Python version: "%s"' % get_python_version())
+    print('Current installation scheme: "%s"' % _get_default_scheme())
+    print()
+    _print_dict('Paths', get_paths())
+    print()
+    _print_dict('Variables', get_config_vars())
+
+
+if __name__ == '__main__':
+    _main()
@@ -0,0 +1,488 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2013 Vinay Sajip.
+# Licensed to the Python Software Foundation under a contributor agreement.
+# See LICENSE.txt and CONTRIBUTORS.txt.
+#
+import hashlib
+import logging
+import os
+import shutil
+import subprocess
+import tempfile
+try:
+    from threading import Thread
+except ImportError:
+    from dummy_threading import Thread
+
+from . import DistlibException
+from .compat import (HTTPBasicAuthHandler, Request, HTTPPasswordMgr,
+                     urlparse, build_opener, string_types)
+from .util import cached_property, zip_dir, ServerProxy
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_INDEX = 'https://pypi.python.org/pypi'
+DEFAULT_REALM = 'pypi'
+
+class PackageIndex(object):
+    """
+    This class represents a package index compatible with PyPI, the Python
+    Package Index.
+    """
+
+    boundary = b'----------ThIs_Is_tHe_distlib_index_bouNdaRY_$'
+
+    def __init__(self, url=None):
+        """
+        Initialise an instance.
+
+        :param url: The URL of the index. If not specified, the URL for PyPI is
+                    used.
+        """
+        self.url = url or DEFAULT_INDEX
+        self.read_configuration()
+        scheme, netloc, path, params, query, frag = urlparse(self.url)
+        if params or query or frag or scheme not in ('http', 'https'):
+            raise DistlibException('invalid repository: %s' % self.url)
+        self.password_handler = None
+        self.ssl_verifier = None
+        self.gpg = None
+        self.gpg_home = None
+        self.rpc_proxy = None
+        with open(os.devnull, 'w') as sink:
+            for s in ('gpg2', 'gpg'):
+                try:
+                    rc = subprocess.check_call([s, '--version'], stdout=sink,
+                                               stderr=sink)
+                    if rc == 0:
+                        self.gpg = s
+                        break
+                except OSError:
+                    pass
+
+    def _get_pypirc_command(self):
+        """
+        Get the distutils command for interacting with PyPI configurations.
+        :return: the command.
+        """
+        from distutils.core import Distribution
+        from distutils.config import PyPIRCCommand
+        d = Distribution()
+        return PyPIRCCommand(d)
+
+    def read_configuration(self):
+        """
+        Read the PyPI access configuration as supported by distutils, getting
+        PyPI to do the acutal work. This populates ``username``, ``password``,
+        ``realm`` and ``url`` attributes from the configuration.
+        """
+        # get distutils to do the work
+        c = self._get_pypirc_command()
+        c.repository = self.url
+        cfg = c._read_pypirc()
+        self.username = cfg.get('username')
+        self.password = cfg.get('password')
+        self.realm = cfg.get('realm', 'pypi')
+        self.url = cfg.get('repository', self.url)
+
+    def save_configuration(self):
+        """
+        Save the PyPI access configuration. You must have set ``username`` and
+        ``password`` attributes before calling this method.
+
+        Again, distutils is used to do the actual work.
+        """
+        self.check_credentials()
+        # get distutils to do the work
+        c = self._get_pypirc_command()
+        c._store_pypirc(self.username, self.password)
+
+    def check_credentials(self):
+        """
+        Check that ``username`` and ``password`` have been set, and raise an
+        exception if not.
+        """
+        if self.username is None or self.password is None:
+            raise DistlibException('username and password must be set')
+        pm = HTTPPasswordMgr()
+        _, netloc, _, _, _, _ = urlparse(self.url)
+        pm.add_password(self.realm, netloc, self.username, self.password)
+        self.password_handler = HTTPBasicAuthHandler(pm)
+
+    def register(self, metadata):
+        """
+        Register a distribution on PyPI, using the provided metadata.
+
+        :param metadata: A :class:`Metadata` instance defining at least a name
+                         and version number for the distribution to be
+                         registered.
+        :return: The HTTP response received from PyPI upon submission of the
+                request.
+        """
+        self.check_credentials()
+        metadata.validate()
+        d = metadata.todict()
+        d[':action'] = 'verify'
+        request = self.encode_request(d.items(), [])
+        response = self.send_request(request)
+        d[':action'] = 'submit'
+        request = self.encode_request(d.items(), [])
+        return self.send_request(request)
+
+    def _reader(self, name, stream, outbuf):
+        """
+        Thread runner for reading lines of from a subprocess into a buffer.
+
+        :param name: The logical name of the stream (used for logging only).
+        :param stream: The stream to read from. This will typically a pipe
+                       connected to the output stream of a subprocess.
+        :param outbuf: The list to append the read lines to.
+        """
+        while True:
+            s = stream.readline()
+            if not s:
+                break
+            s = s.decode('utf-8').rstrip()
+            outbuf.append(s)
+            logger.debug('%s: %s' % (name, s))
+        stream.close()
+
+    def get_sign_command(self, filename, signer, sign_password):
+        """
+        Return a suitable command for signing a file.
+
+        :param filename: The pathname to the file to be signed.
+        :param signer: The identifier of the signer of the file.
+        :param sign_password: The passphrase for the signer's
+                              private key used for signing.
+        :return: The signing command as a list suitable to be
+                 passed to :class:`subprocess.Popen`.
+        """
+        cmd = [self.gpg, '--status-fd', '2', '--no-tty']
+        if self.gpg_home:
+            cmd.extend(['--homedir', self.gpg_home])
+        if sign_password is not None:
+            cmd.extend(['--batch', '--passphrase-fd', '0'])
+        td = tempfile.mkdtemp()
+        sf = os.path.join(td, os.path.basename(filename) + '.asc')
+        cmd.extend(['--detach-sign', '--armor', '--local-user',
+                    signer, '--output', sf, filename])
+        logger.debug('invoking: %s', ' '.join(cmd))
+        return cmd, sf
+
+    def run_command(self, cmd, input_data=None):
+        """
+        Run a command in a child process , passing it any input data specified.
+
+        :param cmd: The command to run.
+        :param input_data: If specified, this must be a byte string containing
+                           data to be sent to the child process.
+        :return: A tuple consisting of the subprocess' exit code, a list of
+                 lines read from the subprocess' ``stdout``, and a list of
+                 lines read from the subprocess' ``stderr``.
+        """
+        kwargs = {
+            'stdout': subprocess.PIPE,
+            'stderr': subprocess.PIPE,
+        }
+        if input_data is not None:
+            kwargs['stdin'] = subprocess.PIPE
+        stdout = []
+        stderr = []
+        p = subprocess.Popen(cmd, **kwargs)
+        # We don't use communicate() here because we may need to
+        # get clever with interacting with the command
+        t1 = Thread(target=self._reader, args=('stdout', p.stdout, stdout))
+        t1.start()
+        t2 = Thread(target=self._reader, args=('stderr', p.stderr, stderr))
+        t2.start()
+        if input_data is not None:
+            p.stdin.write(input_data)
+            p.stdin.close()
+
+        p.wait()
+        t1.join()
+        t2.join()
+        return p.returncode, stdout, stderr
+
+    def sign_file(self, filename, signer, sign_password):
+        """
+        Sign a file.
+
+        :param filename: The pathname to the file to be signed.
+        :param signer: The identifier of the signer of the file.
+        :param sign_password: The passphrase for the signer's
+                              private key used for signing.
+        :return: The absolute pathname of the file where the signature is
+                 stored.
+        """
+        cmd, sig_file = self.get_sign_command(filename, signer, sign_password)
+        rc, stdout, stderr = self.run_command(cmd,
+                                              sign_password.encode('utf-8'))
+        if rc != 0:
+            raise DistlibException('sign command failed with error '
+                                   'code %s' % rc)
+        return sig_file
+
+    def upload_file(self, metadata, filename, signer=None, sign_password=None,
+                    filetype='sdist', pyversion='source'):
+        """
+        Upload a release file to the index.
+
+        :param metadata: A :class:`Metadata` instance defining at least a name
+                         and version number for the file to be uploaded.
+        :param filename: The pathname of the file to be uploaded.
+        :param signer: The identifier of the signer of the file.
+        :param sign_password: The passphrase for the signer's
+                              private key used for signing.
+        :param filetype: The type of the file being uploaded. This is the
+                        distutils command which produced that file, e.g.
+                        ``sdist`` or ``bdist_wheel``.
+        :param pyversion: The version of Python which the release relates
+                          to. For code compatible with any Python, this would
+                          be ``source``, otherwise it would be e.g. ``3.2``.
+        :return: The HTTP response received from PyPI upon submission of the
+                request.
+        """
+        self.check_credentials()
+        if not os.path.exists(filename):
+            raise DistlibException('not found: %s' % filename)
+        metadata.validate()
+        d = metadata.todict()
+        sig_file = None
+        if signer:
+            if not self.gpg:
+                logger.warning('no signing program available - not signed')
+            else:
+                sig_file = self.sign_file(filename, signer, sign_password)
+        with open(filename, 'rb') as f:
+            file_data = f.read()
+        md5_digest = hashlib.md5(file_data).hexdigest()
+        sha256_digest = hashlib.sha256(file_data).hexdigest()
+        d.update({
+            ':action': 'file_upload',
+            'protcol_version': '1',
+            'filetype': filetype,
+            'pyversion': pyversion,
+            'md5_digest': md5_digest,
+            'sha256_digest': sha256_digest,
+        })
+        files = [('content', os.path.basename(filename), file_data)]
+        if sig_file:
+            with open(sig_file, 'rb') as f:
+                sig_data = f.read()
+            files.append(('gpg_signature', os.path.basename(sig_file),
+                         sig_data))
+            shutil.rmtree(os.path.dirname(sig_file))
+        request = self.encode_request(d.items(), files)
+        return self.send_request(request)
+
+    def upload_documentation(self, metadata, doc_dir):
+        """
+        Upload documentation to the index.
+
+        :param metadata: A :class:`Metadata` instance defining at least a name
+                         and version number for the documentation to be
+                         uploaded.
+        :param doc_dir: The pathname of the directory which contains the
+                        documentation. This should be the directory that
+                        contains the ``index.html`` for the documentation.
+        :return: The HTTP response received from PyPI upon submission of the
+                request.
+        """
+        self.check_credentials()
+        if not os.path.isdir(doc_dir):
+            raise DistlibException('not a directory: %r' % doc_dir)
+        fn = os.path.join(doc_dir, 'index.html')
+        if not os.path.exists(fn):
+            raise DistlibException('not found: %r' % fn)
+        metadata.validate()
+        name, version = metadata.name, metadata.version
+        zip_data = zip_dir(doc_dir).getvalue()
+        fields = [(':action', 'doc_upload'),
+                  ('name', name), ('version', version)]
+        files = [('content', name, zip_data)]
+        request = self.encode_request(fields, files)
+        return self.send_request(request)
+
+    def get_verify_command(self, signature_filename, data_filename):
+        """
+        Return a suitable command for verifying a file.
+
+        :param signature_filename: The pathname to the file containing the
+                                   signature.
+        :param data_filename: The pathname to the file containing the
+                              signed data.
+        :return: The verifying command as a list suitable to be
+                 passed to :class:`subprocess.Popen`.
+        """
+        cmd = [self.gpg, '--status-fd', '2', '--no-tty']
+        if self.gpg_home:
+            cmd.extend(['--homedir', self.gpg_home])
+        cmd.extend(['--verify', signature_filename, data_filename])
+        logger.debug('invoking: %s', ' '.join(cmd))
+        return cmd
+
+    def verify_signature(self, signature_filename, data_filename):
+        """
+        Verify a signature for a file.
+
+        :param signature_filename: The pathname to the file containing the
+                                   signature.
+        :param data_filename: The pathname to the file containing the
+                              signed data.
+        :return: True if the signature was verified, else False.
+        """
+        if not self.gpg:
+            raise DistlibException('verification unavailable because gpg '
+                                   'unavailable')
+        cmd = self.get_verify_command(signature_filename, data_filename)
+        rc, stdout, stderr = self.run_command(cmd)
+        if rc not in (0, 1):
+            raise DistlibException('verify command failed with error '
+                             'code %s' % rc)
+        return rc == 0
+
+    def download_file(self, url, destfile, digest=None, reporthook=None):
+        """
+        This is a convenience method for downloading a file from an URL.
+        Normally, this will be a file from the index, though currently
+        no check is made for this (i.e. a file can be downloaded from
+        anywhere).
+
+        The method is just like the :func:`urlretrieve` function in the
+        standard library, except that it allows digest computation to be
+        done during download and checking that the downloaded data
+        matched any expected value.
+
+        :param url: The URL of the file to be downloaded (assumed to be
+                    available via an HTTP GET request).
+        :param destfile: The pathname where the downloaded file is to be
+                         saved.
+        :param digest: If specified, this must be a (hasher, value)
+                       tuple, where hasher is the algorithm used (e.g.
+                       ``'md5'``) and ``value`` is the expected value.
+        :param reporthook: The same as for :func:`urlretrieve` in the
+                           standard library.
+        """
+        if digest is None:
+            digester = None
+            logger.debug('No digest specified')
+        else:
+            if isinstance(digest, (list, tuple)):
+                hasher, digest = digest
+            else:
+                hasher = 'md5'
+            digester = getattr(hashlib, hasher)()
+            logger.debug('Digest specified: %s' % digest)
+        # The following code is equivalent to urlretrieve.
+        # We need to do it this way so that we can compute the
+        # digest of the file as we go.
+        with open(destfile, 'wb') as dfp:
+            # addinfourl is not a context manager on 2.x
+            # so we have to use try/finally
+            sfp = self.send_request(Request(url))
+            try:
+                headers = sfp.info()
+                blocksize = 8192
+                size = -1
+                read = 0
+                blocknum = 0
+                if "content-length" in headers:
+                    size = int(headers["Content-Length"])
+                if reporthook:
+                    reporthook(blocknum, blocksize, size)
+                while True:
+                    block = sfp.read(blocksize)
+                    if not block:
+                        break
+                    read += len(block)
+                    dfp.write(block)
+                    if digester:
+                        digester.update(block)
+                    blocknum += 1
+                    if reporthook:
+                        reporthook(blocknum, blocksize, size)
+            finally:
+                sfp.close()
+
+        # check that we got the whole file, if we can
+        if size >= 0 and read < size:
+            raise DistlibException(
+                'retrieval incomplete: got only %d out of %d bytes'
+                % (read, size))
+        # if we have a digest, it must match.
+        if digester:
+            actual = digester.hexdigest()
+            if digest != actual:
+                raise DistlibException('%s digest mismatch for %s: expected '
+                                       '%s, got %s' % (hasher, destfile,
+                                                       digest, actual))
+            logger.debug('Digest verified: %s', digest)
+
+    def send_request(self, req):
+        """
+        Send a standard library :class:`Request` to PyPI and return its
+        response.
+
+        :param req: The request to send.
+        :return: The HTTP response from PyPI (a standard library HTTPResponse).
+        """
+        handlers = []
+        if self.password_handler:
+            handlers.append(self.password_handler)
+        if self.ssl_verifier:
+            handlers.append(self.ssl_verifier)
+        opener = build_opener(*handlers)
+        return opener.open(req)
+
+    def encode_request(self, fields, files):
+        """
+        Encode fields and files for posting to an HTTP server.
+
+        :param fields: The fields to send as a list of (fieldname, value)
+                       tuples.
+        :param files: The files to send as a list of (fieldname, filename,
+                      file_bytes) tuple.
+        """
+        # Adapted from packaging, which in turn was adapted from
+        # http://code.activestate.com/recipes/146306
+
+        parts = []
+        boundary = self.boundary
+        for k, values in fields:
+            if not isinstance(values, (list, tuple)):
+                values = [values]
+
+            for v in values:
+                parts.extend((
+                    b'--' + boundary,
+                    ('Content-Disposition: form-data; name="%s"' %
+                     k).encode('utf-8'),
+                    b'',
+                    v.encode('utf-8')))
+        for key, filename, value in files:
+            parts.extend((
+                b'--' + boundary,
+                ('Content-Disposition: form-data; name="%s"; filename="%s"' %
+                 (key, filename)).encode('utf-8'),
+                b'',
+                value))
+
+        parts.extend((b'--' + boundary + b'--', b''))
+
+        body = b'\r\n'.join(parts)
+        ct = b'multipart/form-data; boundary=' + boundary
+        headers = {
+            'Content-type': ct,
+            'Content-length': str(len(body))
+        }
+        return Request(self.url, body, headers)
+
+    def search(self, terms, operator=None):
+        if isinstance(terms, string_types):
+            terms = {'name': terms}
+        if self.rpc_proxy is None:
+            self.rpc_proxy = ServerProxy(self.url, timeout=3.0)
+        return self.rpc_proxy.search(terms, operator or 'and')
@@ -0,0 +1,364 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2012-2013 Python Software Foundation.
+# See LICENSE.txt and CONTRIBUTORS.txt.
+#
+"""
+Class representing the list of files in a distribution.
+
+Equivalent to distutils.filelist, but fixes some problems.
+"""
+import fnmatch
+import logging
+import os
+import re
+
+from . import DistlibException
+from .compat import fsdecode
+from .util import convert_path
+
+
+__all__ = ['Manifest']
+
+logger = logging.getLogger(__name__)
+
+# a \ followed by some spaces + EOL
+_COLLAPSE_PATTERN = re.compile('\\\w*\n', re.M)
+_COMMENTED_LINE = re.compile('#.*?(?=\n)|\n(?=$)', re.M | re.S)
+
+
+class Manifest(object):
+    """A list of files built by on exploring the filesystem and filtered by
+    applying various patterns to what we find there.
+    """
+
+    def __init__(self, base=None):
+        """
+        Initialise an instance.
+
+        :param base: The base directory to explore under.
+        """
+        self.base = os.path.abspath(os.path.normpath(base or os.getcwd()))
+        self.prefix = self.base + os.sep
+        self.allfiles = None
+        self.files = set()
+
+    #
+    # Public API
+    #
+
+    def findall(self):
+        """Find all files under the base and set ``allfiles`` to the absolute
+        pathnames of files found.
+        """
+        from stat import S_ISREG, S_ISDIR, S_ISLNK
+
+        self.allfiles = allfiles = []
+        root = self.base
+        stack = [root]
+        pop = stack.pop
+        push = stack.append
+
+        while stack:
+            root = pop()
+            names = os.listdir(root)
+
+            for name in names:
+                fullname = os.path.join(root, name)
+
+                # Avoid excess stat calls -- just one will do, thank you!
+                stat = os.stat(fullname)
+                mode = stat.st_mode
+                if S_ISREG(mode):
+                    allfiles.append(fsdecode(fullname))
+                elif S_ISDIR(mode) and not S_ISLNK(mode):
+                    push(fullname)
+
+    def add(self, item):
+        """
+        Add a file to the manifest.
+
+        :param item: The pathname to add. This can be relative to the base.
+        """
+        if not item.startswith(self.prefix):
+            item = os.path.join(self.base, item)
+        self.files.add(os.path.normpath(item))
+
+    def add_many(self, items):
+        """
+        Add a list of files to the manifest.
+
+        :param items: The pathnames to add. These can be relative to the base.
+        """
+        for item in items:
+            self.add(item)
+
+    def sorted(self, wantdirs=False):
+        """
+        Return sorted files in directory order
+        """
+
+        def add_dir(dirs, d):
+            dirs.add(d)
+            logger.debug('add_dir added %s', d)
+            if d != self.base:
+                parent, _ = os.path.split(d)
+                assert parent not in ('', '/')
+                add_dir(dirs, parent)
+
+        result = set(self.files)    # make a copy!
+        if wantdirs:
+            dirs = set()
+            for f in result:
+                add_dir(dirs, os.path.dirname(f))
+            result |= dirs
+        return [os.path.join(*path_tuple) for path_tuple in
+                sorted(os.path.split(path) for path in result)]
+
+    def clear(self):
+        """Clear all collected files."""
+        self.files = set()
+        self.allfiles = []
+
+    def process_directive(self, directive):
+        """
+        Process a directive which either adds some files from ``allfiles`` to
+        ``files``, or removes some files from ``files``.
+
+        :param directive: The directive to process. This should be in a format
+                     compatible with distutils ``MANIFEST.in`` files:
+
+                     http://docs.python.org/distutils/sourcedist.html#commands
+        """
+        # Parse the line: split it up, make sure the right number of words
+        # is there, and return the relevant words.  'action' is always
+        # defined: it's the first word of the line.  Which of the other
+        # three are defined depends on the action; it'll be either
+        # patterns, (dir and patterns), or (dirpattern).
+        action, patterns, thedir, dirpattern = self._parse_directive(directive)
+
+        # OK, now we know that the action is valid and we have the
+        # right number of words on the line for that action -- so we
+        # can proceed with minimal error-checking.
+        if action == 'include':
+            for pattern in patterns:
+                if not self._include_pattern(pattern, anchor=True):
+                    logger.warning('no files found matching %r', pattern)
+
+        elif action == 'exclude':
+            for pattern in patterns:
+                if not self._exclude_pattern(pattern, anchor=True):
+                    logger.warning('no previously-included files '
+                                   'found matching %r', pattern)
+
+        elif action == 'global-include':
+            for pattern in patterns:
+                if not self._include_pattern(pattern, anchor=False):
+                    logger.warning('no files found matching %r '
+                                   'anywhere in distribution', pattern)
+
+        elif action == 'global-exclude':
+            for pattern in patterns:
+                if not self._exclude_pattern(pattern, anchor=False):
+                    logger.warning('no previously-included files '
+                                   'matching %r found anywhere in '
+                                   'distribution', pattern)
+
+        elif action == 'recursive-include':
+            for pattern in patterns:
+                if not self._include_pattern(pattern, prefix=thedir):
+                    logger.warning('no files found matching %r '
+                                   'under directory %r', pattern, thedir)
+
+        elif action == 'recursive-exclude':
+            for pattern in patterns:
+                if not self._exclude_pattern(pattern, prefix=thedir):
+                    logger.warning('no previously-included files '
+                                   'matching %r found under directory %r',
+                                   pattern, thedir)
+
+        elif action == 'graft':
+            if not self._include_pattern(None, prefix=dirpattern):
+                logger.warning('no directories found matching %r',
+                               dirpattern)
+
+        elif action == 'prune':
+            if not self._exclude_pattern(None, prefix=dirpattern):
+                logger.warning('no previously-included directories found '
+                               'matching %r', dirpattern)
+        else:   # pragma: no cover
+            # This should never happen, as it should be caught in
+            # _parse_template_line
+            raise DistlibException(
+                'invalid action %r' % action)
+
+    #
+    # Private API
+    #
+
+    def _parse_directive(self, directive):
+        """
+        Validate a directive.
+        :param directive: The directive to validate.
+        :return: A tuple of action, patterns, thedir, dir_patterns
+        """
+        words = directive.split()
+        if len(words) == 1 and words[0] not in ('include', 'exclude',
+                                                'global-include',
+                                                'global-exclude',
+                                                'recursive-include',
+                                                'recursive-exclude',
+                                                'graft', 'prune'):
+            # no action given, let's use the default 'include'
+            words.insert(0, 'include')
+
+        action = words[0]
+        patterns = thedir = dir_pattern = None
+
+        if action in ('include', 'exclude',
+                      'global-include', 'global-exclude'):
+            if len(words) < 2:
+                raise DistlibException(
+                    '%r expects <pattern1> <pattern2> ...' % action)
+
+            patterns = [convert_path(word) for word in words[1:]]
+
+        elif action in ('recursive-include', 'recursive-exclude'):
+            if len(words) < 3:
+                raise DistlibException(
+                    '%r expects <dir> <pattern1> <pattern2> ...' % action)
+
+            thedir = convert_path(words[1])
+            patterns = [convert_path(word) for word in words[2:]]
+
+        elif action in ('graft', 'prune'):
+            if len(words) != 2:
+                raise DistlibException(
+                    '%r expects a single <dir_pattern>' % action)
+
+            dir_pattern = convert_path(words[1])
+
+        else:
+            raise DistlibException('unknown action %r' % action)
+
+        return action, patterns, thedir, dir_pattern
+
+    def _include_pattern(self, pattern, anchor=True, prefix=None,
+                         is_regex=False):
+        """Select strings (presumably filenames) from 'self.files' that
+        match 'pattern', a Unix-style wildcard (glob) pattern.
+
+        Patterns are not quite the same as implemented by the 'fnmatch'
+        module: '*' and '?'  match non-special characters, where "special"
+        is platform-dependent: slash on Unix; colon, slash, and backslash on
+        DOS/Windows; and colon on Mac OS.
+
+        If 'anchor' is true (the default), then the pattern match is more
+        stringent: "*.py" will match "foo.py" but not "foo/bar.py".  If
+        'anchor' is false, both of these will match.
+
+        If 'prefix' is supplied, then only filenames starting with 'prefix'
+        (itself a pattern) and ending with 'pattern', with anything in between
+        them, will match.  'anchor' is ignored in this case.
+
+        If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
+        'pattern' is assumed to be either a string containing a regex or a
+        regex object -- no translation is done, the regex is just compiled
+        and used as-is.
+
+        Selected strings will be added to self.files.
+
+        Return True if files are found.
+        """
+        # XXX docstring lying about what the special chars are?
+        found = False
+        pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex)
+
+        # delayed loading of allfiles list
+        if self.allfiles is None:
+            self.findall()
+
+        for name in self.allfiles:
+            if pattern_re.search(name):
+                self.files.add(name)
+                found = True
+        return found
+
+    def _exclude_pattern(self, pattern, anchor=True, prefix=None,
+                         is_regex=False):
+        """Remove strings (presumably filenames) from 'files' that match
+        'pattern'.
+
+        Other parameters are the same as for 'include_pattern()', above.
+        The list 'self.files' is modified in place. Return True if files are
+        found.
+
+        This API is public to allow e.g. exclusion of SCM subdirs, e.g. when
+        packaging source distributions
+        """
+        found = False
+        pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex)
+        for f in list(self.files):
+            if pattern_re.search(f):
+                self.files.remove(f)
+                found = True
+        return found
+
+    def _translate_pattern(self, pattern, anchor=True, prefix=None,
+                           is_regex=False):
+        """Translate a shell-like wildcard pattern to a compiled regular
+        expression.
+
+        Return the compiled regex.  If 'is_regex' true,
+        then 'pattern' is directly compiled to a regex (if it's a string)
+        or just returned as-is (assumes it's a regex object).
+        """
+        if is_regex:
+            if isinstance(pattern, str):
+                return re.compile(pattern)
+            else:
+                return pattern
+
+        if pattern:
+            pattern_re = self._glob_to_re(pattern)
+        else:
+            pattern_re = ''
+
+        base = re.escape(os.path.join(self.base, ''))
+        if prefix is not None:
+            # ditch end of pattern character
+            empty_pattern = self._glob_to_re('')
+            prefix_re = self._glob_to_re(prefix)[:-len(empty_pattern)]
+            sep = os.sep
+            if os.sep == '\\':
+                sep = r'\\'
+            pattern_re = '^' + base + sep.join((prefix_re,
+                                                '.*' + pattern_re))
+        else:                               # no prefix -- respect anchor flag
+            if anchor:
+                pattern_re = '^' + base + pattern_re
+
+        return re.compile(pattern_re)
+
+    def _glob_to_re(self, pattern):
+        """Translate a shell-like glob pattern to a regular expression.
+
+        Return a string containing the regex.  Differs from
+        'fnmatch.translate()' in that '*' does not match "special characters"
+        (which are platform-specific).
+        """
+        pattern_re = fnmatch.translate(pattern)
+
+        # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
+        # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
+        # and by extension they shouldn't match such "special characters" under
+        # any OS.  So change all non-escaped dots in the RE to match any
+        # character except the special characters (currently: just os.sep).
+        sep = os.sep
+        if os.sep == '\\':
+            # we're using a regex to manipulate a regex, so we need
+            # to escape the backslash twice
+            sep = r'\\\\'
+        escaped = r'\1[^%s]' % sep
+        pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
+        return pattern_re
@@ -0,0 +1,190 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2012-2013 Vinay Sajip.
+# Licensed to the Python Software Foundation under a contributor agreement.
+# See LICENSE.txt and CONTRIBUTORS.txt.
+#
+"""Parser for the environment markers micro-language defined in PEP 345."""
+
+import ast
+import os
+import sys
+import platform
+
+from .compat import python_implementation, string_types
+from .util import in_venv
+
+__all__ = ['interpret']
+
+
+class Evaluator(object):
+    """
+    A limited evaluator for Python expressions.
+    """
+
+    operators = {
+        'eq': lambda x, y: x == y,
+        'gt': lambda x, y: x > y,
+        'gte': lambda x, y: x >= y,
+        'in': lambda x, y: x in y,
+        'lt': lambda x, y: x < y,
+        'lte': lambda x, y: x <= y,
+        'not': lambda x: not x,
+        'noteq': lambda x, y: x != y,
+        'notin': lambda x, y: x not in y,
+    }
+
+    allowed_values = {
+        'sys_platform': sys.platform,
+        'python_version': '%s.%s' % sys.version_info[:2],
+        # parsing sys.platform is not reliable, but there is no other
+        # way to get e.g. 2.7.2+, and the PEP is defined with sys.version
+        'python_full_version': sys.version.split(' ', 1)[0],
+        'os_name': os.name,
+        'platform_in_venv': str(in_venv()),
+        'platform_release': platform.release(),
+        'platform_version': platform.version(),
+        'platform_machine': platform.machine(),
+        'platform_python_implementation': python_implementation(),
+    }
+
+    def __init__(self, context=None):
+        """
+        Initialise an instance.
+
+        :param context: If specified, names are looked up in this mapping.
+        """
+        self.context = context or {}
+        self.source = None
+
+    def get_fragment(self, offset):
+        """
+        Get the part of the source which is causing a problem.
+        """
+        fragment_len = 10
+        s = '%r' % (self.source[offset:offset + fragment_len])
+        if offset + fragment_len < len(self.source):
+            s += '...'
+        return s
+
+    def get_handler(self, node_type):
+        """
+        Get a handler for the specified AST node type.
+        """
+        return getattr(self, 'do_%s' % node_type, None)
+
+    def evaluate(self, node, filename=None):
+        """
+        Evaluate a source string or node, using ``filename`` when
+        displaying errors.
+        """
+        if isinstance(node, string_types):
+            self.source = node
+            kwargs = {'mode': 'eval'}
+            if filename:
+                kwargs['filename'] = filename
+            try:
+                node = ast.parse(node, **kwargs)
+            except SyntaxError as e:
+                s = self.get_fragment(e.offset)
+                raise SyntaxError('syntax error %s' % s)
+        node_type = node.__class__.__name__.lower()
+        handler = self.get_handler(node_type)
+        if handler is None:
+            if self.source is None:
+                s = '(source not available)'
+            else:
+                s = self.get_fragment(node.col_offset)
+            raise SyntaxError("don't know how to evaluate %r %s" % (
+                node_type, s))
+        return handler(node)
+
+    def get_attr_key(self, node):
+        assert isinstance(node, ast.Attribute), 'attribute node expected'
+        return '%s.%s' % (node.value.id, node.attr)
+
+    def do_attribute(self, node):
+        if not isinstance(node.value, ast.Name):
+            valid = False
+        else:
+            key = self.get_attr_key(node)
+            valid = key in self.context or key in self.allowed_values
+        if not valid:
+            raise SyntaxError('invalid expression: %s' % key)
+        if key in self.context:
+            result = self.context[key]
+        else:
+            result = self.allowed_values[key]
+        return result
+
+    def do_boolop(self, node):
+        result = self.evaluate(node.values[0])
+        is_or = node.op.__class__ is ast.Or
+        is_and = node.op.__class__ is ast.And
+        assert is_or or is_and
+        if (is_and and result) or (is_or and not result):
+            for n in node.values[1:]:
+                result = self.evaluate(n)
+                if (is_or and result) or (is_and and not result):
+                    break
+        return result
+
+    def do_compare(self, node):
+        def sanity_check(lhsnode, rhsnode):
+            valid = True
+            if isinstance(lhsnode, ast.Str) and isinstance(rhsnode, ast.Str):
+                valid = False
+            #elif (isinstance(lhsnode, ast.Attribute)
+            #      and isinstance(rhsnode, ast.Attribute)):
+            #    klhs = self.get_attr_key(lhsnode)
+            #    krhs = self.get_attr_key(rhsnode)
+            #    valid = klhs != krhs
+            if not valid:
+                s = self.get_fragment(node.col_offset)
+                raise SyntaxError('Invalid comparison: %s' % s)
+
+        lhsnode = node.left
+        lhs = self.evaluate(lhsnode)
+        result = True
+        for op, rhsnode in zip(node.ops, node.comparators):
+            sanity_check(lhsnode, rhsnode)
+            op = op.__class__.__name__.lower()
+            if op not in self.operators:
+                raise SyntaxError('unsupported operation: %r' % op)
+            rhs = self.evaluate(rhsnode)
+            result = self.operators[op](lhs, rhs)
+            if not result:
+                break
+            lhs = rhs
+            lhsnode = rhsnode
+        return result
+
+    def do_expression(self, node):
+        return self.evaluate(node.body)
+
+    def do_name(self, node):
+        valid = False
+        if node.id in self.context:
+            valid = True
+            result = self.context[node.id]
+        elif node.id in self.allowed_values:
+            valid = True
+            result = self.allowed_values[node.id]
+        if not valid:
+            raise SyntaxError('invalid expression: %s' % node.id)
+        return result
+
+    def do_str(self, node):
+        return node.s
+
+
+def interpret(marker, execution_context=None):
+    """
+    Interpret a marker and return a result depending on environment.
+
+    :param marker: The marker to interpret.
+    :type marker: str
+    :param execution_context: The context used for name lookup.
+    :type execution_context: mapping
+    """
+    return Evaluator(execution_context).evaluate(marker.strip())
@@ -0,0 +1,317 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2013 Vinay Sajip.
+# Licensed to the Python Software Foundation under a contributor agreement.
+# See LICENSE.txt and CONTRIBUTORS.txt.
+#
+from __future__ import unicode_literals
+
+import bisect
+import io
+import logging
+import os
+import pkgutil
+import shutil
+import sys
+import types
+import zipimport
+
+from . import DistlibException
+from .util import cached_property, get_cache_base, path_to_cache_dir, Cache
+
+logger = logging.getLogger(__name__)
+
+
+cache = None    # created when needed
+
+
+class ResourceCache(Cache):
+    def __init__(self, base=None):
+        if base is None:
+            # Use native string to avoid issues on 2.x: see Python #20140.
+            base = os.path.join(get_cache_base(), str('resource-cache'))
+        super(ResourceCache, self).__init__(base)
+
+    def is_stale(self, resource, path):
+        """
+        Is the cache stale for the given resource?
+
+        :param resource: The :class:`Resource` being cached.
+        :param path: The path of the resource in the cache.
+        :return: True if the cache is stale.
+        """
+        # Cache invalidation is a hard problem :-)
+        return True
+
+    def get(self, resource):
+        """
+        Get a resource into the cache,
+
+        :param resource: A :class:`Resource` instance.
+        :return: The pathname of the resource in the cache.
+        """
+        prefix, path = resource.finder.get_cache_info(resource)
+        if prefix is None:
+            result = path
+        else:
+            result = os.path.join(self.base, self.prefix_to_dir(prefix), path)
+            dirname = os.path.dirname(result)
+            if not os.path.isdir(dirname):
+                os.makedirs(dirname)
+            if not os.path.exists(result):
+                stale = True
+            else:
+                stale = self.is_stale(resource, path)
+            if stale:
+                # write the bytes of the resource to the cache location
+                with open(result, 'wb') as f:
+                    f.write(resource.bytes)
+        return result
+
+
+class ResourceBase(object):
+    def __init__(self, finder, name):
+        self.finder = finder
+        self.name = name
+
+
+class Resource(ResourceBase):
+    """
+    A class representing an in-package resource, such as a data file. This is
+    not normally instantiated by user code, but rather by a
+    :class:`ResourceFinder` which manages the resource.
+    """
+    is_container = False        # Backwards compatibility
+
+    def as_stream(self):
+        """
+        Get the resource as a stream.
+
+        This is not a property to make it obvious that it returns a new stream
+        each time.
+        """
+        return self.finder.get_stream(self)
+
+    @cached_property
+    def file_path(self):
+        global cache
+        if cache is None:
+            cache = ResourceCache()
+        return cache.get(self)
+
+    @cached_property
+    def bytes(self):
+        return self.finder.get_bytes(self)
+
+    @cached_property
+    def size(self):
+        return self.finder.get_size(self)
+
+
+class ResourceContainer(ResourceBase):
+    is_container = True     # Backwards compatibility
+
+    @cached_property
+    def resources(self):
+        return self.finder.get_resources(self)
+
+
+class ResourceFinder(object):
+    """
+    Resource finder for file system resources.
+    """
+    def __init__(self, module):
+        self.module = module
+        self.loader = getattr(module, '__loader__', None)
+        self.base = os.path.dirname(getattr(module, '__file__', ''))
+
+    def _adjust_path(self, path):
+        return os.path.realpath(path)
+
+    def _make_path(self, resource_name):
+        parts = resource_name.split('/')
+        parts.insert(0, self.base)
+        result = os.path.join(*parts)
+        return self._adjust_path(result)
+
+    def _find(self, path):
+        return os.path.exists(path)
+
+    def get_cache_info(self, resource):
+        return None, resource.path
+
+    def find(self, resource_name):
+        path = self._make_path(resource_name)
+        if not self._find(path):
+            result = None
+        else:
+            if self._is_directory(path):
+                result = ResourceContainer(self, resource_name)
+            else:
+                result = Resource(self, resource_name)
+            result.path = path
+        return result
+
+    def get_stream(self, resource):
+        return open(resource.path, 'rb')
+
+    def get_bytes(self, resource):
+        with open(resource.path, 'rb') as f:
+            return f.read()
+
+    def get_size(self, resource):
+        return os.path.getsize(resource.path)
+
+    def get_resources(self, resource):
+        def allowed(f):
+            return f != '__pycache__' and not f.endswith(('.pyc', '.pyo'))
+        return set([f for f in os.listdir(resource.path) if allowed(f)])
+
+    def is_container(self, resource):
+        return self._is_directory(resource.path)
+
+    _is_directory = staticmethod(os.path.isdir)
+
+
+class ZipResourceFinder(ResourceFinder):
+    """
+    Resource finder for resources in .zip files.
+    """
+    def __init__(self, module):
+        super(ZipResourceFinder, self).__init__(module)
+        archive = self.loader.archive
+        self.prefix_len = 1 + len(archive)
+        # PyPy doesn't have a _files attr on zipimporter, and you can't set one
+        if hasattr(self.loader, '_files'):
+            self._files = self.loader._files
+        else:
+            self._files = zipimport._zip_directory_cache[archive]
+        self.index = sorted(self._files)
+
+    def _adjust_path(self, path):
+        return path
+
+    def _find(self, path):
+        path = path[self.prefix_len:]
+        if path in self._files:
+            result = True
+        else:
+            if path and path[-1] != os.sep:
+                path = path + os.sep
+            i = bisect.bisect(self.index, path)
+            try:
+                result = self.index[i].startswith(path)
+            except IndexError:
+                result = False
+        if not result:
+            logger.debug('_find failed: %r %r', path, self.loader.prefix)
+        else:
+            logger.debug('_find worked: %r %r', path, self.loader.prefix)
+        return result
+
+    def get_cache_info(self, resource):
+        prefix = self.loader.archive
+        path = resource.path[1 + len(prefix):]
+        return prefix, path
+
+    def get_bytes(self, resource):
+        return self.loader.get_data(resource.path)
+
+    def get_stream(self, resource):
+        return io.BytesIO(self.get_bytes(resource))
+
+    def get_size(self, resource):
+        path = resource.path[self.prefix_len:]
+        return self._files[path][3]
+
+    def get_resources(self, resource):
+        path = resource.path[self.prefix_len:]
+        if path and path[-1] != os.sep:
+            path += os.sep
+        plen = len(path)
+        result = set()
+        i = bisect.bisect(self.index, path)
+        while i < len(self.index):
+            if not self.index[i].startswith(path):
+                break
+            s = self.index[i][plen:]
+            result.add(s.split(os.sep, 1)[0])   # only immediate children
+            i += 1
+        return result
+
+    def _is_directory(self, path):
+        path = path[self.prefix_len:]
+        if path and path[-1] != os.sep:
+            path += os.sep
+        i = bisect.bisect(self.index, path)
+        try:
+            result = self.index[i].startswith(path)
+        except IndexError:
+            result = False
+        return result
+
+_finder_registry = {
+    type(None): ResourceFinder,
+    zipimport.zipimporter: ZipResourceFinder
+}
+
+try:
+    import _frozen_importlib
+    _finder_registry[_frozen_importlib.SourceFileLoader] = ResourceFinder
+    _finder_registry[_frozen_importlib.FileFinder] = ResourceFinder
+except (ImportError, AttributeError):
+    pass
+
+
+def register_finder(loader, finder_maker):
+    _finder_registry[type(loader)] = finder_maker
+
+_finder_cache = {}
+
+
+def finder(package):
+    """
+    Return a resource finder for a package.
+    :param package: The name of the package.
+    :return: A :class:`ResourceFinder` instance for the package.
+    """
+    if package in _finder_cache:
+        result = _finder_cache[package]
+    else:
+        if package not in sys.modules:
+            __import__(package)
+        module = sys.modules[package]
+        path = getattr(module, '__path__', None)
+        if path is None:
+            raise DistlibException('You cannot get a finder for a module, '
+                                   'only for a package')
+        loader = getattr(module, '__loader__', None)
+        finder_maker = _finder_registry.get(type(loader))
+        if finder_maker is None:
+            raise DistlibException('Unable to locate finder for %r' % package)
+        result = finder_maker(module)
+        _finder_cache[package] = result
+    return result
+
+
+_dummy_module = types.ModuleType(str('__dummy__'))
+
+
+def finder_for_path(path):
+    """
+    Return a resource finder for a path, which should represent a container.
+
+    :param path: The path.
+    :return: A :class:`ResourceFinder` instance for the path.
+    """
+    result = None
+    # calls any path hooks, gets importer into cache
+    pkgutil.get_importer(path)
+    loader = sys.path_importer_cache.get(path)
+    finder = _finder_registry.get(type(loader))
+    if finder:
+        module = _dummy_module
+        module.__file__ = os.path.join(path, '')
+        module.__loader__ = loader
+        result = finder(module)
+    return result
@@ -0,0 +1,323 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2013 Vinay Sajip.
+# Licensed to the Python Software Foundation under a contributor agreement.
+# See LICENSE.txt and CONTRIBUTORS.txt.
+#
+from io import BytesIO
+import logging
+import os
+import re
+import struct
+import sys
+
+from .compat import sysconfig, fsencode, detect_encoding, ZipFile
+from .resources import finder
+from .util import (FileOperator, get_export_entry, convert_path,
+                   get_executable, in_venv)
+
+logger = logging.getLogger(__name__)
+
+_DEFAULT_MANIFEST = '''
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
+ <assemblyIdentity version="1.0.0.0"
+ processorArchitecture="X86"
+ name="%s"
+ type="win32"/>
+
+ <!-- Identify the application security requirements. -->
+ <trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
+ <security>
+ <requestedPrivileges>
+ <requestedExecutionLevel level="asInvoker" uiAccess="false"/>
+ </requestedPrivileges>
+ </security>
+ </trustInfo>
+</assembly>'''.strip()
+
+# check if Python is called on the first line with this expression
+FIRST_LINE_RE = re.compile(b'^#!.*pythonw?[0-9.]*([ \t].*)?$')
+SCRIPT_TEMPLATE = '''# -*- coding: utf-8 -*-
+if __name__ == '__main__':
+    import sys, re
+
+    def _resolve(module, func):
+        __import__(module)
+        mod = sys.modules[module]
+        parts = func.split('.')
+        result = getattr(mod, parts.pop(0))
+        for p in parts:
+            result = getattr(result, p)
+        return result
+
+    try:
+        sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+
+        func = _resolve('%(module)s', '%(func)s')
+        rc = func() # None interpreted as 0
+    except Exception as e:  # only supporting Python >= 2.6
+        sys.stderr.write('%%s\\n' %% e)
+        rc = 1
+    sys.exit(rc)
+'''
+
+
+class ScriptMaker(object):
+    """
+    A class to copy or create scripts from source scripts or callable
+    specifications.
+    """
+    script_template = SCRIPT_TEMPLATE
+
+    executable = None  # for shebangs
+
+    def __init__(self, source_dir, target_dir, add_launchers=True,
+                 dry_run=False, fileop=None):
+        self.source_dir = source_dir
+        self.target_dir = target_dir
+        self.add_launchers = add_launchers
+        self.force = False
+        self.clobber = False
+        # It only makes sense to set mode bits on POSIX.
+        self.set_mode = (os.name == 'posix')
+        self.variants = set(('', 'X.Y'))
+        self._fileop = fileop or FileOperator(dry_run)
+
+    def _get_alternate_executable(self, executable, options):
+        if options.get('gui', False) and os.name == 'nt':
+            dn, fn = os.path.split(executable)
+            fn = fn.replace('python', 'pythonw')
+            executable = os.path.join(dn, fn)
+        return executable
+
+    def _get_shebang(self, encoding, post_interp=b'', options=None):
+        enquote = True
+        if self.executable:
+            executable = self.executable
+            enquote = False     # assume this will be taken care of
+        elif not sysconfig.is_python_build():
+            executable = get_executable()
+        elif in_venv():
+            executable = os.path.join(sysconfig.get_path('scripts'),
+                            'python%s' % sysconfig.get_config_var('EXE'))
+        else:
+            executable = os.path.join(
+                sysconfig.get_config_var('BINDIR'),
+               'python%s%s' % (sysconfig.get_config_var('VERSION'),
+                               sysconfig.get_config_var('EXE')))
+        if options:
+            executable = self._get_alternate_executable(executable, options)
+
+        # If the user didn't specify an executable, it may be necessary to
+        # cater for executable paths with spaces (not uncommon on Windows)
+        if enquote and ' ' in executable:
+            executable = '"%s"' % executable
+        executable = fsencode(executable)
+        shebang = b'#!' + executable + post_interp + b'\n'
+        # Python parser starts to read a script using UTF-8 until
+        # it gets a #coding:xxx cookie. The shebang has to be the
+        # first line of a file, the #coding:xxx cookie cannot be
+        # written before. So the shebang has to be decodable from
+        # UTF-8.
+        try:
+            shebang.decode('utf-8')
+        except UnicodeDecodeError:
+            raise ValueError(
+                'The shebang (%r) is not decodable from utf-8' % shebang)
+        # If the script is encoded to a custom encoding (use a
+        # #coding:xxx cookie), the shebang has to be decodable from
+        # the script encoding too.
+        if encoding != 'utf-8':
+            try:
+                shebang.decode(encoding)
+            except UnicodeDecodeError:
+                raise ValueError(
+                    'The shebang (%r) is not decodable '
+                    'from the script encoding (%r)' % (shebang, encoding))
+        return shebang
+
+    def _get_script_text(self, entry):
+        return self.script_template % dict(module=entry.prefix,
+                                           func=entry.suffix)
+
+    manifest = _DEFAULT_MANIFEST
+
+    def get_manifest(self, exename):
+        base = os.path.basename(exename)
+        return self.manifest % base
+
+    def _write_script(self, names, shebang, script_bytes, filenames, ext):
+        use_launcher = self.add_launchers and os.name == 'nt'
+        linesep = os.linesep.encode('utf-8')
+        if not use_launcher:
+            script_bytes = shebang + linesep + script_bytes
+        else:
+            if ext == 'py':
+                launcher = self._get_launcher('t')
+            else:
+                launcher = self._get_launcher('w')
+            stream = BytesIO()
+            with ZipFile(stream, 'w') as zf:
+                zf.writestr('__main__.py', script_bytes)
+            zip_data = stream.getvalue()
+            script_bytes = launcher + shebang + linesep + zip_data
+        for name in names:
+            outname = os.path.join(self.target_dir, name)
+            if use_launcher:
+                n, e = os.path.splitext(outname)
+                if e.startswith('.py'):
+                    outname = n
+                outname = '%s.exe' % outname
+                try:
+                    self._fileop.write_binary_file(outname, script_bytes)
+                except Exception:
+                    # Failed writing an executable - it might be in use.
+                    logger.warning('Failed to write executable - trying to '
+                                   'use .deleteme logic')
+                    dfname = '%s.deleteme' % outname
+                    if os.path.exists(dfname):
+                        os.remove(dfname)       # Not allowed to fail here
+                    os.rename(outname, dfname)  # nor here
+                    self._fileop.write_binary_file(outname, script_bytes)
+                    logger.debug('Able to replace executable using '
+                                 '.deleteme logic')
+                    try:
+                        os.remove(dfname)
+                    except Exception:
+                        pass    # still in use - ignore error
+            else:
+                if os.name == 'nt' and not outname.endswith('.' + ext):
+                    outname = '%s.%s' % (outname, ext)
+                if os.path.exists(outname) and not self.clobber:
+                    logger.warning('Skipping existing file %s', outname)
+                    continue
+                self._fileop.write_binary_file(outname, script_bytes)
+                if self.set_mode:
+                    self._fileop.set_executable_mode([outname])
+            filenames.append(outname)
+
+    def _make_script(self, entry, filenames, options=None):
+        shebang = self._get_shebang('utf-8', options=options)
+        script = self._get_script_text(entry).encode('utf-8')
+        name = entry.name
+        scriptnames = set()
+        if '' in self.variants:
+            scriptnames.add(name)
+        if 'X' in self.variants:
+            scriptnames.add('%s%s' % (name, sys.version[0]))
+        if 'X.Y' in self.variants:
+            scriptnames.add('%s-%s' % (name, sys.version[:3]))
+        if options and options.get('gui', False):
+            ext = 'pyw'
+        else:
+            ext = 'py'
+        self._write_script(scriptnames, shebang, script, filenames, ext)
+
+    def _copy_script(self, script, filenames):
+        adjust = False
+        script = os.path.join(self.source_dir, convert_path(script))
+        outname = os.path.join(self.target_dir, os.path.basename(script))
+        if not self.force and not self._fileop.newer(script, outname):
+            logger.debug('not copying %s (up-to-date)', script)
+            return
+
+        # Always open the file, but ignore failures in dry-run mode --
+        # that way, we'll get accurate feedback if we can read the
+        # script.
+        try:
+            f = open(script, 'rb')
+        except IOError:
+            if not self.dry_run:
+                raise
+            f = None
+        else:
+            encoding, lines = detect_encoding(f.readline)
+            f.seek(0)
+            first_line = f.readline()
+            if not first_line:
+                logger.warning('%s: %s is an empty file (skipping)',
+                               self.get_command_name(),  script)
+                return
+
+            match = FIRST_LINE_RE.match(first_line.replace(b'\r\n', b'\n'))
+            if match:
+                adjust = True
+                post_interp = match.group(1) or b''
+
+        if not adjust:
+            if f:
+                f.close()
+            self._fileop.copy_file(script, outname)
+            if self.set_mode:
+                self._fileop.set_executable_mode([outname])
+            filenames.append(outname)
+        else:
+            logger.info('copying and adjusting %s -> %s', script,
+                        self.target_dir)
+            if not self._fileop.dry_run:
+                shebang = self._get_shebang(encoding, post_interp)
+                if b'pythonw' in first_line:
+                    ext = 'pyw'
+                else:
+                    ext = 'py'
+                n = os.path.basename(outname)
+                self._write_script([n], shebang, f.read(), filenames, ext)
+            if f:
+                f.close()
+
+    @property
+    def dry_run(self):
+        return self._fileop.dry_run
+
+    @dry_run.setter
+    def dry_run(self, value):
+        self._fileop.dry_run = value
+
+    if os.name == 'nt':
+        # Executable launcher support.
+        # Launchers are from https://bitbucket.org/vinay.sajip/simple_launcher/
+
+        def _get_launcher(self, kind):
+            if struct.calcsize('P') == 8:   # 64-bit
+                bits = '64'
+            else:
+                bits = '32'
+            name = '%s%s.exe' % (kind, bits)
+            # Issue 31: don't hardcode an absolute package name, but
+            # determine it relative to the current package
+            distlib_package = __name__.rsplit('.', 1)[0]
+            result = finder(distlib_package).find(name).bytes
+            return result
+
+    # Public API follows
+
+    def make(self, specification, options=None):
+        """
+        Make a script.
+
+        :param specification: The specification, which is either a valid export
+                              entry specification (to make a script from a
+                              callable) or a filename (to make a script by
+                              copying from a source location).
+        :param options: A dictionary of options controlling script generation.
+        :return: A list of all absolute pathnames written to.
+        """
+        filenames = []
+        entry = get_export_entry(specification)
+        if entry is None:
+            self._copy_script(specification, filenames)
+        else:
+            self._make_script(entry, filenames, options=options)
+        return filenames
+
+    def make_multiple(self, specifications, options=None):
+        """
+        Take a list of specifications and make scripts from them,
+        :param specifications: A list of specifications.
+        :return: A list of all absolute pathnames written to,
+        """
+        filenames = []
+        for specification in specifications:
+            filenames.extend(self.make(specification, options))
+        return filenames
@@ -0,0 +1,721 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2012-2013 The Python Software Foundation.
+# See LICENSE.txt and CONTRIBUTORS.txt.
+#
+"""
+Implementation of a flexible versioning scheme providing support for PEP-386,
+distribute-compatible and semantic versioning.
+"""
+
+import logging
+import re
+
+from .compat import string_types
+
+__all__ = ['NormalizedVersion', 'NormalizedMatcher',
+           'LegacyVersion', 'LegacyMatcher',
+           'SemanticVersion', 'SemanticMatcher',
+           'UnsupportedVersionError', 'get_scheme']
+
+logger = logging.getLogger(__name__)
+
+
+class UnsupportedVersionError(ValueError):
+    """This is an unsupported version."""
+    pass
+
+
+class Version(object):
+    def __init__(self, s):
+        self._string = s = s.strip()
+        self._parts = parts = self.parse(s)
+        assert isinstance(parts, tuple)
+        assert len(parts) > 0
+
+    def parse(self, s):
+        raise NotImplementedError('please implement in a subclass')
+
+    def _check_compatible(self, other):
+        if type(self) != type(other):
+            raise TypeError('cannot compare %r and %r' % (self, other))
+
+    def __eq__(self, other):
+        self._check_compatible(other)
+        return self._parts == other._parts
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __lt__(self, other):
+        self._check_compatible(other)
+        return self._parts < other._parts
+
+    def __gt__(self, other):
+        return not (self.__lt__(other) or self.__eq__(other))
+
+    def __le__(self, other):
+        return self.__lt__(other) or self.__eq__(other)
+
+    def __ge__(self, other):
+        return self.__gt__(other) or self.__eq__(other)
+
+    # See http://docs.python.org/reference/datamodel#object.__hash__
+    def __hash__(self):
+        return hash(self._parts)
+
+    def __repr__(self):
+        return "%s('%s')" % (self.__class__.__name__, self._string)
+
+    def __str__(self):
+        return self._string
+
+    @property
+    def is_prerelease(self):
+        raise NotImplementedError('Please implement in subclasses.')
+
+
+class Matcher(object):
+    version_class = None
+
+    dist_re = re.compile(r"^(\w[\s\w'.-]*)(\((.*)\))?")
+    comp_re = re.compile(r'^(<=|>=|<|>|!=|==|~=)?\s*([^\s,]+)$')
+    num_re = re.compile(r'^\d+(\.\d+)*$')
+
+    # value is either a callable or the name of a method
+    _operators = {
+        '<': lambda v, c, p: v < c,
+        '>': lambda v, c, p: v > c,
+        '<=': lambda v, c, p: v == c or v < c,
+        '>=': lambda v, c, p: v == c or v > c,
+        '==': lambda v, c, p: v == c,
+        # by default, compatible => >=.
+        '~=': lambda v, c, p: v == c or v > c,
+        '!=': lambda v, c, p: v != c,
+    }
+
+    def __init__(self, s):
+        if self.version_class is None:
+            raise ValueError('Please specify a version class')
+        self._string = s = s.strip()
+        m = self.dist_re.match(s)
+        if not m:
+            raise ValueError('Not valid: %r' % s)
+        groups = m.groups('')
+        self.name = groups[0].strip()
+        self.key = self.name.lower()    # for case-insensitive comparisons
+        clist = []
+        if groups[2]:
+            constraints = [c.strip() for c in groups[2].split(',')]
+            for c in constraints:
+                m = self.comp_re.match(c)
+                if not m:
+                    raise ValueError('Invalid %r in %r' % (c, s))
+                groups = m.groups()
+                op = groups[0] or '~='
+                s = groups[1]
+                if s.endswith('.*'):
+                    if op not in ('==', '!='):
+                        raise ValueError('\'.*\' not allowed for '
+                                         '%r constraints' % op)
+                    # Could be a partial version (e.g. for '2.*') which
+                    # won't parse as a version, so keep it as a string
+                    vn, prefix = s[:-2], True
+                    if not self.num_re.match(vn):
+                        # Just to check that vn is a valid version
+                        self.version_class(vn)
+                else:
+                    # Should parse as a version, so we can create an
+                    # instance for the comparison
+                    vn, prefix = self.version_class(s), False
+                clist.append((op, vn, prefix))
+        self._parts = tuple(clist)
+
+    def match(self, version):
+        """
+        Check if the provided version matches the constraints.
+
+        :param version: The version to match against this instance.
+        :type version: Strring or :class:`Version` instance.
+        """
+        if isinstance(version, string_types):
+            version = self.version_class(version)
+        for operator, constraint, prefix in self._parts:
+            f = self._operators.get(operator)
+            if isinstance(f, string_types):
+                f = getattr(self, f)
+            if not f:
+                msg = ('%r not implemented '
+                       'for %s' % (operator, self.__class__.__name__))
+                raise NotImplementedError(msg)
+            if not f(version, constraint, prefix):
+                return False
+        return True
+
+    @property
+    def exact_version(self):
+        result = None
+        if len(self._parts) == 1 and self._parts[0][0] == '==':
+            result = self._parts[0][1]
+        return result
+
+    def _check_compatible(self, other):
+        if type(self) != type(other) or self.name != other.name:
+            raise TypeError('cannot compare %s and %s' % (self, other))
+
+    def __eq__(self, other):
+        self._check_compatible(other)
+        return self.key == other.key and self._parts == other._parts
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    # See http://docs.python.org/reference/datamodel#object.__hash__
+    def __hash__(self):
+        return hash(self.key) + hash(self._parts)
+
+    def __repr__(self):
+        return "%s(%r)" % (self.__class__.__name__, self._string)
+
+    def __str__(self):
+        return self._string
+
+
+PEP426_VERSION_RE = re.compile(r'^(\d+(\.\d+)*)((a|b|c|rc)(\d+))?'
+                               r'(\.(post)(\d+))?(\.(dev)(\d+))?'
+                               r'(-(\d+(\.\d+)?))?$')
+
+
+def _pep426_key(s):
+    s = s.strip()
+    m = PEP426_VERSION_RE.match(s)
+    if not m:
+        raise UnsupportedVersionError('Not a valid version: %s' % s)
+    groups = m.groups()
+    nums = tuple(int(v) for v in groups[0].split('.'))
+    while len(nums) > 1 and nums[-1] == 0:
+        nums = nums[:-1]
+
+    pre = groups[3:5]
+    post = groups[6:8]
+    dev = groups[9:11]
+    local = groups[12]
+    if pre == (None, None):
+        pre = ()
+    else:
+        pre = pre[0], int(pre[1])
+    if post == (None, None):
+        post = ()
+    else:
+        post = post[0], int(post[1])
+    if dev == (None, None):
+        dev = ()
+    else:
+        dev = dev[0], int(dev[1])
+    if local is None:
+        local = ()
+    else:
+        local = tuple([int(s) for s in local.split('.')])
+    if not pre:
+        # either before pre-release, or final release and after
+        if not post and dev:
+            # before pre-release
+            pre = ('a', -1)     # to sort before a0
+        else:
+            pre = ('z',)        # to sort after all pre-releases
+    # now look at the state of post and dev.
+    if not post:
+        post = ('_',)   # sort before 'a'
+    if not dev:
+        dev = ('final',)
+
+    #print('%s -> %s' % (s, m.groups()))
+    return nums, pre, post, dev, local
+
+
+_normalized_key = _pep426_key
+
+
+class NormalizedVersion(Version):
+    """A rational version.
+
+    Good:
+        1.2         # equivalent to "1.2.0"
+        1.2.0
+        1.2a1
+        1.2.3a2
+        1.2.3b1
+        1.2.3c1
+        1.2.3.4
+        TODO: fill this out
+
+    Bad:
+        1           # mininum two numbers
+        1.2a        # release level must have a release serial
+        1.2.3b
+    """
+    def parse(self, s):
+        result = _normalized_key(s)
+        # _normalized_key loses trailing zeroes in the release
+        # clause, since that's needed to ensure that X.Y == X.Y.0 == X.Y.0.0
+        # However, PEP 440 prefix matching needs it: for example,
+        # (~= 1.4.5.0) matches differently to (~= 1.4.5.0.0).
+        m = PEP426_VERSION_RE.match(s)      # must succeed
+        groups = m.groups()
+        self._release_clause = tuple(int(v) for v in groups[0].split('.'))
+        return result
+
+    PREREL_TAGS = set(['a', 'b', 'c', 'rc', 'dev'])
+
+    @property
+    def is_prerelease(self):
+        return any(t[0] in self.PREREL_TAGS for t in self._parts if t)
+
+
+def _match_prefix(x, y):
+    x = str(x)
+    y = str(y)
+    if x == y:
+        return True
+    if not x.startswith(y):
+        return False
+    n = len(y)
+    return x[n] == '.'
+
+
+class NormalizedMatcher(Matcher):
+    version_class = NormalizedVersion
+
+    # value is either a callable or the name of a method
+    _operators = {
+        '~=': '_match_compatible',
+        '<': '_match_lt',
+        '>': '_match_gt',
+        '<=': '_match_le',
+        '>=': '_match_ge',
+        '==': '_match_eq',
+        '!=': '_match_ne',
+    }
+
+    def _adjust_local(self, version, constraint, prefix):
+        if prefix:
+            strip_local = '-' not in constraint and version._parts[-1]
+        else:
+            # both constraint and version are
+            # NormalizedVersion instances.
+            # If constraint does not have a local component,
+            # ensure the version doesn't, either.
+            strip_local = not constraint._parts[-1] and version._parts[-1]
+        if strip_local:
+            s = version._string.split('-', 1)[0]
+            version = self.version_class(s)
+        return version, constraint
+
+    def _match_lt(self, version, constraint, prefix):
+        version, constraint = self._adjust_local(version, constraint, prefix)
+        if version >= constraint:
+            return False
+        release_clause = constraint._release_clause
+        pfx = '.'.join([str(i) for i in release_clause])
+        return not _match_prefix(version, pfx)
+
+    def _match_gt(self, version, constraint, prefix):
+        version, constraint = self._adjust_local(version, constraint, prefix)
+        if version <= constraint:
+            return False
+        release_clause = constraint._release_clause
+        pfx = '.'.join([str(i) for i in release_clause])
+        return not _match_prefix(version, pfx)
+
+    def _match_le(self, version, constraint, prefix):
+        version, constraint = self._adjust_local(version, constraint, prefix)
+        return version <= constraint
+
+    def _match_ge(self, version, constraint, prefix):
+        version, constraint = self._adjust_local(version, constraint, prefix)
+        return version >= constraint
+
+    def _match_eq(self, version, constraint, prefix):
+        version, constraint = self._adjust_local(version, constraint, prefix)
+        if not prefix:
+            result = (version == constraint)
+        else:
+            result = _match_prefix(version, constraint)
+        return result
+
+    def _match_ne(self, version, constraint, prefix):
+        version, constraint = self._adjust_local(version, constraint, prefix)
+        if not prefix:
+            result = (version != constraint)
+        else:
+            result = not _match_prefix(version, constraint)
+        return result
+
+    def _match_compatible(self, version, constraint, prefix):
+        version, constraint = self._adjust_local(version, constraint, prefix)
+        if version == constraint:
+            return True
+        if version < constraint:
+            return False
+        release_clause = constraint._release_clause
+        if len(release_clause) > 1:
+            release_clause = release_clause[:-1]
+        pfx = '.'.join([str(i) for i in release_clause])
+        return _match_prefix(version, pfx)
+
+_REPLACEMENTS = (
+    (re.compile('[.+-]$'), ''),                     # remove trailing puncts
+    (re.compile(r'^[.](\d)'), r'0.\1'),             # .N -> 0.N at start
+    (re.compile('^[.-]'), ''),                      # remove leading puncts
+    (re.compile(r'^\((.*)\)$'), r'\1'),             # remove parentheses
+    (re.compile(r'^v(ersion)?\s*(\d+)'), r'\2'),    # remove leading v(ersion)
+    (re.compile(r'^r(ev)?\s*(\d+)'), r'\2'),        # remove leading v(ersion)
+    (re.compile('[.]{2,}'), '.'),                   # multiple runs of '.'
+    (re.compile(r'\b(alfa|apha)\b'), 'alpha'),      # misspelt alpha
+    (re.compile(r'\b(pre-alpha|prealpha)\b'),
+                'pre.alpha'),                       # standardise
+    (re.compile(r'\(beta\)$'), 'beta'),             # remove parentheses
+)
+
+_SUFFIX_REPLACEMENTS = (
+    (re.compile('^[:~._+-]+'), ''),                   # remove leading puncts
+    (re.compile('[,*")([\]]'), ''),                   # remove unwanted chars
+    (re.compile('[~:+_ -]'), '.'),                    # replace illegal chars
+    (re.compile('[.]{2,}'), '.'),                   # multiple runs of '.'
+    (re.compile(r'\.$'), ''),                       # trailing '.'
+)
+
+_NUMERIC_PREFIX = re.compile(r'(\d+(\.\d+)*)')
+
+
+def _suggest_semantic_version(s):
+    """
+    Try to suggest a semantic form for a version for which
+    _suggest_normalized_version couldn't come up with anything.
+    """
+    result = s.strip().lower()
+    for pat, repl in _REPLACEMENTS:
+        result = pat.sub(repl, result)
+    if not result:
+        result = '0.0.0'
+
+    # Now look for numeric prefix, and separate it out from
+    # the rest.
+    #import pdb; pdb.set_trace()
+    m = _NUMERIC_PREFIX.match(result)
+    if not m:
+        prefix = '0.0.0'
+        suffix = result
+    else:
+        prefix = m.groups()[0].split('.')
+        prefix = [int(i) for i in prefix]
+        while len(prefix) < 3:
+            prefix.append(0)
+        if len(prefix) == 3:
+            suffix = result[m.end():]
+        else:
+            suffix = '.'.join([str(i) for i in prefix[3:]]) + result[m.end():]
+            prefix = prefix[:3]
+        prefix = '.'.join([str(i) for i in prefix])
+        suffix = suffix.strip()
+    if suffix:
+        #import pdb; pdb.set_trace()
+        # massage the suffix.
+        for pat, repl in _SUFFIX_REPLACEMENTS:
+            suffix = pat.sub(repl, suffix)
+
+    if not suffix:
+        result = prefix
+    else:
+        sep = '-' if 'dev' in suffix else '+'
+        result = prefix + sep + suffix
+    if not is_semver(result):
+        result = None
+    return result
+
+
+def _suggest_normalized_version(s):
+    """Suggest a normalized version close to the given version string.
+
+    If you have a version string that isn't rational (i.e. NormalizedVersion
+    doesn't like it) then you might be able to get an equivalent (or close)
+    rational version from this function.
+
+    This does a number of simple normalizations to the given string, based
+    on observation of versions currently in use on PyPI. Given a dump of
+    those version during PyCon 2009, 4287 of them:
+    - 2312 (53.93%) match NormalizedVersion without change
+      with the automatic suggestion
+    - 3474 (81.04%) match when using this suggestion method
+
+    @param s {str} An irrational version string.
+    @returns A rational version string, or None, if couldn't determine one.
+    """
+    try:
+        _normalized_key(s)
+        return s   # already rational
+    except UnsupportedVersionError:
+        pass
+
+    rs = s.lower()
+
+    # part of this could use maketrans
+    for orig, repl in (('-alpha', 'a'), ('-beta', 'b'), ('alpha', 'a'),
+                       ('beta', 'b'), ('rc', 'c'), ('-final', ''),
+                       ('-pre', 'c'),
+                       ('-release', ''), ('.release', ''), ('-stable', ''),
+                       ('+', '.'), ('_', '.'), (' ', ''), ('.final', ''),
+                       ('final', '')):
+        rs = rs.replace(orig, repl)
+
+    # if something ends with dev or pre, we add a 0
+    rs = re.sub(r"pre$", r"pre0", rs)
+    rs = re.sub(r"dev$", r"dev0", rs)
+
+    # if we have something like "b-2" or "a.2" at the end of the
+    # version, that is pobably beta, alpha, etc
+    # let's remove the dash or dot
+    rs = re.sub(r"([abc]|rc)[\-\.](\d+)$", r"\1\2", rs)
+
+    # 1.0-dev-r371 -> 1.0.dev371
+    # 0.1-dev-r79 -> 0.1.dev79
+    rs = re.sub(r"[\-\.](dev)[\-\.]?r?(\d+)$", r".\1\2", rs)
+
+    # Clean: 2.0.a.3, 2.0.b1, 0.9.0~c1
+    rs = re.sub(r"[.~]?([abc])\.?", r"\1", rs)
+
+    # Clean: v0.3, v1.0
+    if rs.startswith('v'):
+        rs = rs[1:]
+
+    # Clean leading '0's on numbers.
+    #TODO: unintended side-effect on, e.g., "2003.05.09"
+    # PyPI stats: 77 (~2%) better
+    rs = re.sub(r"\b0+(\d+)(?!\d)", r"\1", rs)
+
+    # Clean a/b/c with no version. E.g. "1.0a" -> "1.0a0". Setuptools infers
+    # zero.
+    # PyPI stats: 245 (7.56%) better
+    rs = re.sub(r"(\d+[abc])$", r"\g<1>0", rs)
+
+    # the 'dev-rNNN' tag is a dev tag
+    rs = re.sub(r"\.?(dev-r|dev\.r)\.?(\d+)$", r".dev\2", rs)
+
+    # clean the - when used as a pre delimiter
+    rs = re.sub(r"-(a|b|c)(\d+)$", r"\1\2", rs)
+
+    # a terminal "dev" or "devel" can be changed into ".dev0"
+    rs = re.sub(r"[\.\-](dev|devel)$", r".dev0", rs)
+
+    # a terminal "dev" can be changed into ".dev0"
+    rs = re.sub(r"(?![\.\-])dev$", r".dev0", rs)
+
+    # a terminal "final" or "stable" can be removed
+    rs = re.sub(r"(final|stable)$", "", rs)
+
+    # The 'r' and the '-' tags are post release tags
+    #   0.4a1.r10       ->  0.4a1.post10
+    #   0.9.33-17222    ->  0.9.33.post17222
+    #   0.9.33-r17222   ->  0.9.33.post17222
+    rs = re.sub(r"\.?(r|-|-r)\.?(\d+)$", r".post\2", rs)
+
+    # Clean 'r' instead of 'dev' usage:
+    #   0.9.33+r17222   ->  0.9.33.dev17222
+    #   1.0dev123       ->  1.0.dev123
+    #   1.0.git123      ->  1.0.dev123
+    #   1.0.bzr123      ->  1.0.dev123
+    #   0.1a0dev.123    ->  0.1a0.dev123
+    # PyPI stats:  ~150 (~4%) better
+    rs = re.sub(r"\.?(dev|git|bzr)\.?(\d+)$", r".dev\2", rs)
+
+    # Clean '.pre' (normalized from '-pre' above) instead of 'c' usage:
+    #   0.2.pre1        ->  0.2c1
+    #   0.2-c1         ->  0.2c1
+    #   1.0preview123   ->  1.0c123
+    # PyPI stats: ~21 (0.62%) better
+    rs = re.sub(r"\.?(pre|preview|-c)(\d+)$", r"c\g<2>", rs)
+
+    # Tcl/Tk uses "px" for their post release markers
+    rs = re.sub(r"p(\d+)$", r".post\1", rs)
+
+    try:
+        _normalized_key(rs)
+    except UnsupportedVersionError:
+        rs = None
+    return rs
+
+#
+#   Legacy version processing (distribute-compatible)
+#
+
+_VERSION_PART = re.compile(r'([a-z]+|\d+|[\.-])', re.I)
+_VERSION_REPLACE = {
+    'pre': 'c',
+    'preview': 'c',
+    '-': 'final-',
+    'rc': 'c',
+    'dev': '@',
+    '': None,
+    '.': None,
+}
+
+
+def _legacy_key(s):
+    def get_parts(s):
+        result = []
+        for p in _VERSION_PART.split(s.lower()):
+            p = _VERSION_REPLACE.get(p, p)
+            if p:
+                if '0' <= p[:1] <= '9':
+                    p = p.zfill(8)
+                else:
+                    p = '*' + p
+                result.append(p)
+        result.append('*final')
+        return result
+
+    result = []
+    for p in get_parts(s):
+        if p.startswith('*'):
+            if p < '*final':
+                while result and result[-1] == '*final-':
+                    result.pop()
+            while result and result[-1] == '00000000':
+                result.pop()
+        result.append(p)
+    return tuple(result)
+
+
+class LegacyVersion(Version):
+    def parse(self, s):
+        return _legacy_key(s)
+
+    @property
+    def is_prerelease(self):
+        result = False
+        for x in self._parts:
+            if (isinstance(x, string_types) and x.startswith('*') and
+                x < '*final'):
+                result = True
+                break
+        return result
+
+
+class LegacyMatcher(Matcher):
+    version_class = LegacyVersion
+
+    _operators = dict(Matcher._operators)
+    _operators['~='] = '_match_compatible'
+
+    numeric_re = re.compile('^(\d+(\.\d+)*)')
+
+    def _match_compatible(self, version, constraint, prefix):
+        if version < constraint:
+            return False
+        m = self.numeric_re.match(str(constraint))
+        if not m:
+            logger.warning('Cannot compute compatible match for version %s '
+                           ' and constraint %s', version, constraint)
+            return True
+        s = m.groups()[0]
+        if '.' in s:
+            s = s.rsplit('.', 1)[0]
+        return _match_prefix(version, s)
+
+#
+#   Semantic versioning
+#
+
+_SEMVER_RE = re.compile(r'^(\d+)\.(\d+)\.(\d+)'
+                        r'(-[a-z0-9]+(\.[a-z0-9-]+)*)?'
+                        r'(\+[a-z0-9]+(\.[a-z0-9-]+)*)?$', re.I)
+
+
+def is_semver(s):
+    return _SEMVER_RE.match(s)
+
+
+def _semantic_key(s):
+    def make_tuple(s, absent):
+        if s is None:
+            result = (absent,)
+        else:
+            parts = s[1:].split('.')
+            # We can't compare ints and strings on Python 3, so fudge it
+            # by zero-filling numeric values so simulate a numeric comparison
+            result = tuple([p.zfill(8) if p.isdigit() else p for p in parts])
+        return result
+
+    m = is_semver(s)
+    if not m:
+        raise UnsupportedVersionError(s)
+    groups = m.groups()
+    major, minor, patch = [int(i) for i in groups[:3]]
+    # choose the '|' and '*' so that versions sort correctly
+    pre, build = make_tuple(groups[3], '|'), make_tuple(groups[5], '*')
+    return (major, minor, patch), pre, build
+
+
+class SemanticVersion(Version):
+    def parse(self, s):
+        return _semantic_key(s)
+
+    @property
+    def is_prerelease(self):
+        return self._parts[1][0] != '|'
+
+
+class SemanticMatcher(Matcher):
+    version_class = SemanticVersion
+
+
+class VersionScheme(object):
+    def __init__(self, key, matcher, suggester=None):
+        self.key = key
+        self.matcher = matcher
+        self.suggester = suggester
+
+    def is_valid_version(self, s):
+        try:
+            self.matcher.version_class(s)
+            result = True
+        except UnsupportedVersionError:
+            result = False
+        return result
+
+    def is_valid_matcher(self, s):
+        try:
+            self.matcher(s)
+            result = True
+        except UnsupportedVersionError:
+            result = False
+        return result
+
+    def is_valid_constraint_list(self, s):
+        """
+        Used for processing some metadata fields
+        """
+        return self.is_valid_matcher('dummy_name (%s)' % s)
+
+    def suggest(self, s):
+        if self.suggester is None:
+            result = None
+        else:
+            result = self.suggester(s)
+        return result
+
+_SCHEMES = {
+    'normalized': VersionScheme(_normalized_key, NormalizedMatcher,
+                                _suggest_normalized_version),
+    'legacy': VersionScheme(_legacy_key, LegacyMatcher, lambda self, s: s),
+    'semantic': VersionScheme(_semantic_key, SemanticMatcher,
+                              _suggest_semantic_version),
+}
+
+_SCHEMES['default'] = _SCHEMES['normalized']
+
+
+def get_scheme(name):
+    if name not in _SCHEMES:
+        raise ValueError('unknown scheme name: %r' % name)
+    return _SCHEMES[name]
@@ -0,0 +1,958 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2013-2014 Vinay Sajip.
+# Licensed to the Python Software Foundation under a contributor agreement.
+# See LICENSE.txt and CONTRIBUTORS.txt.
+#
+from __future__ import unicode_literals
+
+import base64
+import codecs
+import datetime
+import distutils.util
+from email import message_from_file
+import hashlib
+import imp
+import json
+import logging
+import os
+import posixpath
+import re
+import shutil
+import sys
+import tempfile
+import zipfile
+
+from . import __version__, DistlibException
+from .compat import sysconfig, ZipFile, fsdecode, text_type, filter
+from .database import InstalledDistribution
+from .metadata import Metadata, METADATA_FILENAME
+from .util import (FileOperator, convert_path, CSVReader, CSVWriter, Cache,
+                   cached_property, get_cache_base, read_exports, tempdir)
+from .version import NormalizedVersion, UnsupportedVersionError
+
+logger = logging.getLogger(__name__)
+
+cache = None    # created when needed
+
+if hasattr(sys, 'pypy_version_info'):
+    IMP_PREFIX = 'pp'
+elif sys.platform.startswith('java'):
+    IMP_PREFIX = 'jy'
+elif sys.platform == 'cli':
+    IMP_PREFIX = 'ip'
+else:
+    IMP_PREFIX = 'cp'
+
+VER_SUFFIX = sysconfig.get_config_var('py_version_nodot')
+if not VER_SUFFIX:   # pragma: no cover
+    VER_SUFFIX = '%s%s' % sys.version_info[:2]
+PYVER = 'py' + VER_SUFFIX
+IMPVER = IMP_PREFIX + VER_SUFFIX
+
+ARCH = distutils.util.get_platform().replace('-', '_').replace('.', '_')
+
+ABI = sysconfig.get_config_var('SOABI')
+if ABI and ABI.startswith('cpython-'):
+    ABI = ABI.replace('cpython-', 'cp')
+else:
+    def _derive_abi():
+        parts = ['cp', VER_SUFFIX]
+        if sysconfig.get_config_var('Py_DEBUG'):
+            parts.append('d')
+        if sysconfig.get_config_var('WITH_PYMALLOC'):
+            parts.append('m')
+        if sysconfig.get_config_var('Py_UNICODE_SIZE') == 4:
+            parts.append('u')
+        return ''.join(parts)
+    ABI = _derive_abi()
+    del _derive_abi
+
+FILENAME_RE = re.compile(r'''
+(?P<nm>[^-]+)
+-(?P<vn>\d+[^-]*)
+(-(?P<bn>\d+[^-]*))?
+-(?P<py>\w+\d+(\.\w+\d+)*)
+-(?P<bi>\w+)
+-(?P<ar>\w+)
+\.whl$
+''', re.IGNORECASE | re.VERBOSE)
+
+NAME_VERSION_RE = re.compile(r'''
+(?P<nm>[^-]+)
+-(?P<vn>\d+[^-]*)
+(-(?P<bn>\d+[^-]*))?$
+''', re.IGNORECASE | re.VERBOSE)
+
+SHEBANG_RE = re.compile(br'\s*#![^\r\n]*')
+
+if os.sep == '/':
+    to_posix = lambda o: o
+else:
+    to_posix = lambda o: o.replace(os.sep, '/')
+
+
+class Mounter(object):
+    def __init__(self):
+        self.impure_wheels = {}
+        self.libs = {}
+
+    def add(self, pathname, extensions):
+        self.impure_wheels[pathname] = extensions
+        self.libs.update(extensions)
+
+    def remove(self, pathname):
+        extensions = self.impure_wheels.pop(pathname)
+        for k, v in extensions:
+            if k in self.libs:
+                del self.libs[k]
+
+    def find_module(self, fullname, path=None):
+        if fullname in self.libs:
+            result = self
+        else:
+            result = None
+        return result
+
+    def load_module(self, fullname):
+        if fullname in sys.modules:
+            result = sys.modules[fullname]
+        else:
+            if fullname not in self.libs:
+                raise ImportError('unable to find extension for %s' % fullname)
+            result = imp.load_dynamic(fullname, self.libs[fullname])
+            result.__loader__ = self
+            parts = fullname.rsplit('.', 1)
+            if len(parts) > 1:
+                result.__package__ = parts[0]
+        return result
+
+_hook = Mounter()
+
+
+class Wheel(object):
+    """
+    Class to build and install from Wheel files (PEP 427).
+    """
+
+    wheel_version = (1, 1)
+    hash_kind = 'sha256'
+
+    def __init__(self, filename=None, sign=False, verify=False):
+        """
+        Initialise an instance using a (valid) filename.
+        """
+        self.sign = sign
+        self.should_verify = verify
+        self.buildver = ''
+        self.pyver = [PYVER]
+        self.abi = ['none']
+        self.arch = ['any']
+        self.dirname = os.getcwd()
+        if filename is None:
+            self.name = 'dummy'
+            self.version = '0.1'
+            self._filename = self.filename
+        else:
+            m = NAME_VERSION_RE.match(filename)
+            if m:
+                info = m.groupdict('')
+                self.name = info['nm']
+                # Reinstate the local version separator
+                self.version = info['vn'].replace('_', '-')
+                self.buildver = info['bn']
+                self._filename = self.filename
+            else:
+                dirname, filename = os.path.split(filename)
+                m = FILENAME_RE.match(filename)
+                if not m:
+                    raise DistlibException('Invalid name or '
+                                           'filename: %r' % filename)
+                if dirname:
+                    self.dirname = os.path.abspath(dirname)
+                self._filename = filename
+                info = m.groupdict('')
+                self.name = info['nm']
+                self.version = info['vn']
+                self.buildver = info['bn']
+                self.pyver = info['py'].split('.')
+                self.abi = info['bi'].split('.')
+                self.arch = info['ar'].split('.')
+
+    @property
+    def filename(self):
+        """
+        Build and return a filename from the various components.
+        """
+        if self.buildver:
+            buildver = '-' + self.buildver
+        else:
+            buildver = ''
+        pyver = '.'.join(self.pyver)
+        abi = '.'.join(self.abi)
+        arch = '.'.join(self.arch)
+        # replace - with _ as a local version separator
+        version = self.version.replace('-', '_')
+        return '%s-%s%s-%s-%s-%s.whl' % (self.name, version, buildver,
+                                         pyver, abi, arch)
+
+    @property
+    def exists(self):
+        path = os.path.join(self.dirname, self.filename)
+        return os.path.isfile(path)
+
+    @property
+    def tags(self):
+        for pyver in self.pyver:
+            for abi in self.abi:
+                for arch in self.arch:
+                    yield pyver, abi, arch
+
+    @cached_property
+    def metadata(self):
+        pathname = os.path.join(self.dirname, self.filename)
+        name_ver = '%s-%s' % (self.name, self.version)
+        info_dir = '%s.dist-info' % name_ver
+        wrapper = codecs.getreader('utf-8')
+        with ZipFile(pathname, 'r') as zf:
+            wheel_metadata = self.get_wheel_metadata(zf)
+            wv = wheel_metadata['Wheel-Version'].split('.', 1)
+            file_version = tuple([int(i) for i in wv])
+            if file_version < (1, 1):
+                fn = 'METADATA'
+            else:
+                fn = METADATA_FILENAME
+            try:
+                metadata_filename = posixpath.join(info_dir, fn)
+                with zf.open(metadata_filename) as bf:
+                    wf = wrapper(bf)
+                    result = Metadata(fileobj=wf)
+            except KeyError:
+                raise ValueError('Invalid wheel, because %s is '
+                                 'missing' % fn)
+        return result
+
+    def get_wheel_metadata(self, zf):
+        name_ver = '%s-%s' % (self.name, self.version)
+        info_dir = '%s.dist-info' % name_ver
+        metadata_filename = posixpath.join(info_dir, 'WHEEL')
+        with zf.open(metadata_filename) as bf:
+            wf = codecs.getreader('utf-8')(bf)
+            message = message_from_file(wf)
+        return dict(message)
+
+    @cached_property
+    def info(self):
+        pathname = os.path.join(self.dirname, self.filename)
+        with ZipFile(pathname, 'r') as zf:
+            result = self.get_wheel_metadata(zf)
+        return result
+
+    def process_shebang(self, data):
+        m = SHEBANG_RE.match(data)
+        if m:
+            data = b'#!python' + data[m.end():]
+        else:
+            cr = data.find(b'\r')
+            lf = data.find(b'\n')
+            if cr < 0 or cr > lf:
+                term = b'\n'
+            else:
+                if data[cr:cr + 2] == b'\r\n':
+                    term = b'\r\n'
+                else:
+                    term = b'\r'
+            data = b'#!python' + term + data
+        return data
+
+    def get_hash(self, data, hash_kind=None):
+        if hash_kind is None:
+            hash_kind = self.hash_kind
+        try:
+            hasher = getattr(hashlib, hash_kind)
+        except AttributeError:
+            raise DistlibException('Unsupported hash algorithm: %r' % hash_kind)
+        result = hasher(data).digest()
+        result = base64.urlsafe_b64encode(result).rstrip(b'=').decode('ascii')
+        return hash_kind, result
+
+    def write_record(self, records, record_path, base):
+        with CSVWriter(record_path) as writer:
+            for row in records:
+                writer.writerow(row)
+            p = to_posix(os.path.relpath(record_path, base))
+            writer.writerow((p, '', ''))
+
+    def write_records(self, info, libdir, archive_paths):
+        records = []
+        distinfo, info_dir = info
+        hasher = getattr(hashlib, self.hash_kind)
+        for ap, p in archive_paths:
+            with open(p, 'rb') as f:
+                data = f.read()
+            digest = '%s=%s' % self.get_hash(data)
+            size = os.path.getsize(p)
+            records.append((ap, digest, size))
+
+        p = os.path.join(distinfo, 'RECORD')
+        self.write_record(records, p, libdir)
+        ap = to_posix(os.path.join(info_dir, 'RECORD'))
+        archive_paths.append((ap, p))
+
+    def build_zip(self, pathname, archive_paths):
+        with ZipFile(pathname, 'w', zipfile.ZIP_DEFLATED) as zf:
+            for ap, p in archive_paths:
+                logger.debug('Wrote %s to %s in wheel', p, ap)
+                zf.write(p, ap)
+
+    def build(self, paths, tags=None, wheel_version=None):
+        """
+        Build a wheel from files in specified paths, and use any specified tags
+        when determining the name of the wheel.
+        """
+        if tags is None:
+            tags = {}
+
+        libkey = list(filter(lambda o: o in paths, ('purelib', 'platlib')))[0]
+        if libkey == 'platlib':
+            is_pure = 'false'
+            default_pyver = [IMPVER]
+            default_abi = [ABI]
+            default_arch = [ARCH]
+        else:
+            is_pure = 'true'
+            default_pyver = [PYVER]
+            default_abi = ['none']
+            default_arch = ['any']
+
+        self.pyver = tags.get('pyver', default_pyver)
+        self.abi = tags.get('abi', default_abi)
+        self.arch = tags.get('arch', default_arch)
+
+        libdir = paths[libkey]
+
+        name_ver = '%s-%s' % (self.name, self.version)
+        data_dir = '%s.data' % name_ver
+        info_dir = '%s.dist-info' % name_ver
+
+        archive_paths = []
+
+        # First, stuff which is not in site-packages
+        for key in ('data', 'headers', 'scripts'):
+            if key not in paths:
+                continue
+            path = paths[key]
+            if os.path.isdir(path):
+                for root, dirs, files in os.walk(path):
+                    for fn in files:
+                        p = fsdecode(os.path.join(root, fn))
+                        rp = os.path.relpath(p, path)
+                        ap = to_posix(os.path.join(data_dir, key, rp))
+                        archive_paths.append((ap, p))
+                        if key == 'scripts' and not p.endswith('.exe'):
+                            with open(p, 'rb') as f:
+                                data = f.read()
+                            data = self.process_shebang(data)
+                            with open(p, 'wb') as f:
+                                f.write(data)
+
+        # Now, stuff which is in site-packages, other than the
+        # distinfo stuff.
+        path = libdir
+        distinfo = None
+        for root, dirs, files in os.walk(path):
+            if root == path:
+                # At the top level only, save distinfo for later
+                # and skip it for now
+                for i, dn in enumerate(dirs):
+                    dn = fsdecode(dn)
+                    if dn.endswith('.dist-info'):
+                        distinfo = os.path.join(root, dn)
+                        del dirs[i]
+                        break
+                assert distinfo, '.dist-info directory expected, not found'
+
+            for fn in files:
+                # comment out next suite to leave .pyc files in
+                if fsdecode(fn).endswith(('.pyc', '.pyo')):
+                    continue
+                p = os.path.join(root, fn)
+                rp = to_posix(os.path.relpath(p, path))
+                archive_paths.append((rp, p))
+
+        # Now distinfo. Assumed to be flat, i.e. os.listdir is enough.
+        files = os.listdir(distinfo)
+        for fn in files:
+            if fn not in ('RECORD', 'INSTALLER', 'SHARED'):
+                p = fsdecode(os.path.join(distinfo, fn))
+                ap = to_posix(os.path.join(info_dir, fn))
+                archive_paths.append((ap, p))
+
+        wheel_metadata = [
+            'Wheel-Version: %d.%d' % (wheel_version or self.wheel_version),
+            'Generator: distlib %s' % __version__,
+            'Root-Is-Purelib: %s' % is_pure,
+        ]
+        for pyver, abi, arch in self.tags:
+            wheel_metadata.append('Tag: %s-%s-%s' % (pyver, abi, arch))
+        p = os.path.join(distinfo, 'WHEEL')
+        with open(p, 'w') as f:
+            f.write('\n'.join(wheel_metadata))
+        ap = to_posix(os.path.join(info_dir, 'WHEEL'))
+        archive_paths.append((ap, p))
+
+        # Now, at last, RECORD.
+        # Paths in here are archive paths - nothing else makes sense.
+        self.write_records((distinfo, info_dir), libdir, archive_paths)
+        # Now, ready to build the zip file
+        pathname = os.path.join(self.dirname, self.filename)
+        self.build_zip(pathname, archive_paths)
+        return pathname
+
+    def install(self, paths, maker, **kwargs):
+        """
+        Install a wheel to the specified paths. If kwarg ``warner`` is
+        specified, it should be a callable, which will be called with two
+        tuples indicating the wheel version of this software and the wheel
+        version in the file, if there is a discrepancy in the versions.
+        This can be used to issue any warnings to raise any exceptions.
+        If kwarg ``lib_only`` is True, only the purelib/platlib files are
+        installed, and the headers, scripts, data and dist-info metadata are
+        not written.
+
+        The return value is a :class:`InstalledDistribution` instance unless
+        ``options.lib_only`` is True, in which case the return value is ``None``.
+        """
+
+        dry_run = maker.dry_run
+        warner = kwargs.get('warner')
+        lib_only = kwargs.get('lib_only', False)
+
+        pathname = os.path.join(self.dirname, self.filename)
+        name_ver = '%s-%s' % (self.name, self.version)
+        data_dir = '%s.data' % name_ver
+        info_dir = '%s.dist-info' % name_ver
+
+        metadata_name = posixpath.join(info_dir, METADATA_FILENAME)
+        wheel_metadata_name = posixpath.join(info_dir, 'WHEEL')
+        record_name = posixpath.join(info_dir, 'RECORD')
+
+        wrapper = codecs.getreader('utf-8')
+
+        with ZipFile(pathname, 'r') as zf:
+            with zf.open(wheel_metadata_name) as bwf:
+                wf = wrapper(bwf)
+                message = message_from_file(wf)
+            wv = message['Wheel-Version'].split('.', 1)
+            file_version = tuple([int(i) for i in wv])
+            if (file_version != self.wheel_version) and warner:
+                warner(self.wheel_version, file_version)
+
+            if message['Root-Is-Purelib'] == 'true':
+                libdir = paths['purelib']
+            else:
+                libdir = paths['platlib']
+
+            records = {}
+            with zf.open(record_name) as bf:
+                with CSVReader(stream=bf) as reader:
+                    for row in reader:
+                        p = row[0]
+                        records[p] = row
+
+            data_pfx = posixpath.join(data_dir, '')
+            info_pfx = posixpath.join(info_dir, '')
+            script_pfx = posixpath.join(data_dir, 'scripts', '')
+
+            # make a new instance rather than a copy of maker's,
+            # as we mutate it
+            fileop = FileOperator(dry_run=dry_run)
+            fileop.record = True    # so we can rollback if needed
+
+            bc = not sys.dont_write_bytecode    # Double negatives. Lovely!
+
+            outfiles = []   # for RECORD writing
+
+            # for script copying/shebang processing
+            workdir = tempfile.mkdtemp()
+            # set target dir later
+            # we default add_launchers to False, as the
+            # Python Launcher should be used instead
+            maker.source_dir = workdir
+            maker.target_dir = None
+            try:
+                for zinfo in zf.infolist():
+                    arcname = zinfo.filename
+                    if isinstance(arcname, text_type):
+                        u_arcname = arcname
+                    else:
+                        u_arcname = arcname.decode('utf-8')
+                    # The signature file won't be in RECORD,
+                    # and we  don't currently don't do anything with it
+                    if u_arcname.endswith('/RECORD.jws'):
+                        continue
+                    row = records[u_arcname]
+                    if row[2] and str(zinfo.file_size) != row[2]:
+                        raise DistlibException('size mismatch for '
+                                               '%s' % u_arcname)
+                    if row[1]:
+                        kind, value = row[1].split('=', 1)
+                        with zf.open(arcname) as bf:
+                            data = bf.read()
+                        _, digest = self.get_hash(data, kind)
+                        if digest != value:
+                            raise DistlibException('digest mismatch for '
+                                                   '%s' % arcname)
+
+                    if lib_only and u_arcname.startswith((info_pfx, data_pfx)):
+                        logger.debug('lib_only: skipping %s', u_arcname)
+                        continue
+                    is_script = (u_arcname.startswith(script_pfx)
+                                 and not u_arcname.endswith('.exe'))
+
+                    if u_arcname.startswith(data_pfx):
+                        _, where, rp = u_arcname.split('/', 2)
+                        outfile = os.path.join(paths[where], convert_path(rp))
+                    else:
+                        # meant for site-packages.
+                        if u_arcname in (wheel_metadata_name, record_name):
+                            continue
+                        outfile = os.path.join(libdir, convert_path(u_arcname))
+                    if not is_script:
+                        with zf.open(arcname) as bf:
+                            fileop.copy_stream(bf, outfile)
+                        outfiles.append(outfile)
+                        # Double check the digest of the written file
+                        if not dry_run and row[1]:
+                            with open(outfile, 'rb') as bf:
+                                data = bf.read()
+                                _, newdigest = self.get_hash(data, kind)
+                                if newdigest != digest:
+                                    raise DistlibException('digest mismatch '
+                                                           'on write for '
+                                                           '%s' % outfile)
+                        if bc and outfile.endswith('.py'):
+                            try:
+                                pyc = fileop.byte_compile(outfile)
+                                outfiles.append(pyc)
+                            except Exception:
+                                # Don't give up if byte-compilation fails,
+                                # but log it and perhaps warn the user
+                                logger.warning('Byte-compilation failed',
+                                               exc_info=True)
+                    else:
+                        fn = os.path.basename(convert_path(arcname))
+                        workname = os.path.join(workdir, fn)
+                        with zf.open(arcname) as bf:
+                            fileop.copy_stream(bf, workname)
+
+                        dn, fn = os.path.split(outfile)
+                        maker.target_dir = dn
+                        filenames = maker.make(fn)
+                        fileop.set_executable_mode(filenames)
+                        outfiles.extend(filenames)
+
+                if lib_only:
+                    logger.debug('lib_only: returning None')
+                    dist = None
+                else:
+                    # Generate scripts
+
+                    # Try to get pydist.json so we can see if there are
+                    # any commands to generate. If this fails (e.g. because
+                    # of a legacy wheel), log a warning but don't give up.
+                    commands = None
+                    file_version = self.info['Wheel-Version']
+                    if file_version == '1.0':
+                        # Use legacy info
+                        ep = posixpath.join(info_dir, 'entry_points.txt')
+                        try:
+                            with zf.open(ep) as bwf:
+                                epdata = read_exports(bwf)
+                            commands = {}
+                            for key in ('console', 'gui'):
+                                k = '%s_scripts' % key
+                                if k in epdata:
+                                    commands['wrap_%s' % key] = d = {}
+                                    for v in epdata[k].values():
+                                        s = '%s:%s' % (v.prefix, v.suffix)
+                                        if v.flags:
+                                            s += ' %s' % v.flags
+                                        d[v.name] = s
+                        except Exception:
+                            logger.warning('Unable to read legacy script '
+                                           'metadata, so cannot generate '
+                                           'scripts')
+                    else:
+                        try:
+                            with zf.open(metadata_name) as bwf:
+                                wf = wrapper(bwf)
+                                commands = json.load(wf).get('commands')
+                        except Exception:
+                            logger.warning('Unable to read JSON metadata, so '
+                                           'cannot generate scripts')
+                    if commands:
+                        console_scripts = commands.get('wrap_console', {})
+                        gui_scripts = commands.get('wrap_gui', {})
+                        if console_scripts or gui_scripts:
+                            script_dir = paths.get('scripts', '')
+                            if not os.path.isdir(script_dir):
+                                raise ValueError('Valid script path not '
+                                                 'specified')
+                            maker.target_dir = script_dir
+                            for k, v in console_scripts.items():
+                                script = '%s = %s' % (k, v)
+                                filenames = maker.make(script)
+                                fileop.set_executable_mode(filenames)
+
+                            if gui_scripts:
+                                options = {'gui': True }
+                                for k, v in gui_scripts.items():
+                                    script = '%s = %s' % (k, v)
+                                    filenames = maker.make(script, options)
+                                    fileop.set_executable_mode(filenames)
+
+                    p = os.path.join(libdir, info_dir)
+                    dist = InstalledDistribution(p)
+
+                    # Write SHARED
+                    paths = dict(paths)     # don't change passed in dict
+                    del paths['purelib']
+                    del paths['platlib']
+                    paths['lib'] = libdir
+                    p = dist.write_shared_locations(paths, dry_run)
+                    if p:
+                        outfiles.append(p)
+
+                    # Write RECORD
+                    dist.write_installed_files(outfiles, paths['prefix'],
+                                               dry_run)
+                return dist
+            except Exception:  # pragma: no cover
+                logger.exception('installation failed.')
+                fileop.rollback()
+                raise
+            finally:
+                shutil.rmtree(workdir)
+
+    def _get_dylib_cache(self):
+        global cache
+        if cache is None:
+            # Use native string to avoid issues on 2.x: see Python #20140.
+            base = os.path.join(get_cache_base(), str('dylib-cache'),
+                                sys.version[:3])
+            cache = Cache(base)
+        return cache
+
+    def _get_extensions(self):
+        pathname = os.path.join(self.dirname, self.filename)
+        name_ver = '%s-%s' % (self.name, self.version)
+        info_dir = '%s.dist-info' % name_ver
+        arcname = posixpath.join(info_dir, 'EXTENSIONS')
+        wrapper = codecs.getreader('utf-8')
+        result = []
+        with ZipFile(pathname, 'r') as zf:
+            try:
+                with zf.open(arcname) as bf:
+                    wf = wrapper(bf)
+                    extensions = json.load(wf)
+                    cache = self._get_dylib_cache()
+                    prefix = cache.prefix_to_dir(pathname)
+                    cache_base = os.path.join(cache.base, prefix)
+                    if not os.path.isdir(cache_base):
+                        os.makedirs(cache_base)
+                    for name, relpath in extensions.items():
+                        dest = os.path.join(cache_base, convert_path(relpath))
+                        if not os.path.exists(dest):
+                            extract = True
+                        else:
+                            file_time = os.stat(dest).st_mtime
+                            file_time = datetime.datetime.fromtimestamp(file_time)
+                            info = zf.getinfo(relpath)
+                            wheel_time = datetime.datetime(*info.date_time)
+                            extract = wheel_time > file_time
+                        if extract:
+                            zf.extract(relpath, cache_base)
+                        result.append((name, dest))
+            except KeyError:
+                pass
+        return result
+
+    def is_compatible(self):
+        """
+        Determine if a wheel is compatible with the running system.
+        """
+        return is_compatible(self)
+
+    def is_mountable(self):
+        """
+        Determine if a wheel is asserted as mountable by its metadata.
+        """
+        return True # for now - metadata details TBD
+
+    def mount(self, append=False):
+        pathname = os.path.abspath(os.path.join(self.dirname, self.filename))
+        if not self.is_compatible():
+            msg = 'Wheel %s not compatible with this Python.' % pathname
+            raise DistlibException(msg)
+        if not self.is_mountable():
+            msg = 'Wheel %s is marked as not mountable.' % pathname
+            raise DistlibException(msg)
+        if pathname in sys.path:
+            logger.debug('%s already in path', pathname)
+        else:
+            if append:
+                sys.path.append(pathname)
+            else:
+                sys.path.insert(0, pathname)
+            extensions = self._get_extensions()
+            if extensions:
+                if _hook not in sys.meta_path:
+                    sys.meta_path.append(_hook)
+                _hook.add(pathname, extensions)
+
+    def unmount(self):
+        pathname = os.path.abspath(os.path.join(self.dirname, self.filename))
+        if pathname not in sys.path:
+            logger.debug('%s not in path', pathname)
+        else:
+            sys.path.remove(pathname)
+            if pathname in _hook.impure_wheels:
+                _hook.remove(pathname)
+            if not _hook.impure_wheels:
+                if _hook in sys.meta_path:
+                    sys.meta_path.remove(_hook)
+
+    def verify(self):
+        pathname = os.path.join(self.dirname, self.filename)
+        name_ver = '%s-%s' % (self.name, self.version)
+        data_dir = '%s.data' % name_ver
+        info_dir = '%s.dist-info' % name_ver
+
+        metadata_name = posixpath.join(info_dir, METADATA_FILENAME)
+        wheel_metadata_name = posixpath.join(info_dir, 'WHEEL')
+        record_name = posixpath.join(info_dir, 'RECORD')
+
+        wrapper = codecs.getreader('utf-8')
+
+        with ZipFile(pathname, 'r') as zf:
+            with zf.open(wheel_metadata_name) as bwf:
+                wf = wrapper(bwf)
+                message = message_from_file(wf)
+            wv = message['Wheel-Version'].split('.', 1)
+            file_version = tuple([int(i) for i in wv])
+            # TODO version verification
+
+            records = {}
+            with zf.open(record_name) as bf:
+                with CSVReader(stream=bf) as reader:
+                    for row in reader:
+                        p = row[0]
+                        records[p] = row
+
+            for zinfo in zf.infolist():
+                arcname = zinfo.filename
+                if isinstance(arcname, text_type):
+                    u_arcname = arcname
+                else:
+                    u_arcname = arcname.decode('utf-8')
+                if '..' in u_arcname:
+                    raise DistlibException('invalid entry in '
+                                           'wheel: %r' % u_arcname)
+
+                # The signature file won't be in RECORD,
+                # and we  don't currently don't do anything with it
+                if u_arcname.endswith('/RECORD.jws'):
+                    continue
+                row = records[u_arcname]
+                if row[2] and str(zinfo.file_size) != row[2]:
+                    raise DistlibException('size mismatch for '
+                                           '%s' % u_arcname)
+                if row[1]:
+                    kind, value = row[1].split('=', 1)
+                    with zf.open(arcname) as bf:
+                        data = bf.read()
+                    _, digest = self.get_hash(data, kind)
+                    if digest != value:
+                        raise DistlibException('digest mismatch for '
+                                               '%s' % arcname)
+
+    def update(self, modifier, dest_dir=None, **kwargs):
+        """
+        Update the contents of a wheel in a generic way. The modifier should
+        be a callable which expects a dictionary argument: its keys are
+        archive-entry paths, and its values are absolute filesystem paths
+        where the contents the corresponding archive entries can be found. The
+        modifier is free to change the contents of the files pointed to, add
+        new entries and remove entries, before returning. This method will
+        extract the entire contents of the wheel to a temporary location, call
+        the modifier, and then use the passed (and possibly updated)
+        dictionary to write a new wheel. If ``dest_dir`` is specified, the new
+        wheel is written there -- otherwise, the original wheel is overwritten.
+
+        The modifier should return True if it updated the wheel, else False.
+        This method returns the same value the modifier returns.
+        """
+
+        def get_version(path_map, info_dir):
+            version = path = None
+            key = '%s/%s' % (info_dir, METADATA_FILENAME)
+            if key not in path_map:
+                key = '%s/PKG-INFO' % info_dir
+            if key in path_map:
+                path = path_map[key]
+                version = Metadata(path=path).version
+            return version, path
+
+        def update_version(version, path):
+            updated = None
+            try:
+                v = NormalizedVersion(version)
+                i = version.find('-')
+                if i < 0:
+                    updated = '%s-1' % version
+                else:
+                    parts = [int(s) for s in version[i + 1:].split('.')]
+                    parts[-1] += 1
+                    updated = '%s-%s' % (version[:i],
+                                         '.'.join(str(i) for i in parts))
+            except UnsupportedVersionError:
+                logger.debug('Cannot update non-compliant (PEP-440) '
+                             'version %r', version)
+            if updated:
+                md = Metadata(path=path)
+                md.version = updated
+                legacy = not path.endswith(METADATA_FILENAME)
+                md.write(path=path, legacy=legacy)
+                logger.debug('Version updated from %r to %r', version,
+                             updated)
+
+        pathname = os.path.join(self.dirname, self.filename)
+        name_ver = '%s-%s' % (self.name, self.version)
+        info_dir = '%s.dist-info' % name_ver
+        record_name = posixpath.join(info_dir, 'RECORD')
+        with tempdir() as workdir:
+            with ZipFile(pathname, 'r') as zf:
+                path_map = {}
+                for zinfo in zf.infolist():
+                    arcname = zinfo.filename
+                    if isinstance(arcname, text_type):
+                        u_arcname = arcname
+                    else:
+                        u_arcname = arcname.decode('utf-8')
+                    if u_arcname == record_name:
+                        continue
+                    if '..' in u_arcname:
+                        raise DistlibException('invalid entry in '
+                                               'wheel: %r' % u_arcname)
+                    zf.extract(zinfo, workdir)
+                    path = os.path.join(workdir, convert_path(u_arcname))
+                    path_map[u_arcname] = path
+
+            # Remember the version.
+            original_version, _ = get_version(path_map, info_dir)
+            # Files extracted. Call the modifier.
+            modified = modifier(path_map, **kwargs)
+            if modified:
+                # Something changed - need to build a new wheel.
+                current_version, path = get_version(path_map, info_dir)
+                if current_version and (current_version == original_version):
+                    # Add or update local version to signify changes.
+                    update_version(current_version, path)
+                # Decide where the new wheel goes.
+                if dest_dir is None:
+                    fd, newpath = tempfile.mkstemp(suffix='.whl',
+                                                   prefix='wheel-update-',
+                                                   dir=workdir)
+                    os.close(fd)
+                else:
+                    if not os.path.isdir(dest_dir):
+                        raise DistlibException('Not a directory: %r' % dest_dir)
+                    newpath = os.path.join(dest_dir, self.filename)
+                archive_paths = list(path_map.items())
+                distinfo = os.path.join(workdir, info_dir)
+                info = distinfo, info_dir
+                self.write_records(info, workdir, archive_paths)
+                self.build_zip(newpath, archive_paths)
+                if dest_dir is None:
+                    shutil.copyfile(newpath, pathname)
+        return modified
+
+def compatible_tags():
+    """
+    Return (pyver, abi, arch) tuples compatible with this Python.
+    """
+    versions = [VER_SUFFIX]
+    major = VER_SUFFIX[0]
+    for minor in range(sys.version_info[1] - 1, - 1, -1):
+        versions.append(''.join([major, str(minor)]))
+
+    abis = []
+    for suffix, _, _ in imp.get_suffixes():
+        if suffix.startswith('.abi'):
+            abis.append(suffix.split('.', 2)[1])
+    abis.sort()
+    if ABI != 'none':
+        abis.insert(0, ABI)
+    abis.append('none')
+    result = []
+
+    arches = [ARCH]
+    if sys.platform == 'darwin':
+        m = re.match('(\w+)_(\d+)_(\d+)_(\w+)$', ARCH)
+        if m:
+            name, major, minor, arch = m.groups()
+            minor = int(minor)
+            matches = [arch]
+            if arch in ('i386', 'ppc'):
+                matches.append('fat')
+            if arch in ('i386', 'ppc', 'x86_64'):
+                matches.append('fat3')
+            if arch in ('ppc64', 'x86_64'):
+                matches.append('fat64')
+            if arch in ('i386', 'x86_64'):
+                matches.append('intel')
+            if arch in ('i386', 'x86_64', 'intel', 'ppc', 'ppc64'):
+                matches.append('universal')
+            while minor >= 0:
+                for match in matches:
+                    s = '%s_%s_%s_%s' % (name, major, minor, match)
+                    if s != ARCH:   # already there
+                        arches.append(s)
+                minor -= 1
+
+    # Most specific - our Python version, ABI and arch
+    for abi in abis:
+        for arch in arches:
+            result.append((''.join((IMP_PREFIX, versions[0])), abi, arch))
+
+    # where no ABI / arch dependency, but IMP_PREFIX dependency
+    for i, version in enumerate(versions):
+        result.append((''.join((IMP_PREFIX, version)), 'none', 'any'))
+        if i == 0:
+            result.append((''.join((IMP_PREFIX, version[0])), 'none', 'any'))
+
+    # no IMP_PREFIX, ABI or arch dependency
+    for i, version in enumerate(versions):
+        result.append((''.join(('py', version)), 'none', 'any'))
+        if i == 0:
+            result.append((''.join(('py', version[0])), 'none', 'any'))
+    return set(result)
+
+
+COMPATIBLE_TAGS = compatible_tags()
+
+del compatible_tags
+
+
+def is_compatible(wheel, tags=None):
+    if not isinstance(wheel, Wheel):
+        wheel = Wheel(wheel)    # assume it's a filename
+    result = False
+    if tags is None:
+        tags = COMPATIBLE_TAGS
+    for ver, abi, arch in tags:
+        if ver in wheel.pyver and abi in wheel.abi and arch in wheel.arch:
+            result = True
+            break
+    return result
@@ -0,0 +1,23 @@
+"""
+HTML parsing library based on the WHATWG "HTML5"
+specification. The parser is designed to be compatible with existing
+HTML found in the wild and implements well-defined error recovery that
+is largely compatible with modern desktop web browsers.
+
+Example usage:
+
+import html5lib
+f = open("my_document.html")
+tree = html5lib.parse(f)
+"""
+
+from __future__ import absolute_import, division, unicode_literals
+
+from .html5parser import HTMLParser, parse, parseFragment
+from .treebuilders import getTreeBuilder
+from .treewalkers import getTreeWalker
+from .serializer import serialize
+
+__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
+           "getTreeWalker", "serialize"]
+__version__ = "1.0b3"
@@ -0,0 +1,12 @@
+from __future__ import absolute_import, division, unicode_literals
+
+
+class Filter(object):
+    def __init__(self, source):
+        self.source = source
+
+    def __iter__(self):
+        return iter(self.source)
+
+    def __getattr__(self, name):
+        return getattr(self.source, name)
@@ -0,0 +1,20 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from . import _base
+
+try:
+    from collections import OrderedDict
+except ImportError:
+    from ordereddict import OrderedDict
+
+
+class Filter(_base.Filter):
+    def __iter__(self):
+        for token in _base.Filter.__iter__(self):
+            if token["type"] in ("StartTag", "EmptyTag"):
+                attrs = OrderedDict()
+                for name, value in sorted(token["data"].items(),
+                                          key=lambda x: x[0]):
+                    attrs[name] = value
+                token["data"] = attrs
+            yield token
@@ -0,0 +1,65 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from . import _base
+
+
+class Filter(_base.Filter):
+    def __init__(self, source, encoding):
+        _base.Filter.__init__(self, source)
+        self.encoding = encoding
+
+    def __iter__(self):
+        state = "pre_head"
+        meta_found = (self.encoding is None)
+        pending = []
+
+        for token in _base.Filter.__iter__(self):
+            type = token["type"]
+            if type == "StartTag":
+                if token["name"].lower() == "head":
+                    state = "in_head"
+
+            elif type == "EmptyTag":
+                if token["name"].lower() == "meta":
+                    # replace charset with actual encoding
+                    has_http_equiv_content_type = False
+                    for (namespace, name), value in token["data"].items():
+                        if namespace is not None:
+                            continue
+                        elif name.lower() == 'charset':
+                            token["data"][(namespace, name)] = self.encoding
+                            meta_found = True
+                            break
+                        elif name == 'http-equiv' and value.lower() == 'content-type':
+                            has_http_equiv_content_type = True
+                    else:
+                        if has_http_equiv_content_type and (None, "content") in token["data"]:
+                            token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding
+                            meta_found = True
+
+                elif token["name"].lower() == "head" and not meta_found:
+                    # insert meta into empty head
+                    yield {"type": "StartTag", "name": "head",
+                           "data": token["data"]}
+                    yield {"type": "EmptyTag", "name": "meta",
+                           "data": {(None, "charset"): self.encoding}}
+                    yield {"type": "EndTag", "name": "head"}
+                    meta_found = True
+                    continue
+
+            elif type == "EndTag":
+                if token["name"].lower() == "head" and pending:
+                    # insert meta into head (if necessary) and flush pending queue
+                    yield pending.pop(0)
+                    if not meta_found:
+                        yield {"type": "EmptyTag", "name": "meta",
+                               "data": {(None, "charset"): self.encoding}}
+                    while pending:
+                        yield pending.pop(0)
+                    meta_found = True
+                    state = "post_head"
+
+            if state == "in_head":
+                pending.append(token)
+            else:
+                yield token
@@ -0,0 +1,93 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from gettext import gettext
+_ = gettext
+
+from . import _base
+from ..constants import cdataElements, rcdataElements, voidElements
+
+from ..constants import spaceCharacters
+spaceCharacters = "".join(spaceCharacters)
+
+
+class LintError(Exception):
+    pass
+
+
+class Filter(_base.Filter):
+    def __iter__(self):
+        open_elements = []
+        contentModelFlag = "PCDATA"
+        for token in _base.Filter.__iter__(self):
+            type = token["type"]
+            if type in ("StartTag", "EmptyTag"):
+                name = token["name"]
+                if contentModelFlag != "PCDATA":
+                    raise LintError(_("StartTag not in PCDATA content model flag: %(tag)s") % {"tag": name})
+                if not isinstance(name, str):
+                    raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
+                if not name:
+                    raise LintError(_("Empty tag name"))
+                if type == "StartTag" and name in voidElements:
+                    raise LintError(_("Void element reported as StartTag token: %(tag)s") % {"tag": name})
+                elif type == "EmptyTag" and name not in voidElements:
+                    raise LintError(_("Non-void element reported as EmptyTag token: %(tag)s") % {"tag": token["name"]})
+                if type == "StartTag":
+                    open_elements.append(name)
+                for name, value in token["data"]:
+                    if not isinstance(name, str):
+                        raise LintError(_("Attribute name is not a string: %(name)r") % {"name": name})
+                    if not name:
+                        raise LintError(_("Empty attribute name"))
+                    if not isinstance(value, str):
+                        raise LintError(_("Attribute value is not a string: %(value)r") % {"value": value})
+                if name in cdataElements:
+                    contentModelFlag = "CDATA"
+                elif name in rcdataElements:
+                    contentModelFlag = "RCDATA"
+                elif name == "plaintext":
+                    contentModelFlag = "PLAINTEXT"
+
+            elif type == "EndTag":
+                name = token["name"]
+                if not isinstance(name, str):
+                    raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
+                if not name:
+                    raise LintError(_("Empty tag name"))
+                if name in voidElements:
+                    raise LintError(_("Void element reported as EndTag token: %(tag)s") % {"tag": name})
+                start_name = open_elements.pop()
+                if start_name != name:
+                    raise LintError(_("EndTag (%(end)s) does not match StartTag (%(start)s)") % {"end": name, "start": start_name})
+                contentModelFlag = "PCDATA"
+
+            elif type == "Comment":
+                if contentModelFlag != "PCDATA":
+                    raise LintError(_("Comment not in PCDATA content model flag"))
+
+            elif type in ("Characters", "SpaceCharacters"):
+                data = token["data"]
+                if not isinstance(data, str):
+                    raise LintError(_("Attribute name is not a string: %(name)r") % {"name": data})
+                if not data:
+                    raise LintError(_("%(type)s token with empty data") % {"type": type})
+                if type == "SpaceCharacters":
+                    data = data.strip(spaceCharacters)
+                    if data:
+                        raise LintError(_("Non-space character(s) found in SpaceCharacters token: %(token)r") % {"token": data})
+
+            elif type == "Doctype":
+                name = token["name"]
+                if contentModelFlag != "PCDATA":
+                    raise LintError(_("Doctype not in PCDATA content model flag: %(name)s") % {"name": name})
+                if not isinstance(name, str):
+                    raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
+                # XXX: what to do with token["data"] ?
+
+            elif type in ("ParseError", "SerializeError"):
+                pass
+
+            else:
+                raise LintError(_("Unknown token type: %(type)s") % {"type": type})
+
+            yield token
@@ -0,0 +1,205 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from . import _base
+
+
+class Filter(_base.Filter):
+    def slider(self):
+        previous1 = previous2 = None
+        for token in self.source:
+            if previous1 is not None:
+                yield previous2, previous1, token
+            previous2 = previous1
+            previous1 = token
+        yield previous2, previous1, None
+
+    def __iter__(self):
+        for previous, token, next in self.slider():
+            type = token["type"]
+            if type == "StartTag":
+                if (token["data"] or
+                        not self.is_optional_start(token["name"], previous, next)):
+                    yield token
+            elif type == "EndTag":
+                if not self.is_optional_end(token["name"], next):
+                    yield token
+            else:
+                yield token
+
+    def is_optional_start(self, tagname, previous, next):
+        type = next and next["type"] or None
+        if tagname in 'html':
+            # An html element's start tag may be omitted if the first thing
+            # inside the html element is not a space character or a comment.
+            return type not in ("Comment", "SpaceCharacters")
+        elif tagname == 'head':
+            # A head element's start tag may be omitted if the first thing
+            # inside the head element is an element.
+            # XXX: we also omit the start tag if the head element is empty
+            if type in ("StartTag", "EmptyTag"):
+                return True
+            elif type == "EndTag":
+                return next["name"] == "head"
+        elif tagname == 'body':
+            # A body element's start tag may be omitted if the first thing
+            # inside the body element is not a space character or a comment,
+            # except if the first thing inside the body element is a script
+            # or style element and the node immediately preceding the body
+            # element is a head element whose end tag has been omitted.
+            if type in ("Comment", "SpaceCharacters"):
+                return False
+            elif type == "StartTag":
+                # XXX: we do not look at the preceding event, so we never omit
+                # the body element's start tag if it's followed by a script or
+                # a style element.
+                return next["name"] not in ('script', 'style')
+            else:
+                return True
+        elif tagname == 'colgroup':
+            # A colgroup element's start tag may be omitted if the first thing
+            # inside the colgroup element is a col element, and if the element
+            # is not immediately preceeded by another colgroup element whose
+            # end tag has been omitted.
+            if type in ("StartTag", "EmptyTag"):
+                # XXX: we do not look at the preceding event, so instead we never
+                # omit the colgroup element's end tag when it is immediately
+                # followed by another colgroup element. See is_optional_end.
+                return next["name"] == "col"
+            else:
+                return False
+        elif tagname == 'tbody':
+            # A tbody element's start tag may be omitted if the first thing
+            # inside the tbody element is a tr element, and if the element is
+            # not immediately preceeded by a tbody, thead, or tfoot element
+            # whose end tag has been omitted.
+            if type == "StartTag":
+                # omit the thead and tfoot elements' end tag when they are
+                # immediately followed by a tbody element. See is_optional_end.
+                if previous and previous['type'] == 'EndTag' and \
+                        previous['name'] in ('tbody', 'thead', 'tfoot'):
+                    return False
+                return next["name"] == 'tr'
+            else:
+                return False
+        return False
+
+    def is_optional_end(self, tagname, next):
+        type = next and next["type"] or None
+        if tagname in ('html', 'head', 'body'):
+            # An html element's end tag may be omitted if the html element
+            # is not immediately followed by a space character or a comment.
+            return type not in ("Comment", "SpaceCharacters")
+        elif tagname in ('li', 'optgroup', 'tr'):
+            # A li element's end tag may be omitted if the li element is
+            # immediately followed by another li element or if there is
+            # no more content in the parent element.
+            # An optgroup element's end tag may be omitted if the optgroup
+            # element is immediately followed by another optgroup element,
+            # or if there is no more content in the parent element.
+            # A tr element's end tag may be omitted if the tr element is
+            # immediately followed by another tr element, or if there is
+            # no more content in the parent element.
+            if type == "StartTag":
+                return next["name"] == tagname
+            else:
+                return type == "EndTag" or type is None
+        elif tagname in ('dt', 'dd'):
+            # A dt element's end tag may be omitted if the dt element is
+            # immediately followed by another dt element or a dd element.
+            # A dd element's end tag may be omitted if the dd element is
+            # immediately followed by another dd element or a dt element,
+            # or if there is no more content in the parent element.
+            if type == "StartTag":
+                return next["name"] in ('dt', 'dd')
+            elif tagname == 'dd':
+                return type == "EndTag" or type is None
+            else:
+                return False
+        elif tagname == 'p':
+            # A p element's end tag may be omitted if the p element is
+            # immediately followed by an address, article, aside,
+            # blockquote, datagrid, dialog, dir, div, dl, fieldset,
+            # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu,
+            # nav, ol, p, pre, section, table, or ul, element, or if
+            # there is no more content in the parent element.
+            if type in ("StartTag", "EmptyTag"):
+                return next["name"] in ('address', 'article', 'aside',
+                                        'blockquote', 'datagrid', 'dialog',
+                                        'dir', 'div', 'dl', 'fieldset', 'footer',
+                                        'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
+                                        'header', 'hr', 'menu', 'nav', 'ol',
+                                        'p', 'pre', 'section', 'table', 'ul')
+            else:
+                return type == "EndTag" or type is None
+        elif tagname == 'option':
+            # An option element's end tag may be omitted if the option
+            # element is immediately followed by another option element,
+            # or if it is immediately followed by an <code>optgroup</code>
+            # element, or if there is no more content in the parent
+            # element.
+            if type == "StartTag":
+                return next["name"] in ('option', 'optgroup')
+            else:
+                return type == "EndTag" or type is None
+        elif tagname in ('rt', 'rp'):
+            # An rt element's end tag may be omitted if the rt element is
+            # immediately followed by an rt or rp element, or if there is
+            # no more content in the parent element.
+            # An rp element's end tag may be omitted if the rp element is
+            # immediately followed by an rt or rp element, or if there is
+            # no more content in the parent element.
+            if type == "StartTag":
+                return next["name"] in ('rt', 'rp')
+            else:
+                return type == "EndTag" or type is None
+        elif tagname == 'colgroup':
+            # A colgroup element's end tag may be omitted if the colgroup
+            # element is not immediately followed by a space character or
+            # a comment.
+            if type in ("Comment", "SpaceCharacters"):
+                return False
+            elif type == "StartTag":
+                # XXX: we also look for an immediately following colgroup
+                # element. See is_optional_start.
+                return next["name"] != 'colgroup'
+            else:
+                return True
+        elif tagname in ('thead', 'tbody'):
+            # A thead element's end tag may be omitted if the thead element
+            # is immediately followed by a tbody or tfoot element.
+            # A tbody element's end tag may be omitted if the tbody element
+            # is immediately followed by a tbody or tfoot element, or if
+            # there is no more content in the parent element.
+            # A tfoot element's end tag may be omitted if the tfoot element
+            # is immediately followed by a tbody element, or if there is no
+            # more content in the parent element.
+            # XXX: we never omit the end tag when the following element is
+            # a tbody. See is_optional_start.
+            if type == "StartTag":
+                return next["name"] in ['tbody', 'tfoot']
+            elif tagname == 'tbody':
+                return type == "EndTag" or type is None
+            else:
+                return False
+        elif tagname == 'tfoot':
+            # A tfoot element's end tag may be omitted if the tfoot element
+            # is immediately followed by a tbody element, or if there is no
+            # more content in the parent element.
+            # XXX: we never omit the end tag when the following element is
+            # a tbody. See is_optional_start.
+            if type == "StartTag":
+                return next["name"] == 'tbody'
+            else:
+                return type == "EndTag" or type is None
+        elif tagname in ('td', 'th'):
+            # A td element's end tag may be omitted if the td element is
+            # immediately followed by a td or th element, or if there is
+            # no more content in the parent element.
+            # A th element's end tag may be omitted if the th element is
+            # immediately followed by a td or th element, or if there is
+            # no more content in the parent element.
+            if type == "StartTag":
+                return next["name"] in ('td', 'th')
+            else:
+                return type == "EndTag" or type is None
+        return False
@@ -0,0 +1,12 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from . import _base
+from ..sanitizer import HTMLSanitizerMixin
+
+
+class Filter(_base.Filter, HTMLSanitizerMixin):
+    def __iter__(self):
+        for token in _base.Filter.__iter__(self):
+            token = self.sanitize_token(token)
+            if token:
+                yield token
@@ -0,0 +1,38 @@
+from __future__ import absolute_import, division, unicode_literals
+
+import re
+
+from . import _base
+from ..constants import rcdataElements, spaceCharacters
+spaceCharacters = "".join(spaceCharacters)
+
+SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)
+
+
+class Filter(_base.Filter):
+
+    spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
+
+    def __iter__(self):
+        preserve = 0
+        for token in _base.Filter.__iter__(self):
+            type = token["type"]
+            if type == "StartTag" \
+                    and (preserve or token["name"] in self.spacePreserveElements):
+                preserve += 1
+
+            elif type == "EndTag" and preserve:
+                preserve -= 1
+
+            elif not preserve and type == "SpaceCharacters" and token["data"]:
+                # Test on token["data"] above to not introduce spaces where there were not
+                token["data"] = " "
+
+            elif not preserve and type == "Characters":
+                token["data"] = collapse_spaces(token["data"])
+
+            yield token
+
+
+def collapse_spaces(text):
+    return SPACES_REGEX.sub(' ', text)
@@ -0,0 +1,285 @@
+from __future__ import absolute_import, division, unicode_literals
+
+import re
+import warnings
+
+from .constants import DataLossWarning
+
+baseChar = """
+[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] |
+[#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] |
+[#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] |
+[#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 |
+[#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] |
+[#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] |
+[#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] |
+[#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] |
+[#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 |
+[#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] |
+[#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] |
+[#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D |
+[#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] |
+[#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] |
+[#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] |
+[#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] |
+[#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] |
+[#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] |
+[#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 |
+[#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] |
+[#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] |
+[#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] |
+[#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] |
+[#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] |
+[#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] |
+[#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] |
+[#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] |
+[#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] |
+[#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] |
+[#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A |
+#x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 |
+#x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] |
+#x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] |
+[#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] |
+[#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C |
+#x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 |
+[#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] |
+[#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] |
+[#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 |
+[#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] |
+[#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B |
+#x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE |
+[#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] |
+[#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 |
+[#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] |
+[#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]"""
+
+ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]"""
+
+combiningCharacter = """
+[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] |
+[#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 |
+[#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] |
+[#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] |
+#x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] |
+[#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] |
+[#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 |
+#x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] |
+[#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC |
+[#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] |
+#x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] |
+[#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] |
+[#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] |
+[#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] |
+[#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] |
+[#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] |
+#x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 |
+[#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] |
+#x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] |
+[#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] |
+[#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] |
+#x3099 | #x309A"""
+
+digit = """
+[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] |
+[#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] |
+[#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] |
+[#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]"""
+
+extender = """
+#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 |
+#[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]"""
+
+letter = " | ".join([baseChar, ideographic])
+
+# Without the
+name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter,
+                   extender])
+nameFirst = " | ".join([letter, "_"])
+
+reChar = re.compile(r"#x([\d|A-F]{4,4})")
+reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]")
+
+
+def charStringToList(chars):
+    charRanges = [item.strip() for item in chars.split(" | ")]
+    rv = []
+    for item in charRanges:
+        foundMatch = False
+        for regexp in (reChar, reCharRange):
+            match = regexp.match(item)
+            if match is not None:
+                rv.append([hexToInt(item) for item in match.groups()])
+                if len(rv[-1]) == 1:
+                    rv[-1] = rv[-1] * 2
+                foundMatch = True
+                break
+        if not foundMatch:
+            assert len(item) == 1
+
+            rv.append([ord(item)] * 2)
+    rv = normaliseCharList(rv)
+    return rv
+
+
+def normaliseCharList(charList):
+    charList = sorted(charList)
+    for item in charList:
+        assert item[1] >= item[0]
+    rv = []
+    i = 0
+    while i < len(charList):
+        j = 1
+        rv.append(charList[i])
+        while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1:
+            rv[-1][1] = charList[i + j][1]
+            j += 1
+        i += j
+    return rv
+
+# We don't really support characters above the BMP :(
+max_unicode = int("FFFF", 16)
+
+
+def missingRanges(charList):
+    rv = []
+    if charList[0] != 0:
+        rv.append([0, charList[0][0] - 1])
+    for i, item in enumerate(charList[:-1]):
+        rv.append([item[1] + 1, charList[i + 1][0] - 1])
+    if charList[-1][1] != max_unicode:
+        rv.append([charList[-1][1] + 1, max_unicode])
+    return rv
+
+
+def listToRegexpStr(charList):
+    rv = []
+    for item in charList:
+        if item[0] == item[1]:
+            rv.append(escapeRegexp(chr(item[0])))
+        else:
+            rv.append(escapeRegexp(chr(item[0])) + "-" +
+                      escapeRegexp(chr(item[1])))
+    return "[%s]" % "".join(rv)
+
+
+def hexToInt(hex_str):
+    return int(hex_str, 16)
+
+
+def escapeRegexp(string):
+    specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}",
+                         "[", "]", "|", "(", ")", "-")
+    for char in specialCharacters:
+        string = string.replace(char, "\\" + char)
+
+    return string
+
+# output from the above
+nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
+
+nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
+
+# Simpler things
+nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\-\'()+,./:=?;!*#@$_%]")
+
+
+class InfosetFilter(object):
+    replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
+
+    def __init__(self, replaceChars=None,
+                 dropXmlnsLocalName=False,
+                 dropXmlnsAttrNs=False,
+                 preventDoubleDashComments=False,
+                 preventDashAtCommentEnd=False,
+                 replaceFormFeedCharacters=True,
+                 preventSingleQuotePubid=False):
+
+        self.dropXmlnsLocalName = dropXmlnsLocalName
+        self.dropXmlnsAttrNs = dropXmlnsAttrNs
+
+        self.preventDoubleDashComments = preventDoubleDashComments
+        self.preventDashAtCommentEnd = preventDashAtCommentEnd
+
+        self.replaceFormFeedCharacters = replaceFormFeedCharacters
+
+        self.preventSingleQuotePubid = preventSingleQuotePubid
+
+        self.replaceCache = {}
+
+    def coerceAttribute(self, name, namespace=None):
+        if self.dropXmlnsLocalName and name.startswith("xmlns:"):
+            warnings.warn("Attributes cannot begin with xmlns", DataLossWarning)
+            return None
+        elif (self.dropXmlnsAttrNs and
+              namespace == "http://www.w3.org/2000/xmlns/"):
+            warnings.warn("Attributes cannot be in the xml namespace", DataLossWarning)
+            return None
+        else:
+            return self.toXmlName(name)
+
+    def coerceElement(self, name, namespace=None):
+        return self.toXmlName(name)
+
+    def coerceComment(self, data):
+        if self.preventDoubleDashComments:
+            while "--" in data:
+                warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
+                data = data.replace("--", "- -")
+        return data
+
+    def coerceCharacters(self, data):
+        if self.replaceFormFeedCharacters:
+            for i in range(data.count("\x0C")):
+                warnings.warn("Text cannot contain U+000C", DataLossWarning)
+            data = data.replace("\x0C", " ")
+        # Other non-xml characters
+        return data
+
+    def coercePubid(self, data):
+        dataOutput = data
+        for char in nonPubidCharRegexp.findall(data):
+            warnings.warn("Coercing non-XML pubid", DataLossWarning)
+            replacement = self.getReplacementCharacter(char)
+            dataOutput = dataOutput.replace(char, replacement)
+        if self.preventSingleQuotePubid and dataOutput.find("'") >= 0:
+            warnings.warn("Pubid cannot contain single quote", DataLossWarning)
+            dataOutput = dataOutput.replace("'", self.getReplacementCharacter("'"))
+        return dataOutput
+
+    def toXmlName(self, name):
+        nameFirst = name[0]
+        nameRest = name[1:]
+        m = nonXmlNameFirstBMPRegexp.match(nameFirst)
+        if m:
+            warnings.warn("Coercing non-XML name", DataLossWarning)
+            nameFirstOutput = self.getReplacementCharacter(nameFirst)
+        else:
+            nameFirstOutput = nameFirst
+
+        nameRestOutput = nameRest
+        replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
+        for char in replaceChars:
+            warnings.warn("Coercing non-XML name", DataLossWarning)
+            replacement = self.getReplacementCharacter(char)
+            nameRestOutput = nameRestOutput.replace(char, replacement)
+        return nameFirstOutput + nameRestOutput
+
+    def getReplacementCharacter(self, char):
+        if char in self.replaceCache:
+            replacement = self.replaceCache[char]
+        else:
+            replacement = self.escapeChar(char)
+        return replacement
+
+    def fromXmlName(self, name):
+        for item in set(self.replacementRegexp.findall(name)):
+            name = name.replace(item, self.unescapeChar(item))
+        return name
+
+    def escapeChar(self, char):
+        replacement = "U%05X" % ord(char)
+        self.replaceCache[char] = replacement
+        return replacement
+
+    def unescapeChar(self, charcode):
+        return chr(int(charcode[1:], 16))
@@ -0,0 +1,881 @@
+from __future__ import absolute_import, division, unicode_literals
+from pip._vendor.six import text_type
+
+import codecs
+import re
+
+from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
+from .constants import encodings, ReparseException
+from . import utils
+
+from io import StringIO
+
+try:
+    from io import BytesIO
+except ImportError:
+    BytesIO = StringIO
+
+try:
+    from io import BufferedIOBase
+except ImportError:
+    class BufferedIOBase(object):
+        pass
+
+# Non-unicode versions of constants for use in the pre-parser
+spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
+asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
+asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
+spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
+
+invalid_unicode_re = re.compile("[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uD800-\uDFFF\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]")
+
+non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
+                                  0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
+                                  0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
+                                  0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
+                                  0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
+                                  0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
+                                  0x10FFFE, 0x10FFFF])
+
+ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]")
+
+# Cache for charsUntil()
+charsUntilRegEx = {}
+
+
+class BufferedStream(object):
+    """Buffering for streams that do not have buffering of their own
+
+    The buffer is implemented as a list of chunks on the assumption that
+    joining many strings will be slow since it is O(n**2)
+    """
+
+    def __init__(self, stream):
+        self.stream = stream
+        self.buffer = []
+        self.position = [-1, 0]  # chunk number, offset
+
+    def tell(self):
+        pos = 0
+        for chunk in self.buffer[:self.position[0]]:
+            pos += len(chunk)
+        pos += self.position[1]
+        return pos
+
+    def seek(self, pos):
+        assert pos <= self._bufferedBytes()
+        offset = pos
+        i = 0
+        while len(self.buffer[i]) < offset:
+            offset -= len(self.buffer[i])
+            i += 1
+        self.position = [i, offset]
+
+    def read(self, bytes):
+        if not self.buffer:
+            return self._readStream(bytes)
+        elif (self.position[0] == len(self.buffer) and
+              self.position[1] == len(self.buffer[-1])):
+            return self._readStream(bytes)
+        else:
+            return self._readFromBuffer(bytes)
+
+    def _bufferedBytes(self):
+        return sum([len(item) for item in self.buffer])
+
+    def _readStream(self, bytes):
+        data = self.stream.read(bytes)
+        self.buffer.append(data)
+        self.position[0] += 1
+        self.position[1] = len(data)
+        return data
+
+    def _readFromBuffer(self, bytes):
+        remainingBytes = bytes
+        rv = []
+        bufferIndex = self.position[0]
+        bufferOffset = self.position[1]
+        while bufferIndex < len(self.buffer) and remainingBytes != 0:
+            assert remainingBytes > 0
+            bufferedData = self.buffer[bufferIndex]
+
+            if remainingBytes <= len(bufferedData) - bufferOffset:
+                bytesToRead = remainingBytes
+                self.position = [bufferIndex, bufferOffset + bytesToRead]
+            else:
+                bytesToRead = len(bufferedData) - bufferOffset
+                self.position = [bufferIndex, len(bufferedData)]
+                bufferIndex += 1
+            rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead])
+            remainingBytes -= bytesToRead
+
+            bufferOffset = 0
+
+        if remainingBytes:
+            rv.append(self._readStream(remainingBytes))
+
+        return b"".join(rv)
+
+
+def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
+    if hasattr(source, "read"):
+        isUnicode = isinstance(source.read(0), text_type)
+    else:
+        isUnicode = isinstance(source, text_type)
+
+    if isUnicode:
+        if encoding is not None:
+            raise TypeError("Cannot explicitly set an encoding with a unicode string")
+
+        return HTMLUnicodeInputStream(source)
+    else:
+        return HTMLBinaryInputStream(source, encoding, parseMeta, chardet)
+
+
+class HTMLUnicodeInputStream(object):
+    """Provides a unicode stream of characters to the HTMLTokenizer.
+
+    This class takes care of character encoding and removing or replacing
+    incorrect byte-sequences and also provides column and line tracking.
+
+    """
+
+    _defaultChunkSize = 10240
+
+    def __init__(self, source):
+        """Initialises the HTMLInputStream.
+
+        HTMLInputStream(source, [encoding]) -> Normalized stream from source
+        for use by html5lib.
+
+        source can be either a file-object, local filename or a string.
+
+        The optional encoding parameter must be a string that indicates
+        the encoding.  If specified, that encoding will be used,
+        regardless of any BOM or later declaration (such as in a meta
+        element)
+
+        parseMeta - Look for a <meta> element containing encoding information
+
+        """
+
+        # Craziness
+        if len("\U0010FFFF") == 1:
+            self.reportCharacterErrors = self.characterErrorsUCS4
+            self.replaceCharactersRegexp = re.compile("[\uD800-\uDFFF]")
+        else:
+            self.reportCharacterErrors = self.characterErrorsUCS2
+            self.replaceCharactersRegexp = re.compile("([\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])")
+
+        # List of where new lines occur
+        self.newLines = [0]
+
+        self.charEncoding = ("utf-8", "certain")
+        self.dataStream = self.openStream(source)
+
+        self.reset()
+
+    def reset(self):
+        self.chunk = ""
+        self.chunkSize = 0
+        self.chunkOffset = 0
+        self.errors = []
+
+        # number of (complete) lines in previous chunks
+        self.prevNumLines = 0
+        # number of columns in the last line of the previous chunk
+        self.prevNumCols = 0
+
+        # Deal with CR LF and surrogates split over chunk boundaries
+        self._bufferedCharacter = None
+
+    def openStream(self, source):
+        """Produces a file object from source.
+
+        source can be either a file object, local filename or a string.
+
+        """
+        # Already a file object
+        if hasattr(source, 'read'):
+            stream = source
+        else:
+            stream = StringIO(source)
+
+        return stream
+
+    def _position(self, offset):
+        chunk = self.chunk
+        nLines = chunk.count('\n', 0, offset)
+        positionLine = self.prevNumLines + nLines
+        lastLinePos = chunk.rfind('\n', 0, offset)
+        if lastLinePos == -1:
+            positionColumn = self.prevNumCols + offset
+        else:
+            positionColumn = offset - (lastLinePos + 1)
+        return (positionLine, positionColumn)
+
+    def position(self):
+        """Returns (line, col) of the current position in the stream."""
+        line, col = self._position(self.chunkOffset)
+        return (line + 1, col)
+
+    def char(self):
+        """ Read one character from the stream or queue if available. Return
+            EOF when EOF is reached.
+        """
+        # Read a new chunk from the input stream if necessary
+        if self.chunkOffset >= self.chunkSize:
+            if not self.readChunk():
+                return EOF
+
+        chunkOffset = self.chunkOffset
+        char = self.chunk[chunkOffset]
+        self.chunkOffset = chunkOffset + 1
+
+        return char
+
+    def readChunk(self, chunkSize=None):
+        if chunkSize is None:
+            chunkSize = self._defaultChunkSize
+
+        self.prevNumLines, self.prevNumCols = self._position(self.chunkSize)
+
+        self.chunk = ""
+        self.chunkSize = 0
+        self.chunkOffset = 0
+
+        data = self.dataStream.read(chunkSize)
+
+        # Deal with CR LF and surrogates broken across chunks
+        if self._bufferedCharacter:
+            data = self._bufferedCharacter + data
+            self._bufferedCharacter = None
+        elif not data:
+            # We have no more data, bye-bye stream
+            return False
+
+        if len(data) > 1:
+            lastv = ord(data[-1])
+            if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF:
+                self._bufferedCharacter = data[-1]
+                data = data[:-1]
+
+        self.reportCharacterErrors(data)
+
+        # Replace invalid characters
+        # Note U+0000 is dealt with in the tokenizer
+        data = self.replaceCharactersRegexp.sub("\ufffd", data)
+
+        data = data.replace("\r\n", "\n")
+        data = data.replace("\r", "\n")
+
+        self.chunk = data
+        self.chunkSize = len(data)
+
+        return True
+
+    def characterErrorsUCS4(self, data):
+        for i in range(len(invalid_unicode_re.findall(data))):
+            self.errors.append("invalid-codepoint")
+
+    def characterErrorsUCS2(self, data):
+        # Someone picked the wrong compile option
+        # You lose
+        skip = False
+        for match in invalid_unicode_re.finditer(data):
+            if skip:
+                continue
+            codepoint = ord(match.group())
+            pos = match.start()
+            # Pretty sure there should be endianness issues here
+            if utils.isSurrogatePair(data[pos:pos + 2]):
+                # We have a surrogate pair!
+                char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2])
+                if char_val in non_bmp_invalid_codepoints:
+                    self.errors.append("invalid-codepoint")
+                skip = True
+            elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and
+                  pos == len(data) - 1):
+                self.errors.append("invalid-codepoint")
+            else:
+                skip = False
+                self.errors.append("invalid-codepoint")
+
+    def charsUntil(self, characters, opposite=False):
+        """ Returns a string of characters from the stream up to but not
+        including any character in 'characters' or EOF. 'characters' must be
+        a container that supports the 'in' method and iteration over its
+        characters.
+        """
+
+        # Use a cache of regexps to find the required characters
+        try:
+            chars = charsUntilRegEx[(characters, opposite)]
+        except KeyError:
+            if __debug__:
+                for c in characters:
+                    assert(ord(c) < 128)
+            regex = "".join(["\\x%02x" % ord(c) for c in characters])
+            if not opposite:
+                regex = "^%s" % regex
+            chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex)
+
+        rv = []
+
+        while True:
+            # Find the longest matching prefix
+            m = chars.match(self.chunk, self.chunkOffset)
+            if m is None:
+                # If nothing matched, and it wasn't because we ran out of chunk,
+                # then stop
+                if self.chunkOffset != self.chunkSize:
+                    break
+            else:
+                end = m.end()
+                # If not the whole chunk matched, return everything
+                # up to the part that didn't match
+                if end != self.chunkSize:
+                    rv.append(self.chunk[self.chunkOffset:end])
+                    self.chunkOffset = end
+                    break
+            # If the whole remainder of the chunk matched,
+            # use it all and read the next chunk
+            rv.append(self.chunk[self.chunkOffset:])
+            if not self.readChunk():
+                # Reached EOF
+                break
+
+        r = "".join(rv)
+        return r
+
+    def unget(self, char):
+        # Only one character is allowed to be ungotten at once - it must
+        # be consumed again before any further call to unget
+        if char is not None:
+            if self.chunkOffset == 0:
+                # unget is called quite rarely, so it's a good idea to do
+                # more work here if it saves a bit of work in the frequently
+                # called char and charsUntil.
+                # So, just prepend the ungotten character onto the current
+                # chunk:
+                self.chunk = char + self.chunk
+                self.chunkSize += 1
+            else:
+                self.chunkOffset -= 1
+                assert self.chunk[self.chunkOffset] == char
+
+
+class HTMLBinaryInputStream(HTMLUnicodeInputStream):
+    """Provides a unicode stream of characters to the HTMLTokenizer.
+
+    This class takes care of character encoding and removing or replacing
+    incorrect byte-sequences and also provides column and line tracking.
+
+    """
+
+    def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
+        """Initialises the HTMLInputStream.
+
+        HTMLInputStream(source, [encoding]) -> Normalized stream from source
+        for use by html5lib.
+
+        source can be either a file-object, local filename or a string.
+
+        The optional encoding parameter must be a string that indicates
+        the encoding.  If specified, that encoding will be used,
+        regardless of any BOM or later declaration (such as in a meta
+        element)
+
+        parseMeta - Look for a <meta> element containing encoding information
+
+        """
+        # Raw Stream - for unicode objects this will encode to utf-8 and set
+        #              self.charEncoding as appropriate
+        self.rawStream = self.openStream(source)
+
+        HTMLUnicodeInputStream.__init__(self, self.rawStream)
+
+        self.charEncoding = (codecName(encoding), "certain")
+
+        # Encoding Information
+        # Number of bytes to use when looking for a meta element with
+        # encoding information
+        self.numBytesMeta = 512
+        # Number of bytes to use when using detecting encoding using chardet
+        self.numBytesChardet = 100
+        # Encoding to use if no other information can be found
+        self.defaultEncoding = "windows-1252"
+
+        # Detect encoding iff no explicit "transport level" encoding is supplied
+        if (self.charEncoding[0] is None):
+            self.charEncoding = self.detectEncoding(parseMeta, chardet)
+
+        # Call superclass
+        self.reset()
+
+    def reset(self):
+        self.dataStream = codecs.getreader(self.charEncoding[0])(self.rawStream,
+                                                                 'replace')
+        HTMLUnicodeInputStream.reset(self)
+
+    def openStream(self, source):
+        """Produces a file object from source.
+
+        source can be either a file object, local filename or a string.
+
+        """
+        # Already a file object
+        if hasattr(source, 'read'):
+            stream = source
+        else:
+            stream = BytesIO(source)
+
+        try:
+            stream.seek(stream.tell())
+        except:
+            stream = BufferedStream(stream)
+
+        return stream
+
+    def detectEncoding(self, parseMeta=True, chardet=True):
+        # First look for a BOM
+        # This will also read past the BOM if present
+        encoding = self.detectBOM()
+        confidence = "certain"
+        # If there is no BOM need to look for meta elements with encoding
+        # information
+        if encoding is None and parseMeta:
+            encoding = self.detectEncodingMeta()
+            confidence = "tentative"
+        # Guess with chardet, if avaliable
+        if encoding is None and chardet:
+            confidence = "tentative"
+            try:
+                try:
+                    from charade.universaldetector import UniversalDetector
+                except ImportError:
+                    from chardet.universaldetector import UniversalDetector
+                buffers = []
+                detector = UniversalDetector()
+                while not detector.done:
+                    buffer = self.rawStream.read(self.numBytesChardet)
+                    assert isinstance(buffer, bytes)
+                    if not buffer:
+                        break
+                    buffers.append(buffer)
+                    detector.feed(buffer)
+                detector.close()
+                encoding = detector.result['encoding']
+                self.rawStream.seek(0)
+            except ImportError:
+                pass
+        # If all else fails use the default encoding
+        if encoding is None:
+            confidence = "tentative"
+            encoding = self.defaultEncoding
+
+        # Substitute for equivalent encodings:
+        encodingSub = {"iso-8859-1": "windows-1252"}
+
+        if encoding.lower() in encodingSub:
+            encoding = encodingSub[encoding.lower()]
+
+        return encoding, confidence
+
+    def changeEncoding(self, newEncoding):
+        assert self.charEncoding[1] != "certain"
+        newEncoding = codecName(newEncoding)
+        if newEncoding in ("utf-16", "utf-16-be", "utf-16-le"):
+            newEncoding = "utf-8"
+        if newEncoding is None:
+            return
+        elif newEncoding == self.charEncoding[0]:
+            self.charEncoding = (self.charEncoding[0], "certain")
+        else:
+            self.rawStream.seek(0)
+            self.reset()
+            self.charEncoding = (newEncoding, "certain")
+            raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
+
+    def detectBOM(self):
+        """Attempts to detect at BOM at the start of the stream. If
+        an encoding can be determined from the BOM return the name of the
+        encoding otherwise return None"""
+        bomDict = {
+            codecs.BOM_UTF8: 'utf-8',
+            codecs.BOM_UTF16_LE: 'utf-16-le', codecs.BOM_UTF16_BE: 'utf-16-be',
+            codecs.BOM_UTF32_LE: 'utf-32-le', codecs.BOM_UTF32_BE: 'utf-32-be'
+        }
+
+        # Go to beginning of file and read in 4 bytes
+        string = self.rawStream.read(4)
+        assert isinstance(string, bytes)
+
+        # Try detecting the BOM using bytes from the string
+        encoding = bomDict.get(string[:3])         # UTF-8
+        seek = 3
+        if not encoding:
+            # Need to detect UTF-32 before UTF-16
+            encoding = bomDict.get(string)         # UTF-32
+            seek = 4
+            if not encoding:
+                encoding = bomDict.get(string[:2])  # UTF-16
+                seek = 2
+
+        # Set the read position past the BOM if one was found, otherwise
+        # set it to the start of the stream
+        self.rawStream.seek(encoding and seek or 0)
+
+        return encoding
+
+    def detectEncodingMeta(self):
+        """Report the encoding declared by the meta element
+        """
+        buffer = self.rawStream.read(self.numBytesMeta)
+        assert isinstance(buffer, bytes)
+        parser = EncodingParser(buffer)
+        self.rawStream.seek(0)
+        encoding = parser.getEncoding()
+
+        if encoding in ("utf-16", "utf-16-be", "utf-16-le"):
+            encoding = "utf-8"
+
+        return encoding
+
+
+class EncodingBytes(bytes):
+    """String-like object with an associated position and various extra methods
+    If the position is ever greater than the string length then an exception is
+    raised"""
+    def __new__(self, value):
+        assert isinstance(value, bytes)
+        return bytes.__new__(self, value.lower())
+
+    def __init__(self, value):
+        self._position = -1
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        p = self._position = self._position + 1
+        if p >= len(self):
+            raise StopIteration
+        elif p < 0:
+            raise TypeError
+        return self[p:p + 1]
+
+    def next(self):
+        # Py2 compat
+        return self.__next__()
+
+    def previous(self):
+        p = self._position
+        if p >= len(self):
+            raise StopIteration
+        elif p < 0:
+            raise TypeError
+        self._position = p = p - 1
+        return self[p:p + 1]
+
+    def setPosition(self, position):
+        if self._position >= len(self):
+            raise StopIteration
+        self._position = position
+
+    def getPosition(self):
+        if self._position >= len(self):
+            raise StopIteration
+        if self._position >= 0:
+            return self._position
+        else:
+            return None
+
+    position = property(getPosition, setPosition)
+
+    def getCurrentByte(self):
+        return self[self.position:self.position + 1]
+
+    currentByte = property(getCurrentByte)
+
+    def skip(self, chars=spaceCharactersBytes):
+        """Skip past a list of characters"""
+        p = self.position               # use property for the error-checking
+        while p < len(self):
+            c = self[p:p + 1]
+            if c not in chars:
+                self._position = p
+                return c
+            p += 1
+        self._position = p
+        return None
+
+    def skipUntil(self, chars):
+        p = self.position
+        while p < len(self):
+            c = self[p:p + 1]
+            if c in chars:
+                self._position = p
+                return c
+            p += 1
+        self._position = p
+        return None
+
+    def matchBytes(self, bytes):
+        """Look for a sequence of bytes at the start of a string. If the bytes
+        are found return True and advance the position to the byte after the
+        match. Otherwise return False and leave the position alone"""
+        p = self.position
+        data = self[p:p + len(bytes)]
+        rv = data.startswith(bytes)
+        if rv:
+            self.position += len(bytes)
+        return rv
+
+    def jumpTo(self, bytes):
+        """Look for the next sequence of bytes matching a given sequence. If
+        a match is found advance the position to the last byte of the match"""
+        newPosition = self[self.position:].find(bytes)
+        if newPosition > -1:
+            # XXX: This is ugly, but I can't see a nicer way to fix this.
+            if self._position == -1:
+                self._position = 0
+            self._position += (newPosition + len(bytes) - 1)
+            return True
+        else:
+            raise StopIteration
+
+
+class EncodingParser(object):
+    """Mini parser for detecting character encoding from meta elements"""
+
+    def __init__(self, data):
+        """string - the data to work on for encoding detection"""
+        self.data = EncodingBytes(data)
+        self.encoding = None
+
+    def getEncoding(self):
+        methodDispatch = (
+            (b"<!--", self.handleComment),
+            (b"<meta", self.handleMeta),
+            (b"</", self.handlePossibleEndTag),
+            (b"<!", self.handleOther),
+            (b"<?", self.handleOther),
+            (b"<", self.handlePossibleStartTag))
+        for byte in self.data:
+            keepParsing = True
+            for key, method in methodDispatch:
+                if self.data.matchBytes(key):
+                    try:
+                        keepParsing = method()
+                        break
+                    except StopIteration:
+                        keepParsing = False
+                        break
+            if not keepParsing:
+                break
+
+        return self.encoding
+
+    def handleComment(self):
+        """Skip over comments"""
+        return self.data.jumpTo(b"-->")
+
+    def handleMeta(self):
+        if self.data.currentByte not in spaceCharactersBytes:
+            # if we have <meta not followed by a space so just keep going
+            return True
+        # We have a valid meta element we want to search for attributes
+        hasPragma = False
+        pendingEncoding = None
+        while True:
+            # Try to find the next attribute after the current position
+            attr = self.getAttribute()
+            if attr is None:
+                return True
+            else:
+                if attr[0] == b"http-equiv":
+                    hasPragma = attr[1] == b"content-type"
+                    if hasPragma and pendingEncoding is not None:
+                        self.encoding = pendingEncoding
+                        return False
+                elif attr[0] == b"charset":
+                    tentativeEncoding = attr[1]
+                    codec = codecName(tentativeEncoding)
+                    if codec is not None:
+                        self.encoding = codec
+                        return False
+                elif attr[0] == b"content":
+                    contentParser = ContentAttrParser(EncodingBytes(attr[1]))
+                    tentativeEncoding = contentParser.parse()
+                    if tentativeEncoding is not None:
+                        codec = codecName(tentativeEncoding)
+                        if codec is not None:
+                            if hasPragma:
+                                self.encoding = codec
+                                return False
+                            else:
+                                pendingEncoding = codec
+
+    def handlePossibleStartTag(self):
+        return self.handlePossibleTag(False)
+
+    def handlePossibleEndTag(self):
+        next(self.data)
+        return self.handlePossibleTag(True)
+
+    def handlePossibleTag(self, endTag):
+        data = self.data
+        if data.currentByte not in asciiLettersBytes:
+            # If the next byte is not an ascii letter either ignore this
+            # fragment (possible start tag case) or treat it according to
+            # handleOther
+            if endTag:
+                data.previous()
+                self.handleOther()
+            return True
+
+        c = data.skipUntil(spacesAngleBrackets)
+        if c == b"<":
+            # return to the first step in the overall "two step" algorithm
+            # reprocessing the < byte
+            data.previous()
+        else:
+            # Read all attributes
+            attr = self.getAttribute()
+            while attr is not None:
+                attr = self.getAttribute()
+        return True
+
+    def handleOther(self):
+        return self.data.jumpTo(b">")
+
+    def getAttribute(self):
+        """Return a name,value pair for the next attribute in the stream,
+        if one is found, or None"""
+        data = self.data
+        # Step 1 (skip chars)
+        c = data.skip(spaceCharactersBytes | frozenset([b"/"]))
+        assert c is None or len(c) == 1
+        # Step 2
+        if c in (b">", None):
+            return None
+        # Step 3
+        attrName = []
+        attrValue = []
+        # Step 4 attribute name
+        while True:
+            if c == b"=" and attrName:
+                break
+            elif c in spaceCharactersBytes:
+                # Step 6!
+                c = data.skip()
+                break
+            elif c in (b"/", b">"):
+                return b"".join(attrName), b""
+            elif c in asciiUppercaseBytes:
+                attrName.append(c.lower())
+            elif c is None:
+                return None
+            else:
+                attrName.append(c)
+            # Step 5
+            c = next(data)
+        # Step 7
+        if c != b"=":
+            data.previous()
+            return b"".join(attrName), b""
+        # Step 8
+        next(data)
+        # Step 9
+        c = data.skip()
+        # Step 10
+        if c in (b"'", b'"'):
+            # 10.1
+            quoteChar = c
+            while True:
+                # 10.2
+                c = next(data)
+                # 10.3
+                if c == quoteChar:
+                    next(data)
+                    return b"".join(attrName), b"".join(attrValue)
+                # 10.4
+                elif c in asciiUppercaseBytes:
+                    attrValue.append(c.lower())
+                # 10.5
+                else:
+                    attrValue.append(c)
+        elif c == b">":
+            return b"".join(attrName), b""
+        elif c in asciiUppercaseBytes:
+            attrValue.append(c.lower())
+        elif c is None:
+            return None
+        else:
+            attrValue.append(c)
+        # Step 11
+        while True:
+            c = next(data)
+            if c in spacesAngleBrackets:
+                return b"".join(attrName), b"".join(attrValue)
+            elif c in asciiUppercaseBytes:
+                attrValue.append(c.lower())
+            elif c is None:
+                return None
+            else:
+                attrValue.append(c)
+
+
+class ContentAttrParser(object):
+    def __init__(self, data):
+        assert isinstance(data, bytes)
+        self.data = data
+
+    def parse(self):
+        try:
+            # Check if the attr name is charset
+            # otherwise return
+            self.data.jumpTo(b"charset")
+            self.data.position += 1
+            self.data.skip()
+            if not self.data.currentByte == b"=":
+                # If there is no = sign keep looking for attrs
+                return None
+            self.data.position += 1
+            self.data.skip()
+            # Look for an encoding between matching quote marks
+            if self.data.currentByte in (b'"', b"'"):
+                quoteMark = self.data.currentByte
+                self.data.position += 1
+                oldPosition = self.data.position
+                if self.data.jumpTo(quoteMark):
+                    return self.data[oldPosition:self.data.position]
+                else:
+                    return None
+            else:
+                # Unquoted value
+                oldPosition = self.data.position
+                try:
+                    self.data.skipUntil(spaceCharactersBytes)
+                    return self.data[oldPosition:self.data.position]
+                except StopIteration:
+                    # Return the whole remaining value
+                    return self.data[oldPosition:]
+        except StopIteration:
+            return None
+
+
+def codecName(encoding):
+    """Return the python codec name corresponding to an encoding or None if the
+    string doesn't correspond to a valid encoding."""
+    if isinstance(encoding, bytes):
+        try:
+            encoding = encoding.decode("ascii")
+        except UnicodeDecodeError:
+            return None
+    if encoding:
+        canonicalName = ascii_punctuation_re.sub("", encoding).lower()
+        return encodings.get(canonicalName, None)
+    else:
+        return None
@@ -0,0 +1,271 @@
+from __future__ import absolute_import, division, unicode_literals
+
+import re
+from xml.sax.saxutils import escape, unescape
+
+from .tokenizer import HTMLTokenizer
+from .constants import tokenTypes
+
+
+class HTMLSanitizerMixin(object):
+    """ sanitization of XHTML+MathML+SVG and of inline style attributes."""
+
+    acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area',
+                           'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
+                           'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
+                           'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
+                           'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
+                           'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
+                           'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
+                           'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
+                           'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
+                           'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
+                           'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
+                           'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
+                           'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
+
+    mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
+                       'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
+                       'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
+                       'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
+                       'munderover', 'none']
+
+    svg_elements = ['a', 'animate', 'animateColor', 'animateMotion',
+                    'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse',
+                    'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
+                    'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
+                    'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
+                    'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
+
+    acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
+                             'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
+                             'background', 'balance', 'bgcolor', 'bgproperties', 'border',
+                             'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
+                             'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
+                             'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color',
+                             'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords',
+                             'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default',
+                             'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end',
+                             'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers',
+                             'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace',
+                             'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing',
+                             'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
+                             'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
+                             'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
+                             'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload',
+                             'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
+                             'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
+                             'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
+                             'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
+                             'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
+                             'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
+                             'width', 'wrap', 'xml:lang']
+
+    mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
+                         'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
+                         'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
+                         'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
+                         'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
+                         'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
+                         'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
+                         'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
+                         'xlink:type', 'xmlns', 'xmlns:xlink']
+
+    svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
+                      'arabic-form', 'ascent', 'attributeName', 'attributeType',
+                      'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
+                      'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx',
+                      'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill',
+                      'fill-opacity', 'fill-rule', 'font-family', 'font-size',
+                      'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from',
+                      'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging',
+                      'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
+                      'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end',
+                      'marker-mid', 'marker-start', 'markerHeight', 'markerUnits',
+                      'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset',
+                      'opacity', 'orient', 'origin', 'overline-position',
+                      'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
+                      'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount',
+                      'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart',
+                      'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color',
+                      'stop-opacity', 'strikethrough-position', 'strikethrough-thickness',
+                      'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
+                      'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
+                      'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to',
+                      'transform', 'type', 'u1', 'u2', 'underline-position',
+                      'underline-thickness', 'unicode', 'unicode-range', 'units-per-em',
+                      'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x',
+                      'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole',
+                      'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type',
+                      'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
+                      'y1', 'y2', 'zoomAndPan']
+
+    attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster',
+                       'xlink:href', 'xml:base']
+
+    svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
+                               'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
+                               'mask', 'stroke']
+
+    svg_allow_local_href = ['altGlyph', 'animate', 'animateColor',
+                            'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter',
+                            'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref',
+                            'set', 'use']
+
+    acceptable_css_properties = ['azimuth', 'background-color',
+                                 'border-bottom-color', 'border-collapse', 'border-color',
+                                 'border-left-color', 'border-right-color', 'border-top-color', 'clear',
+                                 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
+                                 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
+                                 'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
+                                 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
+                                 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
+                                 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
+                                 'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
+                                 'white-space', 'width']
+
+    acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue',
+                               'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
+                               'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
+                               'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
+                               'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
+                               'transparent', 'underline', 'white', 'yellow']
+
+    acceptable_svg_properties = ['fill', 'fill-opacity', 'fill-rule',
+                                 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
+                                 'stroke-opacity']
+
+    acceptable_protocols = ['ed2k', 'ftp', 'http', 'https', 'irc',
+                            'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
+                            'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
+                            'ssh', 'sftp', 'rtsp', 'afs']
+
+    # subclasses may define their own versions of these constants
+    allowed_elements = acceptable_elements + mathml_elements + svg_elements
+    allowed_attributes = acceptable_attributes + mathml_attributes + svg_attributes
+    allowed_css_properties = acceptable_css_properties
+    allowed_css_keywords = acceptable_css_keywords
+    allowed_svg_properties = acceptable_svg_properties
+    allowed_protocols = acceptable_protocols
+
+    # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
+    # stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
+    # attributes are parsed, and a restricted set, # specified by
+    # ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
+    # attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified
+    # in ALLOWED_PROTOCOLS are allowed.
+    #
+    #   sanitize_html('<script> do_nasty_stuff() </script>')
+    #    => &lt;script> do_nasty_stuff() &lt;/script>
+    #   sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
+    #    => <a>Click here for $100</a>
+    def sanitize_token(self, token):
+
+        # accommodate filters which use token_type differently
+        token_type = token["type"]
+        if token_type in list(tokenTypes.keys()):
+            token_type = tokenTypes[token_type]
+
+        if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"],
+                          tokenTypes["EmptyTag"]):
+            if token["name"] in self.allowed_elements:
+                return self.allowed_token(token, token_type)
+            else:
+                return self.disallowed_token(token, token_type)
+        elif token_type == tokenTypes["Comment"]:
+            pass
+        else:
+            return token
+
+    def allowed_token(self, token, token_type):
+        if "data" in token:
+            attrs = dict([(name, val) for name, val in
+                          token["data"][::-1]
+                          if name in self.allowed_attributes])
+            for attr in self.attr_val_is_uri:
+                if attr not in attrs:
+                    continue
+                val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
+                                       unescape(attrs[attr])).lower()
+                # remove replacement characters from unescaped characters
+                val_unescaped = val_unescaped.replace("\ufffd", "")
+                if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped) and
+                    (val_unescaped.split(':')[0] not in
+                     self.allowed_protocols)):
+                    del attrs[attr]
+            for attr in self.svg_attr_val_allows_ref:
+                if attr in attrs:
+                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
+                                         ' ',
+                                         unescape(attrs[attr]))
+            if (token["name"] in self.svg_allow_local_href and
+                'xlink:href' in attrs and re.search('^\s*[^#\s].*',
+                                                    attrs['xlink:href'])):
+                del attrs['xlink:href']
+            if 'style' in attrs:
+                attrs['style'] = self.sanitize_css(attrs['style'])
+            token["data"] = [[name, val] for name, val in list(attrs.items())]
+        return token
+
+    def disallowed_token(self, token, token_type):
+        if token_type == tokenTypes["EndTag"]:
+            token["data"] = "</%s>" % token["name"]
+        elif token["data"]:
+            attrs = ''.join([' %s="%s"' % (k, escape(v)) for k, v in token["data"]])
+            token["data"] = "<%s%s>" % (token["name"], attrs)
+        else:
+            token["data"] = "<%s>" % token["name"]
+        if token.get("selfClosing"):
+            token["data"] = token["data"][:-1] + "/>"
+
+        if token["type"] in list(tokenTypes.keys()):
+            token["type"] = "Characters"
+        else:
+            token["type"] = tokenTypes["Characters"]
+
+        del token["name"]
+        return token
+
+    def sanitize_css(self, style):
+        # disallow urls
+        style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
+
+        # gauntlet
+        if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
+            return ''
+        if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
+            return ''
+
+        clean = []
+        for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
+            if not value:
+                continue
+            if prop.lower() in self.allowed_css_properties:
+                clean.append(prop + ': ' + value + ';')
+            elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
+                                                'padding']:
+                for keyword in value.split():
+                    if not keyword in self.acceptable_css_keywords and \
+                            not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
+                        break
+                else:
+                    clean.append(prop + ': ' + value + ';')
+            elif prop.lower() in self.allowed_svg_properties:
+                clean.append(prop + ': ' + value + ';')
+
+        return ' '.join(clean)
+
+
+class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
+    def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
+                 lowercaseElementName=False, lowercaseAttrName=False, parser=None):
+        # Change case matching defaults as we only output lowercase html anyway
+        # This solution doesn't seem ideal...
+        HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
+                               lowercaseElementName, lowercaseAttrName, parser=parser)
+
+    def __iter__(self):
+        for token in HTMLTokenizer.__iter__(self):
+            token = self.sanitize_token(token)
+            if token:
+                yield token
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
coissac	764ee27c0d	Switch to the version 1.1.21	2015-07-13 15:31:20 +02:00
coissac	18d87aa1b1	Add a small work around patch for the --skip and --only options in illuminapairedend	2015-07-11 23:38:47 +02:00
coissac	1c91c27ee3	Tag the version 1.1.20	2015-07-03 14:57:24 +02:00
coissac	22d343b46a	Patch two small print bug in the new ecotag	2015-07-03 14:55:44 +02:00
coissac	2af94b9da7	Add docuentation for the new options and an option to manage the ecotag cache size	2015-07-03 10:39:59 +02:00
coissac	aa064dda57	Add two general options for limiting the analysis of a file to only a sub-part of the data: - --skip <N> - --only <N> They respectively allow to first skip the N first sequences, and to analysize only the <N> next sequences.	2015-07-03 09:10:49 +02:00
coissac	f38ccae698	Add a --minimum-circle option to ecotag and a cache on the self alignment scores of 1000000 of pairwise scores	2015-07-02 16:14:22 +02:00
Celine Mercier	b99881817a	Closes #9 : adds a parser in obiaddtaxids for the UNITE 'general FASTA release' format	2015-06-12 15:08:58 +02:00
Celine Mercier	df093b3fa9	updated the UNITE parser of obiaddtaxids	2015-06-11 17:47:00 +02:00
coissac	9ba67985e9	Switch to the 1.1.18 version	2015-06-02 16:01:16 +02:00
coissac	f74aeff002	Merge branch 'master' of git@git.metabarcoding.org:obitools/obitools.git	2015-06-02 15:59:57 +02:00
coissac	b9fa718a2c	tag the 1.1.17 version	2015-06-02 15:53:41 +02:00
coissac	51a7ecde21	Patch the installer	2015-06-02 15:52:18 +02:00
Celine Mercier	6bf6cc6612	fixing a documentation problem (issue #3 )	2015-06-01 14:11:11 +02:00
Celine Mercier	191af9669a	modified the obisilva script to deal with version names containing dots, e.g. "119.1"	2015-06-01 14:07:19 +02:00
Celine Mercier	429fbbb61b	switching branch OBITools-1.01 to master	2015-06-01 10:41:26 +02:00
Celine Mercier	18c3a7dc08	Fixes #3	2015-05-22 18:00:13 +02:00
Céline Mercier	f40c2f5245	in obitaxonomy, fixed a problem when adding taxa from file with an associated path	2015-02-23 14:02:31 +00:00
Frédéric Boyer	f596d3a4fb	[ADD] order in location	2014-10-21 10:32:44 +00:00
Frédéric Boyer	4de712702d	Bug sur merged_KEY lorsque la merged_KEY existe deja dans les sequences : la somme n'etait pas effectuee correctement	2014-09-30 09:21:32 +00:00
Frédéric Boyer	1dad3bf086	-r option fix	2014-05-15 14:41:16 +00:00
Frédéric Boyer	b25091e8b0	Bug à l'affichage au chargement de la taxonomie	2014-04-30 15:04:23 +00:00
Frédéric Boyer	a664ea5d77	assertion failed with progress bar when empty file	2014-04-25 08:51:17 +00:00
coissac	a9a86d1067	remove memory leak	2014-04-23 14:00:48 +00:00
coissac	5a6c4d231c		2014-04-23 12:35:54 +00:00
Frédéric Boyer	62923e1cb8	[MOD] Erreur de lecture des fichiers (standard, pas les options)	2014-04-22 22:39:23 +00:00
Frédéric Boyer	d5ce87f7f8	[MOD] Corrected a bug in initialization of 'word4table' that 'coredump' on linux	2014-03-10 18:19:55 +00:00
coissac	7c55ad6d32	Patch to fit the version 0.20 of cython	2014-02-11 16:25:06 +00:00
coissac	eac5bc958d	Patch to fit the version 0.20 of cython	2014-02-11 16:22:28 +00:00
Frédéric Boyer	847284267e		2014-01-31 16:51:20 +00:00
Frédéric Boyer	105cb4c8be		2014-01-31 16:45:18 +00:00
Frédéric Boyer	acad67c67f		2014-01-31 16:26:44 +00:00
Frédéric Boyer	4daec682ff		2014-01-31 16:18:31 +00:00
Frédéric Boyer	b09f7b4ef4		2014-01-31 10:39:48 +00:00
Frédéric Boyer	8b00463558		2014-01-31 10:32:21 +00:00
coissac	bd75660e3d		2014-01-31 09:29:36 +00:00
Frédéric Boyer	2d432e1d51	doc	2014-01-30 16:20:36 +00:00
coissac	2f6c3c05db	Change the name of the tags forward_primer and reverse_primer used in ecoPCR file conversion to forward_match and reverse_match to conform with names used in ngsfilter	2014-01-30 10:39:01 +00:00
Céline Mercier	4e22703ddb	fixed spelling mistake	2013-12-10 13:59:51 +00:00
coissac	0675ea6770	Patch a bug related to the -n option of obitab	2013-12-10 08:11:07 +00:00
Frédéric Boyer	f3d52f1007	[DOC]	2013-12-04 14:54:09 +00:00
Frédéric Boyer	9ae4a1b4c4	[MOD] direct_tag replaced by forward_tag	2013-12-04 13:41:31 +00:00
Frédéric Boyer	a938c9bdc2		2013-12-04 11:02:02 +00:00
Frédéric Boyer	514cd02210	[MOD] nqual and junction removed	2013-12-04 11:00:27 +00:00
Frédéric Boyer	f2f8d8805c		2013-12-04 10:18:00 +00:00
Céline Mercier	f355cc7a9f		2013-11-20 15:42:07 +00:00
Frédéric Boyer	608db8b1c0	[MOD] removed a print	2013-11-15 21:53:40 +00:00
coissac	47e6b4108c	New version of obiclean with clustering options	2013-11-14 15:03:11 +00:00
Frédéric Boyer	5917031d61	[MOD] now handle dictionaries, in such case, sequences can then be found in several files named after the keys of the dictionary	2013-11-05 14:14:24 +00:00
coissac	411c5134ae	Path 2 error in the option helps	2013-10-29 14:02:24 +00:00
Céline Mercier	22217920a5	fixed obiaddtaxids bug	2013-10-24 14:32:27 +00:00
Céline Mercier	621b499cb8	fixed obitaxonomy bug	2013-10-24 13:32:44 +00:00
Céline Mercier	2e18284dc0	fixed obitaxonomy bug	2013-10-24 13:29:20 +00:00
Céline Mercier	fc5d914093	obiaddtaxids now handle homonymes	2013-10-24 12:55:46 +00:00
Céline Mercier	7a7ee58a51	taxonomy databases now handle homonymes	2013-10-24 12:15:00 +00:00
Céline Mercier	82d92f8497	commented )D option in obitaxonomy because it doesn't work	2013-10-18 12:03:49 +00:00
Céline Mercier	47920a4687	obitaxonomy now checks if taxon already exists under the specific ancestor + changed -A option: if genus already existing it MUST be under restricting ancestor + fixed minor bug	2013-10-16 13:53:16 +00:00
coissac	12c080d7f4	rename --rank option to --seq-rank option	2013-09-19 01:58:11 +00:00
Céline Mercier	7b3886974d	better handling of options in obiaddtaxids	2013-09-10 09:57:34 +00:00
Céline Mercier	0686029075	-w option in obitaxonomy	2013-09-04 12:07:16 +00:00
Céline Mercier	ca36a4fbfe	fixed bug	2013-09-04 10:28:56 +00:00
Céline Mercier	cbf007b5a9	-w option in obitaxonomy	2013-09-04 10:27:02 +00:00
Céline Mercier	7e8835295a	figuring out a bug	2013-09-04 10:11:10 +00:00
Céline Mercier	e3aaa53be1	removed a print	2013-09-04 10:00:20 +00:00
Céline Mercier	fbdf0db3c5	changed the UNITE parser in obiaddtaxids to remove empty fields ('-')	2013-09-04 09:46:26 +00:00
Frédéric Boyer	3b10f97587	[MOD] updated doc	2013-08-01 15:11:38 +00:00
coissac	ff287fc31d	Patch variance estimator for returning 0 for singleton	2013-06-22 08:31:52 +00:00
coissac	3cea0f40fe	patch a bug in obisample on the sampling with replacement method	2013-06-18 08:42:48 +00:00
coissac	7bf04063f8	add doc to the manifest	2013-05-20 10:12:31 +00:00