Skip to content

Commit

Permalink
Fix unicode encoding errors inside pip
Browse files Browse the repository at this point in the history
On Python 2, if a distribution package contains non-ASCII file names,
the Resolver failed with the following UnicodeDecodeError, because
RequirementSet was initialized with unicode paths rather than str.

Fix this by making sure that the path components used in PyPIRepository
are all filesystem encoded str.

Drop the unicode_literals import from `_compat/tempfile.py`, because the
strings used in there should also be str, not unicode.

Also make sure that the InstallRequirement is always constructed from
str, not unicode.

Traceback of the error looked like this:

    Traceback (most recent call last):
      File ".../piptools/scripts/compile.py", line 184, in cli
        results = resolver.resolve(max_rounds=max_rounds)
      File ".../piptools/resolver.py", line 107, in resolve
        has_changed, best_matches = self._resolve_one_round()
      File ".../piptools/resolver.py", line 195, in _resolve_one_round
        for dep in self._iter_dependencies(best_match):
      File ".../piptools/resolver.py", line 274, in _iter_dependencies
        dependencies = self.repository.get_dependencies(ireq)
      File ".../piptools/repositories/pypi.py", line 145, in get_dependencies
        self._dependencies_cache[ireq] = reqset._prepare_file(self.finder, ireq)
      File ".../pip/req/req_set.py", line 620, in _prepare_file
        session=self.session, hashes=hashes)
      File ".../pip/download.py", line 821, in unpack_url
        hashes=hashes
      File ".../pip/download.py", line 663, in unpack_http_url
        unpack_file(from_path, location, content_type, link)
      File ".../pip/utils/__init__.py", line 605, in unpack_file
        untar_file(filename, location)
      File ".../pip/utils/__init__.py", line 551, in untar_file
        path = os.path.join(location, fn)
      File "/usr/local/lib/python2.7/posixpath.py", line 73, in join
        path += '/' + b
    UnicodeDecodeError: 'ascii' codec can't decode byte 0xe3 in position 66:
      ordinal not in range(128)

Fixes jazzband#564
  • Loading branch information
suutari authored and vphilippon committed Sep 26, 2017
1 parent b3c6a11 commit 3cb15eb
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 8 deletions.
3 changes: 1 addition & 2 deletions piptools/_compat/tempfile.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# coding: utf-8
from __future__ import (absolute_import, division, print_function,
unicode_literals)
from __future__ import absolute_import, division, print_function

import os as _os
import sys as _sys
Expand Down
10 changes: 5 additions & 5 deletions piptools/repositories/pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from ..cache import CACHE_DIR
from ..exceptions import NoCandidateFound
from ..utils import (is_pinned_requirement, lookup_table,
from ..utils import (fs_str, is_pinned_requirement, lookup_table,
make_install_requirement, pip_version_info)
from .base import BaseRepository

Expand Down Expand Up @@ -83,16 +83,16 @@ def __init__(self, pip_options, session):

# Setup file paths
self.freshen_build_caches()
self._download_dir = os.path.join(CACHE_DIR, 'pkgs')
self._wheel_download_dir = os.path.join(CACHE_DIR, 'wheels')
self._download_dir = fs_str(os.path.join(CACHE_DIR, 'pkgs'))
self._wheel_download_dir = fs_str(os.path.join(CACHE_DIR, 'wheels'))

def freshen_build_caches(self):
"""
Start with fresh build/source caches. Will remove any old build
caches from disk automatically.
"""
self._build_dir = TemporaryDirectory('build')
self._source_dir = TemporaryDirectory('source')
self._build_dir = TemporaryDirectory(fs_str('build'))
self._source_dir = TemporaryDirectory(fs_str('source'))

@property
def build_dir(self):
Expand Down
31 changes: 30 additions & 1 deletion piptools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ def make_install_requirement(name, version, extras, constraint=False):
# Sort extras for stability
extras_string = "[{}]".format(",".join(sorted(extras)))

return InstallRequirement.from_line('{}{}=={}'.format(name, extras_string, str(version)), constraint=constraint)
return InstallRequirement.from_line(
str('{}{}=={}'.format(name, extras_string, version)),
constraint=constraint)


def format_requirement(ireq, marker=None):
Expand Down Expand Up @@ -208,3 +210,30 @@ def dedup(iterable):
order-reserved.
"""
return iter(OrderedDict.fromkeys(iterable))


def fs_str(string):
"""
Convert given string to a correctly encoded filesystem string.
On Python 2, if the input string is unicode, converts it to bytes
encoded with the filesystem encoding.
On Python 3 returns the string as is, since Python 3 uses unicode
paths and the input string shouldn't be bytes.
>>> fs_str(u'some path component/Something')
'some path component/Something'
>>> assert isinstance(fs_str('whatever'), str)
>>> assert isinstance(fs_str(u'whatever'), str)
:type string: str|unicode
:rtype: str
"""
if isinstance(string, str):
return string
assert not isinstance(string, bytes)
return string.encode(_fs_encoding)


_fs_encoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
62 changes: 62 additions & 0 deletions tests/test_repositories.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from mock import MagicMock, patch
from pip.index import PackageFinder
from pip.req import InstallRequirement

from piptools.repositories.pypi import PyPIRepository
from piptools.scripts.compile import get_pip_command


def test_pypirepo_build_dir_is_str():
assert isinstance(get_pypi_repository().build_dir, str)


def test_pypirepo_source_dir_is_str():
assert isinstance(get_pypi_repository().source_dir, str)


def test_pypirepo_calls_reqset_with_str_paths():
"""
Make sure that paths passed to RequirementSet init are str.
Passing unicode paths on Python 2 could make pip fail later on
unpack, if the package contains non-ASCII file names, because
non-ASCII str and unicode paths cannot be combined.
"""
with patch('piptools.repositories.pypi.RequirementSet') as mocked_init:
repo = get_pypi_repository()
ireq = InstallRequirement.from_line('ansible==2.4.0.0')

# Setup a mock object to be returned from the RequirementSet call
mocked_reqset = MagicMock()
mocked_init.return_value = mocked_reqset

# Do the call
repo.get_dependencies(ireq)

# Check that RequirementSet init is called with correct type arguments
assert mocked_init.call_count == 1
(init_call_args, init_call_kwargs) = mocked_init.call_args
assert isinstance(init_call_args[0], str)
assert isinstance(init_call_args[1], str)
assert isinstance(init_call_kwargs.get('download_dir'), str)
assert isinstance(init_call_kwargs.get('wheel_download_dir'), str)

# Check that _prepare_file is called correctly
assert mocked_reqset._prepare_file.call_count == 1
(pf_call_args, pf_call_kwargs) = mocked_reqset._prepare_file.call_args
(called_with_finder, called_with_ireq) = pf_call_args
assert isinstance(called_with_finder, PackageFinder)
assert called_with_ireq == ireq
assert not pf_call_kwargs


def get_pypi_repository():
"""
Get a PyPIRepository object for the tests.
:rtype: PyPIRepository
"""
pip_command = get_pip_command()
pip_options = pip_command.parse_args([])[0]
session = pip_command._build_session(pip_options)
return PyPIRepository(pip_options, session)

0 comments on commit 3cb15eb

Please sign in to comment.