Skip to content

Commit

Permalink
add shallow download network utilities
Browse files Browse the repository at this point in the history
make types pass

add --shallow-wheels cli arg

add news

rename news

make the metadata test pass on windows

use --shallow-wheels unconditionally and remove the cli arg

download all wheels at the end of the run

add a hack to avoid signal() erroring in a background thread

avoid using shallow wheels for non-remote file paths

add --unstable-feature=shallow_wheels!
  • Loading branch information
cosmicexplorer committed Jun 23, 2020
1 parent acab2ee commit e9c6ec3
Show file tree
Hide file tree
Showing 22 changed files with 868 additions and 4 deletions.
1 change: 1 addition & 0 deletions news/8448.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add a set of utilities in ``pip._internal.network.shallow`` for fetching metadata from remote wheel files without downloading the entire file. Link these utilities into the v2 resolver by adding a new ShallowWheelDistribution AbstractDistribution subclass. Expose this behavior via a --unstable-feature=shallow_wheels command-line option to ``pip download``. This produces a marked performance improvement.
2 changes: 1 addition & 1 deletion src/pip/_internal/cli/cmdoptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -919,7 +919,7 @@ def check_list_path_option(options):
metavar='feature',
action='append',
default=[],
choices=['resolver'],
choices=['resolver', 'shallow_wheels'],
help=SUPPRESS_HELP, # TODO: Enable this when the resolver actually works.
# help='Enable unstable feature(s) that may be backward incompatible.',
) # type: Callable[..., Option]
Expand Down
19 changes: 16 additions & 3 deletions src/pip/_internal/cli/progress_bars.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from pip._internal.utils.typing import MYPY_CHECK_RUNNING

if MYPY_CHECK_RUNNING:
from typing import Any, Dict, List
from typing import Any, Dict, List, Optional

try:
from pip._vendor import colorama
Expand All @@ -24,6 +24,18 @@
colorama = None


def _signal_unless_backgrounded(signum, handler):
# type: (int, Any) -> Optional[Any]
try:
return signal(signum, handler)
except ValueError:
# FIXME: this otherwise doesn't work when called from a non-main
# thread. This therefore fails if we try to download more than one
# wheel at once via threading, which calls back to Downloader, which
# uses this progress bar.
return None


def _select_progress_class(preferred, fallback):
# type: (Bar, Bar) -> Bar
encoding = getattr(preferred.file, "encoding", None)
Expand Down Expand Up @@ -84,7 +96,8 @@ def __init__(self, *args, **kwargs):
**kwargs
)

self.original_handler = signal(SIGINT, self.handle_sigint)
self.original_handler = _signal_unless_backgrounded(
SIGINT, self.handle_sigint)

# If signal() returns None, the previous handler was not installed from
# Python, and we cannot restore it. This probably should not happen,
Expand All @@ -103,7 +116,7 @@ def finish(self):
normally, or gets interrupted.
"""
super(InterruptibleMixin, self).finish() # type: ignore
signal(SIGINT, self.original_handler)
_signal_unless_backgrounded(SIGINT, self.original_handler)

def handle_sigint(self, signum, frame): # type: ignore
"""
Expand Down
1 change: 1 addition & 0 deletions src/pip/_internal/cli/req_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ def make_requirement_preparer(
finder=finder,
require_hashes=options.require_hashes,
use_user_site=use_user_site,
use_shallow_wheels=('shallow_wheels' in options.unstable_features),
)

@staticmethod
Expand Down
1 change: 1 addition & 0 deletions src/pip/_internal/commands/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ def run(self, options, args):
requirement_set = resolver.resolve(
reqs, check_supported_wheels=True
)
requirement_set.perform_all_final_hydration()

downloaded = ' '.join([req.name # type: ignore
for req in requirement_set.requirements.values()
Expand Down
1 change: 1 addition & 0 deletions src/pip/_internal/commands/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ def run(self, options, args):
requirement_set = resolver.resolve(
reqs, check_supported_wheels=not options.target_dir
)
requirement_set.perform_all_final_hydration()

try:
pip_req = requirement_set.get_requirement("pip")
Expand Down
1 change: 1 addition & 0 deletions src/pip/_internal/commands/wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ def run(self, options, args):
requirement_set = resolver.resolve(
reqs, check_supported_wheels=True
)
requirement_set.perform_all_final_hydration()

reqs_to_build = [
r for r in requirement_set.requirements.values()
Expand Down
100 changes: 100 additions & 0 deletions src/pip/_internal/distributions/shallow_wheel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import os

from pip._vendor.pkg_resources import DistInfoDistribution

from pip._internal.distributions.base import AbstractDistribution
from pip._internal.network.shallow.httpfile import Context as HttpContext
from pip._internal.network.shallow.httpfile import Url
from pip._internal.network.shallow.wheel import Context as WheelContext
from pip._internal.network.shallow.wheel import (
ProjectName,
WheelMetadataRequest,
)
from pip._internal.network.shallow.zipfile import Context as ZipContext
from pip._internal.utils.typing import MYPY_CHECK_RUNNING
from pip._internal.utils.wheel import WheelMetadata

if MYPY_CHECK_RUNNING:
from typing import Any
from pip._vendor.pkg_resources import Distribution
from pip._internal.index.package_finder import PackageFinder
from pip._internal.models.link import Link
from pip._internal.network.download import Downloader
from pip._internal.req import InstallRequirement


class DistributionNeedingFinalHydration(DistInfoDistribution):
def __init__(self, link, downloader, download_dir, *args, **kwargs):
# type: (Link, Downloader, str, Any, Any) -> None
super(DistributionNeedingFinalHydration, self).__init__(
*args, **kwargs)
self.final_link = link
self.downloader = downloader
self.download_dir = download_dir

def finally_hydrate(self):
# type: () -> None
download = self.downloader(self.final_link)
output_filename = os.path.join(self.download_dir, download.filename)
with open(output_filename, 'wb') as f:
for chunk in download.chunks:
f.write(chunk)


class ShallowWheelDistribution(AbstractDistribution):
"""Represents a wheel distribution.
This does not need any preparation as wheels can be directly unpacked.
"""

def __init__(self, req, downloader, download_dir):
# type: (InstallRequirement, Downloader, str) -> None
super(ShallowWheelDistribution, self).__init__(req)
self._downloader = downloader
self._download_dir = download_dir

@property
def _wheel_context(self):
# type: () -> WheelContext
http_ctx = HttpContext(self._downloader.get_session())
zip_ctx = ZipContext(http_ctx)
wheel_ctx = WheelContext(zip_ctx)
return wheel_ctx

def get_pkg_resources_distribution(self):
# type: () -> Distribution
"""Loads the metadata from the shallow wheel file into memory and
returns a Distribution that uses it, not relying on the wheel file or
requirement.
"""
# Wheels are never unnamed.
assert self.req.name
assert self.req.link

project_name = ProjectName(self.req.name)
remote_location = Url(self.req.link.url)

wheel_req = WheelMetadataRequest(
url=remote_location,
project_name=project_name,
)
metadata = (self
._wheel_context
.extract_wheel_metadata(wheel_req)
.contents)

wheel_filename = self.req.link.filename
wheel_metadata = WheelMetadata({'METADATA': metadata}, wheel_filename)

return DistributionNeedingFinalHydration(
link=self.req.link,
downloader=self._downloader,
download_dir=self._download_dir,
location=wheel_filename,
metadata=wheel_metadata,
project_name=project_name.name,
)

def prepare_distribution_metadata(self, finder, build_isolation):
# type: (PackageFinder, bool) -> None
pass
4 changes: 4 additions & 0 deletions src/pip/_internal/network/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,10 @@ def __init__(
self._session = session
self._progress_bar = progress_bar

def get_session(self):
# type: () -> PipSession
return self._session

def __call__(self, link):
# type: (Link) -> Download
try:
Expand Down
Empty file.
156 changes: 156 additions & 0 deletions src/pip/_internal/network/shallow/httpfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
"""
Download ranges of files over remote http.
"""

from collections import namedtuple

from pip._vendor import requests

from pip._internal.utils.typing import MYPY_CHECK_RUNNING
from pip._internal.utils.urls import get_url_scheme

if MYPY_CHECK_RUNNING:
from typing import Any, Optional


def url_is_remote(url):
# type: (str) -> bool
return get_url_scheme(url) in ['http', 'https']


class Url(namedtuple('Url', ['url'])):

def __new__(cls, url):
# type: (str) -> Url
assert url_is_remote(url)
return super(Url, cls).__new__(cls, url)


class HttpFileRequest(namedtuple('HttpFileRequest', ['url'])):
pass


class Size(namedtuple('Size', ['size'])):
def __new__(cls, size=0):
# type: (int) -> Size
assert size >= 0
return super(Size, cls).__new__(cls, size)

def __add__(self, other):
# type: (Any) -> Size
assert isinstance(other, type(self))
return Size(self.size + other.size)

def __sub__(self, other):
# type: (Any) -> Size
assert isinstance(other, type(self))
return Size(self.size - other.size)

def __lt__(self, other):
# type: (Any) -> bool
assert isinstance(other, type(self))
return self.size < other.size

def __le__(self, other):
# type: (Any) -> bool
assert isinstance(other, type(self))
return self.size <= other.size

def __gt__(self, other):
# type: (Any) -> bool
assert isinstance(other, type(self))
return self.size > other.size

def __ge__(self, other):
# type: (Any) -> bool
assert isinstance(other, type(self))
return self.size >= other.size


class ByteRange(namedtuple('ByteRange', ['start', 'end'])):
def __new__(cls, start, end):
# type: (Size, Size) -> ByteRange
assert end >= start
return super(ByteRange, cls).__new__(cls, start, end)

def as_bytes_range_header(self):
# type: () -> str
return "bytes={start}-{end}".format(
start=self.start.size,
# NB: The byte ranges accepted here are inclusive, so remove one
# from the end.
end=(self.end.size - 1))

def size_diff(self):
# type: () -> Size
return self.end - self.start


class BytesRangeRequest(namedtuple('BytesRangeRequest', ['start', 'end'])):
def __new__(cls, start, end):
# type: (Optional[Size], Optional[Size]) -> BytesRangeRequest
if (start is not None) and (end is not None):
assert end >= start
return super(BytesRangeRequest, cls).__new__(cls, start, end)

def get_byte_range(self, size):
# type: (Size) -> ByteRange
if self.start is None:
start = 0
else:
assert self.start <= size, "???/start={start},size={size}".format(
start=self.start, size=size)
start = self.start.size

if self.end is None:
end = size.size
else:
assert self.end <= size
end = self.end.size

return ByteRange(start=Size(start), end=Size(end))


class HttpFile(namedtuple('HttpFile', ['url', 'size'])):
pass


class Context(object):

def __init__(self, session=None):
# type: (Optional[requests.Session]) -> None
self.session = session or requests.Session()

def head(self, request):
# type: (HttpFileRequest) -> HttpFile
resp = self.session.head(request.url.url)
resp.raise_for_status()
assert (
"bytes" in resp.headers["Accept-Ranges"]
), "???/bytes was not found in range headers"
content_length = int(resp.headers["Content-Length"])
return HttpFile(url=request.url, size=Size(content_length))

def range_request(self, http_file, request):
# type: (HttpFile, BytesRangeRequest) -> bytes
byte_range = request.get_byte_range(http_file.size)
resp = self.session.get(
http_file.url.url,
headers={"Range": byte_range.as_bytes_range_header()})
resp.raise_for_status()

if Size(len(resp.content)) == http_file.size:
# This request for the full URL contents is cached, and we should
# return just the requested byte range.
start = byte_range.start.size
end = byte_range.end.size
response_bytes = resp.content[start:end]
else:
response_bytes = resp.content

size_diff = byte_range.size_diff()
assert (
Size(len(response_bytes)) == size_diff
), ("???/response should have been length {}, but got (size {}):\n{!r}"
.format(size_diff, len(response_bytes), response_bytes))
return response_bytes
Loading

0 comments on commit e9c6ec3

Please sign in to comment.