Skip to content

Commit

Permalink
Experimental support for pip dependencies
Browse files Browse the repository at this point in the history
In the absence of an external interface to pip's resolver (see e.g.
pypa/pip#7819), this uses Poetry's
resolution logic to convert pip requirements from environment.yaml
to either transitive dependencies (in the case of env output) or
direct references (in the case of explicit output).  In explicit
mode these are emitted as comment lines that `conda-lock install`
can unpack and pass to `pip install` inside of the target environment.
  • Loading branch information
jvansanten committed Oct 20, 2021
1 parent a9724ae commit b5b13aa
Show file tree
Hide file tree
Showing 6 changed files with 480 additions and 17 deletions.
110 changes: 104 additions & 6 deletions conda_lock/conda_lock.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@

from conda_lock.common import read_file, read_json, write_file
from conda_lock.errors import PlatformValidationError
from conda_lock.pypi_solver import PipRequirement, solve_pypi
from conda_lock.src_parser import LockSpecification
from conda_lock.src_parser.environment_yaml import parse_environment_file
from conda_lock.src_parser.meta_yaml import parse_meta_yaml_file
Expand Down Expand Up @@ -264,15 +265,20 @@ def do_conda_install(conda: PathLike, prefix: str, name: str, file: str) -> None
*([] if kind == "env" else ["--yes"]),
]

common_args = []
if prefix:
args.append("--prefix")
args.append(prefix)
common_args.append("--prefix")
common_args.append(prefix)
if name:
args.append("--name")
args.append(name)
common_args.append("--name")
common_args.append(name)
conda_flags = os.environ.get("CONDA_FLAGS")
if conda_flags:
args.extend(shlex.split(conda_flags))
common_args.extend(shlex.split(conda_flags))

args.extend(common_args)

assert len(common_args) == 2

logging.debug("$MAMBA_ROOT_PREFIX: %s", os.environ.get("MAMBA_ROOT_PREFIX"))

Expand All @@ -297,6 +303,47 @@ def do_conda_install(conda: PathLike, prefix: str, name: str, file: str) -> None
)
sys.exit(1)

if kind == "explicit":
with open(file) as explicit_env:
pip_requirements = [
line.split("# pip ")[1]
for line in explicit_env
if line.startswith("# pip ")
]
if not pip_requirements:
return

with tempfile.NamedTemporaryFile() as tf:
write_file("\n".join(pip_requirements), tf.name)
pip_proc = subprocess.run(
[
str(conda),
"run",
]
+ common_args
+ [
"pip",
"install",
"--no-deps",
"-r",
tf.name,
]
)

if pip_proc.stdout:
for line in pip_proc.stdout.decode().split("\n"):
logging.info(line)

if pip_proc.stderr:
for line in pip_proc.stderr.decode().split("\n"):
logging.error(line.rstrip())

if pip_proc.returncode != 0:
print(
f"Could not perform pip install using {file} lock file into {name or prefix}"
)
sys.exit(1)


def search_for_md5s(
conda: PathLike, package_specs: List[dict], platform: str, channels: Sequence[str]
Expand Down Expand Up @@ -539,12 +586,39 @@ def create_lockfile_from_spec(
)
logging.debug("dry_run_install:\n%s", dry_run_install)

if spec.pip_specs:
python_version: Optional[str] = None
locked_packages = []
for package in (
dry_run_install["actions"]["FETCH"] + dry_run_install["actions"]["LINK"]
):
if package["name"] == "python":
python_version = package["version"]
elif not package["name"].startswith("__"):
locked_packages.append((package["name"], package["version"]))
if python_version is None:
raise ValueError("Got pip specs without Python")
pip = solve_pypi(
spec.pip_specs,
conda_installed=locked_packages,
python_version=python_version,
platform=spec.platform,
)
else:
pip = []

lockfile_contents = [
"# Generated by conda-lock.",
f"# platform: {spec.platform}",
f"# input_hash: {spec.input_hash()}\n",
]

def format_pip_requirement(spec: PipRequirement) -> str:
if "url" in spec:
return f'{spec["name"]} @ {spec["url"]}'
else:
return f'{spec["name"]} === {spec["version"]}'

if kind == "env":
link_actions = dry_run_install["actions"]["LINK"]
lockfile_contents.extend(
Expand All @@ -560,6 +634,10 @@ def create_lockfile_from_spec(
),
]
)
if pip:
lockfile_contents.extend(
[" - pip:", *(f" - {format_pip_requirement(pkg)}" for pkg in pip)]
)
elif kind == "explicit":
lockfile_contents.append("@EXPLICIT\n")

Expand Down Expand Up @@ -611,6 +689,18 @@ def sanitize_lockfile_line(line):
return line

lockfile_contents = [sanitize_lockfile_line(line) for line in lockfile_contents]

# emit an explicit requirements.txt, prefixed with '# pip '
for pkg in pip:
lines = [format_pip_requirement(pkg)] + [
f" --hash={hash}" for hash in pkg["hashes"]
]
lockfile_contents.extend(
[
f"# pip {line}"
for line in [line + " \\" for line in lines[:-1]] + [lines[-1]]
]
)
else:
raise ValueError(f"Unrecognised lock kind {kind}.")

Expand Down Expand Up @@ -670,6 +760,12 @@ def aggregate_lock_specs(lock_specs: List[LockSpecification]) -> LockSpecificati
set(chain.from_iterable([lock_spec.specs for lock_spec in lock_specs]))
)

pip_specs = list(
set(
chain.from_iterable([lock_spec.pip_specs or [] for lock_spec in lock_specs])
)
)

# pick the first non-empty channel
channels: List[str] = next(
(lock_spec.channels for lock_spec in lock_specs if lock_spec.channels), []
Expand All @@ -680,7 +776,9 @@ def aggregate_lock_specs(lock_specs: List[LockSpecification]) -> LockSpecificati
(lock_spec.platform for lock_spec in lock_specs if lock_spec.platform), ""
)

return LockSpecification(specs=specs, channels=channels, platform=platform)
return LockSpecification(
specs=specs, channels=channels, platform=platform, pip_specs=pip_specs
)


def _ensureconda(
Expand Down
197 changes: 197 additions & 0 deletions conda_lock/pypi_solver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
import re
import sys

from pathlib import Path
from typing import Optional, TypedDict
from urllib.parse import urldefrag

from clikit.api.io.flags import VERY_VERBOSE
from clikit.io import ConsoleIO
from packaging.tags import compatible_tags, cpython_tags
from poetry.core.packages import Dependency, Package, ProjectPackage, URLDependency
from poetry.installation.chooser import Chooser
from poetry.installation.operations import Install
from poetry.installation.operations.uninstall import Uninstall
from poetry.puzzle import Solver
from poetry.repositories.pool import Pool
from poetry.repositories.pypi_repository import PyPiRepository
from poetry.repositories.repository import Repository
from poetry.utils.env import Env

from conda_lock.src_parser.pyproject_toml import get_lookup as get_forward_lookup


class PlatformEnv(Env):
def __init__(self, python_version, platform):
super().__init__(path=Path(sys.prefix))
if platform == "linux-64":
# FIXME: in principle these depend on the glibc in the conda env
self._platforms = ["manylinux_2_17_x86_64", "manylinux2014_x86_64"]
else:
raise ValueError(f"Unsupported platform '{platform}'")
self._python_version = tuple(map(int, python_version.split(".")))

def get_supported_tags(self):
"""
Mimic the output of packaging.tags.sys_tags() on the given platform
"""
return list(
cpython_tags(python_version=self._python_version, platforms=self._platforms)
) + list(
compatible_tags(
python_version=self._python_version, platforms=self._platforms
)
)


class PipRequirement(TypedDict):
name: str
version: Optional[str]
url: str
hashes: list[str]


REQUIREMENT_PATTERN = re.compile(
r"""
^
(?P<name>[a-zA-Z0-9_-]+) # package name
(?:\[(?P<extras>(?:\s?[a-zA-Z0-9_-]+(?:\s?\,\s?)?)+)\])? # extras
(?:
(?: # a direct reference
\s?@\s?(?P<url>.*)
)
|
(?: # one or more PEP440 version specifiers
\s?(?P<constraint>
(?:\s?
(?:
(?:=|[><~=!])?=
|
[<>]
)
\s?
(?:
[A-Za-z0-9\.-_\*]+ # a version tuple, e.g. x.y.z
(?:-[A-Za-z]+(?:\.[0-9]+)?)? # a post-release tag, e.g. -alpha.2
(?:\s?\,\s?)?
)
)+
)
)
)?
$
""",
re.VERBOSE,
)


def parse_pip_requirement(requirement: str) -> Optional[dict[str, str]]:
match = REQUIREMENT_PATTERN.match(requirement)
if not match:
return None
return match.groupdict()


def get_dependency(requirement: str) -> Dependency:
parsed = parse_pip_requirement(requirement)
if parsed is None:
raise ValueError(f"Unknown pip requirement '{requirement}'")
extras = re.split(r"\s?\,\s?", parsed["extras"]) if parsed["extras"] else None
if parsed["url"]:
return URLDependency(name=parsed["name"], url=parsed["url"], extras=extras)
else:
return Dependency(
name=parsed["name"], constraint=parsed["constraint"] or "*", extras=extras
)


PYPI_LOOKUP: Optional[dict] = None


def get_lookup() -> dict:
global PYPI_LOOKUP
if PYPI_LOOKUP is None:
PYPI_LOOKUP = {
record["conda_name"]: record for record in get_forward_lookup().values()
}
return PYPI_LOOKUP


def normalize_conda_name(name: str):
return get_lookup().get(name, {"pypi_name": name})["pypi_name"]


def solve_pypi(
dependencies: list[str],
conda_installed: list[tuple[str, str]],
python_version: str,
platform: str,
verbose: bool = False,
) -> list[PipRequirement]:
dummy_package = ProjectPackage("_dummy_package_", "0.0.0")
dummy_package.python_versions = f"=={python_version}"
for spec in dependencies:
dummy_package.add_dependency(get_dependency(spec))

pypi = PyPiRepository()
pool = Pool(repositories=[pypi])

installed = Repository()
locked = Repository()

python_packages = dict()
for name, version in conda_installed:
pypi_name = normalize_conda_name(name)
# Prefer the Python package when its name collides with the Conda package
# for the underlying library, e.g. python-xxhash (pypi: xxhash) over xxhash
# (pypi: no equivalent)
if pypi_name not in python_packages or pypi_name != name:
python_packages[pypi_name] = version
for name, version in python_packages.items():
for repo in (locked, installed):
repo.add_package(Package(name=name, version=version))

io = ConsoleIO()
if verbose:
io.set_verbosity(VERY_VERBOSE)
s = Solver(
dummy_package,
pool=pool,
installed=installed,
locked=locked,
io=io,
)
result = s.solve(use_latest=dependencies)

chooser = Chooser(pool, env=PlatformEnv(python_version, platform))

# Extract distributions from Poetry package plan, ignoring uninstalls
# (usually: conda package with no pypi equivalent) and skipped ops
# (already installed)
requirements: list[PipRequirement] = []
for op in result:
if not isinstance(op, Uninstall) and not op.skipped:
# Take direct references verbatim
if op.package.source_type == "url":
url, fragment = urldefrag(op.package.source_url)
requirements.append(
{
"name": op.package.name,
"version": None,
"url": url,
"hashes": [fragment.replace("=", ":")],
}
)
# Choose the most specific distribution for the target
else:
link = chooser.choose_for(op.package)
requirements.append(
{
"name": op.package.name,
"version": str(op.package.version),
"url": link.url_without_fragment,
"hashes": [f"{link.hash_name}:{link.hash}"],
}
)

return requirements
3 changes: 3 additions & 0 deletions conda_lock/src_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,21 @@ def __init__(
specs: List[str],
channels: List[str],
platform: str,
pip_specs: Optional[List[str]] = None,
virtual_package_repo: Optional[FakeRepoData] = None,
):
self.specs = specs
self.channels = channels
self.platform = platform
self.pip_specs = pip_specs
self.virtual_package_repo = virtual_package_repo

def input_hash(self) -> str:
data: dict = {
"channels": self.channels,
"platform": self.platform,
"specs": sorted(self.specs),
"pip_specs": sorted(self.pip_specs or []),
}
if self.virtual_package_repo is not None:
vpr_data = self.virtual_package_repo.all_repodata
Expand Down
Loading

0 comments on commit b5b13aa

Please sign in to comment.