Skip to content

Commit

Permalink
Test for a repository deposit before establishing mirrorrepo
Browse files Browse the repository at this point in the history
The `mirrorrepo` property of the datalad-annex Git remote helper
implementation creates an empty repository, if the remote has no
refs (or does not exist). This is sensible behavior for enabling
initial pushes.

However, it leads to successful but empty clones/fetches for any
misspecified URL or connectivity failure. This is undesirable.

This change adds an explicit test for an available/accessible
repository deposit, whenever a clone/fetch is performed. Wrong URL
specifications or connectivity issues will now lead to an explicit
error exit. This fact is communicated via a non-verbose message,
and the full error is available at verbosity level 2, i.e. with a
`git clone -v`.

Closes: gh-373
Closes: gh-636
  • Loading branch information
mih committed Feb 19, 2024
1 parent b8f5c5d commit e415687
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 5 deletions.
24 changes: 21 additions & 3 deletions datalad_next/gitremotes/datalad_annex.py
Original file line number Diff line number Diff line change
Expand Up @@ -691,6 +691,9 @@ class instance.
'\n'
)
elif line == 'connect git-receive-pack\n':
# "receive", because we are receiving at the local mirror repo
# from a `send-pack` process that is connected to the main local
# repo
self.log('Connecting git-receive-pack\n')
self.send('\n')
# we assume the mirror repo is in-sync with the remote at
Expand Down Expand Up @@ -755,6 +758,16 @@ class instance.
self._store_credential()
return
elif line == 'connect git-upload-pack\n':
# "upload", because we are uploading from the local mirror repo
# to a `fetch-pack` process that is connected to the main local
# repo
try:
self.get_remote_refs(raise_on_error=True)
except Exception as e:
self.log("fatal: couldn't find remote refs (repository deposit does not exist, or is inaccessible", level=1)
self.log(f"query error: {e!r}", level=2)
return

self.log('Connecting git-upload-pack\n')
self.send('\n')
# must not capture -- git is talking to it directly from here.
Expand Down Expand Up @@ -957,7 +970,7 @@ def replace_mirrorrepo_from_remote_deposit(self) -> None:
for p in legacy_basedir.iterdir():
p.rename(self._mirrorrepodir / p.relative_to(legacy_basedir))

def get_remote_refs(self) -> str | None:
def get_remote_refs(self, raise_on_error: bool = False) -> str | None:
"""Report remote refs
The underlying special remote is asked whether it has the key
Expand All @@ -967,8 +980,9 @@ def get_remote_refs(self) -> str | None:
Returns
-------
str or None
If the remote has refs, they are returned as a string, formatted like
a refs file in a Git directory. Otherwise, `None` is returned.
If the remote has a refs record, it is returned as a string,
formatted like a refs file in a Git directory.
Otherwise, `None` is returned.
"""
if self._cached_remote_refs:
# this process already queried them once, return cache
Expand All @@ -994,9 +1008,13 @@ def get_remote_refs(self) -> str | None:
ra.call_annex([
'transferkey', self.refs_key, f'--from={sremote_id}'])
except CommandError as e:
if raise_on_error:
raise
CapturedException(e)
self.log("Remote appears to have no refs")
# download failed, we have no refs
# this can happen for legit reasons (prior first push),
# but also with broken URLs or lack of permissions
return None

refskeyloc = ra.call_annex_oneline([
Expand Down
12 changes: 10 additions & 2 deletions datalad_next/patches/push_optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,9 +444,17 @@ def _sync_remote_annex_branch(repo, target, is_annex_repo):
except mod_push.CommandError as e:
# it is OK if the remote doesn't have a git-annex branch yet
# (e.g. fresh repo)
# TODO is this possible? we just copied? Maybe check if anything
# Is this even possible? we just copied? Maybe check if anything
# was actually copied?
if "fatal: couldn't find remote ref git-annex" not in e.stderr.lower():
# Yes, this is possible. The current implementation of the datalad-annex
# special remote would run into this situation. It would copy annex objects
# to a new location just fine, but until a repository deposit was made
# (and this implementation of push only does this as a second step), it
# could not retrieve any refs from the remote.
# the following conditional tests for the common prefix of the respective
# error message by Git and the Git-channeled error message from the
# datalad-annex remote helper.
if "fatal: couldn't find remote ref" not in e.stderr.lower():
raise
lgr.debug('Remote does not have a git-annex branch: %s', e)

Expand Down

0 comments on commit e415687

Please sign in to comment.