Skip to content

Commit

Permalink
Merge pull request #687 from arXiv/develop
Browse files Browse the repository at this point in the history
deploy doi guessing if not available
  • Loading branch information
kyokukou authored Aug 7, 2024
2 parents db30d0e + f25234a commit 825096c
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 5 deletions.
14 changes: 10 additions & 4 deletions browse/templates/abs/abs.html
Original file line number Diff line number Diff line change
Expand Up @@ -324,11 +324,14 @@ <h1 class="title mathjax"><span class="descriptor">Title:</span>{{ title|tex2utf
{{ base_macros.version_atag(arxiv_id, version, primary_category) }}</span> for this version)
</td>
</tr>
{%- if datacite_doi %}
<tr>
<td class="tablecell label">&nbsp;</td>
<td class="tablecell arxivdoi">
<a href="https://doi.org/{{ datacite_doi }}">https://doi.org/{{ datacite_doi }}</a>
{%- if datacite_doi %}
<a href="https://doi.org/{{ datacite_doi }}" id="arxiv-doi-link">https://doi.org/{{ datacite_doi }}</a>
{%- else %}
<a href="https://doi.org/10.48550/arXiv.{{ arxiv_id }}" id="arxiv-doi-link">https://doi.org/10.48550/arXiv.{{ arxiv_id }}</a>
{%- endif -%}
<div class="button-and-tooltip">
<button class="more-info" aria-describedby="more-info-desc-1">
<svg height="15" role="presentation" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M256 8C119.043 8 8 119.083 8 256c0 136.997 111.043 248 248 248s248-111.003 248-248C504 119.083 392.957 8 256 8zm0 110c23.196 0 42 18.804 42 42s-18.804 42-42 42-42-18.804-42-42 18.804-42 42-42zm56 254c0 6.627-5.373 12-12 12h-88c-6.627 0-12-5.373-12-12v-24c0-6.627 5.373-12 12-12h12v-64h-12c-6.627 0-12-5.373-12-12v-24c0-6.627 5.373-12 12-12h64c6.627 0 12 5.373 12 12v100h12c6.627 0 12 5.373 12 12v24z" class=""></path></svg>
Expand All @@ -337,12 +340,15 @@ <h1 class="title mathjax"><span class="descriptor">Title:</span>{{ title|tex2utf
<!-- tooltip description -->
<div role="tooltip" id="more-info-desc-1">
<span class="left-corner"></span>
arXiv-issued DOI via DataCite
{%- if datacite_doi %}
arXiv-issued DOI via DataCite
{%- else %}
arXiv-issued DOI via DataCite (pending registration)
{%- endif -%}
</div>
</div>
</td>
</tr>
{%- endif -%}
{%- if journal_ref %}
<tr>
<td class="tablecell label">Journal&nbsp;reference:</td>
Expand Down
25 changes: 25 additions & 0 deletions script/sync_prod_to_gcp/webnode_pdf_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@
The request (pub/sub entry) is subsumed when the pdf exists, so this is a pretty safe operation.
"""
import argparse
import shlex
import signal
import subprocess
import threading
import typing
from datetime import datetime, timedelta
from pathlib import Path
from time import gmtime, sleep

Expand Down Expand Up @@ -125,6 +128,27 @@ def ping_callback(message: Message) -> None:
message.ack()
return

message_age: timedelta = datetime.utcnow() - message.publish_time
compilation_timeout = int(os.environ.get("TEX_COMPILATION_TIMEOUT_MINUTES", "30"))
if message_age > timedelta(minutes=compilation_timeout):
help_needed = os.environ.get("TEX_COMPILATION_RECIPIENT", "help@arxiv.org")
subject = f"TeX compilation for {paper_id}v{version} failed"
mail_body = f"Hello EUST,\nTex compilation for {paper_id}v{version} has failed. Please resolve the issue.\n\nThis message is generated by a bot on arxiv-sync.serverfarm.cornell.edu.\n"
cmd = ["/usr/bin/mail", "-r", "developers@arxiv.org", "-s", subject, help_needed]
mail = subprocess.Popen(cmd,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
try:
mail.communicate(mail_body, timeout=60)
if mail.returncode == 0:
message.ack()
logger.warning(f"Alart mail sent: {subject}", extra=log_extra)
else:
logger.error("Failed to send mail: %s", shlex.join(cmd), extra=log_extra)
except Exception as exc:
logger.error(f"Failed: %s", shlex.join(cmd), extra=log_extra, exc_info=True)
pass
pass

arxiv_id = Identifier(arxiv_id_str)
archive = ('arxiv' if not arxiv_id.is_old_id else arxiv_id.archive)
pdf_source = Path(f"{FTP_PREFIX}/{archive}/papers/{arxiv_id.yymm}/{arxiv_id.filename}.pdf")
Expand Down Expand Up @@ -168,6 +192,7 @@ def ping_callback(message: Message) -> None:
exc_info=True, stack_info=False)

host, n_para = CONCURRENCY_PER_WEBNODE[min(len(CONCURRENCY_PER_WEBNODE)-1, max(0, my_tag))]
log_extra['web_node'] = host
try:
pdf_file, url, _1, duration_ms = ensure_pdf(thread_data.session, host, arxiv_id, timeout=30)
if pdf_file.exists():
Expand Down
38 changes: 37 additions & 1 deletion tests/test_db_abs.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,4 +126,40 @@ def test_abs_surrogate_keys(dbclient):

rv=dbclient.get('/abs/0704.0046v1')
assert "abs-0704.0046" in rv.headers['Surrogate-Key']
assert "paper-id-0704.0046" in rv.headers['Surrogate-Key']
assert "paper-id-0704.0046" in rv.headers['Surrogate-Key']


def test_guess_DOI(dbclient):
#if no DOI in table, should still guess and display DOI value
rt = dbclient.get('/abs/0906.2112')
assert rt.status_code == 200
assert rt.headers.get('Surrogate-Control')
html = BeautifulSoup(rt.data.decode('utf-8'), 'html.parser')

metatable = html.select_one('.metatable')
assert metatable
text= metatable.get_text()
assert 'https://doi.org/10.48550/arXiv.' in text
assert 'arXiv-issued DOI via DataCite' in text
assert 'arXiv-issued DOI via DataCite (pending registration)' in text

atag=metatable.find('a', {'id': 'arxiv-doi-link'})
assert atag
assert atag.text=='https://doi.org/10.48550/arXiv.0906.2112'
assert atag.get('href')=='https://doi.org/10.48550/arXiv.0906.2112'

#proper format for old ids
rt = dbclient.get('/abs/math/0510544')
assert rt.status_code == 200
assert rt.headers.get('Surrogate-Control')
html = BeautifulSoup(rt.data.decode('utf-8'), 'html.parser')

metatable = html.select_one('.metatable')
assert metatable
text= metatable.get_text()
assert 'arXiv-issued DOI via DataCite (pending registration)' in text

atag=metatable.find('a', {'id': 'arxiv-doi-link'})
assert atag
assert atag.text=='https://doi.org/10.48550/arXiv.math/0510544'
assert atag.get('href')=='https://doi.org/10.48550/arXiv.math/0510544'

0 comments on commit 825096c

Please sign in to comment.