Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deploy doi guessing if not available #687

Merged
merged 8 commits into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions browse/templates/abs/abs.html
Original file line number Diff line number Diff line change
Expand Up @@ -324,11 +324,14 @@ <h1 class="title mathjax"><span class="descriptor">Title:</span>{{ title|tex2utf
{{ base_macros.version_atag(arxiv_id, version, primary_category) }}</span> for this version)
</td>
</tr>
{%- if datacite_doi %}
<tr>
<td class="tablecell label">&nbsp;</td>
<td class="tablecell arxivdoi">
<a href="https://doi.org/{{ datacite_doi }}">https://doi.org/{{ datacite_doi }}</a>
{%- if datacite_doi %}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

<a href="https://doi.org/{{ datacite_doi }}" id="arxiv-doi-link">https://doi.org/{{ datacite_doi }}</a>
{%- else %}
<a href="https://doi.org/10.48550/arXiv.{{ arxiv_id }}" id="arxiv-doi-link">https://doi.org/10.48550/arXiv.{{ arxiv_id }}</a>
{%- endif -%}
<div class="button-and-tooltip">
<button class="more-info" aria-describedby="more-info-desc-1">
<svg height="15" role="presentation" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><path fill="currentColor" d="M256 8C119.043 8 8 119.083 8 256c0 136.997 111.043 248 248 248s248-111.003 248-248C504 119.083 392.957 8 256 8zm0 110c23.196 0 42 18.804 42 42s-18.804 42-42 42-42-18.804-42-42 18.804-42 42-42zm56 254c0 6.627-5.373 12-12 12h-88c-6.627 0-12-5.373-12-12v-24c0-6.627 5.373-12 12-12h12v-64h-12c-6.627 0-12-5.373-12-12v-24c0-6.627 5.373-12 12-12h64c6.627 0 12 5.373 12 12v100h12c6.627 0 12 5.373 12 12v24z" class=""></path></svg>
Expand All @@ -337,12 +340,15 @@ <h1 class="title mathjax"><span class="descriptor">Title:</span>{{ title|tex2utf
<!-- tooltip description -->
<div role="tooltip" id="more-info-desc-1">
<span class="left-corner"></span>
arXiv-issued DOI via DataCite
{%- if datacite_doi %}
arXiv-issued DOI via DataCite
{%- else %}
arXiv-issued DOI via DataCite (pending registration)
{%- endif -%}
</div>
</div>
</td>
</tr>
{%- endif -%}
{%- if journal_ref %}
<tr>
<td class="tablecell label">Journal&nbsp;reference:</td>
Expand Down
25 changes: 25 additions & 0 deletions script/sync_prod_to_gcp/webnode_pdf_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@
The request (pub/sub entry) is subsumed when the pdf exists, so this is a pretty safe operation.
"""
import argparse
import shlex
import signal
import subprocess
import threading
import typing
from datetime import datetime, timedelta
from pathlib import Path
from time import gmtime, sleep

Expand Down Expand Up @@ -125,6 +128,27 @@ def ping_callback(message: Message) -> None:
message.ack()
return

message_age: timedelta = datetime.utcnow() - message.publish_time
compilation_timeout = int(os.environ.get("TEX_COMPILATION_TIMEOUT_MINUTES", "30"))
if message_age > timedelta(minutes=compilation_timeout):
help_needed = os.environ.get("TEX_COMPILATION_RECIPIENT", "help@arxiv.org")
subject = f"TeX compilation for {paper_id}v{version} failed"
mail_body = f"Hello EUST,\nTex compilation for {paper_id}v{version} has failed. Please resolve the issue.\n\nThis message is generated by a bot on arxiv-sync.serverfarm.cornell.edu.\n"
cmd = ["/usr/bin/mail", "-r", "developers@arxiv.org", "-s", subject, help_needed]
mail = subprocess.Popen(cmd,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
try:
mail.communicate(mail_body, timeout=60)
if mail.returncode == 0:
message.ack()
logger.warning(f"Alart mail sent: {subject}", extra=log_extra)
else:
logger.error("Failed to send mail: %s", shlex.join(cmd), extra=log_extra)
except Exception as exc:
logger.error(f"Failed: %s", shlex.join(cmd), extra=log_extra, exc_info=True)
pass
pass

arxiv_id = Identifier(arxiv_id_str)
archive = ('arxiv' if not arxiv_id.is_old_id else arxiv_id.archive)
pdf_source = Path(f"{FTP_PREFIX}/{archive}/papers/{arxiv_id.yymm}/{arxiv_id.filename}.pdf")
Expand Down Expand Up @@ -168,6 +192,7 @@ def ping_callback(message: Message) -> None:
exc_info=True, stack_info=False)

host, n_para = CONCURRENCY_PER_WEBNODE[min(len(CONCURRENCY_PER_WEBNODE)-1, max(0, my_tag))]
log_extra['web_node'] = host
try:
pdf_file, url, _1, duration_ms = ensure_pdf(thread_data.session, host, arxiv_id, timeout=30)
if pdf_file.exists():
Expand Down
38 changes: 37 additions & 1 deletion tests/test_db_abs.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,4 +126,40 @@ def test_abs_surrogate_keys(dbclient):

rv=dbclient.get('/abs/0704.0046v1')
assert "abs-0704.0046" in rv.headers['Surrogate-Key']
assert "paper-id-0704.0046" in rv.headers['Surrogate-Key']
assert "paper-id-0704.0046" in rv.headers['Surrogate-Key']


def test_guess_DOI(dbclient):
#if no DOI in table, should still guess and display DOI value
rt = dbclient.get('/abs/0906.2112')
assert rt.status_code == 200
assert rt.headers.get('Surrogate-Control')
html = BeautifulSoup(rt.data.decode('utf-8'), 'html.parser')

metatable = html.select_one('.metatable')
assert metatable
text= metatable.get_text()
assert 'https://doi.org/10.48550/arXiv.' in text
assert 'arXiv-issued DOI via DataCite' in text
assert 'arXiv-issued DOI via DataCite (pending registration)' in text

atag=metatable.find('a', {'id': 'arxiv-doi-link'})
assert atag
assert atag.text=='https://doi.org/10.48550/arXiv.0906.2112'
assert atag.get('href')=='https://doi.org/10.48550/arXiv.0906.2112'

#proper format for old ids
rt = dbclient.get('/abs/math/0510544')
assert rt.status_code == 200
assert rt.headers.get('Surrogate-Control')
html = BeautifulSoup(rt.data.decode('utf-8'), 'html.parser')

metatable = html.select_one('.metatable')
assert metatable
text= metatable.get_text()
assert 'arXiv-issued DOI via DataCite (pending registration)' in text

atag=metatable.find('a', {'id': 'arxiv-doi-link'})
assert atag
assert atag.text=='https://doi.org/10.48550/arXiv.math/0510544'
assert atag.get('href')=='https://doi.org/10.48550/arXiv.math/0510544'
Loading