Skip to content

Commit

Permalink
Make solr-updater take into account book providers for ebook_access
Browse files Browse the repository at this point in the history
  • Loading branch information
cdrini committed Oct 17, 2022
1 parent 45be8ed commit ded8118
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 28 deletions.
132 changes: 109 additions & 23 deletions openlibrary/book_providers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from dataclasses import dataclass
import logging
from typing import Optional, TypedDict, Union, Literal, cast, TypeVar, Generic
from collections.abc import Iterator
import urllib.parse
Expand All @@ -11,6 +13,11 @@
from openlibrary.utils import OrderedEnum, multisort_best


logger = logging.getLogger("openlibrary.book_providers")

ProviderAccessLiteral = Literal['sample', 'buy', 'open-access', 'borrow', 'subscribe']


class EbookAccess(OrderedEnum):
# Keep in sync with solr/conf/enumsConfig.xml !
NO_EBOOK = 0
Expand All @@ -22,6 +29,85 @@ class EbookAccess(OrderedEnum):
def to_solr_str(self):
return self.name.lower()

@staticmethod
def from_provider_literal(literal: ProviderAccessLiteral) -> 'EbookAccess':
if literal == 'sample':
# We need to update solr to handle these! Requires full reindex
return EbookAccess.PRINTDISABLED
elif literal == 'buy':
return EbookAccess.NO_EBOOK
elif literal == 'open-access':
return EbookAccess.PUBLIC
elif literal == 'borrow':
return EbookAccess.BORROWABLE
elif literal == 'subscribe':
return EbookAccess.NO_EBOOK
else:
raise ValueError(f'Unknown access literal: {literal}')


@dataclass
class EbookProvider:
access: ProviderAccessLiteral
format: Literal['web', 'pdf', 'epub', 'audio']
price: str | None
url: str
provider_name: str | None = None

@property
def ebook_access(self) -> EbookAccess:
return EbookAccess.from_provider_literal(self.access)

@staticmethod
def from_json(json: dict) -> 'EbookProvider':
if 'href' in json:
# OPDS-style provider
return EbookProvider.from_opds_json(json)
elif 'url' in json:
# Pressbooks/OL-style
return EbookProvider(
access=json.get('access', 'open-access'),
format=json.get('format', 'web'),
price=json.get('price', None),
url=json['url'],
provider_name=json.get('provider_name', None),
)
else:
raise ValueError(f'Unknown ebook provider format: {json}')

@staticmethod
def from_opds_json(json: dict) -> 'EbookProvider':
if json.get('properties', {}).get('indirectAcquisition', None):
mimetype = json['properties']['indirectAcquisition'][0]['type']
else:
mimetype = json['type']

fmt: Literal['web', 'pdf', 'epub', 'audio'] = 'web'
if mimetype.startswith('audio/'):
fmt = 'audio'
elif mimetype == 'application/pdf':
fmt = 'pdf'
elif mimetype == 'application/epub+zip':
fmt = 'epub'
elif mimetype == 'text/html':
fmt = 'web'
else:
logger.warn(f'Unknown mimetype: {mimetype}')
fmt = 'web'

if json.get('properties', {}).get('price', None):
price = f"{json['properties']['price']['value']} {json['properties']['price']['currency']}"
else:
price = None

return EbookProvider(
access=json['rel'].split('/')[-1],
format=fmt,
price=price,
url=json['href'],
provider_name=json.get('name'),
)


class IALiteMetadata(TypedDict):
boxid: set[str]
Expand Down Expand Up @@ -257,44 +343,44 @@ def solr_key(self):
return None

def get_identifiers(self, ed_or_solr: Union[Edition, dict]) -> list[str]:
NS = 'http://opds-spec.org'
# It's an edition
if ed_or_solr.get('providers'):
return [
b.get('url') or b.get('href')
for b in ed_or_solr['providers']
if b.get('url')
or (
b.get('rel')
in (
f"{NS}/acquisition/open-access",
f"{NS}/acquisition/sample",
)
)
provider.url
for provider in map(EbookProvider.from_json, ed_or_solr['providers'])
if provider.ebook_access >= EbookAccess.PRINTDISABLED
]
else:
# TODO: Not implemented for search/solr yet
return []

def render_read_button(self, ed_or_solr: Union[Edition, dict]):
NS = 'http://opds-spec.org'

def acq_sort(b):
if b.get('url') or b.get('rel') == f'{NS}/acquisition/open-access':
return 0
elif b.get('rel') == f'{NS}/acquisition/sample':
return 1
else:
return 2

acq_sorted = sorted(
(p for p in ed_or_solr.get('providers', []) if acq_sort(p) < 2),
key=acq_sort,
(
p
for p in map(EbookProvider.from_json, ed_or_solr.get('providers', []))
if p.ebook_access >= EbookAccess.PRINTDISABLED
),
key=lambda p: p.ebook_access,
reverse=True,
)
if not acq_sorted:
return ''
return render_template(self.get_template_path('read_button'), acq_sorted[0])

def get_access(
self,
edition: dict,
metadata: TProviderMetadata = None,
) -> EbookAccess:
"""
Return the access level of the edition.
"""
# For now assume 0 is best
return EbookAccess.from_provider_literal(
EbookProvider.from_json(edition['providers'][0]).access
)


PROVIDER_ORDER: list[AbstractBookProvider] = [
# These providers act essentially as their own publishers, so link to the first when
Expand Down
9 changes: 4 additions & 5 deletions openlibrary/templates/book_providers/direct_read_button.html
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
$def with(provider)

$if provider.get('url') or provider.get('rel') == 'http://opds-spec.org/acquisition/open-access':
$ url = provider.get('url') or provider.get('href')
$if provider.access == 'open-access':
<div class="cta-button-group">
<a
href="$url"
href="$(provider.url)"
title="$_('Read free online')"
class="cta-btn cta-btn--available cta-btn--read cta-btn--external cta-btn--direct"
target="_blank"
>$_('Read')</a>
</div>

$elif provider.get('rel') == 'http://opds-spec.org/acquisition/sample':
$elif provider.access == 'sample':
<div class="cta-button-group">
<a class="cta-btn cta-btn--shell cta-btn--external"
data-ol-link-track="CTAClick|Preview"
target="_blank"
href="$provider.get('href')"
href="$(provider.url)"
>$_('Preview')</a>
</div>

0 comments on commit ded8118

Please sign in to comment.