internetarchive · davidscotson · Sep 29, 2023
diff --git a/openlibrary/plugins/openlibrary/js/edit.js b/openlibrary/plugins/openlibrary/js/edit.js
@@ -9,6 +9,8 @@ import {
     isValidLccn,
     isIdDupe
 } from './idValidation';
+import { extractWorkIdFromUrl, } from './idExtraction';
+import { detectTypeFromWorkId, } from './idDetection';
 /* global render_seed_field, render_language_field, render_lazy_work_preview, render_language_autocomplete_item, render_work_field, render_work_autocomplete_item */
 /* Globals are provided by the edit edition template */
 
@@ -230,6 +232,32 @@ export function initIdentifierValidation() {
     });
 }
 
+export function initWorkIdentifierValidation() {
+    $('#workidentifiers').repeat({
+        vars: {prefix: 'work--'},
+        validate: function(data) {return validateWorkIdentifiers(data)},
+    });
+    $('#workidentifiers').on('repeat-add', function () {
+        $('#workselect-id option').first().prop('selected',true);
+    });
+    $('#workid-value').on('input', function () {
+        const input = $('#workid-value').val().trim();
+        $('#workid-value').val(input);
+        if (/^https?:/.test(input)) {
+            const [id, type] = extractWorkIdFromUrl(input);
+            if (id && type) {
+                $('#workselect-id').val(type);
+                $('#workid-value').val(id);
+            }
+        } else {
+            const type = detectTypeFromWorkId(input);
+            if (type) {
+                $('#workselect-id').val(type);
+            }
+        }
+    });
+}
+
 export function initClassificationValidation() {
     const dataConfig = JSON.parse(document.querySelector('#classifications').dataset.config);
     $('#classifications').repeat({
@@ -248,6 +276,42 @@ export function initClassificationValidation() {
     });
 }
 
+/**
+ * Called by initWorkIdentifierValidation(), along with tests in
+ * tests/unit/js/editEditionsPage.test.js, to validate the addition of new
+ * identifiers (ISBN, LCCN) to an edition.
+ * @param {Object} data  data from the input form
+ * @returns {boolean}  true if identifier passes validation
+ */
+export function validateWorkIdentifiers(data) {
+    const dataConfig = JSON.parse(document.querySelector('#workidentifiers').dataset.config);
+
+    if (data.name === '' || data.name === '---') {
+        return error('#workid-errors', 'workselect-id', dataConfig['Please select an identifier.'])
+    }
+    const label = $('#workselect-id').find(`option[value='${data.name}']`).html();
+    if (data.value === '') {
+        return error('#workid-errors', 'workid-value', dataConfig['You need to give a value to ID.'].replace(/ID/, label));
+    }
+
+    let validId = true;
+    if (data.name === 'lccn') {
+        validId = validateLccn(data, dataConfig, label);
+    }
+
+    // checking for duplicate identifier entry on all identifier types
+    // expects parsed ids so placed after validate
+    const entries = document.querySelectorAll(`.${data.name}`);
+    if (isIdDupe(entries, data.value) === true) {
+        return error('#workid-errors', 'workid-value', dataConfig['That ID already exists for this work.'].replace(/ID/, label));
+    }
+
+    if (validId === false) return false;
+
+    $('#workid-errors').hide();
+    return true;
+}
+
 export function initLanguageMultiInputAutocomplete() {
     $(function() {
         getJqueryElements('.multi-input-autocomplete--language').forEach(jqueryElement => {

diff --git a/openlibrary/plugins/openlibrary/js/idDetection.js b/openlibrary/plugins/openlibrary/js/idDetection.js
@@ -0,0 +1,35 @@
+const commonRegex  = {
+    wikidata: /^Q[1-9]\d+$/, // ignore single digit matches to reduce false positives
+    storygraph: /^[0-9a-f]{8}(-[0-9a-f]{4}){3}-[0-9a-f]{12}$/i,
+    amazon: /^B[0-9A-Za-z]{9}$/,
+
+}
+const workIdentifierPatterns  = {
+    wikidata: commonRegex.wikidata,
+    amazon: commonRegex.amazon,
+    storygraph: commonRegex.storygraph,
+}
+
+/**
+ * Compares url string against regex patters to extract work identifier.
+ * @param {String} id string to test
+ * @returns {String} identifier type name e.g. 'wikidata' or null
+ */
+export function detectTypeFromWorkId(id) {
+    return detectTypeFromId(id, workIdentifierPatterns);
+}
+/**
+ * Compares url string against regex patters to extract identifier.
+ * @param {String} id string to test
+ * @param {Object} named regexs to match against
+ * @returns {String} identifier type name e.g. 'wikidata' or null
+ */
+function detectTypeFromId(id, patterns) {
+    for (const idtype in patterns) {
+        const detectPattern = patterns[idtype];
+        if (detectPattern.test(id) === true) {
+            return idtype;
+        }
+    }
+    return null;
+}
diff --git a/openlibrary/plugins/openlibrary/js/idExtraction.js b/openlibrary/plugins/openlibrary/js/idExtraction.js
@@ -0,0 +1,43 @@
+const commonRegex = {
+    wikidata: /^https?:\/\/www\.wikidata\.org\/wiki\/(Q[1-9]\d*)$/,
+    // viaf regex from https://www.wikidata.org/wiki/Property:P214#P8966
+    viaf: /^https?:\/\/(?:www\.)?viaf\.org\/viaf\/([1-9]\d(?:\d{0,7}|\d{17,20}))($|\/|\?|#)/,
+    // note: storygraph seems to use the same format for works and editions
+    storygraph: /^https?:\/\/app\.thestorygraph\.com\/books\/([0-9a-f]{8}(-[0-9a-f]{4}){3}-[0-9a-f]{12})$/,
+
+}
+const workIdentifierExtractionPatterns = {
+    wikidata: commonRegex.wikidata,
+    viaf: commonRegex.viaf,
+    storygraph: commonRegex.storygraph,
+    // librarything regex from https://www.wikidata.org/wiki/Property:P1085#P8966
+    librarything: /^https?:\/\/www\.librarything\.(?:com|nl)\/work\/(\d+)/,
+    // goodreads regex from https://www.wikidata.org/wiki/Property:P8383#P8966
+    goodreads: /^https?:\/\/www\.goodreads\.com\/work\/editions\/(\d+)/,
+
+}
+
+/**
+ * Compares url string against regex patters to extract work identifier.
+ * @param {String} url
+ * @returns {Array} [work identifier, identifier type] or null, null
+ */
+export function extractWorkIdFromUrl(url) {
+    return extractIdFromUrl(url, workIdentifierExtractionPatterns);
+}
+/**
+ * Compares url string against regex patters to extract identifier.
+ * @param {String} url
+ * @param {Object} patters - object of regex patterns
+ * @returns {Array} [identifier, identifier type] or null, null
+ */
+function extractIdFromUrl(url, patterns) {
+    for (const idtype in patterns) {
+        const extractPattern = patterns[idtype];
+        const id = extractPattern.exec(url);
+        if (id && id[1]) {
+            return [id[1], idtype];
+        }
+    }
+    return [null, null];
+}
diff --git a/openlibrary/plugins/openlibrary/js/index.js b/openlibrary/plugins/openlibrary/js/index.js
@@ -127,6 +127,7 @@ jQuery(function () {
     const addRowButton = document.getElementById('add_row_button');
     const roles = document.querySelector('#roles');
     const identifiers = document.querySelector('#identifiers');
+    const workIdentifiers = document.querySelector('#workidentifiers');
     const classifications = document.querySelector('#classifications');
     const excerpts = document.getElementById('excerpts');
     const links = document.getElementById('links');
@@ -162,6 +163,9 @@ jQuery(function () {
                 if (identifiers) {
                     module.initIdentifierValidation();
                 }
+                if (workIdentifiers) {
+                    module.initWorkIdentifierValidation();
+                }
                 if (classifications) {
                     module.initClassificationValidation();
                 }

diff --git a/openlibrary/plugins/upstream/addbook.py b/openlibrary/plugins/upstream/addbook.py
@@ -606,6 +606,9 @@ def save(self, formdata: web.Storage) -> None:
                     edition_data.works = [{'key': self.work.key}]
 
             if self.work is not None:
+                identifiers = work_data.pop('identifiers', [])
+                self.work.set_identifiers(identifiers)
+
                 self.work.update(work_data)
                 saveutil.save(self.work)
 

diff --git a/openlibrary/plugins/upstream/models.py b/openlibrary/plugins/upstream/models.py
@@ -17,7 +17,12 @@
 from openlibrary.core.models import Image
 from openlibrary.core import lending
 
-from openlibrary.plugins.upstream.utils import MultiDict, parse_toc, get_edition_config
+from openlibrary.plugins.upstream.utils import (
+    MultiDict,
+    parse_toc,
+    get_edition_config,
+    get_work_config,
+)
 from openlibrary.plugins.upstream import account
 from openlibrary.plugins.upstream import borrow
 from openlibrary.plugins.worksearch.code import works_by_author
@@ -561,6 +566,62 @@ def get_covers(self, use_solr=True):
         else:
             return []
 
+    def get_identifiers(self):
+        """Returns (name, value) pairs of all available identifiers."""
+        return self._process_identifiers(
+            get_work_config().identifiers, self.identifiers
+        )
+
+    def set_identifiers(self, identifiers):
+        """Updates the work from identifiers specified as (name, value) pairs."""
+
+        d = {}
+        for id in identifiers:
+            # ignore bad values
+            if 'name' not in id or 'value' not in id:
+                continue
+            name, value = id['name'], id['value']
+            if name == 'lccn':
+                value = normalize_lccn(value)
+            # `None` in this field causes errors. See #7999.
+            if value is not None:
+                d.setdefault(name, []).append(value)
+
+        self.identifiers = {}
+
+        for name, value in d.items():
+            self.identifiers[name] = value
+
+    def _process_identifiers(self, config_, values):
+        id_map = {}
+        for id in config_:
+            id_map[id.name] = id
+            id.setdefault("label", id.name)
+            id.setdefault("url_format", None)
+
+        d = MultiDict()
+
+        def process(name, value):
+            if value:
+                if not isinstance(value, list):
+                    value = [value]
+
+                id = id_map.get(name) or web.storage(
+                    name=name, label=name, url_format=None
+                )
+                for v in value:
+                    d[id.name] = web.storage(
+                        name=id.name,
+                        label=id.label,
+                        value=v,
+                        url=id.get('url') and id.url.replace('@@@', v.replace(' ', '')),
+                    )
+
+        for name in values:
+            process(name, values[name])
+
+        return d
+
     def get_covers_from_solr(self):
         try:
             w = self._solr_data

diff --git a/openlibrary/plugins/upstream/utils.py b/openlibrary/plugins/upstream/utils.py
@@ -846,6 +846,27 @@ def _get_author_config():
     return Storage(identifiers=identifiers)
 
 
+@public
+def get_work_config() -> Storage:
+    return _get_work_config()
+
+
+@web.memoize
+def _get_work_config():
+    """Returns the work config.
+
+    The results are cached on the first invocation. Any changes to /config/work page require restarting the app.
+
+    This is is cached because fetching and creating the Thing object was taking about 20ms of time for each book request.
+    """
+    thing = web.ctx.site.get('/config/work')
+    if hasattr(thing, "identifiers"):
+        identifiers = [Storage(t.dict()) for t in thing.identifiers if 'name' in t]
+    else:
+        identifiers = {}
+    return Storage(identifiers=identifiers)
+
+
 @public
 def get_edition_config() -> Storage:
     return _get_edition_config()

diff --git a/openlibrary/templates/books/edit.html b/openlibrary/templates/books/edit.html
@@ -2,6 +2,8 @@
 
 $ this_title = work.title + ': ' + work.subtitle if work.get('subtitle', None) else work.title
 
+$ work_config = get_work_config()
+
 $var title: $this_title
 $putctx("robots", "noindex,nofollow")
 
@@ -105,6 +107,73 @@ <h3 class="editFormBookAuthors">
                         </div>
                     </div>
                 </fieldset>
+                $ config = ({
+                    $ 'Please select an identifier.': _('Please select an identifier.'),
+                    $ 'You need to give a value to ID.': _('You need to give a value to ID.'),
+                    $ 'ID ids cannot contain whitespace.': _('ID ids cannot contain whitespace.'),
+                    $ 'That ID already exists for this work.': _('That ID already exists for this work.'),
+                    $ 'Invalid ID format': _('Invalid ID format')
+                $ })
+                <fieldset class="major" id="workidentifiers" data-config="$dumps(config)">
+                    <legend>$_("ID Numbers")</legend>
+                    <div class="formBack">
+
+                        <div id="workid-errors" class="note" style="display: none"></div>
+                        <div class="formElement">
+                            <div class="label">
+                                <label for="workselect-id">$_("Do you know any identifiers for this work?")</label>
+                                <span class="tip">$_("Like, VIAF?")</span>
+                            </div>
+                            <div class="input">
+                                <table class="identifiers">
+                                    <tr id="workidentifiers-form">
+                                        <td align="right">
+                                            <select name="name" id="workselect-id">
+                                            $ id_labels = dict((d.name, d.label) for d in work_config.identifiers)
+                                            $ id_dict = dict((id.name, id) for id in work_config.identifiers)
+
+                                            <option value="">$_('Select one of many...')</option>
+                                            $for id in work_config.identifiers:
+                                                <option value="$id.name">$id.label</option>
+
+                                            </select>
+                                        </td>
+                                        <td>
+                                            <input type="text" name="value" id="workid-value"/>
+                                        </td>
+                                        <td>
+                                            <button type="button" name="add" class="repeat-add larger">$_("Add")</button>
+                                        </td>
+                                    </tr>
+                                    <tbody id="workidentifiers-display">
+                                        <tr id="workidentifiers-template" style="display: none;" class="repeat-item">
+                                            <td align="right"><strong>{{\$("#workselect-id").find("option[value='" + name + "']").html()}}</strong></td>
+                                            <td>{{value}}
+                                                <input type="hidden" name="{{prefix}}identifiers--{{index}}--name" value="{{name}}"/>
+                                                <input type="hidden" name="{{prefix}}identifiers--{{index}}--value" value="{{value}}" class="{{name}}"/>
+                                            </td>
+                                            <td><a href="javascript:;" class="repeat-remove red plain" title="Remove this identifier">[x]</a></td>
+                                        </tr>
+                                        <tr>
+                                            <td align="right">Open Library</td>
+                                            <td>$work.key.split("/")[-1]</td>
+                                            <td></td>
+                                        </tr>
+                                        $for i, id in enumerate(work.get_identifiers().values()):
+                                        <tr id="workidentifiers--$i" class="repeat-item">
+                                            <td align="right"><strong>$id_labels.get(id.name, id.name)</strong></td>
+                                            <td>$id.value
+                                                <input type="hidden" name="work--identifiers--${i}--name" value="$id.name"/>
+                                                <input type="hidden" name="work--identifiers--${i}--value" value="$id.value" class="$id.name"/>
+                                            </td>
+                                            <td><a href="javascript:;" class="repeat-remove red plain" title="Remove this identifier">[x]</a></td>
+                                        </tr>
+                                    </tbody>
+                                </table>
+                            </div>
+                        </div>
+                    </div>
+                </fieldset>
                 <fieldset class="major">
                     <legend>$_("Add Excerpts")</legend>
                     <div class="formBack" id="excerpts">