add audio for more languages from LanguagePod101

* get audio for more languages from LanguagePod101 * docs * simpler docs * no duplicate urls * fix test <rikaitan.link>NGNhZTMxN2U1NGE4NmZkOGNmMjI0MmEyMmMxOTUwNWQ5MTQ2NTllOAo=</rikaitan.link>
Ajatt-Tools · Aug 11, 2024 · f8f6aa9 · f8f6aa9
1 parent fec44a0
commit f8f6aa9
Show file tree

Hide file tree

Showing 10 changed files with 140 additions and 40 deletions.
diff --git a/docs/audio.md b/docs/audio.md
@@ -4,21 +4,14 @@
 
 After looking up a term, you can click on the <img src="../ext/images/play-audio.svg" alt="" width="16" height="16"> _speaker_ button to hear the term's pronunciation. When searching for audio, multiple audio sources are checked until the first source with audio for the term is found. If no audio is found, you will hear a short click instead. Right-clicking the <img src="../ext/images/play-audio.svg" alt="" width="16" height="16"> button allows choosing the source manually.
 
-#### Japanese
-
-The default audio sources for Japanese are:
-
-- JapanesePod101
-- Jisho.org
-
-#### Other languages
-
-The default audio sources for other languages are from Wikimedia Commons:
+The default audio sources are:
 
+- LanguagePod101
+- Jisho.org (japanese only)
 - Lingua Libre
 - Wiktionary
 
-While Commons has audio for many languages, some of them have limited coverage, and you may want to add additional sources, as described below.
+Depending on language, you may want to increase coverage by adding additional sources, as described below.
 
 ## Configuration
 

diff --git a/ext/data/schemas/options-schema.json b/ext/data/schemas/options-schema.json
@@ -423,7 +423,7 @@
                                                     "type": "string",
                                                     "enum": [
                                                         "jpod101",
-                                                        "jpod101-alternate",
+                                                        "language-pod-101",
                                                         "jisho",
                                                         "lingua-libre",
                                                         "wiktionary",

diff --git a/ext/js/data/options-util.js b/ext/js/data/options-util.js
@@ -561,6 +561,7 @@ export class OptionsUtil {
             this._updateVersion47,
             this._updateVersion48,
             this._updateVersion49,
+            this._updateVersion50,
         ];
         /* eslint-enable @typescript-eslint/unbound-method */
         if (typeof targetVersion === 'number' && targetVersion < result.length) {
@@ -1447,6 +1448,20 @@ export class OptionsUtil {
         }
     }
 
+    /**
+     * - Generalized jpod101-alternate to language-pod-101
+     * @type {import('options-util').UpdateFunction}
+     */
+    async _updateVersion50(options) {
+        for (const profile of options.profiles) {
+            for (const source of profile.options.audio.sources) {
+                if (source.type === 'jpod101-alternate') {
+                    source.type = 'language-pod-101';
+                }
+            }
+        }
+    }
+
     /**
      * @param {string} url
      * @returns {Promise<chrome.tabs.Tab>}

diff --git a/ext/js/display/display-audio.js b/ext/js/display/display-audio.js
@@ -55,7 +55,7 @@ export class DisplayAudio {
         /** @type {Map<import('settings').AudioSourceType, string>} */
         this._audioSourceTypeNames = new Map([
             ['jpod101', 'JapanesePod101'],
-            ['jpod101-alternate', 'JapanesePod101 (Alternate)'],
+            ['language-pod-101', 'LanguagePod101'],
             ['jisho', 'Jisho.org'],
             ['lingua-libre', 'Lingua Libre'],
             ['wiktionary', 'Wiktionary'],
@@ -200,11 +200,12 @@ export class DisplayAudio {
         return language === 'ja' ?
             new Set([
                 'jpod101',
-                'jpod101-alternate',
+                'language-pod-101',
                 'jisho',
             ]) :
             new Set([
                 'lingua-libre',
+                'language-pod-101',
                 'wiktionary',
             ]);
     }

diff --git a/ext/js/media/audio-downloader.js b/ext/js/media/audio-downloader.js
@@ -47,7 +47,7 @@ export class AudioDownloader {
         /** @type {Map<import('settings').AudioSourceType, import('audio-downloader').GetInfoHandler>} */
         this._getInfoHandlers = new Map(/** @type {[name: import('settings').AudioSourceType, handler: import('audio-downloader').GetInfoHandler][]} */ ([
             ['jpod101', this._getInfoJpod101.bind(this)],
-            ['jpod101-alternate', this._getInfoJpod101Alternate.bind(this)],
+            ['language-pod-101', this._getInfoLanguagePod101.bind(this)],
             ['jisho', this._getInfoJisho.bind(this)],
             ['lingua-libre', this._getInfoLinguaLibre.bind(this)],
             ['wiktionary', this._getInfoWiktionary.bind(this)],
@@ -126,11 +126,12 @@ export class AudioDownloader {
         const requiredSources = language === 'ja' ?
             new Set([
                 'jpod101',
-                'jpod101-alternate',
+                'language-pod-101',
                 'jisho',
             ]) :
             new Set([
                 'lingua-libre',
+                'language-pod-101',
                 'wiktionary',
             ]);
 
@@ -170,8 +171,10 @@ export class AudioDownloader {
     }
 
     /** @type {import('audio-downloader').GetInfoHandler} */
-    async _getInfoJpod101Alternate(term, reading) {
-        const fetchUrl = 'https://www.japanesepod101.com/learningcenter/reference/dictionary_post';
+    async _getInfoLanguagePod101(term, reading, _details, languageSummary) {
+        const {name: language} = languageSummary;
+
+        const fetchUrl = this._getLanguagePod101FetchUrl(language);
         const data = new URLSearchParams({
             post: 'dictionary_reference',
             match_type: 'exact',
@@ -189,6 +192,8 @@ export class AudioDownloader {
         const responseText = await response.text();
 
         const dom = this._createSimpleDOMParser(responseText);
+        /** @type {Set<string>} */
+        const urls = new Set();
         for (const row of dom.getElementsByClassName('dc-result-row')) {
             try {
                 const audio = dom.getElementByTagName('audio', row);
@@ -200,20 +205,101 @@ export class AudioDownloader {
                 let url = dom.getAttribute(source, 'src');
                 if (url === null) { continue; }
 
-                const htmlReadings = dom.getElementsByClassName('dc-vocab_kana');
-                if (htmlReadings.length === 0) { continue; }
-
-                const htmlReading = dom.getTextContent(htmlReadings[0]);
-                if (htmlReading && (reading === term || reading === htmlReading)) {
-                    url = this._normalizeUrl(url, response.url);
-                    return [{type: 'url', url}];
-                }
+                if (!this._validateLanguagePod101Row(language, dom, row, term, reading)) { continue; }
+                url = this._normalizeUrl(url, response.url);
+                urls.add(url);
             } catch (e) {
                 // NOP
             }
         }
+        return [...urls].map((url) => ({type: 'url', url}));
+    }
 
-        throw new Error('Failed to find audio URL');
+    /**
+     * @param {string} language
+     * @param {import('simple-dom-parser').ISimpleDomParser} dom
+     * @param {import('simple-dom-parser').Element} row
+     * @param {string} term
+     * @param {string} reading
+     * @returns {boolean}
+     */
+    _validateLanguagePod101Row(language, dom, row, term, reading) {
+        switch (language) {
+            case 'Japanese': {
+                const htmlReadings = dom.getElementsByClassName('dc-vocab_kana', row);
+                if (htmlReadings.length === 0) { return false; }
+
+                const htmlReading = dom.getTextContent(htmlReadings[0]);
+                if (!htmlReading) { return false; }
+                if (reading !== term && reading !== htmlReading) { return false; }
+            } break;
+            default: {
+                const vocab = dom.getElementsByClassName('dc-vocab', row);
+                if (vocab.length === 0) { return false; }
+
+                if (term !== dom.getTextContent(vocab[0])) { return false; }
+            }
+        }
+        return true;
+    }
+
+    /**
+     * @param {string} language
+     * @returns {string}
+     */
+    _getLanguagePod101FetchUrl(language) {
+        const podOrClass = this._getLanguagePod101PodOrClass(language);
+        const lowerCaseLanguage = language.toLowerCase();
+        return `https://www.${lowerCaseLanguage}${podOrClass}101.com/learningcenter/reference/dictionary_post`;
+    }
+
+    /**
+     * - https://languagepod101.com/
+     * @param {string} language
+     * @returns {'pod'|'class'}
+     * @throws {Error}
+     */
+    _getLanguagePod101PodOrClass(language) {
+        switch (language) {
+            case 'Afrikaans':
+            case 'Arabic':
+            case 'Bulgarian':
+            case 'Dutch':
+            case 'Filipino':
+            case 'Finnish':
+            case 'French':
+            case 'German':
+            case 'Greek':
+            case 'Hebrew':
+            case 'Hindi':
+            case 'Hungarian':
+            case 'Indonesian':
+            case 'Italian':
+            case 'Japanese':
+            case 'Persian':
+            case 'Polish':
+            case 'Portuguese':
+            case 'Romanian':
+            case 'Russian':
+            case 'Spanish':
+            case 'Swahili':
+            case 'Swedish':
+            case 'Thai':
+            case 'Urdu':
+            case 'Vietnamese':
+                return 'pod';
+            case 'Cantonese':
+            case 'Chinese':
+            case 'Czech':
+            case 'Danish':
+            case 'English':
+            case 'Korean':
+            case 'Norwegian':
+            case 'Turkish':
+                return 'class';
+            default:
+                throw new Error('Invalid language for LanguagePod101');
+        }
     }
 
     /** @type {import('audio-downloader').GetInfoHandler} */
@@ -262,7 +348,7 @@ export class AudioDownloader {
             return validFilenameTest.test(filename);
         };
 
-        return await this.getInfoWikimediaCommons(fetchUrl, validateFilename);
+        return await this._getInfoWikimediaCommons(fetchUrl, validateFilename);
     }
 
     /** @type {import('audio-downloader').GetInfoHandler} */
@@ -298,7 +384,7 @@ export class AudioDownloader {
             return `(${regionName}) ${fileUser}`;
         };
 
-        return await this.getInfoWikimediaCommons(fetchUrl, validateFilename, displayName);
+        return await this._getInfoWikimediaCommons(fetchUrl, validateFilename, displayName);
     }
 
     /**
@@ -307,7 +393,7 @@ export class AudioDownloader {
      * @param {(filename: string, fileUser: string) => string} [displayName]
      * @returns {Promise<import('audio-downloader').Info1[]>}
      */
-    async getInfoWikimediaCommons(fetchUrl, validateFilename, displayName = (_filename, fileUser) => fileUser) {
+    async _getInfoWikimediaCommons(fetchUrl, validateFilename, displayName = (_filename, fileUser) => fileUser) {
         const response = await this._requestBuilder.fetchAnonymous(fetchUrl, DEFAULT_REQUEST_INIT_PARAMS);
 
         /** @type {import('audio-downloader').WikimediaCommonsLookupResponse} */

diff --git a/ext/js/pages/settings/audio-controller.js b/ext/js/pages/settings/audio-controller.js
@@ -238,10 +238,10 @@ export class AudioController extends EventDispatcher {
      */
     _getAvailableAudioSourceTypes() {
         /** @type {import('settings').AudioSourceType[]} */
-        const generalAudioSources = ['lingua-libre', 'wiktionary', 'text-to-speech', 'custom'];
+        const generalAudioSources = ['language-pod-101', 'lingua-libre', 'wiktionary', 'text-to-speech', 'custom'];
         if (this._language === 'ja') {
             /** @type {import('settings').AudioSourceType[]} */
-            const japaneseAudioSources = ['jpod101', 'jpod101-alternate', 'jisho'];
+            const japaneseAudioSources = ['jpod101', 'jisho'];
             return [...japaneseAudioSources, ...generalAudioSources];
         }
         return generalAudioSources;
@@ -502,7 +502,7 @@ class AudioSourceEntry {
     _normalizeAudioSourceType(value) {
         switch (value) {
             case 'jpod101':
-            case 'jpod101-alternate':
+            case 'language-pod-101':
             case 'jisho':
             case 'lingua-libre':
             case 'wiktionary':

diff --git a/ext/templates-settings.html b/ext/templates-settings.html
@@ -138,7 +138,7 @@
     <div class="audio-source-inner">
         <select class="audio-source-type-select">
             <option value="jpod101">JapanesePod101</option>
-            <option value="jpod101-alternate">JapanesePod101 (Alternate)</option>
+            <option value="language-pod-101">LanguagePod101</option>
             <option value="jisho">Jisho.org</option>
             <option value="lingua-libre">(Commons) Lingua Libre</option>
             <option value="wiktionary">(Commons) Wiktionary</option>
@@ -453,7 +453,7 @@
     <span class="hotkey-argument-label">Source:</span>
     <select class="audio-source-select hotkey-argument-input horizontal-flex-fill">
         <option value="jpod101">JapanesePod101</option>
-        <option value="jpod101-alternate">JapanesePod101 (Alternate)</option>
+        <option value="language-pod-101">LanguagePod101</option>
         <option value="jisho">Jisho.org</option>
         <option value="lingua-libre">Lingua Libre</option>
         <option value="wiktionary">Wiktionary</option>

diff --git a/test/options-util.test.js b/test/options-util.test.js
@@ -73,7 +73,7 @@ function createProfileOptionsTestData1() {
         },
         audio: {
             enabled: true,
-            sources: ['jpod101', 'text-to-speech', 'custom'],
+            sources: ['jpod101', 'text-to-speech', 'custom', 'jpod101-alternate'],
             volume: 100,
             autoPlay: false,
             customSourceUrl: 'http://localhost/audio.mp3?term={expression}&reading={reading}',
@@ -319,6 +319,11 @@ function createProfileOptionsUpdatedTestData1() {
                     url: 'http://localhost/audio.mp3?term={term}&reading={reading}',
                     voice: '',
                 },
+                {
+                    type: 'language-pod-101',
+                    url: '',
+                    voice: '',
+                },
             ],
             volume: 100,
             autoPlay: false,
@@ -639,7 +644,7 @@ function createOptionsUpdatedTestData1() {
             },
         ],
         profileCurrent: 0,
-        version: 49,
+        version: 50,
         global: {
             database: {
                 prefixWildcardsSupported: false,

diff --git a/types/ext/audio-downloader.d.ts b/types/ext/audio-downloader.d.ts
@@ -21,8 +21,8 @@ import type * as Language from './language';
 export type GetInfoHandler = (
     term: string,
     reading: string,
-    details?: Audio.AudioSourceInfo,
-    languageSummary?: Language.LanguageSummary,
+    details: Audio.AudioSourceInfo,
+    languageSummary: Language.LanguageSummary,
 ) => Promise<Info[]>;
 
 export type Info = Info1 | Info2;

diff --git a/types/ext/settings.d.ts b/types/ext/settings.d.ts
@@ -390,7 +390,7 @@ export type PopupWindowType = 'normal' | 'popup';
 
 export type PopupWindowState = 'normal' | 'maximized' | 'fullscreen';
 
-export type AudioSourceType = 'jpod101' | 'jpod101-alternate' | 'jisho' | 'lingua-libre' | 'wiktionary' | 'text-to-speech' | 'text-to-speech-reading' | 'custom' | 'custom-json';
+export type AudioSourceType = 'jpod101' | 'language-pod-101' | 'jisho' | 'lingua-libre' | 'wiktionary' | 'text-to-speech' | 'text-to-speech-reading' | 'custom' | 'custom-json';
 
 export type TranslationConvertType = 'false' | 'true' | 'variant';