Skip to content

Commit

Permalink
Add experimental Google Scraper Implementation, close #43, close #66
Browse files Browse the repository at this point in the history
  • Loading branch information
phibr0 committed Feb 25, 2022
1 parent 304833e commit 3c8e8ad
Show file tree
Hide file tree
Showing 5 changed files with 186 additions and 11 deletions.
85 changes: 85 additions & 0 deletions api/main.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import { Application, Context, createHttpExceptionBody } from "https://deno.land/x/abc@v1.3.3/mod.ts";
import { DOMParser } from "https://deno.land/x/deno_dom/deno-dom-wasm.ts";
import { NodeList } from "https://deno.land/x/deno_dom@v0.1.21-alpha/src/dom/node-list.ts";
import { Definition, DictionaryWord } from "./types.ts";

const app = new Application();

const LANGUAGES = {
'en': 'english',
'de': 'deutsch',
}

console.log("http://localhost:5000/");

app
.get("/v1/definition", async (ctx: Context) => {
const lang = LANGUAGES[ctx.queryParams.lang as keyof typeof LANGUAGES];
if(!lang) {
return "Unsupported Language";
}

if(!ctx.queryParams.word) {
return "Supply a word";
}

try {
const result = await fetch(`https://www.google.co.in/search?q=define+${ctx.queryParams.word.replaceAll(' ', '+')}+${lang}`, {
headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' }
});

const doc = new DOMParser().parseFromString(await result.text(), 'text/html')!;
const data = doc.querySelector(`div[data-query-term=${ctx.queryParams.word}]`);

if(!data) throw "";

const def: DictionaryWord = {
phonetics: [],
meanings: [],
word: data.querySelector('span[data-dobid="hdw"]')?.textContent ?? ctx.queryParams.word
};

//Something like eɪpr(ɪ)l (April)
const phoneticText = data.querySelector('.LTKOO > span')?.textContent;
if (phoneticText) {
def.phonetics.push({
text: phoneticText,
audio: data.querySelector('audio > source')?.getAttribute('src') ?? undefined
});
}

//Something like noun
const type = data.querySelector('.vmod i')?.textContent;
if (type) {
const defGenerator = (defs: NodeList) => {

const out: Definition[] = [];
const syns: string[] = [];
const tmp = data.querySelectorAll('.lr_container div[role="button"] span');
tmp.forEach((el) => {
if (!el.parentElement?.getAttribute('data-topic') && el.textContent) {
syns.push(el.textContent.trim());
}
})
defs.forEach((el, idx) => {
out.push({
definition: el.textContent,
example: el.nextSibling?.textContent,
synonyms: !idx ? syns : undefined
})
})
return out;
}

def.meanings.push({
partOfSpeech: type,
definitions: defGenerator(data.querySelectorAll('div[data-dobid="dfn"]'))
});
}

return def;
} catch (_) {
return createHttpExceptionBody('No definition found');
}
})
.start({ port: 5000 });
2 changes: 1 addition & 1 deletion manifest.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"id": "obsidian-dictionary-plugin",
"name": "Dictionary",
"version": "2.19.2",
"version": "2.20.0",
"minAppVersion": "0.12.11",
"description": "This is a simple dictionary for the Obsidian Note-Taking Tool.",
"author": "phibr0",
Expand Down
14 changes: 5 additions & 9 deletions src/apiManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import { OpenThesaurusSynonymAPI as OpenThesaurusSynonymProvider } from "src/int
// import { SynonymoSynonymAPI as SynonymoSynonymProvider } from "src/integrations/synonymoAPI";
import { AltervistaSynonymProvider } from "src/integrations/altervistaAPI";
import type DictionaryPlugin from "src/main";
import { GoogleScraperDefinitionProvider, GoogleScraperSynonymProvider } from 'src/integrations/googleScraperAPI';

/*
HOW TO ADD A NEW API:
Expand All @@ -35,13 +36,15 @@ export default class APIManager {
definitionProvider: DefinitionProvider[] = [
new FreeDictionaryDefinitionProvider(),
new OfflineDictionary(this),
new GoogleScraperDefinitionProvider(),
];
// Adds new API's to the Synonym Providers
synonymProvider: SynonymProvider[] = [
new FreeDictionarySynonymProvider(),
new OpenThesaurusSynonymProvider(),
// new SynonymoSynonymProvider(), see #44
new AltervistaSynonymProvider(),
new GoogleScraperSynonymProvider(),
];
// Adds new API's to the Part Of Speech Providers
partOfSpeechProvider: PartOfSpeechProvider[] = [
Expand All @@ -61,13 +64,9 @@ export default class APIManager {
public async requestDefinitions(query: string): Promise<DictionaryWord> {
//Get the currently enabled API
const api = this.getDefinitionAPI();
const { cache, settings, loadCache } = this.plugin;
const { cache, settings } = this.plugin;

if (settings.useCaching && !api.name.toLowerCase().contains("offline")) {
//Get any cached Definitions
if(!cache) {
await loadCache();
}
const cachedDefinition = cache.cachedDefinitions.find((c) => { return c.content.word.toLowerCase() == query.toLowerCase() && c.lang == settings.defaultLanguage && c.api == api.name });
//If cachedDefiniton exists return it as a Promise
if (cachedDefinition) {
Expand Down Expand Up @@ -103,11 +102,8 @@ export default class APIManager {
if (!api) {
throw ("No Synonym API selected/available");
}
const { cache, settings, loadCache } = this.plugin;
const { cache, settings } = this.plugin;
if (settings.useCaching && !api.name.toLowerCase().contains("offline")) {
if(!cache) {
await loadCache();
}
const cachedSynonymCollection = cache.cachedSynonyms.find((s) => { return s.word.toLowerCase() == query.toLowerCase() && s.lang == settings.defaultLanguage && s.api == api.name });
if (cachedSynonymCollection) {
return new Promise((resolve) => resolve(cachedSynonymCollection.content));
Expand Down
94 changes: 94 additions & 0 deletions src/integrations/googleScraperAPI.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import type { Definition, DefinitionProvider, DictionaryWord, PartOfSpeech, Synonym, SynonymProvider } from "src/integrations/types";
import { requestUrl } from "obsidian";

class Base {
name = "Google";
url?: "https://support.google.com/websearch/answer/10106608";
offline = false;
supportedLanguages = [
"en_US",
"de",
"es",
"fr"
];

static LANGUAGES = {
'en_US': 'english',
'de': 'deutsch',
"es": "Español",
"fr": "Français"
}
}

export class GoogleScraperDefinitionProvider extends Base implements DefinitionProvider {
async requestDefinitions(query: string, lang: string): Promise<DictionaryWord> {
const result = await requestUrl({
url: `https://www.google.com/search?q=define+${query.replace(/\s/g, '+')}+${GoogleScraperDefinitionProvider.LANGUAGES[lang]}`,
headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' }
});
console.log(result);

const doc = new DOMParser().parseFromString(result.text, 'text/html');
const data = doc.querySelector(`div[data-query-term=${query}]`);

if (!data) throw "";

const def: DictionaryWord = {
phonetics: [],
meanings: [],
word: data.querySelector('span[data-dobid="hdw"]')?.textContent ?? query
};

//Something like eɪpr(ɪ)l (April)
const phoneticText = data.querySelector('.LTKOO > span')?.textContent;
if (phoneticText) {
def.phonetics.push({
text: phoneticText,
audio: data.querySelector('audio > source')?.getAttribute('src') ?? undefined
});
}

//Something like noun
const type = data.querySelector('.vmod i')?.textContent;
if (type) {
const defGenerator = (defs: NodeList) => {

const out: Definition[] = [];
const syns: string[] = [];
const tmp = data.querySelectorAll('.lr_container div[role="button"] span');
tmp.forEach((el) => {
if (!el.parentElement?.getAttribute('data-topic') && el.textContent) {
syns.push(el.textContent.trim());
}
})
defs.forEach((el, idx) => {
out.push({
definition: el.textContent,
example: el.nextSibling?.textContent,
synonyms: !idx ? syns : undefined
})
})
return out;
}

def.meanings.push({
partOfSpeech: type,
definitions: defGenerator(data.querySelectorAll('div[data-dobid="dfn"]'))
});
}

return def;
}
}

export class GoogleScraperSynonymProvider extends Base implements SynonymProvider {
provider: GoogleScraperDefinitionProvider;
constructor() {
super();
this.provider = new GoogleScraperDefinitionProvider();
}

async requestSynonyms(query: string, lang: string, _?: PartOfSpeech): Promise<Synonym[]> {
return (await this.provider.requestDefinitions(query, lang)).meanings.first().definitions.first().synonyms.map<Synonym>(synonym => { return { word: synonym } });
}
}
2 changes: 1 addition & 1 deletion src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ export default class DictionaryPlugin extends Plugin {
async onload(): Promise<void> {
console.log('loading dictionary');

await Promise.all([this.loadSettings()]);
await Promise.all([this.loadSettings(), this.loadCache()]);

addIcons();

Expand Down

0 comments on commit 3c8e8ad

Please sign in to comment.