diff --git a/example/materials.jv b/example/materials.jv new file mode 100644 index 000000000..813f02783 --- /dev/null +++ b/example/materials.jv @@ -0,0 +1,56 @@ +// SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg +// +// SPDX-License-Identifier: AGPL-3.0-only + +pipeline MaterialsDatabasePipeline { + + block MaterialsDatabaseExtractor oftype HttpExtractor { + url: "https://figshare.com/ndownloader/files/31626647"; + followRedirects: true; + } + + block ZipArchiveInterpreter oftype ArchiveInterpreter { + archiveType: "zip"; + } + + block MaterialsDatabaseCSVPicker oftype FilePicker { + path: "/Databases/Combined/Combined_YieldStrength_GrainSize_Database.csv"; + } + + block MaterialsDatabaseTextFileInterpreter oftype TextFileInterpreter { + + } + + block MaterialsDatabaseCSVInterpreter oftype CSVInterpreter { + delimiter: ","; + enclosing: '"'; + enclosingEscape: '"'; + } + + block MaterialsDatabaseTableInterpreter oftype TableInterpreter { + header: true; + columns: [ + "Compound" oftype text, + "Blacklisted Compound?" oftype text, + "Yield Strength Value" oftype text, + "Yield Strength Unit" oftype text, + "Grain Size Value" oftype text, + "Grain Size Unit" oftype text, + "DOI" oftype text, + "Open Access" oftype text, + ]; + } + + block MaterialsDatabaseLoader oftype SQLiteLoader { + table: "MaterialsDatabase"; + file: "./MaterialsDatabase.sqlite"; + } + + MaterialsDatabaseExtractor + -> ZipArchiveInterpreter + -> MaterialsDatabaseCSVPicker + -> MaterialsDatabaseTextFileInterpreter + -> MaterialsDatabaseCSVInterpreter + -> MaterialsDatabaseTableInterpreter + -> MaterialsDatabaseLoader; +} \ No newline at end of file diff --git a/example/thermoelectricMaterials.jv b/example/thermoelectricMaterials.jv new file mode 100644 index 000000000..d2b1e160c --- /dev/null +++ b/example/thermoelectricMaterials.jv @@ -0,0 +1,48 @@ +// SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg +// +// SPDX-License-Identifier: AGPL-3.0-only + +pipeline ThermoelectricMaterialsPipeline { + + block ThermoelectricMaterialsExtractor oftype HttpExtractor { + url: "https://figshare.com/ndownloader/files/28333845"; + followRedirects: true; + } + + block ThermoelectricMaterialsArchiveInterpreter oftype ArchiveInterpreter { + archiveType: "gz"; + } + + block ThermoelectricMaterialsFilePicker oftype FilePicker { + path: "/ucsb_thermoelectrics.json"; + } + + block ThermoelectricMaterialsTextFileInterpreter oftype TextFileInterpreter { + + } + + // Not working from here on forward, as json file is received and json interpreter is not yet implemented + block ThermoelectricMaterialsJSONInterpreter oftype CSVInterpreter { + enclosing: '"'; + } + + block ThermoelectricMaterialsTableInterpreter oftype TableInterpreter { + header: true; + columns: [ + + ]; + } + + block ThermoelectricMaterialsLoader oftype SQLiteLoader { + table: "ThermoelectricMaterials"; + file: "./ThermoelectricMaterials.sqlite"; + } + + ThermoelectricMaterialsExtractor + -> ThermoelectricMaterialsArchiveInterpreter + -> ThermoelectricMaterialsFilePicker + -> ThermoelectricMaterialsTextFileInterpreter + -> ThermoelectricMaterialsJSONInterpreter + -> ThermoelectricMaterialsTableInterpreter + -> ThermoelectricMaterialsLoader; +} \ No newline at end of file diff --git a/libs/extensions/std/exec/src/archive-interpreter-executor.ts b/libs/extensions/std/exec/src/archive-interpreter-executor.ts index 79f833618..7ab32cc95 100644 --- a/libs/extensions/std/exec/src/archive-interpreter-executor.ts +++ b/libs/extensions/std/exec/src/archive-interpreter-executor.ts @@ -20,6 +20,8 @@ import { } from '@jvalue/jayvee-execution'; import { IOType, PrimitiveValuetypes } from '@jvalue/jayvee-language-server'; import * as JSZip from 'jszip'; +import * as zlib from 'node:zlib'; + import { inferFileExtensionFromFileExtensionString, @@ -52,12 +54,70 @@ export class ArchiveInterpreterExecutor } return R.ok(fs.right); } + if (archiveType === 'gz') { + const fs = await this.loadGZipFileToInMemoryFileSystem( + archiveFile, + context, + ); + if (R.isErr(fs)) { + return fs; + } + return R.ok(fs.right); + } + return R.err({ message: `Archive is not a zip-archive`, diagnostic: { node: context.getCurrentNode(), property: 'name' }, }); } + private async loadGZipFileToInMemoryFileSystem( + archiveFile: BinaryFile, + context: ExecutionContext, + ): Promise> { + context.logger.logDebug(`Loading zip file from binary content`); + try { + + const fs = new InMemoryFileSystem(); + const archivedObject = zlib.gunzipSync(archiveFile.content); + + const extNameArchive = path.extname(archiveFile.name); + const fileName = path.basename(archiveFile.name, extNameArchive); + const extName = path.extname(fileName); + + const mimeType = + inferMimeTypeFromContentTypeString(extName) || + MimeType.APPLICATION_OCTET_STREAM; + const fileExtension = + inferFileExtensionFromFileExtensionString(extName) || + FileExtension.NONE; + const file = new BinaryFile( + fileName, + fileExtension, + mimeType, + archivedObject, + ); + + const addedFile = fs.putFile( + InMemoryFileSystem.getPathSeparator() + fileName, + file, + ); + + assert(addedFile != null); + + return R.ok(fs); + } + catch (error: unknown) { + return R.err({ + message: `Unexpected Error ${ + error instanceof Error ? error.message : JSON.stringify(err) + } occured during processing`, + diagnostic: { node: context.getCurrentNode(), property: 'name' }, + }); + } + } + + private async loadZipFileToInMemoryFileSystem( archiveFile: BinaryFile, context: ExecutionContext, diff --git a/libs/extensions/std/exec/src/http-extractor-executor.ts b/libs/extensions/std/exec/src/http-extractor-executor.ts index ba4c96b15..ac375cef0 100644 --- a/libs/extensions/std/exec/src/http-extractor-executor.ts +++ b/libs/extensions/std/exec/src/http-extractor-executor.ts @@ -2,9 +2,8 @@ // // SPDX-License-Identifier: AGPL-3.0-only -import * as http from 'http'; -import * as https from 'https'; import * as path from 'path'; +import { http, https } from 'follow-redirects'; import * as R from '@jvalue/jayvee-execution'; import { @@ -61,17 +60,29 @@ export class HttpExtractorExecutor } else { httpGetFunction = http.get; } + + const followRedirects = context.getPropertyValue('followRedirects', PrimitiveValuetypes.Boolean); + return new Promise((resolve) => { - httpGetFunction(url, (response) => { + httpGetFunction(url, { followRedirects: followRedirects }, (response) => { const responseCode = response.statusCode; // Catch errors if (responseCode === undefined || responseCode >= 400) { resolve( R.err({ - message: `HTTP fetch failed with code ${ - responseCode ?? 'undefined' - }. Please check your connection.`, + message: `HTTP fetch failed with code ${responseCode ?? 'undefined' + }. Please check your connection.`, + diagnostic: { node: context.getOrFailProperty('url') }, + }), + ); + } + + if (responseCode === 302) { + resolve( + R.err({ + message: `HTTP fetch was redirected with code ${responseCode + }. Redirects are either disabled or maximum number of redirects was exeeded.`, diagnostic: { node: context.getOrFailProperty('url') }, }), ); @@ -103,7 +114,7 @@ export class HttpExtractorExecutor 'url', PrimitiveValuetypes.Text, ); - const url = new URL(urlString); + const url = new URL(response.responseUrl); let fileName = url.pathname.split('/').pop(); if (fileName === undefined) { fileName = url.pathname.replace('/', '-'); diff --git a/libs/extensions/std/lang/src/archive-interpreter-meta-inf.ts b/libs/extensions/std/lang/src/archive-interpreter-meta-inf.ts index adf197402..d48290bf1 100644 --- a/libs/extensions/std/lang/src/archive-interpreter-meta-inf.ts +++ b/libs/extensions/std/lang/src/archive-interpreter-meta-inf.ts @@ -19,7 +19,7 @@ export class ArchiveInterpreterMetaInformation extends BlockMetaInformation { archiveType: { type: PrimitiveValuetypes.Text, docs: { - description: 'The archive type to be interpreted, e.g., `"zip"`.', + description: 'The archive type to be interpreted, e.g., `"zip" or "gz`.', }, }, }, diff --git a/libs/extensions/std/lang/src/example-validation.spec.ts b/libs/extensions/std/lang/src/example-validation.spec.ts index 7a60fbd66..bef2626cf 100644 --- a/libs/extensions/std/lang/src/example-validation.spec.ts +++ b/libs/extensions/std/lang/src/example-validation.spec.ts @@ -37,6 +37,9 @@ describe('jv example tests', () => { it.each([ 'cars.jv', + 'materials.jv', + // TODO: implement JSON-Interpreter + 'thermoelectricMaterials.jv', 'electric-vehicles.jv', 'gtfs-rt-simple.jv', 'gtfs-static-and-rt.jv', diff --git a/libs/extensions/std/lang/src/http-extractor-meta-inf.ts b/libs/extensions/std/lang/src/http-extractor-meta-inf.ts index 8b5b8bf89..aa5ba779c 100644 --- a/libs/extensions/std/lang/src/http-extractor-meta-inf.ts +++ b/libs/extensions/std/lang/src/http-extractor-meta-inf.ts @@ -28,7 +28,21 @@ export class HttpExtractorMetaInformation extends BlockMetaInformation { ], }, }, + followRedirects: { + type: PrimitiveValuetypes.Boolean, + defaultValue: true, + docs: { + description: 'Indicates, whether to follow redirects on get requests. If `false`, Redirects are disabled. Default `true`', + examples: [ + { + code: 'url: "tinyurl.com/4ub9spwz" \n followRedirects: true', + description: 'Specifies the URL to fetch the data from and allows redirects.', + }, + ], + }, + }, }, + // Input type: IOType.NONE, diff --git a/package-lock.json b/package-lock.json index 8e124a044..0dbfeda59 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,11 +12,13 @@ "@docusaurus/preset-classic": "2.4.1", "@docusaurus/theme-mermaid": "2.4.1", "@mdx-js/react": "^1.6.22", + "@types/follow-redirects": "^1.14.1", "assert": "^2.0.0", "chalk": "^4.1.2", "clsx": "^1.2.1", "commander": "^8.0.0", "fast-csv": "^4.3.6", + "follow-redirects": "^1.15.2", "fp-ts": "^2.13.1", "gtfs-realtime-bindings": "^1.1.1", "jszip": "^3.10.1", @@ -5755,6 +5757,14 @@ "@types/range-parser": "*" } }, + "node_modules/@types/follow-redirects": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/@types/follow-redirects/-/follow-redirects-1.14.1.tgz", + "integrity": "sha512-THBEFwqsLuU/K62B5JRwab9NW97cFmL4Iy34NTMX0bMycQVzq2q7PKOkhfivIwxdpa/J72RppgC42vCHfwKJ0Q==", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/fs-extra": { "version": "8.1.2", "resolved": "https://registry.npmjs.org/@types/fs-extra/-/fs-extra-8.1.2.tgz", @@ -28706,6 +28716,14 @@ "@types/range-parser": "*" } }, + "@types/follow-redirects": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/@types/follow-redirects/-/follow-redirects-1.14.1.tgz", + "integrity": "sha512-THBEFwqsLuU/K62B5JRwab9NW97cFmL4Iy34NTMX0bMycQVzq2q7PKOkhfivIwxdpa/J72RppgC42vCHfwKJ0Q==", + "requires": { + "@types/node": "*" + } + }, "@types/fs-extra": { "version": "8.1.2", "resolved": "https://registry.npmjs.org/@types/fs-extra/-/fs-extra-8.1.2.tgz", diff --git a/package.json b/package.json index 848f765fe..355891ff9 100644 --- a/package.json +++ b/package.json @@ -10,6 +10,8 @@ "test": "nx run-many --target test", "generate": "nx run language-server:generate", "example:cars": "nx run interpreter:run -d example/cars.jv", + "example:materials": "nx run interpreter:run -d example/materials.jv", + "example:thermoelectricMaterials": "nx run interpreter:run -d example/thermoelectricMaterials.jv", "example:gtfs": "nx run interpreter:run -d example/gtfs-static-and-rt.jv", "example:vehicles": "nx run interpreter:run -d -e DB_HOST=localhost -e DB_PORT=5432 -e DB_USERNAME=postgres -e DB_PASSWORD=postgres -e DB_DATABASE=postgres example/electric-vehicles.jv" }, @@ -19,11 +21,13 @@ "@docusaurus/preset-classic": "2.4.1", "@docusaurus/theme-mermaid": "2.4.1", "@mdx-js/react": "^1.6.22", + "@types/follow-redirects": "^1.14.1", "assert": "^2.0.0", "chalk": "^4.1.2", "clsx": "^1.2.1", "commander": "^8.0.0", "fast-csv": "^4.3.6", + "follow-redirects": "^1.15.2", "fp-ts": "^2.13.1", "gtfs-realtime-bindings": "^1.1.1", "jszip": "^3.10.1", @@ -94,4 +98,4 @@ "got": "^11.8.5" } } -} +} \ No newline at end of file