diff --git a/libs/extensions/std/exec/src/archive-interpreter-executor.ts b/libs/extensions/std/exec/src/archive-interpreter-executor.ts index 7ac06f076..bf78fe65d 100644 --- a/libs/extensions/std/exec/src/archive-interpreter-executor.ts +++ b/libs/extensions/std/exec/src/archive-interpreter-executor.ts @@ -3,6 +3,7 @@ // SPDX-License-Identifier: AGPL-3.0-only import { strict as assert } from 'assert'; +import * as zlib from 'node:zlib'; import * as path from 'path'; import * as R from '@jvalue/jayvee-execution'; @@ -45,20 +46,52 @@ export class ArchiveInterpreterExecutor extends AbstractBlockExecutor< 'archiveType', PrimitiveValuetypes.Text, ); + let fs: R.Result; + if (archiveType === 'zip') { - const fs = await this.loadZipFileToInMemoryFileSystem( - archiveFile, - context, + fs = await this.loadZipFileToInMemoryFileSystem(archiveFile, context); + } else if (archiveType === 'gz') { + fs = this.loadGzFileToInMemoryFileSystem(archiveFile, context); + } else { + return R.err({ + message: `Archive type is not supported`, + diagnostic: { node: context.getCurrentNode(), property: 'name' }, + }); + } + + if (R.isErr(fs)) { + return fs; + } + return R.ok(fs.right); + } + + private loadGzFileToInMemoryFileSystem( + archiveFile: BinaryFile, + context: ExecutionContext, + ): R.Result { + context.logger.logDebug(`Loading gz file from binary content`); + try { + const fs = new InMemoryFileSystem(); + const archivedObject = zlib.gunzipSync(archiveFile.content); + + const extNameArchive = path.extname(archiveFile.name); + + const file = this.createFileFromArchive( + archiveFile.name, + archivedObject, + extNameArchive, ); - if (R.isErr(fs)) { - return fs; - } - return R.ok(fs.right); + + const addedFile = fs.putFile( + InMemoryFileSystem.getPathSeparator() + file.name, + file, + ); + assert(addedFile != null); + + return R.ok(fs); + } catch (error: unknown) { + return R.err(this.generateErrorObject(context, error)); } - return R.err({ - message: `Archive is not a zip-archive`, - diagnostic: { node: context.getCurrentNode(), property: 'name' }, - }); } private async loadZipFileToInMemoryFileSystem( @@ -75,21 +108,9 @@ export class ArchiveInterpreterExecutor extends AbstractBlockExecutor< )) { if (!archivedObject.dir) { const content = await archivedObject.async('arraybuffer'); - // Ext incl. leading dot - const extName = path.extname(archivedObject.name); - const fileName = path.basename(archivedObject.name); - const mimeType = - inferMimeTypeFromContentTypeString(extName) || - MimeType.APPLICATION_OCTET_STREAM; - const fileExtension = - inferFileExtensionFromFileExtensionString(extName) || - FileExtension.NONE; - const file = new BinaryFile( - fileName, - fileExtension, - mimeType, - content, - ); + + const file = this.createFileFromArchive(archivedObject.name, content); + const addedFile = fs.putFile( InMemoryFileSystem.getPathSeparator() + relPath, file, @@ -99,12 +120,33 @@ export class ArchiveInterpreterExecutor extends AbstractBlockExecutor< } return R.ok(fs); } catch (error: unknown) { - return R.err({ - message: `Unexpected Error ${ - error instanceof Error ? error.message : JSON.stringify(err) - } occured during processing`, - diagnostic: { node: context.getCurrentNode(), property: 'name' }, - }); + return R.err(this.generateErrorObject(context, error)); } } + + private createFileFromArchive( + archiveFileName: string, + content: ArrayBuffer, + extNameArchive?: string, + ) { + const fileName = path.basename(archiveFileName, extNameArchive); + const extName = path.extname(fileName); + + const mimeType = + inferMimeTypeFromContentTypeString(extName) || + MimeType.APPLICATION_OCTET_STREAM; + const fileExtension = + inferFileExtensionFromFileExtensionString(extName) || FileExtension.NONE; + + return new BinaryFile(fileName, fileExtension, mimeType, content); + } + + private generateErrorObject(context: ExecutionContext, error: unknown) { + return { + message: `Unexpected Error ${ + error instanceof Error ? error.message : JSON.stringify(err) + } occured during processing`, + diagnostic: { node: context.getCurrentNode(), property: 'name' }, + }; + } } diff --git a/libs/extensions/std/exec/src/http-extractor-executor.ts b/libs/extensions/std/exec/src/http-extractor-executor.ts index b7d1fcc7e..03534cfcb 100644 --- a/libs/extensions/std/exec/src/http-extractor-executor.ts +++ b/libs/extensions/std/exec/src/http-extractor-executor.ts @@ -3,8 +3,6 @@ // SPDX-License-Identifier: AGPL-3.0-only import { strict as assert } from 'assert'; -import * as http from 'http'; -import * as https from 'https'; import * as path from 'path'; import * as R from '@jvalue/jayvee-execution'; @@ -20,6 +18,7 @@ import { implementsStatic, } from '@jvalue/jayvee-execution'; import { IOType, PrimitiveValuetypes } from '@jvalue/jayvee-language-server'; +import { http, https } from 'follow-redirects'; import { AstNode } from 'langium'; import { @@ -106,12 +105,16 @@ export class HttpExtractorExecutor extends AbstractBlockExecutor< context.logger.logDebug(`Fetching raw data from ${url}`); let httpGetFunction: HttpGetFunction; if (url.startsWith('https')) { - httpGetFunction = https.get; + httpGetFunction = https.get.bind(https); } else { - httpGetFunction = http.get; + httpGetFunction = http.get.bind(http); } + const followRedirects = context.getPropertyValue( + 'followRedirects', + PrimitiveValuetypes.Boolean, + ); return new Promise((resolve) => { - httpGetFunction(url, (response) => { + httpGetFunction(url, { followRedirects: followRedirects }, (response) => { const responseCode = response.statusCode; // Catch errors @@ -124,6 +127,13 @@ export class HttpExtractorExecutor extends AbstractBlockExecutor< diagnostic: { node: context.getOrFailProperty('url') }, }), ); + } else if (responseCode >= 301 && responseCode < 400) { + resolve( + R.err({ + message: `HTTP fetch was redirected with code ${responseCode}. Redirects are either disabled or maximum number of redirects was exeeded.`, + diagnostic: { node: context.getOrFailProperty('url') }, + }), + ); } // Get chunked data and store to ArrayBuffer @@ -147,11 +157,7 @@ export class HttpExtractorExecutor extends AbstractBlockExecutor< // Infer FileName and FileExtension from url, if not inferrable, then default to None // Get last element of URL assuming this is a filename - const urlString = context.getPropertyValue( - 'url', - PrimitiveValuetypes.Text, - ); - const url = new URL(urlString); + const url = new URL(response.responseUrl); let fileName = url.pathname.split('/').pop(); if (fileName === undefined) { fileName = url.pathname.replace('/', '-'); diff --git a/libs/extensions/std/lang/src/archive-interpreter-meta-inf.ts b/libs/extensions/std/lang/src/archive-interpreter-meta-inf.ts index adf197402..7339fbd2a 100644 --- a/libs/extensions/std/lang/src/archive-interpreter-meta-inf.ts +++ b/libs/extensions/std/lang/src/archive-interpreter-meta-inf.ts @@ -19,7 +19,8 @@ export class ArchiveInterpreterMetaInformation extends BlockMetaInformation { archiveType: { type: PrimitiveValuetypes.Text, docs: { - description: 'The archive type to be interpreted, e.g., `"zip"`.', + description: + 'The archive type to be interpreted, e.g., "zip" or "gz".', }, }, }, diff --git a/libs/extensions/std/lang/src/http-extractor-meta-inf.ts b/libs/extensions/std/lang/src/http-extractor-meta-inf.ts index 26dcc7321..1ac27dc34 100644 --- a/libs/extensions/std/lang/src/http-extractor-meta-inf.ts +++ b/libs/extensions/std/lang/src/http-extractor-meta-inf.ts @@ -147,6 +147,21 @@ export class HttpExtractorMetaInformation extends BlockMetaInformation { } }, }, + followRedirects: { + type: PrimitiveValuetypes.Boolean, + defaultValue: true, + docs: { + description: + 'Indicates, whether to follow redirects on get requests. If `false`, redirects are not followed. Default `true`', + examples: [ + { + code: 'url: "tinyurl.com/4ub9spwz" \n followRedirects: true', + description: + 'Specifies the URL to fetch the data from and allows redirects.', + }, + ], + }, + }, }, // Input type: diff --git a/package-lock.json b/package-lock.json index 8e124a044..0dbfeda59 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,11 +12,13 @@ "@docusaurus/preset-classic": "2.4.1", "@docusaurus/theme-mermaid": "2.4.1", "@mdx-js/react": "^1.6.22", + "@types/follow-redirects": "^1.14.1", "assert": "^2.0.0", "chalk": "^4.1.2", "clsx": "^1.2.1", "commander": "^8.0.0", "fast-csv": "^4.3.6", + "follow-redirects": "^1.15.2", "fp-ts": "^2.13.1", "gtfs-realtime-bindings": "^1.1.1", "jszip": "^3.10.1", @@ -5755,6 +5757,14 @@ "@types/range-parser": "*" } }, + "node_modules/@types/follow-redirects": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/@types/follow-redirects/-/follow-redirects-1.14.1.tgz", + "integrity": "sha512-THBEFwqsLuU/K62B5JRwab9NW97cFmL4Iy34NTMX0bMycQVzq2q7PKOkhfivIwxdpa/J72RppgC42vCHfwKJ0Q==", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/fs-extra": { "version": "8.1.2", "resolved": "https://registry.npmjs.org/@types/fs-extra/-/fs-extra-8.1.2.tgz", @@ -28706,6 +28716,14 @@ "@types/range-parser": "*" } }, + "@types/follow-redirects": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/@types/follow-redirects/-/follow-redirects-1.14.1.tgz", + "integrity": "sha512-THBEFwqsLuU/K62B5JRwab9NW97cFmL4Iy34NTMX0bMycQVzq2q7PKOkhfivIwxdpa/J72RppgC42vCHfwKJ0Q==", + "requires": { + "@types/node": "*" + } + }, "@types/fs-extra": { "version": "8.1.2", "resolved": "https://registry.npmjs.org/@types/fs-extra/-/fs-extra-8.1.2.tgz", diff --git a/package.json b/package.json index 0b5ba1eb2..1edf42734 100644 --- a/package.json +++ b/package.json @@ -19,11 +19,13 @@ "@docusaurus/preset-classic": "2.4.1", "@docusaurus/theme-mermaid": "2.4.1", "@mdx-js/react": "^1.6.22", + "@types/follow-redirects": "^1.14.1", "assert": "^2.0.0", "chalk": "^4.1.2", "clsx": "^1.2.1", "commander": "^8.0.0", "fast-csv": "^4.3.6", + "follow-redirects": "^1.15.2", "fp-ts": "^2.13.1", "gtfs-realtime-bindings": "^1.1.1", "jszip": "^3.10.1", @@ -94,4 +96,4 @@ "got": "^11.8.5" } } -} +} \ No newline at end of file