Skip to content

Commit

Permalink
Merge pull request #405 from A-M-A-X/main
Browse files Browse the repository at this point in the history
feature/#392_Unpack_gz_files_and_#391follow_redirects
  • Loading branch information
rhazn committed Jul 28, 2023
2 parents c4b17b7 + d705a2b commit a9abd0b
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 44 deletions.
106 changes: 74 additions & 32 deletions libs/extensions/std/exec/src/archive-interpreter-executor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// SPDX-License-Identifier: AGPL-3.0-only

import { strict as assert } from 'assert';
import * as zlib from 'node:zlib';
import * as path from 'path';

import * as R from '@jvalue/jayvee-execution';
Expand Down Expand Up @@ -45,20 +46,52 @@ export class ArchiveInterpreterExecutor extends AbstractBlockExecutor<
'archiveType',
PrimitiveValuetypes.Text,
);
let fs: R.Result<R.FileSystem>;

if (archiveType === 'zip') {
const fs = await this.loadZipFileToInMemoryFileSystem(
archiveFile,
context,
fs = await this.loadZipFileToInMemoryFileSystem(archiveFile, context);
} else if (archiveType === 'gz') {
fs = this.loadGzFileToInMemoryFileSystem(archiveFile, context);
} else {
return R.err({
message: `Archive type is not supported`,
diagnostic: { node: context.getCurrentNode(), property: 'name' },
});
}

if (R.isErr(fs)) {
return fs;
}
return R.ok(fs.right);
}

private loadGzFileToInMemoryFileSystem(
archiveFile: BinaryFile,
context: ExecutionContext,
): R.Result<FileSystem> {
context.logger.logDebug(`Loading gz file from binary content`);
try {
const fs = new InMemoryFileSystem();
const archivedObject = zlib.gunzipSync(archiveFile.content);

const extNameArchive = path.extname(archiveFile.name);

const file = this.createFileFromArchive(
archiveFile.name,
archivedObject,
extNameArchive,
);
if (R.isErr(fs)) {
return fs;
}
return R.ok(fs.right);

const addedFile = fs.putFile(
InMemoryFileSystem.getPathSeparator() + file.name,
file,
);
assert(addedFile != null);

return R.ok(fs);
} catch (error: unknown) {
return R.err(this.generateErrorObject(context, error));
}
return R.err({
message: `Archive is not a zip-archive`,
diagnostic: { node: context.getCurrentNode(), property: 'name' },
});
}

private async loadZipFileToInMemoryFileSystem(
Expand All @@ -75,21 +108,9 @@ export class ArchiveInterpreterExecutor extends AbstractBlockExecutor<
)) {
if (!archivedObject.dir) {
const content = await archivedObject.async('arraybuffer');
// Ext incl. leading dot
const extName = path.extname(archivedObject.name);
const fileName = path.basename(archivedObject.name);
const mimeType =
inferMimeTypeFromContentTypeString(extName) ||
MimeType.APPLICATION_OCTET_STREAM;
const fileExtension =
inferFileExtensionFromFileExtensionString(extName) ||
FileExtension.NONE;
const file = new BinaryFile(
fileName,
fileExtension,
mimeType,
content,
);

const file = this.createFileFromArchive(archivedObject.name, content);

const addedFile = fs.putFile(
InMemoryFileSystem.getPathSeparator() + relPath,
file,
Expand All @@ -99,12 +120,33 @@ export class ArchiveInterpreterExecutor extends AbstractBlockExecutor<
}
return R.ok(fs);
} catch (error: unknown) {
return R.err({
message: `Unexpected Error ${
error instanceof Error ? error.message : JSON.stringify(err)
} occured during processing`,
diagnostic: { node: context.getCurrentNode(), property: 'name' },
});
return R.err(this.generateErrorObject(context, error));
}
}

private createFileFromArchive(
archiveFileName: string,
content: ArrayBuffer,
extNameArchive?: string,
) {
const fileName = path.basename(archiveFileName, extNameArchive);
const extName = path.extname(fileName);

const mimeType =
inferMimeTypeFromContentTypeString(extName) ||
MimeType.APPLICATION_OCTET_STREAM;
const fileExtension =
inferFileExtensionFromFileExtensionString(extName) || FileExtension.NONE;

return new BinaryFile(fileName, fileExtension, mimeType, content);
}

private generateErrorObject(context: ExecutionContext, error: unknown) {
return {
message: `Unexpected Error ${
error instanceof Error ? error.message : JSON.stringify(err)
} occured during processing`,
diagnostic: { node: context.getCurrentNode(), property: 'name' },
};
}
}
26 changes: 16 additions & 10 deletions libs/extensions/std/exec/src/http-extractor-executor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
// SPDX-License-Identifier: AGPL-3.0-only

import { strict as assert } from 'assert';
import * as http from 'http';
import * as https from 'https';
import * as path from 'path';

import * as R from '@jvalue/jayvee-execution';
Expand All @@ -20,6 +18,7 @@ import {
implementsStatic,
} from '@jvalue/jayvee-execution';
import { IOType, PrimitiveValuetypes } from '@jvalue/jayvee-language-server';
import { http, https } from 'follow-redirects';
import { AstNode } from 'langium';

import {
Expand Down Expand Up @@ -106,12 +105,16 @@ export class HttpExtractorExecutor extends AbstractBlockExecutor<
context.logger.logDebug(`Fetching raw data from ${url}`);
let httpGetFunction: HttpGetFunction;
if (url.startsWith('https')) {
httpGetFunction = https.get;
httpGetFunction = https.get.bind(https);
} else {
httpGetFunction = http.get;
httpGetFunction = http.get.bind(http);
}
const followRedirects = context.getPropertyValue(
'followRedirects',
PrimitiveValuetypes.Boolean,
);
return new Promise((resolve) => {
httpGetFunction(url, (response) => {
httpGetFunction(url, { followRedirects: followRedirects }, (response) => {
const responseCode = response.statusCode;

// Catch errors
Expand All @@ -124,6 +127,13 @@ export class HttpExtractorExecutor extends AbstractBlockExecutor<
diagnostic: { node: context.getOrFailProperty('url') },
}),
);
} else if (responseCode >= 301 && responseCode < 400) {
resolve(
R.err({
message: `HTTP fetch was redirected with code ${responseCode}. Redirects are either disabled or maximum number of redirects was exeeded.`,
diagnostic: { node: context.getOrFailProperty('url') },
}),
);
}

// Get chunked data and store to ArrayBuffer
Expand All @@ -147,11 +157,7 @@ export class HttpExtractorExecutor extends AbstractBlockExecutor<

// Infer FileName and FileExtension from url, if not inferrable, then default to None
// Get last element of URL assuming this is a filename
const urlString = context.getPropertyValue(
'url',
PrimitiveValuetypes.Text,
);
const url = new URL(urlString);
const url = new URL(response.responseUrl);
let fileName = url.pathname.split('/').pop();
if (fileName === undefined) {
fileName = url.pathname.replace('/', '-');
Expand Down
3 changes: 2 additions & 1 deletion libs/extensions/std/lang/src/archive-interpreter-meta-inf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ export class ArchiveInterpreterMetaInformation extends BlockMetaInformation {
archiveType: {
type: PrimitiveValuetypes.Text,
docs: {
description: 'The archive type to be interpreted, e.g., `"zip"`.',
description:
'The archive type to be interpreted, e.g., "zip" or "gz".',
},
},
},
Expand Down
15 changes: 15 additions & 0 deletions libs/extensions/std/lang/src/http-extractor-meta-inf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,21 @@ export class HttpExtractorMetaInformation extends BlockMetaInformation {
}
},
},
followRedirects: {
type: PrimitiveValuetypes.Boolean,
defaultValue: true,
docs: {
description:
'Indicates, whether to follow redirects on get requests. If `false`, redirects are not followed. Default `true`',
examples: [
{
code: 'url: "tinyurl.com/4ub9spwz" \n followRedirects: true',
description:
'Specifies the URL to fetch the data from and allows redirects.',
},
],
},
},
},

// Input type:
Expand Down
18 changes: 18 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@
"@docusaurus/preset-classic": "2.4.1",
"@docusaurus/theme-mermaid": "2.4.1",
"@mdx-js/react": "^1.6.22",
"@types/follow-redirects": "^1.14.1",
"assert": "^2.0.0",
"chalk": "^4.1.2",
"clsx": "^1.2.1",
"commander": "^8.0.0",
"fast-csv": "^4.3.6",
"follow-redirects": "^1.15.2",
"fp-ts": "^2.13.1",
"gtfs-realtime-bindings": "^1.1.1",
"jszip": "^3.10.1",
Expand Down Expand Up @@ -94,4 +96,4 @@
"got": "^11.8.5"
}
}
}
}

0 comments on commit a9abd0b

Please sign in to comment.