-
Notifications
You must be signed in to change notification settings - Fork 11
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feature/#392_Unpack_gz_files_and_#391follow_redirects #405
Changes from 13 commits
71f22c7
1124884
3a13303
e97b0a6
a83e6f6
561c088
6149013
7b963ad
a23642a
1eb6e1b
b119c5e
5f55fb9
8d0d124
9fe5645
88d480a
35aef3f
b1f1b0d
5194dbe
be91eda
729788b
9499a52
d705a2b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
// SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg | ||
// | ||
// SPDX-License-Identifier: AGPL-3.0-only | ||
|
||
pipeline MaterialsDatabasePipeline { | ||
|
||
block MaterialsDatabaseExtractor oftype HttpExtractor { | ||
url: "https://figshare.com/ndownloader/files/31626647"; | ||
followRedirects: true; | ||
} | ||
|
||
block ZipArchiveInterpreter oftype ArchiveInterpreter { | ||
archiveType: "zip"; | ||
} | ||
|
||
block MaterialsDatabaseCSVPicker oftype FilePicker { | ||
path: "/Databases/Combined/Combined_YieldStrength_GrainSize_Database.csv"; | ||
} | ||
|
||
block MaterialsDatabaseTextFileInterpreter oftype TextFileInterpreter { | ||
|
||
} | ||
|
||
block MaterialsDatabaseCSVInterpreter oftype CSVInterpreter { | ||
delimiter: ","; | ||
enclosing: '"'; | ||
enclosingEscape: '"'; | ||
} | ||
|
||
block MaterialsDatabaseTableInterpreter oftype TableInterpreter { | ||
header: true; | ||
columns: [ | ||
"Compound" oftype text, | ||
"Blacklisted Compound?" oftype text, | ||
"Yield Strength Value" oftype text, | ||
"Yield Strength Unit" oftype text, | ||
"Grain Size Value" oftype text, | ||
"Grain Size Unit" oftype text, | ||
"DOI" oftype text, | ||
"Open Access" oftype text, | ||
]; | ||
} | ||
|
||
block MaterialsDatabaseLoader oftype SQLiteLoader { | ||
table: "MaterialsDatabase"; | ||
file: "./MaterialsDatabase.sqlite"; | ||
} | ||
|
||
MaterialsDatabaseExtractor | ||
-> ZipArchiveInterpreter | ||
-> MaterialsDatabaseCSVPicker | ||
-> MaterialsDatabaseTextFileInterpreter | ||
-> MaterialsDatabaseCSVInterpreter | ||
-> MaterialsDatabaseTableInterpreter | ||
-> MaterialsDatabaseLoader; | ||
} |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we need new examples in the Jayvee repository right now, it would be better to move this to the SWC repo. Also please don't provide examples that do not work. These files are meant as documentation for users of Jayvee, not to test implementation ;). |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
// SPDX-FileCopyrightText: 2023 Friedrich-Alexander-Universitat Erlangen-Nurnberg | ||
// | ||
// SPDX-License-Identifier: AGPL-3.0-only | ||
|
||
pipeline ThermoelectricMaterialsPipeline { | ||
|
||
block ThermoelectricMaterialsExtractor oftype HttpExtractor { | ||
url: "https://figshare.com/ndownloader/files/28333845"; | ||
followRedirects: true; | ||
} | ||
|
||
block ThermoelectricMaterialsArchiveInterpreter oftype ArchiveInterpreter { | ||
archiveType: "gz"; | ||
} | ||
|
||
block ThermoelectricMaterialsFilePicker oftype FilePicker { | ||
path: "/ucsb_thermoelectrics.json"; | ||
} | ||
|
||
block ThermoelectricMaterialsTextFileInterpreter oftype TextFileInterpreter { | ||
|
||
} | ||
|
||
// Not working from here on forward, as json file is received and json interpreter is not yet implemented | ||
block ThermoelectricMaterialsJSONInterpreter oftype CSVInterpreter { | ||
enclosing: '"'; | ||
} | ||
|
||
block ThermoelectricMaterialsTableInterpreter oftype TableInterpreter { | ||
header: true; | ||
columns: [ | ||
|
||
]; | ||
} | ||
|
||
block ThermoelectricMaterialsLoader oftype SQLiteLoader { | ||
table: "ThermoelectricMaterials"; | ||
file: "./ThermoelectricMaterials.sqlite"; | ||
} | ||
|
||
ThermoelectricMaterialsExtractor | ||
-> ThermoelectricMaterialsArchiveInterpreter | ||
-> ThermoelectricMaterialsFilePicker | ||
-> ThermoelectricMaterialsTextFileInterpreter | ||
-> ThermoelectricMaterialsJSONInterpreter | ||
-> ThermoelectricMaterialsTableInterpreter | ||
-> ThermoelectricMaterialsLoader; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,8 @@ | |
implementsStatic, | ||
} from '@jvalue/jayvee-execution'; | ||
import { IOType, PrimitiveValuetypes } from '@jvalue/jayvee-language-server'; | ||
import * as JSZip from 'jszip'; | ||
import * as zlib from 'node:zlib'; | ||
|
||
import { | ||
inferFileExtensionFromFileExtensionString, | ||
|
@@ -55,12 +56,69 @@ | |
} | ||
return R.ok(fs.right); | ||
} | ||
if (archiveType === 'gz') { | ||
const fs = await this.loadGzFileToInMemoryFileSystem( | ||
archiveFile, | ||
context, | ||
); | ||
if (R.isErr(fs)) { | ||
return fs; | ||
} | ||
return R.ok(fs.right); | ||
} | ||
|
||
return R.err({ | ||
message: `Archive is not a zip-archive`, | ||
diagnostic: { node: context.getCurrentNode(), property: 'name' }, | ||
}); | ||
} | ||
|
||
private async loadGzFileToInMemoryFileSystem( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function should duplicate a lot of the It would be better to implement it that way, e.g., by extracting the shared logic of putting files into our in-memory file system into one function and reduce these two functions to just extracting the relevant archives and then using the new function to put files into a file system. |
||
archiveFile: BinaryFile, | ||
context: ExecutionContext, | ||
): Promise<R.Result<FileSystem>> { | ||
context.logger.logDebug(`Loading gz file from binary content`); | ||
try { | ||
|
||
const fs = new InMemoryFileSystem(); | ||
const archivedObject = zlib.gunzipSync(archiveFile.content); | ||
|
||
const extNameArchive = path.extname(archiveFile.name); | ||
const fileName = path.basename(archiveFile.name, extNameArchive); | ||
const extName = path.extname(fileName); | ||
|
||
const mimeType = | ||
inferMimeTypeFromContentTypeString(extName) || | ||
MimeType.APPLICATION_OCTET_STREAM; | ||
const fileExtension = | ||
inferFileExtensionFromFileExtensionString(extName) || | ||
FileExtension.NONE; | ||
const file = new BinaryFile( | ||
fileName, | ||
fileExtension, | ||
mimeType, | ||
archivedObject, | ||
); | ||
|
||
const addedFile = fs.putFile( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am very confused how this works because I can only see one There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To my understanding gzip is used to compress individual files. To compress multiple files with gzip, they have to be concated first, e.g to a .tar file. This I would implement in a different feature. Happy to further look into this, if you disagree. |
||
InMemoryFileSystem.getPathSeparator() + fileName, | ||
file, | ||
); | ||
|
||
assert(addedFile != null); | ||
|
||
return R.ok(fs); | ||
} | ||
catch (error: unknown) { | ||
return R.err({ | ||
message: `Unexpected Error ${ | ||
error instanceof Error ? error.message : JSON.stringify(err) | ||
} occured during processing`, | ||
diagnostic: { node: context.getCurrentNode(), property: 'name' }, | ||
}); | ||
} | ||
} | ||
|
||
private async loadZipFileToInMemoryFileSystem( | ||
archiveFile: BinaryFile, | ||
context: ExecutionContext, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,9 +3,8 @@ | |
// SPDX-License-Identifier: AGPL-3.0-only | ||
|
||
import { strict as assert } from 'assert'; | ||
import * as http from 'http'; | ||
import * as https from 'https'; | ||
import * as path from 'path'; | ||
import { http, https } from 'follow-redirects'; | ||
|
||
import * as R from '@jvalue/jayvee-execution'; | ||
import { | ||
|
@@ -110,8 +109,9 @@ | |
} else { | ||
httpGetFunction = http.get; | ||
} | ||
const followRedirects = context.getPropertyValue('followRedirects', PrimitiveValuetypes.Boolean); | ||
return new Promise((resolve) => { | ||
httpGetFunction(url, (response) => { | ||
httpGetFunction(url, { followRedirects: followRedirects }, (response) => { | ||
const responseCode = response.statusCode; | ||
|
||
// Catch errors | ||
|
@@ -126,6 +126,17 @@ | |
); | ||
} | ||
|
||
if (responseCode === 302) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 301 is also a redirect code I assume. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changed to >= 301, as 300 is used for 'multiple choices'. The remaining There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please do not change it to >= 301, that makes it dependent on other code to ensure this does not handle 400+. You can change it to >=301 and <400 though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, done |
||
resolve( | ||
R.err({ | ||
message: `HTTP fetch was redirected with code ${ | ||
responseCode ?? 'undefined' | ||
}. Redirects are either disabled or maximum number of redirects was exeeded.`, | ||
diagnostic: { node: context.getOrFailProperty('url') }, | ||
}), | ||
); | ||
} | ||
|
||
// Get chunked data and store to ArrayBuffer | ||
let rawData = new Uint8Array(0); | ||
response.on('data', (chunk: Buffer) => { | ||
|
@@ -151,7 +162,7 @@ | |
'url', | ||
PrimitiveValuetypes.Text, | ||
); | ||
const url = new URL(urlString); | ||
const url = new URL(response.responseUrl); | ||
let fileName = url.pathname.split('/').pop(); | ||
if (fileName === undefined) { | ||
fileName = url.pathname.replace('/', '-'); | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -19,7 +19,7 @@ | |||||
archiveType: { | ||||||
type: PrimitiveValuetypes.Text, | ||||||
docs: { | ||||||
description: 'The archive type to be interpreted, e.g., `"zip"`.', | ||||||
description: 'The archive type to be interpreted, e.g., `"zip" or "gz`.', | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changed to 'The archive type to be interpreted, e.g., "zip" or "gz".', |
||||||
}, | ||||||
}, | ||||||
}, | ||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -147,6 +147,19 @@ | |||||
} | ||||||
}, | ||||||
}, | ||||||
followRedirects: { | ||||||
type: PrimitiveValuetypes.Boolean, | ||||||
defaultValue: true, | ||||||
docs: { | ||||||
description: 'Indicates, whether to follow redirects on get requests. If `false`, Redirects are disabled. Default `true`', | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Redirects are not disabled, just not followed ;).
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changed as suggested |
||||||
examples: [ | ||||||
{ | ||||||
code: 'url: "tinyurl.com/4ub9spwz" \n followRedirects: true', | ||||||
description: 'Specifies the URL to fetch the data from and allows redirects.', | ||||||
}, | ||||||
], | ||||||
}, | ||||||
}, | ||||||
}, | ||||||
|
||||||
// Input type: | ||||||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think we need new examples in the Jayvee repository right now, it would be better to move this to the SWC repo.