diff --git a/packages/rdf/docs/metadata.md b/packages/rdf/docs/metadata.md new file mode 100644 index 00000000..c7cca607 --- /dev/null +++ b/packages/rdf/docs/metadata.md @@ -0,0 +1,87 @@ + +# Metadata operation + +## Append + +Say you have a `dataset_description.ttl` file containing: + +```turtle + a . +``` + +Then a step: + +```turtle +@prefix p: . +@prefix code: . +@prefix rdfs: . + + + a p:Step ; + rdfs:label "Attach metadata" ; + code:implementedBy [ a code:EcmaScriptModule ; + code:link ] ; + code:arguments [ + code:name "input"; code:value "../../metadata/dataset_description.ttl" + ], + [ + code:name "dateCreated"; + code:value "2020-05-30"; + ], + [ + code:name "dateModified"; + code:value "TIME_NOW"; + ] . + +``` + +will append the contents of `dataset_description.ttl` to the stream, with new or updated `schema.dateModified` or `schema.dateCreated` properties. + +```turtle + "2022-04-13T08:55:21.363Z"^^ . + "2020-05-30"^^ . +``` + +### Parameters + +#### input + +The quads to append. Can be a file, a quad stream or an URL pointing to the resource. + +### Optional parameters + +#### basepath + +Sets the base path used to fetch the file. + +#### graph + +The namedgraph used for the incoming metadata quads. + +### Dataset Classes + +The operation updates subjects with a type that's a 'well known dataset class', currently: + +* http://rdfs.org/ns/void#Dataset +* http://www.w3.org/ns/dcat#Dataset + +That will add or modify the `dcterms:created` and `dcterms:modified` properties, and: + +* http://schema.org/Dataset +* https://cube.link/Cube + +that will add or modify the `schema:dateCreated` and `schema:dateUpdated` properties. + +### Named Date Literals + +#### TIME_NOW + +The current time + +#### TIME_FILE_CREATION + +The file creation time. Applies only to files + +#### TIME_FILE_MODIFICATION + +The file modification time. Applies only to files diff --git a/packages/rdf/lib/append.js b/packages/rdf/lib/append.js new file mode 100644 index 00000000..bf8dba27 --- /dev/null +++ b/packages/rdf/lib/append.js @@ -0,0 +1,47 @@ +import { Transform } from 'readable-stream' +import { localFetch } from './localFetch/localFetch.js' +import { applyOptions } from './metadata/applyOptions.js' + +class MetadataAppend extends Transform { + constructor (context, basePath, input, options) { + super({ objectMode: true }) + this.context = context + this.basePath = basePath + this.input = input + this.options = options + } + + _transform (chunk, encoding, callback) { + callback(null, chunk) + } + + async _flush (callback) { + try { + const { quadStream, metadata } = await localFetch(this.input, this.basePath) + for (const quad of await applyOptions(quadStream, metadata, this.options)) { + this.push(quad) + } + } catch (err) { + this.destroy(err) + } finally { + callback() + } + } +} + +async function append ({ + input, + basepath, + dateModified = undefined, + dateCreated = undefined, + graph = undefined +} = {}) { + if (!input) { + throw new Error('Needs input as parameter (url or filename)') + } + const basePath = this?.basePath ? this.basePath : basepath + + return new MetadataAppend(this, basePath, input, { graph, dateModified, dateCreated }) +} + +export default append diff --git a/packages/rdf/lib/cube/buildCubeShape/Cube.js b/packages/rdf/lib/cube/buildCubeShape/Cube.js index 981ad345..28a9a63a 100644 --- a/packages/rdf/lib/cube/buildCubeShape/Cube.js +++ b/packages/rdf/lib/cube/buildCubeShape/Cube.js @@ -3,8 +3,8 @@ import TermSet from '@rdfjs/term-set' import clownface from 'clownface' import rdf from 'rdf-ext' import cbdCopy from '../../cbdCopy.js' +import * as ns from '../../namespaces.js' import Dimension from './Dimension.js' -import * as ns from './namespaces.js' class Cube { constructor ({ metadata, observationSet, shape, term }) { diff --git a/packages/rdf/lib/cube/buildCubeShape/Dimension.js b/packages/rdf/lib/cube/buildCubeShape/Dimension.js index 0191dbd0..f3ea221f 100644 --- a/packages/rdf/lib/cube/buildCubeShape/Dimension.js +++ b/packages/rdf/lib/cube/buildCubeShape/Dimension.js @@ -4,7 +4,7 @@ import clownface from 'clownface' import rdf from 'rdf-ext' import { fromRdf } from 'rdf-literal' import cbdCopy from '../../cbdCopy.js' -import * as ns from './namespaces.js' +import * as ns from '../../namespaces.js' const datatypeParsers = new TermMap([ [ns.xsd.byte, fromRdf], diff --git a/packages/rdf/lib/cube/buildCubeShape/index.js b/packages/rdf/lib/cube/buildCubeShape/index.js index 634aa41f..d39f0817 100644 --- a/packages/rdf/lib/cube/buildCubeShape/index.js +++ b/packages/rdf/lib/cube/buildCubeShape/index.js @@ -2,11 +2,11 @@ import TermMap from '@rdfjs/term-map' import TermSet from '@rdfjs/term-set' import clownface from 'clownface' import once from 'lodash/once.js' -import rdf from 'rdf-ext' +import $rdf from 'rdf-ext' import { Transform } from 'readable-stream' +import * as ns from '../../namespaces.js' import urlJoin from '../../urlJoin.js' import Cube from './Cube.js' -import * as ns from './namespaces.js' function defaultCube ({ observationSet }) { const observationSetIri = observationSet && observationSet.value @@ -15,7 +15,7 @@ function defaultCube ({ observationSet }) { return null } - return rdf.namedNode(urlJoin(observationSetIri, '..')) + return $rdf.namedNode(urlJoin(observationSetIri, '..')) } function defaultShape ({ term }) { @@ -25,7 +25,7 @@ function defaultShape ({ term }) { return null } - return rdf.namedNode(urlJoin(cubeIri, 'shape')) + return $rdf.namedNode(urlJoin(cubeIri, 'shape')) } class CubeShapeBuilder extends Transform { @@ -35,7 +35,7 @@ class CubeShapeBuilder extends Transform { this.options = { cubes: new TermMap(), cube: defaultCube, - excludeValuesOf: new TermSet(excludeValuesOf ? excludeValuesOf.map(v => rdf.namedNode(v)) : []), + excludeValuesOf: new TermSet(excludeValuesOf ? excludeValuesOf.map(v => $rdf.namedNode(v)) : []), metadataStream: metadata, shape: defaultShape } @@ -45,9 +45,9 @@ class CubeShapeBuilder extends Transform { async _init () { if (this.options.metadataStream) { - this.options.metadata = await rdf.dataset().import(this.options.metadataStream) + this.options.metadata = await $rdf.dataset().import(this.options.metadataStream) } else { - this.options.metadata = rdf.dataset() + this.options.metadata = $rdf.dataset() } } @@ -58,7 +58,7 @@ class CubeShapeBuilder extends Transform { return callback(err) } - const dataset = rdf.dataset([...chunk]) + const dataset = $rdf.dataset([...chunk]) const context = { dataset, diff --git a/packages/rdf/lib/localFetch/localFetch.js b/packages/rdf/lib/localFetch/localFetch.js new file mode 100644 index 00000000..ed5f9b67 --- /dev/null +++ b/packages/rdf/lib/localFetch/localFetch.js @@ -0,0 +1,91 @@ +import fsp from 'fs/promises' + +import { resolve } from 'path' +import rdfFetch from '@rdfjs/fetch' +import fileFetch from 'file-fetch' +import isStream, { isReadable } from 'isstream' +import protoFetch from 'proto-fetch' +import { getParserByExtension } from './lookupParser.js' + +function isReadableStream (arg) { + return isStream(arg) && isReadable(arg) +} + +function isAbsolute (str) { + return str.startsWith('https:') || str.startsWith('http:') || str.startsWith('file:') +} + +async function streamWithMetadata (input) { + return { + quadStream: input, + metadata: { + type: input.constructor.name + } + } +} + +async function fetchHTTPWithMeta (input) { + const url = new URL(input, import.meta.url) + const res = await rdfFetch(url) + return { + quadStream: await res.quadStream(), + metadata: { + type: url.constructor.name, + value: url + } + } +} + +function guessParserForFile (filePath) { + const parser = getParserByExtension(filePath) + if (!parser) { + throw new Error(`No parser could be guessed for ${filePath}`) + } + return parser +} + +async function fetchFileWithMeta (input) { + const filePathURL = new URL(input, import.meta.url) + const res = await fileFetch(filePathURL.toString()) + const stream = res.body + const quadStream = await guessParserForFile(input).import(stream) + return { + quadStream: quadStream, + metadata: { + type: filePathURL.constructor.name, + value: filePathURL.toString(), + stats: await fsp.lstat(filePathURL) + } + } +} + +// Tries to fetch or read locally one file +async function localFetch ( + input, + basePath +) { + if (!(input)) { + throw new Error('needs input filename or URL') + } + if (isReadableStream(input)) { + return streamWithMetadata(input, basePath) + } + if (typeof input !== 'string') { + throw new Error(`needs input filename or URL, got [${typeof input}]`) + } + const fetch = protoFetch({ + file: fetchFileWithMeta, + http: fetchHTTPWithMeta, + https: fetchHTTPWithMeta + }) + + const url = isAbsolute(input) + ? input + : basePath + ? `file://${resolve(basePath, input)}` + : input + + return fetch(url) +} + +export { localFetch } diff --git a/packages/rdf/lib/localFetch/lookupParser.js b/packages/rdf/lib/localFetch/lookupParser.js new file mode 100644 index 00000000..00e5aa3c --- /dev/null +++ b/packages/rdf/lib/localFetch/lookupParser.js @@ -0,0 +1,11 @@ +import defaultFormats from '@rdfjs/formats-common' +import mime from 'mime-types' + +function getParserByExtension (fileUrl) { + const mimeType = mime.lookup(fileUrl.toString()) + return defaultFormats.parsers.get(mimeType) +} + +export { + getParserByExtension +} diff --git a/packages/rdf/lib/metadata/applyOptions.js b/packages/rdf/lib/metadata/applyOptions.js new file mode 100644 index 00000000..c88ac8de --- /dev/null +++ b/packages/rdf/lib/metadata/applyOptions.js @@ -0,0 +1,72 @@ +import TermSet from '@rdfjs/term-set' +import rdf from 'rdf-ext' +import * as ns from '../namespaces.js' +import { xsd } from '../namespaces.js' +import { wellKnownDatasetClasses, wellKnownDatasetClassesWithDcterms } from './datasetClasses.js' +import { namedDateLiterals } from './namedDateLiterals.js' + +function subjectsWithDatasetType (dataset, classes) { + const result = new TermSet() + dataset + .filter(quad => (quad.predicate.equals(ns.rdf.type) && classes.has(quad.object))) + .forEach(quad => { + result.add(quad.subject) + }) + return result +} + +function updateOrInsert (dataset, datasetClasses, predicate, object) { + const targetSubjects = subjectsWithDatasetType(dataset, datasetClasses) + + // Remove existent + dataset = dataset.filter(quad => { + return !(quad.predicate.equals(predicate) && targetSubjects.has(quad.subject)) + }) + + // Append + for (const subject of targetSubjects) { + dataset.add(rdf.quad(subject, predicate, object)) + } + + return dataset +} + +function toDateLiteral (item) { + return typeof item === 'string' ? rdf.literal(item, xsd.dateTime) : item +} + +function toNamedNode (item) { + return typeof item === 'string' ? rdf.namedNode(item) : item +} + +function resolveNamedDate (value, metadata) { + return namedDateLiterals.has(value) ? namedDateLiterals.get(value)(metadata) : toDateLiteral(value) +} + +async function applyOptions (quadStream, metadata = {}, options = {}) { + let dataset = await rdf.dataset().import(quadStream) + + // dateModified + if (options.dateModified) { + const dateModifiedLiteral = resolveNamedDate(options.dateModified, metadata) + + dataset = updateOrInsert(dataset, wellKnownDatasetClassesWithDcterms, ns.dcterms.modified, dateModifiedLiteral) + dataset = updateOrInsert(dataset, wellKnownDatasetClasses, ns.schema.dateModified, dateModifiedLiteral) + } + + // dateCreated + if (options.dateCreated) { + const dateCreatedLiteral = resolveNamedDate(options.dateCreated, metadata) + dataset = updateOrInsert(dataset, wellKnownDatasetClassesWithDcterms, ns.dcterms.created, dateCreatedLiteral) + dataset = updateOrInsert(dataset, wellKnownDatasetClasses, ns.schema.dateCreated, dateCreatedLiteral) + } + + // Sets graph + if (options.graph) { + dataset = dataset.map(quad => rdf.quad(quad.subject, quad.predicate, quad.object, toNamedNode(options.graph))) + } + + return dataset +} + +export { applyOptions } diff --git a/packages/rdf/lib/metadata/datasetClasses.js b/packages/rdf/lib/metadata/datasetClasses.js new file mode 100644 index 00000000..41ad0e6a --- /dev/null +++ b/packages/rdf/lib/metadata/datasetClasses.js @@ -0,0 +1,16 @@ +import TermSet from '@rdfjs/term-set' +import * as ns from '../namespaces.js' + +const wellKnownDatasetClasses = new TermSet([ + ns.schema.Dataset, + ns.cube.Cube +] +) + +const wellKnownDatasetClassesWithDcterms = new TermSet([ + ns._void.Dataset, + ns.dcat.Dataset +] +) + +export { wellKnownDatasetClasses, wellKnownDatasetClassesWithDcterms } diff --git a/packages/rdf/lib/metadata/namedDateLiterals.js b/packages/rdf/lib/metadata/namedDateLiterals.js new file mode 100644 index 00000000..b203e011 --- /dev/null +++ b/packages/rdf/lib/metadata/namedDateLiterals.js @@ -0,0 +1,24 @@ +import rdf from 'rdf-ext' +import { xsd } from '../namespaces.js' + +const namedDateLiterals = new Map() + +namedDateLiterals.set('TIME_NOW', metadata => { + return rdf.literal((new Date()).toISOString(), xsd.dateTime) +}) + +namedDateLiterals.set('TIME_FILE_CREATION', metadata => { + if (!metadata?.stats?.birthtimeMs) { + throw new Error('No metadata.stats.birthtimeMs') + } + return rdf.literal((new Date(metadata.stats.birthtimeMs)).toISOString(), xsd.dateTime) +}) + +namedDateLiterals.set('TIME_FILE_MODIFICATION', metadata => { + if (!metadata?.stats?.mtimeMs) { + throw new Error('No metadata.stats.mtimeMs') + } + return rdf.literal((new Date(metadata.stats.mtimeMs)).toISOString(), xsd.dateTime) +}) + +export { namedDateLiterals } diff --git a/packages/rdf/lib/cube/buildCubeShape/namespaces.js b/packages/rdf/lib/namespaces.js similarity index 54% rename from packages/rdf/lib/cube/buildCubeShape/namespaces.js rename to packages/rdf/lib/namespaces.js index d39b3ee4..0885b187 100644 --- a/packages/rdf/lib/cube/buildCubeShape/namespaces.js +++ b/packages/rdf/lib/namespaces.js @@ -5,5 +5,9 @@ const rdf = namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') const rdfs = namespace('http://www.w3.org/2000/01/rdf-schema#') const sh = namespace('http://www.w3.org/ns/shacl#') const xsd = namespace('http://www.w3.org/2001/XMLSchema#') +const _void = namespace('http://rdfs.org/ns/void#') +const dcat = namespace('http://www.w3.org/ns/dcat#') +const schema = namespace('http://schema.org/') +const dcterms = namespace('http://purl.org/dc/terms/') -export { cube, rdf, rdfs, sh, xsd } +export { cube, rdf, rdfs, sh, xsd, _void, dcat, schema, dcterms } diff --git a/packages/rdf/manifest.ttl b/packages/rdf/manifest.ttl index 57e40827..a36f44ca 100644 --- a/packages/rdf/manifest.ttl +++ b/packages/rdf/manifest.ttl @@ -30,3 +30,10 @@ code:implementedBy [ a code:EcmaScript; code:link ]. + + a p:Operation, p:WritableObjectMode, p:ReadableObjectMode; + rdfs:label "Append metadata"; + rdfs:comment "Fetches, updates and appends a metadata resource"; + code:implementedBy [ a code:EcmaScript; + code:link + ]. diff --git a/packages/rdf/metadata.js b/packages/rdf/metadata.js new file mode 100644 index 00000000..fcb4b7a0 --- /dev/null +++ b/packages/rdf/metadata.js @@ -0,0 +1,3 @@ +import append from './lib/append.js' + +export { append } diff --git a/packages/rdf/package.json b/packages/rdf/package.json index 76680ae5..6ee088be 100644 --- a/packages/rdf/package.json +++ b/packages/rdf/package.json @@ -1,6 +1,6 @@ { "name": "barnard59-rdf", - "version": "1.2.0", + "version": "1.2.2", "description": "RDF support for Linked Data pipelines", "main": "index.js", "type": "module", @@ -20,11 +20,16 @@ }, "homepage": "https://github.com/zazuko/barnard59-rdf", "dependencies": { + "@rdfjs/fetch": "^3.0.0", + "@rdfjs/formats-common": "^2.2.0", "@rdfjs/namespace": "^1.1.0", "@rdfjs/term-map": "^1.0.0", "@rdfjs/term-set": "^1.0.1", "clownface": "^1.3.0", + "file-fetch": "^1.7.0", "lodash": "^4.17.21", + "mime-types": "^2.1.35", + "proto-fetch": "^1.0.0", "rdf-ext": "^1.3.2", "rdf-literal": "^1.3.0", "rdf-transform-triple-to-quad": "^1.0.2", @@ -32,11 +37,13 @@ }, "devDependencies": { "@rdfjs/to-ntriples": "^1.0.2", + "assert-throws-async": "^3.0.0", "c8": "^7.7.3", "codecov": "^3.8.2", "get-stream": "^6.0.1", "isstream": "^0.1.2", "mocha": "^9.0.1", + "nock": "^13.2.4", "stricter-standard": "^0.2.0" }, "engines": { diff --git a/packages/rdf/test/append.test.js b/packages/rdf/test/append.test.js new file mode 100644 index 00000000..c6408abe --- /dev/null +++ b/packages/rdf/test/append.test.js @@ -0,0 +1,399 @@ +import { equal, strictEqual } from 'assert' +import fs from 'fs' +import fsp from 'fs/promises' +import { fileURLToPath } from 'url' +import defaultFormats from '@rdfjs/formats-common' +import namespace from '@rdfjs/namespace' +import assertThrows from 'assert-throws-async' +import getStream from 'get-stream' +import { isDuplex } from 'isstream' +import { describe, it } from 'mocha' +import nock from 'nock' +import rdf from 'rdf-ext' +import { Readable } from 'readable-stream' +import append from '../lib/append.js' +import { schema, xsd, dcterms } from '../lib/namespaces.js' + +const dataPath = './support/dataset.ttl' +new URL(dataPath, import.meta.url).toString() + +const metadataPath = './support/dataset_description.ttl' +new URL(metadataPath, import.meta.url).toString() + +const __dirname = fileURLToPath(new URL('.', import.meta.url)) + +const ex = namespace('http://example.org/') + +async function getRDFDataset (filePath) { + return rdf.dataset().import(getRDFStream(filePath)) +} + +function getRDFStream (filePath) { + const stream = fs.createReadStream(new URL(filePath, import.meta.url)) + const parser = defaultFormats.parsers.get('text/turtle') + return parser.import(stream) +} + +async function applyStep (transform) { + const initial = await getRDFDataset(dataPath) + const stream = getRDFStream(dataPath).pipe(transform) + const final = rdf.dataset(await getStream.array(stream)) + return { initial, final } +} + +describe('metadata.append', () => { + it('should be a factory', () => { + strictEqual(typeof append, 'function') + }) + + it('should throw an error if no argument is given', async () => { + await assertThrows(async () => { + await append() + }, Error, /Needs input as parameter/) + }) + + it('should return a duplex stream with a stream metadata parameter', async () => { + const step = await append({ + input: getRDFStream(metadataPath) + }) + strictEqual(isDuplex(step), true) + }) + + it('should return a duplex stream with a path (string) metadata parameter', async () => { + const step = await append({ + input: metadataPath + }) + strictEqual(isDuplex(step), true) + }) + + it('should return a duplex stream with an URL pointing to a public resource', async () => { + // Mocking a remote file. + const fileStr = fs.readFileSync(new URL(metadataPath, import.meta.url), 'utf8') + nock('https://example.com') + .get('/metadata.ttl') + .reply(200, fileStr, { 'content-type': 'text/turtle' }) + + const stream = await append({ + input: 'https://example.com/metadata.ttl' + }) + strictEqual(isDuplex(stream), true) + }) + + it('should append data and metadata with default values', async () => { + const all = rdf.dataset() + all.addAll(await getRDFDataset(dataPath)) + all.addAll(await getRDFDataset(metadataPath)) + + const step = await append({ + input: getRDFStream(metadataPath) + }) + const { final } = await applyStep(step) + + equal( + final.toCanonical(), + all.toCanonical(), 'appended quads not as expected' + ) + }) + + it('should append data and metadata with default values, and path as string', async () => { + const all = rdf.dataset() + all.addAll(await getRDFDataset(dataPath)) + all.addAll(await getRDFDataset(metadataPath)) + + const step = await append({ + input: metadataPath, + basepath: __dirname + }) + const { final } = await applyStep(step) + + equal( + final.toCanonical(), + all.toCanonical(), 'appended quads not as expected' + ) + }) + + it('should append data with the specified graph', async () => { + const data = [ + rdf.quad(ex.subject0, ex.predicate0, ex.object0, ex.graph0) + ] + + const metadata = [ + rdf.quad(ex.subject1, ex.predicate1, ex.object1, ex.graph1), + rdf.quad(ex.subject2, ex.predicate2, ex.object2, ex.graph2) + ] + + const graphString = 'http://example.org/metadata' + const graph = rdf.namedNode(graphString) + + const step = await append({ + input: Readable.from(metadata), + graph: graphString + }) + + const result = await getStream.array(Readable.from(data).pipe(step)) + + strictEqual(result.length, 3) + strictEqual(result[0].equals(data[0]), true) + strictEqual(result[1].equals(metadata[0]), false) + strictEqual(result[2].equals(metadata[1]), false) + + strictEqual(result[1].graph.value, graph.value) + strictEqual(result[2].graph.value, graph.value) + }) + + it('fails at unknown protocol', async () => { + await assertThrows(async () => { + const step = await append({ + input: 'unknown::protocol' + }) + await applyStep(step) + }, Error, /unknown protocol/) + }) + + it('fails at file not found', async () => { + await assertThrows(async () => { + const step = await append({ + input: 'file:///not/found.ttl' + }) + await applyStep(step) + }, Error, /ENOENT: no such file or directory/) + }) +}) + +describe('File System: metadata.append', () => { + it('should use resolved literal TIME_FILE_CREATION with dateCreated', async () => { + const data = [ + rdf.quad(ex.subject0, ex.predicate0, ex.object0, ex.graph0) + ] + + const step = await append({ + input: metadataPath, + basepath: __dirname, + dateCreated: 'TIME_FILE_CREATION' + }) + + const stats = await fsp.lstat(new URL(metadataPath, import.meta.url)) + const result = await getStream.array(Readable.from(data).pipe(step)) + strictEqual(result.length, 7) + + strictEqual(result[4].predicate.value, schema.dateModified.value) + strictEqual(result[4].object.value, rdf.literal('2020-05-30').value) + + strictEqual(result[5].predicate.value, dcterms.created.value) + strictEqual(result[5].object.value, rdf.literal((new Date(stats.birthtimeMs)).toISOString(), xsd.dateTime).value) + + strictEqual(result[6].predicate.value, schema.dateCreated.value) + strictEqual(result[6].object.value, rdf.literal((new Date(stats.birthtimeMs)).toISOString(), xsd.dateTime).value) + }) + + it('should use resolved literal TIME_FILE_CREATION with dateModified', async () => { + const data = [ + rdf.quad(ex.subject0, ex.predicate0, ex.object0, ex.graph0) + ] + + const step = await append({ + input: metadataPath, + basepath: __dirname, + dateModified: 'TIME_FILE_CREATION' + }) + + const stats = await fsp.lstat(new URL(metadataPath, import.meta.url)) + const result = await getStream.array(Readable.from(data).pipe(step)) + + strictEqual(result.length, 7) + + strictEqual(result[4].predicate.value, schema.dateCreated.value) + strictEqual(result[4].object.value, rdf.literal('2020-05-30').value) + + strictEqual(result[5].predicate.value, dcterms.modified.value) + strictEqual(result[5].object.value, rdf.literal((new Date(stats.birthtimeMs)).toISOString(), xsd.dateTime).value) + + strictEqual(result[6].predicate.value, schema.dateModified.value) + strictEqual(result[6].object.value, rdf.literal((new Date(stats.birthtimeMs)).toISOString(), xsd.dateTime).value) + }) + + it('should use resolved literal TIME_FILE_MODIFICATION with dateCreated', async () => { + const data = [ + rdf.quad(ex.subject0, ex.predicate0, ex.object0, ex.graph0) + ] + + const step = await append({ + input: metadataPath, + basepath: __dirname, + dateCreated: 'TIME_FILE_MODIFICATION' + }) + + const stats = await fsp.lstat(new URL(metadataPath, import.meta.url)) + const result = await getStream.array(Readable.from(data).pipe(step)) + strictEqual(result.length, 7) + + strictEqual(result[4].predicate.value, schema.dateModified.value) + strictEqual(result[4].object.value, rdf.literal('2020-05-30').value) + + strictEqual(result[5].predicate.value, dcterms.created.value) + strictEqual(result[5].object.value, rdf.literal((new Date(stats.mtimeMs)).toISOString(), xsd.dateTime).value) + + strictEqual(result[6].predicate.value, schema.dateCreated.value) + strictEqual(result[6].object.value, rdf.literal((new Date(stats.mtimeMs)).toISOString(), xsd.dateTime).value) + }) + + it('should use resolved literal TIME_FILE_MODIFICATION with dateModified', async () => { + const data = [ + rdf.quad(ex.subject0, ex.predicate0, ex.object0, ex.graph0) + ] + + const step = await append({ + input: metadataPath, + basepath: __dirname, + dateModified: 'TIME_FILE_MODIFICATION' + }) + + const stats = await fsp.lstat(new URL(metadataPath, import.meta.url)) + const result = await getStream.array(Readable.from(data).pipe(step)) + + strictEqual(result.length, 7) + + strictEqual(result[4].predicate.value, schema.dateCreated.value) + strictEqual(result[4].object.value, rdf.literal('2020-05-30').value) + + strictEqual(result[5].predicate.value, dcterms.modified.value) + strictEqual(result[5].object.value, rdf.literal((new Date(stats.mtimeMs)).toISOString(), xsd.dateTime).value) + + strictEqual(result[6].predicate.value, schema.dateModified.value) + strictEqual(result[6].object.value, rdf.literal((new Date(stats.mtimeMs)).toISOString(), xsd.dateTime).value) + }) + + it('should use resolved literal TIME_NOW with dateModified', async () => { + const data = [ + rdf.quad(ex.subject0, ex.predicate0, ex.object0, ex.graph0) + ] + const metadata = [ + rdf.quad(ex.subject1, rdf.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), schema.Dataset), + rdf.quad(ex.subject1, schema.dateCreated, rdf.literal('2020-05-30')), + rdf.quad(ex.subject1, schema.dateModified, rdf.literal('2020-05-30')) + ] + const step = await append({ + input: Readable.from(metadata), + dateModified: 'TIME_NOW' + }) + + const result = await getStream.array(Readable.from(data).pipe(step)) + + strictEqual(result.length, 4) + + strictEqual(result[2].predicate.value, schema.dateCreated.value) + strictEqual(result[2].object.value, rdf.literal('2020-05-30').value) + + strictEqual(result[3].predicate.value, schema.dateModified.value) + strictEqual(result[3].object.value === rdf.literal('2020-05-30').value, false) + }) + + it('should use resolved literal TIME_NOW with dateCreated', async () => { + const data = [ + rdf.quad(ex.subject0, ex.predicate0, ex.object0, ex.graph0) + ] + const metadata = [ + rdf.quad(ex.subject1, rdf.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), schema.Dataset), + rdf.quad(ex.subject1, schema.dateCreated, rdf.literal('2020-05-30')), + rdf.quad(ex.subject1, schema.dateModified, rdf.literal('2020-05-30')) + ] + const step = await append({ + input: Readable.from(metadata), + dateCreated: 'TIME_NOW' + }) + + const result = await getStream.array(Readable.from(data).pipe(step)) + + strictEqual(result.length, 4) + + strictEqual(result[2].predicate.value, schema.dateModified.value) + strictEqual(result[2].object.value, rdf.literal('2020-05-30').value) + + strictEqual(result[3].predicate.value, schema.dateCreated.value) + strictEqual(result[3].object.value === rdf.literal('2020-05-30').value, false) + }) + + it('should use specified literal with dateModified (string)', async () => { + const data = [ + rdf.quad(ex.subject0, ex.predicate0, ex.object0, ex.graph0) + ] + const metadata = [ + rdf.quad(ex.subject1, rdf.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), schema.Dataset), + rdf.quad(ex.subject1, schema.dateModified, rdf.literal('2020-05-30')) + ] + const step = await append({ + input: Readable.from(metadata), + dateModified: '1999-12-31' + }) + + const result = await getStream.array(Readable.from(data).pipe(step)) + + strictEqual(result.length, 3) + + strictEqual(result[2].predicate.value, schema.dateModified.value) + strictEqual(result[2].object.value, rdf.literal('1999-12-31').value) + }) + + it('should use specified literal with dateCreated (string)', async () => { + const data = [ + rdf.quad(ex.subject0, ex.predicate0, ex.object0, ex.graph0) + ] + const metadata = [ + rdf.quad(ex.subject1, rdf.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), schema.Dataset), + rdf.quad(ex.subject1, schema.dateCreated, rdf.literal('2020-05-30')) + ] + const step = await append({ + input: Readable.from(metadata), + dateCreated: '1999-12-31' + }) + + const result = await getStream.array(Readable.from(data).pipe(step)) + + strictEqual(result.length, 3) + + strictEqual(result[2].predicate.value, schema.dateCreated.value) + strictEqual(result[2].object.value, rdf.literal('1999-12-31').value) + }) + + it('should use specified literal with dateModified', async () => { + const data = [ + rdf.quad(ex.subject0, ex.predicate0, ex.object0, ex.graph0) + ] + const metadata = [ + rdf.quad(ex.subject1, rdf.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), schema.Dataset), + rdf.quad(ex.subject1, schema.dateModified, rdf.literal('2020-05-30')) + ] + const step = await append({ + input: Readable.from(metadata), + dateModified: rdf.literal('1999-12-31', xsd.dateTime) + }) + + const result = await getStream.array(Readable.from(data).pipe(step)) + + strictEqual(result.length, 3) + + strictEqual(result[2].predicate.value, schema.dateModified.value) + strictEqual(result[2].object.value, rdf.literal('1999-12-31').value) + }) + + it('should use specified literal with dateCreated', async () => { + const data = [ + rdf.quad(ex.subject0, ex.predicate0, ex.object0, ex.graph0) + ] + const metadata = [ + rdf.quad(ex.subject1, rdf.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), schema.Dataset), + rdf.quad(ex.subject1, schema.dateCreated, rdf.literal('2020-05-30')) + ] + const step = await append({ + input: Readable.from(metadata), + dateCreated: rdf.literal('1999-12-31', xsd.dateTime) + }) + + const result = await getStream.array(Readable.from(data).pipe(step)) + + strictEqual(result.length, 3) + + strictEqual(result[2].predicate.value, schema.dateCreated.value) + strictEqual(result[2].object.value, rdf.literal('1999-12-31').value) + }) +}) diff --git a/packages/rdf/test/localFetch/localFetch.test.js b/packages/rdf/test/localFetch/localFetch.test.js new file mode 100644 index 00000000..e8a53a3e --- /dev/null +++ b/packages/rdf/test/localFetch/localFetch.test.js @@ -0,0 +1,109 @@ +import { equal, strictEqual } from 'assert' +import fs from 'fs' +import { resolve } from 'path' +import defaultFormats from '@rdfjs/formats-common' +import assertThrows from 'assert-throws-async' +import { describe, it } from 'mocha' +import nock from 'nock' +import rdf from 'rdf-ext' +import { localFetch } from '../../lib/localFetch/localFetch.js' + +const datasetPath = '../support/dataset.ttl' +const datasetAbsolutePath = new URL(datasetPath, import.meta.url).toString() + +async function getRDFDataset (filePath) { + return rdf.dataset().import(getRDFStream(filePath)) +} + +function getRDFStream (filePath) { + const stream = fs.createReadStream(new URL(filePath, import.meta.url)) + const parser = defaultFormats.parsers.get('text/turtle') + return parser.import(stream) +} + +describe('metadata.lfetch', () => { + it('should be a function', () => { + strictEqual(typeof localFetch, 'function') + }) + + it('should throw an error if no input is given', async () => { + await assertThrows(async () => { + await localFetch() + }, Error, /needs input filename or URL/) + }) + + it('should throw an error if no valid input is given', async () => { + await assertThrows(async () => { + await localFetch({ not: 'this' }) + }, Error, /needs input filename or URL, got /) + }) + + it('with defaults, should get the same dataset', async () => { + const expected = await getRDFDataset(datasetPath) + const { quadStream } = await localFetch(getRDFStream(datasetPath)) + const actual = await rdf.dataset().import(quadStream) + equal(expected.equals(actual), true) + }) + + it('with filename and base, should get the same dataset', async () => { + const expected = await getRDFDataset(datasetPath) + const { quadStream } = await localFetch(datasetPath, resolve('./test/ldfetch')) + const actual = await rdf.dataset().import(quadStream) + + equal(expected.equals(actual), true) + }) + + it('with absolute filename, should get the same dataset', async () => { + const expected = await getRDFDataset(datasetPath) + const { quadStream } = await localFetch(datasetAbsolutePath) + const actual = await rdf.dataset().import(quadStream) + + equal(expected.equals(actual), true) + }) + + it('with absolute filename, should ignore basePath and get the same dataset', async () => { + const expected = await getRDFDataset(datasetPath) + const { quadStream } = await localFetch(datasetAbsolutePath, '/unknown/') + const actual = await rdf.dataset().import(quadStream) + + equal(expected.equals(actual), true) + }) + + it('fails at file not found', async () => { + await assertThrows(async () => { + await localFetch('file:///not/found.ttl') + }, Error, /ENOENT: no such file or directory/) + }) + + it('fails if not string', async () => { + await assertThrows(async () => { + await localFetch(['a', 'b']) + }, Error, /needs input filename or URL, got \[object\]/) + }) + + it('should get a dataset from URL pointing to a public resource', async () => { + // Mocking a remote file. + const fileStr = fs.readFileSync(new URL(datasetPath, import.meta.url), 'utf8') + nock('https://example.com') + .get('/metadata.ttl') + .reply(200, fileStr, { 'content-type': 'text/turtle' }) + + const expected = await getRDFDataset(datasetPath) + const { quadStream } = await localFetch('https://example.com/metadata.ttl') + const actual = await rdf.dataset().import(quadStream) + + equal(expected.equals(actual), true) + }) + + it('fails at unknown file extension', async () => { + await assertThrows(async () => { + await localFetch(new URL('../support/file.unknown.extension', import.meta.url).toString()) + }, Error, /No parser could be guessed for/) + }) + + it('fails at unknown protocol', async () => { + await assertThrows(async () => { + await localFetch('unknown::protocol') + }, Error, /unknown protocol/) + }) +}) diff --git a/packages/rdf/test/localFetch/lookupParser.test.js b/packages/rdf/test/localFetch/lookupParser.test.js new file mode 100644 index 00000000..5679314c --- /dev/null +++ b/packages/rdf/test/localFetch/lookupParser.test.js @@ -0,0 +1,38 @@ +import { strictEqual } from 'assert' +import { describe, it } from 'mocha' +import { getParserByExtension } from '../../lib/localFetch/lookupParser.js' + +describe('metadata.lfetch.lookupParser', () => { + it('should be a function', () => { + strictEqual(typeof getParserByExtension, 'function') + }) + + it('should return a parser for well-known RDF extensions', async () => { + const rdfExtensions = [ + 'jsonld', + 'trig', + 'nq', + 'nt', + 'n3', + 'ttl', + 'rdf' + ] + + rdfExtensions.forEach(extension => { + const parser = getParserByExtension(`/file.${extension}`) + strictEqual(parser !== null, true, `Should get a parser for extension ${extension}`) + }) + }) + + it('should return undefined for non standard RDF extensions', async () => { + const nonRdfExtensions = [ + 'xml', + 'turtle' + ] + + nonRdfExtensions.forEach(extension => { + const parser = getParserByExtension(`/file.${extension}`) + strictEqual(parser === undefined, true, `Should not get a parser for extension ${extension}`) + }) + }) +}) diff --git a/packages/rdf/test/metadata/applyOptions.test.js b/packages/rdf/test/metadata/applyOptions.test.js new file mode 100644 index 00000000..750a4834 --- /dev/null +++ b/packages/rdf/test/metadata/applyOptions.test.js @@ -0,0 +1,206 @@ +import { strictEqual } from 'assert' +import namespace from '@rdfjs/namespace' +import { describe, it } from 'mocha' +import rdf from 'rdf-ext' +import { Readable } from 'readable-stream' +import { applyOptions } from '../../lib/metadata/applyOptions.js' +import * as ns from '../../lib/namespaces.js' +import { xsd } from '../../lib/namespaces.js' + +const ex = namespace('http://example.org/') + +describe('applyOptions', () => { + it('should be a function', () => { + strictEqual(typeof applyOptions, 'function') + }) + + it('should return the same data if no options given', async () => { + const data = [ + rdf.quad(ex.subject0, ns.rdf.type, ex.type0, ex.graph1) + ] + + const options = {} + const quadStream = Readable.from(data, options) + const result = [...await applyOptions(quadStream)] + + strictEqual(result.length, 1) + strictEqual(result[0].equals(data[0]), true) + }) + + it('should update or append schema:dateCreated for known classes', async () => { + const data = [ + rdf.quad(ex.subject0, ns.rdf.type, ns.schema.Dataset, ex.graph1), + rdf.quad(ex.subject0, ns.schema.dateCreated, rdf.literal('Not me'), ex.graph0), + rdf.quad(ex.subject1, ns.rdf.type, ex.type1, ex.graph0), + rdf.quad(ex.subject3, ns.rdf.type, ns.schema.Dataset, ex.graph0) + ] + + const options = { + dateCreated: rdf.literal('1999-12-31', xsd.dateTime) + } + const quadStream = Readable.from(data) + const result = [...await applyOptions(quadStream, {}, options)] + + strictEqual(result.length, 5) + strictEqual(result[0].equals(data[0]), true) + strictEqual(result[1].equals(data[2]), true) + strictEqual(result[2].equals(data[3]), true) + strictEqual(result[3].equals(rdf.quad(ex.subject0, ns.schema.dateCreated, rdf.literal('1999-12-31', xsd.dateTime))), true) + strictEqual(result[4].equals(rdf.quad(ex.subject3, ns.schema.dateCreated, rdf.literal('1999-12-31', xsd.dateTime))), true) + }) + + it('should update or append schema:dateCreated for known classes (string)', async () => { + const data = [ + rdf.quad(ex.subject0, ns.rdf.type, ns.schema.Dataset, ex.graph1) + ] + + const options = { + dateCreated: rdf.literal('1999-12-31', xsd.dateTime).toString() + } + const quadStream = Readable.from(data) + const result = [...await applyOptions(quadStream, {}, options)] + + strictEqual(result.length, 2) + strictEqual(result[0].equals(data[0]), true) + strictEqual(result[1].equals(rdf.quad(ex.subject0, ns.schema.dateCreated, rdf.literal('1999-12-31', xsd.dateTime))), true) + }) + + it('should update or append dcterms:created for known classes', async () => { + const data = [ + rdf.quad(ex.subject0, ns.rdf.type, ns.dcat.Dataset, ex.graph1), + rdf.quad(ex.subject0, ns.dcterms.created, rdf.literal('Not me'), ex.graph0), + rdf.quad(ex.subject1, ns.rdf.type, ex.type1, ex.graph0), + rdf.quad(ex.subject3, ns.rdf.type, ns.dcat.Dataset, ex.graph0) + ] + + const options = { + dateCreated: rdf.literal('1999-12-31', xsd.dateTime) + } + const quadStream = Readable.from(data) + const result = [...await applyOptions(quadStream, {}, options)] + + strictEqual(result.length, 5) + strictEqual(result[0].equals(data[0]), true) + strictEqual(result[1].equals(data[2]), true) + strictEqual(result[2].equals(data[3]), true) + strictEqual(result[3].equals(rdf.quad(ex.subject0, ns.dcterms.created, rdf.literal('1999-12-31', xsd.dateTime))), true) + strictEqual(result[4].equals(rdf.quad(ex.subject3, ns.dcterms.created, rdf.literal('1999-12-31', xsd.dateTime))), true) + }) + + it('should update or append schema:dateModified for known classes', async () => { + const data = [ + rdf.quad(ex.subject0, ns.rdf.type, ns.schema.Dataset, ex.graph1), + rdf.quad(ex.subject0, ns.schema.dateModified, rdf.literal('Not me'), ex.graph0), + rdf.quad(ex.subject1, ns.rdf.type, ex.type1, ex.graph0), + rdf.quad(ex.subject3, ns.rdf.type, ns.schema.Dataset, ex.graph0) + ] + + const options = { + dateModified: rdf.literal('1999-12-31', xsd.dateTime) + } + const quadStream = Readable.from(data) + const result = [...await applyOptions(quadStream, {}, options)] + + strictEqual(result.length, 5) + strictEqual(result[0].equals(data[0]), true) + strictEqual(result[1].equals(data[2]), true) + strictEqual(result[2].equals(data[3]), true) + strictEqual(result[3].equals(rdf.quad(ex.subject0, ns.schema.dateModified, rdf.literal('1999-12-31', xsd.dateTime))), true) + strictEqual(result[4].equals(rdf.quad(ex.subject3, ns.schema.dateModified, rdf.literal('1999-12-31', xsd.dateTime))), true) + }) + + it('should update or append dcterms:modified for known classes', async () => { + const data = [ + rdf.quad(ex.subject0, ns.rdf.type, ns.dcat.Dataset, ex.graph1), + rdf.quad(ex.subject0, ns.dcterms.modified, rdf.literal('Not me'), ex.graph0), + rdf.quad(ex.subject1, ns.rdf.type, ex.type1, ex.graph0), + rdf.quad(ex.subject3, ns.rdf.type, ns.dcat.Dataset, ex.graph0) + ] + + const options = { + dateModified: rdf.literal('1999-12-31', xsd.dateTime) + } + const quadStream = Readable.from(data) + const result = [...await applyOptions(quadStream, {}, options)] + + strictEqual(result.length, 5) + strictEqual(result[0].equals(data[0]), true) + strictEqual(result[1].equals(data[2]), true) + strictEqual(result[2].equals(data[3]), true) + strictEqual(result[3].equals(rdf.quad(ex.subject0, ns.dcterms.modified, rdf.literal('1999-12-31', xsd.dateTime))), true) + strictEqual(result[4].equals(rdf.quad(ex.subject3, ns.dcterms.modified, rdf.literal('1999-12-31', xsd.dateTime))), true) + }) + + it('should update or append both dcterms:modified and schema:modified for known classes', async () => { + const data = [ + rdf.quad(ex.subject0, ns.rdf.type, ns.dcat.Dataset, ex.graph1), + rdf.quad(ex.subject0, ns.rdf.type, ns.schema.Dataset, ex.graph1) + ] + + const options = { + dateModified: rdf.literal('1999-12-31', xsd.dateTime) + } + const quadStream = Readable.from(data) + const result = [...await applyOptions(quadStream, {}, options)] + + strictEqual(result.length, 4) + strictEqual(result[0].equals(data[0]), true) + strictEqual(result[1].equals(data[1]), true) + strictEqual(result[2].equals(rdf.quad(ex.subject0, ns.dcterms.modified, rdf.literal('1999-12-31', xsd.dateTime))), true) + strictEqual(result[3].equals(rdf.quad(ex.subject0, ns.schema.dateModified, rdf.literal('1999-12-31', xsd.dateTime))), true) + }) + + it('should update or append schema:dateModified for known (string)', async () => { + const data = [ + rdf.quad(ex.subject0, ns.rdf.type, ns.schema.Dataset, ex.graph1) + ] + + const options = { + dateModified: rdf.literal('1999-12-31', xsd.dateTime).toString() + } + const quadStream = Readable.from(data) + const result = [...await applyOptions(quadStream, {}, options)] + + strictEqual(result.length, 2) + strictEqual(result[0].equals(data[0]), true) + strictEqual(result[1].equals(rdf.quad(ex.subject0, ns.schema.dateModified, rdf.literal('1999-12-31', xsd.dateTime))), true) + }) + + it('should set the corresponding graph', async () => { + const data = [ + rdf.quad(ex.subject0, ns.rdf.type, ns.schema.Dataset, ex.graph1), + rdf.quad(ex.subject0, ns.schema.dateModified, rdf.literal('Not me'), ex.graph0), + rdf.quad(ex.subject1, ns.rdf.type, ex.type1, ex.graph0), + rdf.quad(ex.subject3, ns.rdf.type, ns.schema.Dataset, ex.graph0) + ] + + const options = { + graph: ex.graph2, + dateModified: rdf.literal('1999-12-31', xsd.dateTime) + } + const quadStream = Readable.from(data) + const result = [...await applyOptions(quadStream, {}, options)] + + strictEqual(result.length, 5) + strictEqual(result[0].graph.equals(ex.graph2), true) + strictEqual(result[1].graph.equals(ex.graph2), true) + strictEqual(result[2].graph.equals(ex.graph2), true) + strictEqual(result[3].graph.equals(ex.graph2), true) + strictEqual(result[4].graph.equals(ex.graph2), true) + }) + + it('should set the corresponding graph (string)', async () => { + const data = [ + rdf.quad(ex.subject0, ns.rdf.type, ns.schema.Dataset, ex.graph1) + ] + + const options = { + graph: ex.graph2.value + } + const quadStream = Readable.from(data) + const result = [...await applyOptions(quadStream, {}, options)] + + strictEqual(result.length, 1) + strictEqual(result[0].graph.equals(ex.graph2), true) + }) +}) diff --git a/packages/rdf/test/support/dataset.ttl b/packages/rdf/test/support/dataset.ttl new file mode 100644 index 00000000..109a1285 --- /dev/null +++ b/packages/rdf/test/support/dataset.ttl @@ -0,0 +1,8 @@ +@prefix rdf: . +@prefix rdfs: . +@prefix xsd: . +@prefix schema: . + + + a ; + schema:dateModified "2020-05-30"^^xsd:date . diff --git a/packages/rdf/test/support/dataset_description.ttl b/packages/rdf/test/support/dataset_description.ttl new file mode 100644 index 00000000..18b8f107 --- /dev/null +++ b/packages/rdf/test/support/dataset_description.ttl @@ -0,0 +1,11 @@ +BASE +PREFIX void: +PREFIX dcat: +PREFIX xsd: +PREFIX schema: + + + a void:Dataset, dcat:Dataset, schema:Dataset ; + schema:dateCreated "2020-05-30"^^xsd:date ; # The date on which the CreativeWork was created or the item was added to a DataFeed. + schema:dateModified "2020-05-30"^^xsd:date ; # The date on which the CreativeWork was most recently modified or when the item's entry was modified within a DataFeed. +. \ No newline at end of file diff --git a/packages/rdf/test/support/file.unknown.extension b/packages/rdf/test/support/file.unknown.extension new file mode 100644 index 00000000..9ae9ea0a --- /dev/null +++ b/packages/rdf/test/support/file.unknown.extension @@ -0,0 +1 @@ +Contents \ No newline at end of file