diff --git a/README.md b/README.md index 9dbc33fc4..2e04b4e7b 100644 --- a/README.md +++ b/README.md @@ -185,7 +185,7 @@ The `types` property is an array of string values that describes the type of the - `has-`: for each type of content that occurs at least once in the section, e.g. has-heading - `is--only`: for sections that only have content of a single type, e.g. is-image-only -- `is---`, `is--`, and `is-` for the top 3 most frequent types of children in the section. For instance a gallery with a heading and description would be `is-image-paragraph-heading`. +- `is---`, `is--`, and `is-` for the top 3 most frequent types of children in the section. For instance a gallery with a heading and description would be `is-image-paragraph-heading`. You can infer additional types using [`utils.types`](#infer-content-types-with-utilstypes). Each section has additional content-derived metadata properties, in particular: @@ -364,4 +364,29 @@ const sizes = [ content.document = new VDOM(content.mdast, {widths, sizes}).getDocument(); ``` -This gives you fine-grained control over the image widths that are made available and will get loaded by browsers based on the width of the browser window. With `util.vdom` you can have different settings per page- or section-type. \ No newline at end of file +This gives you fine-grained control over the image widths that are made available and will get loaded by browsers based on the width of the browser window. With `util.vdom` you can have different settings per page- or section-type. + +### Infer Content Types with `utils.types` + +In addition to the automatically inferred content types for each section, `utils.types` provides a `TypeMatcher` utility class that allows matching section content against a simple expression language and thus enrich the `section[].types` values. + + +```javascript +const TypeMatcher = require('@adobe/hypermedia-pipeline').utils.types; + +const matcher = new TypeMatcher(content.sections); +matcher.match('^heading', 'starts-with-heading'); +content.sections = matcher.process(); +``` + +In the example above, all sections that have a `heading` as the first child will get the value `starts-with-heading` appended to the `types` array. `^heading` is an example of the content expression language, which allows matching content against a simple regular expression-like syntax. + +##### Content Expression Language + +* `^heading` – the first element is a `heading` +* `paragraph$` – the last element is a `paragraph` +* `heading image+` – a `heading` followed by one or more `image`s +* `heading? image` – an optional `heading` followed by one `image` +* `heading paragraph* image` – a `heading` followed by any number of `paragraph`s (also no paragraphs at all), followed by an `image` +* `(paragraph|list)` – a `paragraph` or a `list` +* `^heading (image paragraph)+$` – one `heading`, followed by pairs of `image` and `paragraph`, but at least one diff --git a/package-lock.json b/package-lock.json index 3e838e9d9..5ff86baee 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "@adobe/hypermedia-pipeline", - "version": "0.5.1-pre.9", + "version": "0.5.1-pre.10", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/package.json b/package.json index 018f51c9f..3a7486ec7 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@adobe/hypermedia-pipeline", - "version": "0.5.1-pre.9", + "version": "0.5.1-pre.10", "description": "", "repository": "https://github.com/adobe/hypermedia-pipeline", "main": "index.js", diff --git a/src/utils/index.js b/src/utils/index.js index c4ed43cf1..92d6618f9 100644 --- a/src/utils/index.js +++ b/src/utils/index.js @@ -11,7 +11,9 @@ */ const vdom = require('./mdast-to-vdom'); +const types = require('./match-section-types'); module.exports = { vdom, + types, }; diff --git a/src/utils/match-section-types.js b/src/utils/match-section-types.js new file mode 100644 index 000000000..3e0a1e9b3 --- /dev/null +++ b/src/utils/match-section-types.js @@ -0,0 +1,114 @@ +/* + * Copyright 2018 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +/* eslint no-unused-vars: ["error", { "argsIgnorePattern": "^_" }] */ +const { match } = require('./pattern-compiler'); +/** + * This utility class allows the registration of type matchers. Type matchers + * are either content-expressions like `header? (image|paragraph)+` or predicate + * functions that operate on a list of child node types. + */ +class TypeMatcher { + /** + * Creates a new type matcher for an MDAST node or list of MDAST nodes + * @param {(Node|Node[])} section the section node or list of section nodes to evaluate + * the registered content expressions against. + */ + constructor(section = []) { + if (Array.isArray(section)) { + this._sections = section; + } else if (section && section.children) { + this._sections = [section]; + } else { + this._sections = []; + } + this._matchers = []; + } + + /** + * A predicate function that string lists + * @typedef {function(node)} matcherFunction + * @param {string[]} types a list of child types + * @returns {boolean} true for matching string arrays + */ + + /** + * Registers a type detector for nodes sequences that match either a content expression + * or a matcher predicate function. The `matcher` will be evaluated against every + * node in the MDAST. In cases where the `matcher` matches (returns true), the + * processor will be called with the current node. + * @param {(string|matcherFunction)} matcher either an unist-util-select expression + * or a predicate function + * @param {string} type the appropriate handler function to handle matching types. + * @returns {TypeMatcher} this, enabling chaining + */ + match(matcher, type) { + const matchfn = typeof matcher === 'function' ? matcher : TypeMatcher.matchfn(matcher); + + this._matchers.push([matchfn, type]); + + return this; + } + + /** + * Finds all matching types for a given sequence of content types + * @private + * @param {string[]} types an array of content types + * @returns {string[]} the array of matching types + */ + matches(types) { + return this._matchers + .filter(([matchfn]) => matchfn(types)) + .map(([_, type]) => type); + } + + + /** + * Turns a content expression into a matcher predicate function + * @private + * @param {string} pattern a regex-like content expression + * @returns {matcherFunction} a corresponding matcher function that returns true + * for sequences matching the pattern + */ + static matchfn(pattern) { + return function matchtypes(types) { + return match(types, pattern); + }; + } + + /** + * Processes the registered matchers and returns the sections provided + * in the constructor with the matched types pushed to the types property. + * @returns {(Node|Node[])} the processed sections + */ + process() { + const mapped = this._sections.map((section) => { + // get the type for each node, skip everything that's not a node or + // doesn't have a type + const childtypes = section.children + ? section.children.map(node => node.type).filter(type => !!type) + : []; + const matchedtypes = this.matches(childtypes); + const oldtypes = section.types && Array.isArray(section.types) ? section.types : []; + + return Object.assign({ + types: [...matchedtypes, ...oldtypes], + + }, section); + }); + if (mapped.length === 1) { + return mapped[0]; + } + return mapped; + } +} + +module.exports = TypeMatcher; diff --git a/src/utils/pattern-compiler.js b/src/utils/pattern-compiler.js new file mode 100644 index 000000000..1fc5da67e --- /dev/null +++ b/src/utils/pattern-compiler.js @@ -0,0 +1,47 @@ +/* + * Copyright 2018 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/** + * Turns a content-expression like "heading? (paragraph|image)+" into + * a proper regular expression. + * @param {string} pattern the content expression + * @returns {RegExp} a regular expression that matches strings following + * the high-level pattern. + */ +function compile(pattern) { + const expression = new RegExp(pattern + .replace(/(\w+)/g, '($1·)') // always match whole words + .replace(/ /g, '') // remove spaces + .toString()); + // console.log('=> ' + expression); + return expression; +} + +/** + * Determines if the provided list of child nodes matches the + * type expression + * @param {string[]} list a list of node types + * @param {string} pattern a content-expression like "heading? (paragraph|image)+" + * @returns true if the list matches the pattern + */ +function match(list, pattern) { + const str = `${list.join('·')}·`; + // console.log('-> ' + str); + const matches = !!compile(pattern).test(str); + // console.log(matches); + return matches; +} + +module.exports = { + compile, + match, +}; diff --git a/test/testPatternCompiler.js b/test/testPatternCompiler.js new file mode 100644 index 000000000..cd365945a --- /dev/null +++ b/test/testPatternCompiler.js @@ -0,0 +1,53 @@ +/* + * Copyright 2018 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +/* eslint-env mocha */ + +const assert = require('assert'); +const pattern = require('../src/utils/pattern-compiler'); + +describe('Test compiled patterns', () => { + it('Basic pattern matches', () => { + assert.ok(pattern.match(['heading', 'paragraph', 'paragraph'], 'heading? paragraph+')); + assert.ok(!pattern.match(['heading', 'paragraph'], 'heading? paragraph paragraph+')); + }); + + it('Or expressions work', () => { + assert.ok(pattern.match(['heading', 'image', 'paragraph'], '^heading? (paragraph|image)+')); + assert.ok(pattern.match(['heading', 'paragraph', 'image'], 'heading? (paragraph|image)+')); + }); + + it('Matches a gallery', () => { + const gallery = '^heading? image image image+$'; + assert.ok(pattern.match( + ['heading', 'image', 'image', 'image', 'image'], gallery, + )); + assert.ok(pattern.match( + ['heading', 'image', 'image', 'image', 'image', 'image'], gallery, + )); + assert.ok(pattern.match( + ['image', 'image', 'image', 'image'], gallery, + )); + }); + + it('Matches a section with text or lists', () => { + const textlist = '^heading? (paragraph|list)+$'; + assert.ok(pattern.match( + ['heading', 'list', 'list', 'list', 'list'], textlist, + )); + assert.ok(pattern.match( + ['heading', 'paragraph', 'paragraph', 'list', 'paragraph', 'paragraph'], textlist, + )); + assert.ok(pattern.match( + ['paragraph', 'list', 'paragraph', 'list'], textlist, + )); + }); +}); diff --git a/test/testTypeMatcher.js b/test/testTypeMatcher.js new file mode 100644 index 000000000..0d28bdf38 --- /dev/null +++ b/test/testTypeMatcher.js @@ -0,0 +1,67 @@ +/* + * Copyright 2018 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ +/* eslint-env mocha */ +const fs = require('fs-extra'); +const path = require('path'); +const assert = require('assert'); +const TypeMatcher = require('../src/utils/match-section-types'); + + +describe('Test Type Matcher Util', () => { + const sections = fs.readJSONSync(path.resolve(__dirname, 'fixtures', 'sections.json')); + + it('TypeMatcher works with empty input', () => { + assert.deepEqual(new TypeMatcher(null).process(), []); + assert.deepEqual(new TypeMatcher().process(), []); + assert.deepEqual(new TypeMatcher([]).process(), []); + }); + + it('TypeMatcher returns empty array if no matchers are registered', () => { + assert.deepEqual(new TypeMatcher(sections[0]) + .process().types, []); + }); + + it('TypeMatcher matches simple expressions', () => { + assert.deepEqual(new TypeMatcher(sections[0]) + .match('heading', 'has-heading') + .process().types, ['has-heading']); + }); + + it('TypeMatcher matches multiple expressions', () => { + assert.deepEqual(new TypeMatcher(sections[0]) + .match('heading', 'has-heading') + .match('paragraph', 'has-paragraph') + .match('impossible', 'has-impossible') + .process().types, ['has-heading', 'has-paragraph']); + }); + + it('TypeMatcher can match with functions', () => { + assert.deepEqual(new TypeMatcher(sections[0]) + .match('heading', 'has-heading') + .match('paragraph', 'has-paragraph') + .match(types => types.length >= 3, 'long') + .process().types, ['has-heading', 'has-paragraph', 'long']); + }); + + it('TypeMatcher can match with functions', () => { + const matchedsections = new TypeMatcher(sections) + .match('heading', 'has-heading') + .match('paragraph', 'has-paragraph') + .match(types => types.length >= 3, 'long') + .process(); + assert.equal(matchedsections.length, 4); + assert.ok(matchedsections[0].types); + assert.ok(matchedsections[1].types); + assert.deepEqual(matchedsections[2].types, ['has-heading', 'has-paragraph', 'long']); + assert.deepEqual(matchedsections[3].types, ['has-heading', 'has-paragraph', 'long']); + }); +});