Skip to content

Commit

Permalink
migration2clarin7/fix different separator in crosswalks and added met…
Browse files Browse the repository at this point in the history
…asharev2 crosswalk. (#223)

* Added missing metasharev2 crosswalk into xoai.xml - without it, this crosswalk would never be used.

* Added a new metadata formats for the `metashare2.xsl` crosswalk

* Working on separator and trying to fix metashare crosswalk

* Created scripts for fast updating the crosswalks without rebuilding and installing the dspace project.

* Added fast building script for oai-pmh package

* Added fast building script for dspace-api package

* str:split function replaced by the tokenize function.

* Fixed creating NodeList

* Refactoring

* Added script for updating the configuration files

* Refactoring and fixed checkstyle violations

* `registry.metadata.load = metashare-schema.xml` property cannot be added in the `clarin-dspace.cfg` because then some Unit tests are failing..

---------

Co-authored-by: MilanMajchrák <milan.majchak@dataquest.sk>
  • Loading branch information
milanmajchrak and MilanMajchrák committed Jun 19, 2024
1 parent 0bebdc4 commit fa0c279
Show file tree
Hide file tree
Showing 16 changed files with 299 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ ALTER TABLE handle ADD url varchar(2048);
ALTER TABLE handle ADD dead BOOL;
ALTER TABLE handle ADD dead_since TIMESTAMP;

-- MetadataField table
-- Because of metashareSchema
ALTER TABLE metadatafieldregistry ALTER COLUMN element TYPE VARCHAR(128);

-- LICENSES
--
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ ALTER TABLE handle ADD url varchar;
ALTER TABLE handle ADD dead BOOL;
ALTER TABLE handle ADD dead_since TIMESTAMP WITH TIME ZONE;

-- MetadataField table
-- Because of metashareSchema
ALTER TABLE metadatafieldregistry ALTER COLUMN element TYPE VARCHAR(128);

-- LICENSES
--
Expand Down
16 changes: 10 additions & 6 deletions dspace-oai/src/main/java/org/dspace/utils/LicenseUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,19 @@
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.w3c.dom.Document;
import org.w3c.dom.Element;

/**
* Class is copied from the LINDAT/CLARIAH-CZ (This class is taken from UFAL-clarin. https://github.com/ufal/clarin-dspace/blob
* Class is copied from the LINDAT/CLARIAH-CZ (This class is taken from UFAL-clarin.
* https://github.com/ufal/clarin-dspace/blob
* * /si-master-origin/dspace-oai/src/main/java/cz/cuni/mff/ufal/utils/LicenseUtil.java) and modified by
*
* @author Marian Berger (marian.berger at dataquest.sk)
* @author Milan Majchrak (milan.majchrak at dataquest.sk)
*/
public class LicenseUtil {

Expand Down Expand Up @@ -198,15 +200,18 @@ public static String uriToAvailability(String uri) {
return "available-restrictedUse";
}

public static org.w3c.dom.NodeList uriToRestrictions(String uri) throws ParserConfigurationException {
public static org.w3c.dom.NodeList uriToRestrictions(String uri)
throws ParserConfigurationException {

DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
javax.xml.parsers.DocumentBuilder builder;
builder = factory.newDocumentBuilder();

Document doc = builder.newDocument();
Element root = doc.createElement("restrictions");

String restrictions = _uri2restrictions.get(uri);
if (Objects.nonNull(restrictions)) {
if (Objects.isNull(restrictions)) {
restrictions = "other";
}

Expand All @@ -217,7 +222,6 @@ public static org.w3c.dom.NodeList uriToRestrictions(String uri) throws ParserCo
}

return root.getElementsByTagName("restriction");

}

public static void main(String[] args) throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public XdmValue call(XdmValue[] xdmValues) throws SaxonApiException {
return new XdmAtomicValue("");
}
return new XdmAtomicValue(checks(XslLogUtil.logMissing(xdmValues[0].itemAt(0).getStringValue(),
xdmValues[0].itemAt(1).getStringValue())));
xdmValues[1].itemAt(0).getStringValue())));
}

private String checks(String got) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,10 @@

package org.dspace.xoai.services.impl.resources.functions;

import static org.apache.logging.log4j.LogManager.getLogger;
import static org.dspace.xoai.services.impl.resources.functions.StringXSLFunction.BASE;

import java.util.Objects;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.dom.DOMSource;

import net.sf.saxon.s9api.DocumentBuilder;
Expand All @@ -25,21 +24,26 @@
import net.sf.saxon.s9api.SequenceType;
import net.sf.saxon.s9api.XdmAtomicValue;
import net.sf.saxon.s9api.XdmValue;
import org.apache.logging.log4j.Logger;
import org.bouncycastle.util.Arrays;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;


/**
* Serves as proxy for call from XSL engine.
*
* @author Marian Berger (marian.berger at dataquest.sk)
* @author Milan Majchrak (milan.majchrak at dataquest.sk)
*/
public abstract class NodeListXslFunction implements ExtensionFunction {

protected abstract String getFnName();

protected abstract NodeList getNodeList(String param);

private static final Logger log = getLogger(NodeListXslFunction.class);

@Override
final public QName getName() {
return new QName(BASE, getFnName());
Expand All @@ -62,26 +66,13 @@ final public XdmValue call(XdmValue[] xdmValues) throws SaxonApiException {
if (Objects.isNull(xdmValues) || Arrays.isNullOrContainsNull(xdmValues)) {
return new XdmAtomicValue("");
}

NodeList nodeList = getNodeList(xdmValues[0].itemAt(0).getStringValue());
Node oneNode = nodeList.item(0);

DocumentBuilder db = new Processor(false).newDocumentBuilder();
if (Objects.isNull(nodeList)) {
try {
nodeList = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument().getChildNodes();
} catch (ParserConfigurationException e) {
e.printStackTrace();
return null;
}
}
Node parent = null;
try {
parent = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
for (int i = 0; i < nodeList.getLength(); i++) {
parent.appendChild(nodeList.item(i));
}
return db.build(new DOMSource(parent));
} catch (ParserConfigurationException e) {
e.printStackTrace();
return null;
}
DOMSource sourceObj = new DOMSource(oneNode);
var res = db.wrap(sourceObj);
return res;
}
}
25 changes: 11 additions & 14 deletions dspace/config/crosswalks/oai/metadataFormats/metasharev2.xsl
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
-->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:doc="http://www.lyncode.com/xoai"
xmlns:str="http://exslt.org/strings"
xmlns:ms="http://www.ilsp.gr/META-XMLSchema"
xmlns:fn="http://custom.crosswalk.functions"
exclude-result-prefixes="doc fn str"
version="1.0">
exclude-result-prefixes="doc fn"
version="2.0">

<xsl:output omit-xml-declaration="yes" method="xml" indent="yes"/>

Expand Down Expand Up @@ -142,7 +139,7 @@
</xsl:when>
<xsl:when
test="doc:metadata/doc:element[@name='local']/doc:element[@name='contact']/doc:element[@name='person']/doc:element/doc:field[@name='value']">
<xsl:value-of select="str:split(doc:metadata/doc:element[@name='local']/doc:element[@name='contact']/doc:element[@name='person']/doc:element/doc:field[@name='value'], '@@')[2]"/>
<xsl:value-of select="tokenize(doc:metadata/doc:element[@name='local']/doc:element[@name='contact']/doc:element[@name='person']/doc:element/doc:field[@name='value'], ';')[2]"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="fn:logMissing('surname',$handle)" />
Expand All @@ -158,7 +155,7 @@
</xsl:when>
<xsl:when
test="doc:metadata/doc:element[@name='local']/doc:element[@name='contact']/doc:element[@name='person']/doc:element/doc:field[@name='value']">
<xsl:value-of select="str:split(doc:metadata/doc:element[@name='local']/doc:element[@name='contact']/doc:element[@name='person']/doc:element/doc:field[@name='value'], '@@')[1]"/>
<xsl:value-of select="tokenize(doc:metadata/doc:element[@name='local']/doc:element[@name='contact']/doc:element[@name='person']/doc:element/doc:field[@name='value'], ';')[1]"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="fn:logMissing('givenName',$handle)" />
Expand All @@ -177,11 +174,11 @@
<xsl:call-template name="CommunicationInfo" />
</ms:affiliation>
</xsl:when>
<xsl:when
<xsl:when
test="doc:metadata/doc:element[@name='local']/doc:element[@name='contact']/doc:element[@name='person']/doc:element/doc:field[@name='value']">
<ms:affiliation>
<ms:organizationName>
<xsl:value-of select="str:split(doc:metadata/doc:element[@name='local']/doc:element[@name='contact']/doc:element[@name='person']/doc:element/doc:field[@name='value'], '@@')[4]" />
<xsl:value-of select="tokenize(doc:metadata/doc:element[@name='local']/doc:element[@name='contact']/doc:element[@name='person']/doc:element/doc:field[@name='value'], ';')[4]" />
</ms:organizationName>
<!--another communicationInfo needed -->
<xsl:call-template name="CommunicationInfo" />
Expand All @@ -202,7 +199,7 @@
</xsl:when>
<xsl:when
test="doc:metadata/doc:element[@name='local']/doc:element[@name='contact']/doc:element[@name='person']/doc:element/doc:field[@name='value']">
<xsl:value-of select="str:split(doc:metadata/doc:element[@name='local']/doc:element[@name='contact']/doc:element[@name='person']/doc:element/doc:field[@name='value'], '@@')[3]"/>
<xsl:value-of select="tokenize(doc:metadata/doc:element[@name='local']/doc:element[@name='contact']/doc:element[@name='person']/doc:element/doc:field[@name='value'], ';')[3]"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="fn:logMissing('email',$handle)" />
Expand Down Expand Up @@ -288,10 +285,10 @@
<xsl:for-each select="doc:metadata/doc:element[@name='local']/doc:element[@name='sponsor']/doc:element/doc:field[@name='value']">
<ms:fundingProject>
<ms:projectName>
<xsl:value-of select="str:split(., '@@')[3]"/>
<xsl:value-of select="tokenize(., ';')[3]"/>
</ms:projectName>
<ms:fundingType>
<xsl:value-of select="str:split(., '@@')[4]"/>
<xsl:value-of select="tokenize(., ';')[4]"/>
</ms:fundingType>
</ms:fundingProject>
</xsl:for-each>
Expand Down Expand Up @@ -459,7 +456,7 @@
</xsl:when>
<xsl:when
test="doc:metadata/doc:element[@name='local']/doc:element[@name='size']/doc:element[@name='info']/doc:element/doc:field[@name='value']">
<xsl:value-of select="str:split(doc:metadata/doc:element[@name='local']/doc:element[@name='size']/doc:element[@name='info']/doc:element/doc:field[@name='value'], '@@')[1]"/>
<xsl:value-of select="tokenize(doc:metadata/doc:element[@name='local']/doc:element[@name='size']/doc:element[@name='info']/doc:element/doc:field[@name='value'], ';')[1]"/>
</xsl:when>
<xsl:otherwise>
<xsl:variable name="iJustWantToLog" select="fn:logMissing('size',$handle)" />
Expand All @@ -476,7 +473,7 @@
</xsl:when>
<xsl:when
test="doc:metadata/doc:element[@name='local']/doc:element[@name='size']/doc:element[@name='info']/doc:element/doc:field[@name='value']">
<xsl:value-of select="str:split(doc:metadata/doc:element[@name='local']/doc:element[@name='size']/doc:element[@name='info']/doc:element/doc:field[@name='value'], '@@')[2]"/>
<xsl:value-of select="tokenize(doc:metadata/doc:element[@name='local']/doc:element[@name='size']/doc:element[@name='info']/doc:element/doc:field[@name='value'], ';')[2]"/>
</xsl:when>
<xsl:otherwise>
<xsl:variable name="iJustWantToLog" select="fn:logMissing('size',$handle)" />
Expand Down
7 changes: 7 additions & 0 deletions dspace/config/crosswalks/oai/xoai.xml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
<Format ref="oaidc"/>
<Format ref="mets"/>
<Format ref="cmdi" />
<Format ref="oai_metasharev2" />
<Format ref="olac" />
<Format ref="xoai"/>
<Format ref="didl"/>
Expand Down Expand Up @@ -122,6 +123,12 @@
<SchemaLocation>http://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/profiles/clarin.eu:cr1:p_1349361150622/xsd</SchemaLocation>
<!-- <Filter ref="ExcludeItemsInComOrCol"/>-->
</Format>
<Format id="oai_metasharev2">
<Prefix>oai_metasharev2</Prefix>
<XSLT>metadataFormats/metasharev2.xsl</XSLT>
<Namespace>http://www.ilsp.gr/META-XMLSchema</Namespace>
<SchemaLocation>http://metashare.ilsp.gr/META-XMLSchema/v2.0/META-SHARE-Resource.xsd</SchemaLocation>
</Format>
<Format id="mets">
<Prefix>mets</Prefix>
<XSLT>metadataFormats/mets.xsl</XSLT>
Expand Down
4 changes: 3 additions & 1 deletion dspace/config/dspace.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -928,7 +928,9 @@ registry.metadata.load = schema-publicationVolume-types.xml
registry.metadata.load = openaire4-types.xml
registry.metadata.load = dspace-types.xml
registry.metadata.load = iiif-types.xml

### CLARIN ###
# This property cannot be added in the `clarin-dspace.cfg` because then some Unit tests are failing..
registry.metadata.load = metashare-schema.xml

#---------------------------------------------------------------#
#-----------------UI-Related CONFIGURATIONS---------------------#
Expand Down
48 changes: 48 additions & 0 deletions dspace/config/registries/local-types.xml
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,52 @@
<scope_note>Links to data in TEITOK</scope_note>
</dc-type>

<dc-type>
<schema>local</schema>
<element>demo</element>
<qualifier>uri</qualifier>
<scope_note>URL of the demo</scope_note>
</dc-type>

<dc-type>
<schema>local</schema>
<element>size</element>
<qualifier>info</qualifier>
</dc-type>

<dc-type>
<schema>local</schema>
<element>submission</element>
<qualifier>note</qualifier>
<scope_note>A submitter's note for editor/reviewer</scope_note>
</dc-type>

<dc-type>
<schema>local</schema>
<element>has</element>
<qualifier>files</qualifier>
<scope_note>True if the item contains any bitstreams</scope_note>
</dc-type>

<dc-type>
<schema>local</schema>
<element>branding</element>
<scope_note>LRT/LINDAT (Name of Community)</scope_note>
</dc-type>

<dc-type>
<schema>local</schema>
<element>bitstream</element>
<qualifier>file</qualifier>
<scope_note>Files inside a bitstream if an archive</scope_note>
</dc-type>

<dc-type>
<schema>local</schema>
<element>refbox</element>
<qualifier>format</qualifier>
<scope_note>If the default refbox citation is not enough, use this field to provide a format string
. Check/extend html.xsl for available variables.</scope_note>
</dc-type>

</dspace-dc-types>
Loading

0 comments on commit fa0c279

Please sign in to comment.