Commit 8d8dfeb4 authored by Fischer, Tim's avatar Fischer, Tim
Browse files

Filter and Transformation for STSS

parent 47b18fb0
......@@ -13,6 +13,7 @@ import org.gesis.dda.filter.impl.Doabooks2SsoarBundleFilter;
import org.gesis.dda.filter.impl.Fqs2SsoarBundleFilter;
import org.gesis.dda.filter.impl.Jfr2SsoarBundleFilter;
import org.gesis.dda.filter.impl.SsoarTargetRepositoryBundleFilter;
import org.gesis.dda.filter.impl.Stss2SsoarBundleFilter;
import org.gesis.dda.filter.impl.Tatup2SsoarBundleFilter;
import org.gesis.dda.filter.impl.Wbv2SsoarBundleFilter;
import org.gesis.dda.filter.impl.Econstor2SSOARBundleFilter;
......@@ -161,6 +162,10 @@ public class FeedingContextFactory {
log.info("using Tatup2SsoarBundleFilter");
result = new Tatup2SsoarBundleFilter();
break;
case "https://www.http://publications.tlu.ee/index.php/stss/oai$$ssoar":
log.info("using STSS2SsoarBundleFilter");
result = new Stss2SsoarBundleFilter();
break;
default:
log.debug("using default IdentityMetadataTransformer");
result = new AcceptAnyBundleFilter();
......
package org.gesis.dda.filter.impl;
import java.util.Set;
import org.gesis.dda.filter.BundleFilter;
import org.gesis.dda.publishing.domain.Bundle;
import org.gesis.dda.publishing.domain.Metadatum;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class Stss2SsoarBundleFilter implements BundleFilter {
private final static Logger LOG = LoggerFactory.getLogger(Stss2SsoarBundleFilter.class);
/**
* Filter away everything "online first"
*/
@Override
public boolean test(Bundle bundle) {
boolean result = false;
Set<Metadatum> metadata = bundle.getMetadata();
Metadatum issued = metadata.stream().filter(m -> m.getKey().equals("dc.date.issued")).findFirst()
.orElse(null);
Metadatum volume = metadata.stream().filter(m -> m.getKey().equals("dc.source.volume")).findFirst()
.orElse(null);
if (null != issued) {
if(!issued.equals("Online First")) {
if (null != volume) {
String volumeString = volume.getValue();
try {
int volumeValue = Integer.parseInt(volumeString);
if (volumeValue >= 13) {
result = true;
} else {
LOG.info("Stss2SsoarBundleFilter - filtering away bundle.reference={}", bundle.getReference());
result = false;
}
} catch (NumberFormatException e) {
LOG.warn("unparsable dc.source.volume={} for bundle.reference={}", volumeString, bundle.getReference());
result = true;
}
} else {
result = true;
}
}
}
return result;
}
}
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0"
xpath-default-namespace="http://www.openarchives.org/OAI/2.0/"
xmlns:cc="http://www.d-nb.de/standards/cc/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dcmitype="http://purl.org/dc/dcmitype/"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:ddb="http://www.d-nb.de/standards/ddb/"
xmlns:dini="http://www.d-nb.de/standards/xmetadissplus/type/"
xmlns:doi="http://www.d-nb.de/standards/doi/"
xmlns:hdl="http://www.d-nb.de/standards/hdl/"
xmlns:mml="http://www.w3.org/1998/Math/MathML"
xmlns:nlm="http://dtd.nlm.nih.gov/publishing/2.3"
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
xmlns:pc="http://www.d-nb.de/standards/pc/"
xmlns:thesis="http://www.ndltd.org/standards/metadata/etdms/1.0/"
xmlns:urn="http://www.d-nb.de/standards/urn/"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:xMetaDiss="http://www.d-nb.de/standards/xmetadissplus/"
xmlns:xoai="http://www.lyncode.com/xoai"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:zoai="http://git.gesis.org/dda/zoai">
<xsl:output indent="yes" />
<xsl:strip-space elements="*" />
<!-- <xsl:copy-of select="."/> -->
<!-- <xsl:message terminate="no">here is some message for stderr, e.g. <xsl:value-of
select='text()' /> </xsl:message> -->
<!-- override default template -->
<xsl:template match="*" />
<xsl:template match="/">
<xsl:element name="bundle">
<xsl:element name="metadata">
<xsl:apply-templates
select="oai:OAI-PMH/oai:GetRecord/oai:record/oai:metadata/oai_dc:dc" />
</xsl:element>
</xsl:element>
</xsl:template>
<xsl:template
match="oai:OAI-PMH/oai:GetRecord/oai:record/oai:metadata/oai_dc:dc">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key"
select="'dc.source.journal'" />
<xsl:with-param name="language" select="''" />
<xsl:with-param name="value" select="'529'" />
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key"
select="'dc.description.pubstatus'" />
<xsl:with-param name="language" select="''" />
<xsl:with-param name="value" select="'1'" />
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key"
select="'dc.publisher'" />
<xsl:with-param name="language" select="''" />
<xsl:with-param name="value" select="'Studies of Transition States and Societies'" />
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key"
select="'dc.publisher.country'" />
<xsl:with-param name="language" select="''" />
<xsl:with-param name="value" select="'MISC'" />
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key"
select="'dc.identifier.issn'" />
<xsl:with-param name="language" select="''" />
<xsl:with-param name="value" select="'1736-8758'" />
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'internal.status'" />
<xsl:with-param name="language" select="''" />
<xsl:with-param name="value"
select="'formal und inhaltlich fertig erschlossen'" />
</xsl:call-template>
<xsl:variable name="subjectotheren">
<xsl:for-each select="dc:subject[@xml:lang='en-US']">
<xsl:if test=". != ''">
<xsl:value-of select="replace(., ',', ';')"/>
<xsl:if test="position() != last()">
<xsl:text>; </xsl:text>
</xsl:if>
</xsl:if>
</xsl:for-each>
</xsl:variable>
<xsl:if test="$subjectotheren != ''">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key"
select="'dc.subject.other'" />
<xsl:with-param name="language" select="'en'" />
<xsl:with-param name="value" select="$subjectotheren" />
</xsl:call-template>
</xsl:if>
<xsl:apply-templates></xsl:apply-templates>
</xsl:template>
<xsl:template match="dc:language">
<xsl:variable name="sanitizedLanguage"
select="zoai:sanitizeLanguage( text() )" />
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.language'" />
<xsl:with-param name="language" select="''" />
<xsl:with-param name="value"
select="$sanitizedLanguage" />
</xsl:call-template>
</xsl:template>
<xsl:template match="dc:creator">
<xsl:choose>
<xsl:when test="contains(text(), ';')">
<xsl:variable name="tokenizedAuthors"
select="tokenize(text(),';')" />
<xsl:for-each select="$tokenizedAuthors">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key"
select="'dc.contributor.author'" />
<xsl:with-param name="language" select="''" />
<xsl:with-param name="value"
select="normalize-space(.)" />
</xsl:call-template>
</xsl:for-each>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key"
select="'dc.contributor.author'" />
<xsl:with-param name="language" select="''" />
<xsl:with-param name="value" select="text()" />
</xsl:call-template>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template match="dc:description">
<xsl:if test=". != ''">
<xsl:variable name="sanitizedLanguage"
select="zoai:sanitizeLanguage(@xml:lang)" />
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key"
select="'dc.description.abstract'" />
<xsl:with-param name="language"
select="$sanitizedLanguage" />
<xsl:with-param name="value"
select="normalize-space(replace(text(),'[»«]','&quot;'))" />
</xsl:call-template>
<xsl:variable name="maybeUrn"
select="zoai:extractUrnFromFqsAbstract(text() )" />
<xsl:if test="$maybeUrn != ''">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key"
select="'dc.identifier.urn'" />
<xsl:with-param name="language" select="''" />
<xsl:with-param name="value" select="$maybeUrn" />
</xsl:call-template>
</xsl:if>
</xsl:if>
</xsl:template>
<xsl:template match="dc:rights">
<xsl:if test="matches(., 'https://creativecommons.org/licenses/by-nc-nd/4.0')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key"
select="'dc.rights.licence'" />
<xsl:with-param name="language" select="''" />
<xsl:with-param name="value" select="'16'" />
</xsl:call-template>
</xsl:if>
</xsl:template>
<xsl:template match="dc:type[@xml:lang='en-US']">
<xsl:if test=". = 'Peer-reviewed Article'">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'internal.identifier.review'" />
<xsl:with-param name="language" select="''" />
<xsl:with-param name="value" select="'1'" />
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key"
select="'dc.type.stock'" />
<xsl:with-param name="language" select="''" />
<xsl:with-param name="value" select="'article'" />
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key"
select="'dc.type.document'" />
<xsl:with-param name="language" select="''" />
<xsl:with-param name="value" select="'32'" />
</xsl:call-template>
</xsl:if>
<xsl:if test=". = 'Non-refereed Book Review'">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'internal.identifier.review'" />
<xsl:with-param name="language" select="''" />
<xsl:with-param name="value" select="'2'" />
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key"
select="'dc.type.stock'" />
<xsl:with-param name="language" select="''" />
<xsl:with-param name="value" select="'recension'" />
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key"
select="'dc.type.document'" />
<xsl:with-param name="language" select="''" />
<xsl:with-param name="value" select="'23'" />
</xsl:call-template>
</xsl:if>
</xsl:template>
<xsl:template match="dc:source[@xml:lang='en-US']">
<xsl:variable name="issued"
select="substring-before(substring-after(text(),'('),')')" />
<xsl:if
test="$issued != ''">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key"
select="'dc.date.issued'" />
<xsl:with-param name="language" select="''" />
<xsl:with-param name="value" select="$issued" />
</xsl:call-template>
</xsl:if>
<xsl:variable name="issue"
select="substring-before(substring-after(text(),'No '),'(')" />
<xsl:if test="$issue != '' and matches ($issue, '\d+')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.source.issue'" />
<xsl:with-param name="language" select="''" />
<xsl:with-param name="value"
select="normalize-space($issue)" />
</xsl:call-template>
</xsl:if>
<xsl:variable name="volume"
select="substring-before(substring-after(text(),'Vol '),',')" />
<xsl:if test="$volume != '' and matches ($volume, '\d+')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key"
select="'dc.source.volume'" />
<xsl:with-param name="language" select="''" />
<xsl:with-param name="value"
select="normalize-space($volume)" />
</xsl:call-template>
</xsl:if>
</xsl:template>
<xsl:template
match="dc:identifier[matches(text(), '^10\..*')]">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key"
select="'dc.identifier.doi'" />
<xsl:with-param name="language" select="''" />
<xsl:with-param name="value"
select="concat('https://doi.org/', text())" />
</xsl:call-template>
</xsl:template>
<xsl:template
match="dc:identifier[matches(text(), 'http://publications.tlu.ee/index.php/stss/article/view/')]">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key"
select="'dc.identifier.url'" />
<xsl:with-param name="language" select="''" />
<xsl:with-param name="value" select="text()" />
</xsl:call-template>
</xsl:template>
<xsl:template match="dc:title">
<xsl:variable name="sanitizedLanguage"
select="zoai:sanitizeLanguage( @xml:lang )" />
<xsl:variable name="sanitizedMainLanguage"
select="zoai:sanitizeLanguage( /oai:OAI-PMH/oai:GetRecord/oai:record/oai:metadata/oai_dc:dc/dc:language )" />
<xsl:choose>
<xsl:when test="$sanitizedLanguage = $sanitizedMainLanguage">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.title'" />
<xsl:with-param name="language"
select="$sanitizedLanguage" />
<xsl:with-param name="value"
select="replace(text(), '\.$','')" />
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key"
select="'dc.title.alternative'" />
<xsl:with-param name="language"
select="$sanitizedLanguage" />
<xsl:with-param name="value"
select="replace(text(), '\.$','')" />
</xsl:call-template>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template name="new-metadatum">
<xsl:param name="key" />
<xsl:param name="language" />
<xsl:param name="value" />
<xsl:element name="metadatum">
<xsl:element name="key">
<xsl:value-of select="$key" />
</xsl:element>
<xsl:if test="$language != ''">
<xsl:variable name="sanitizedLanguage"
select="zoai:sanitizeLanguage($language)" />
<xsl:element name="language">
<xsl:value-of select="$sanitizedLanguage" />
</xsl:element>
</xsl:if>
<xsl:element name="value">
<xsl:value-of select="$value" />
</xsl:element>
</xsl:element>
</xsl:template>
<xsl:function name="zoai:sanitizeLanguage">
<xsl:param name="inputLanguage" />
<xsl:choose>
<xsl:when
test="$inputLanguage = 'ger' or $inputLanguage = 'DE' or $inputLanguage = 'de-DE' or $inputLanguage = 'deu'">
<xsl:value-of select="'de'" />
</xsl:when>
<xsl:when
test="$inputLanguage = 'eng' or $inputLanguage = 'EN' or $inputLanguage = 'en-US' or $inputLanguage = 'en-GB'">
<xsl:value-of select="'en'" />
</xsl:when>
<xsl:when
test="$inputLanguage = 'pol' or $inputLanguage = 'PL' or $inputLanguage = 'pl-PL'">
<xsl:value-of select="'pl'" />
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$inputLanguage" />
</xsl:otherwise>
</xsl:choose>
</xsl:function>
<xsl:function name="zoai:extractUrnFromFqsAbstract">
<xsl:param name="rawString" />
<xsl:analyze-string select="$rawString"
regex=".*(urn:nbn:de:.*)$">
<xsl:matching-substring>
<xsl:value-of select="regex-group(1)" />
</xsl:matching-substring>
</xsl:analyze-string>
</xsl:function>
</xsl:stylesheet>
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment