Commit 0ee0238a authored by Steinberg, Jan's avatar Steinberg, Jan
Browse files

ASEAS - xslt, filter, test

parent f7a7dc9b
......@@ -7,6 +7,7 @@ import org.gesis.dda.feeder.FeedingContext;
import org.gesis.dda.feeder.NextIncrementalHarvestingIntervalStrategy;
import org.gesis.dda.filter.BundleFilter;
import org.gesis.dda.filter.impl.AcceptAnyBundleFilter;
import org.gesis.dda.filter.impl.Aseas2SsoarBundleFilter;
import org.gesis.dda.filter.impl.Fqs2SsoarBundleFilter;
import org.gesis.dda.filter.impl.SsoarTargetRepositoryBundleFilter;
import org.gesis.dda.filter.impl.Wbv2SsoarBundleFilter;
......@@ -128,6 +129,10 @@ public class FeedingContextFactory {
log.info("using Fqs2SsoarBundleFilter");
result = new Fqs2SsoarBundleFilter();
break;
case "https://aseas.univie.ac.at/index.php/aseas/oai/$$ssoar":
log.info("using Aseas2SsoarBundleFilter");
result = new Aseas2SsoarBundleFilter();
break;
default:
log.debug("using default IdentityMetadataTransformer");
result = new AcceptAnyBundleFilter();
......
package org.gesis.dda.filter.impl;
import java.util.Set;
import org.gesis.dda.filter.BundleFilter;
import org.gesis.dda.publishing.domain.Bundle;
import org.gesis.dda.publishing.domain.Metadatum;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class Aseas2SsoarBundleFilter implements BundleFilter {
private final static Logger LOG = LoggerFactory.getLogger(Aseas2SsoarBundleFilter.class);
/**
* SSOAR already has everything up to and including dc.source.volume=9 dc.source.issue=1 ->
* therefore: harvest everything starting with volume>=10 and volume == 9 and issue > 1
*/
@Override
public boolean test(Bundle bundle) {
boolean result;
Set<Metadatum> metadata = bundle.getMetadata();
Metadatum volume = metadata.stream().filter( m -> m.getKey().equals("dc.source.volume") ).findFirst().orElse(null);
Metadatum issue = metadata.stream().filter( m -> m.getKey().equals("dc.source.issue") ).findFirst().orElse(null);
if (null != volume) {
String volumeValueString = volume.getValue();
String issueValueString = issue.getValue();
try {
int volumeValue = Integer.parseInt(volumeValueString);
int issueValue = Integer.parseInt(issueValueString);
if ( volumeValue >= 10 ) {
result = true;
}
else if ( volumeValue == 9 && issueValue > 1) {
result = true;
}
else {
LOG.info("filtering away bundle.reference={}", bundle.getReference() );
result = false;
}
}
catch (NumberFormatException e) {
LOG.warn("unparsable dc.source.volume={} for bundle.reference={}", volumeValueString, bundle.getReference() );
result = true;
}
}
else {
result = true;
}
return result;
}
}
......@@ -331,6 +331,68 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest {
assertEquals( 1, fileSet2.size());
}
@Test
public void getSinglePublicationAseas() throws IOException {
Map<String, String> map = new HashMap<>();
// String oaiMarcXsltString = IOUtils.toString(
// getClass().
// getClassLoader().
// getResourceAsStream("xslt/hjk-oai_marc-2-xmlbundle.xslt"), StandardCharsets.UTF_8);
// map.put("oai_marc", oaiMarcXsltString);
//
String oaiDcXsltString = IOUtils.toString(
getClass().
getClassLoader().
getResourceAsStream("xslt/aseas-oai_dc-2-xmlbundle.xslt"), StandardCharsets.UTF_8);
map.put("oai_dc", oaiDcXsltString);
XsltTransformerOaiPmhBundlesStreamSource bss = new XsltTransformerOaiPmhBundlesStreamSource("https://aseas.univie.ac.at/index.php/aseas/oai/", map);
Bundle bundle = bss.getBundle("oai:ojs.univie.ac.at:article/394");
LOG.info("{}", bundle);
Set<Metadatum> metadata = bundle.getMetadata();
metadata.stream().map(Object::toString).forEach(LOG::info);
assertTrue( metadata.contains( new SimpleMetadatum("dc.type.stock", "article") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.type.document", "32") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.source.journal", "5") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.description.pubstatus", "1") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.contributor.author", "Pichler, Melanie") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.contributor.author") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.description.review", "1") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.description.review") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.date.issued", "2010") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.description.abstract", "en", "This paper deals with agrofuel policies within the European Union (EU) and the consequences of these policies in Indonesia. That South-East-Asian country is the world leader in the production and exportation of palm oil, which is one of the cheapest feedstocks for the production of biodiesel. Recently, production has expanded signifi cantly due to the incentives of the international energy market. This paper analyses the interests and strategies of the key players in the palm oil and agrofuels business in Indonesia, looks at the model of development they (re-)produce, and analyses their reactions to the problem of sustainability in relation to deforestation, land confl icts, and biodiversity loss through the expansion of monocultures and industrial agriculture.") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.description.abstract") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.identifier.issn", "1999-253X") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.identifier.url", "https://aseas.univie.ac.at/index.php/aseas/article/view/394/198") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.identifier.url") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.language", "en") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.source.issue", "2") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.source.issue") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.source.volume", "3") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.source.volume") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.rights.licence", "19") ) );
// assertTrue( metadata.contains( new SimpleMetadatum("dc.source.issuetopic", "OBEN_UNTEN : Bilder vom Leben der Anderen") ) );
assertEquals(0, metadata.stream().filter(m -> m.getKey().equals("dc.source.issuetopic") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.source.pageinfo", "175-193") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.source.pageinfo") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.title", "en", "Agrofuels in Indonesia: Structures, Conflicts, Consequences, and the Role of the EU") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.title") ).count() );
// assertTrue( metadata.contains( new SimpleMetadatum("dc.identifier.urn", "urn:nbn:de:gbv:18-8-7560") ) );
assertTrue( metadata.contains( new SimpleMetadatum("internal.dda.reference", "https://aseas.univie.ac.at/index.php/aseas/oai/@@oai:ojs.univie.ac.at:article/394") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.publisher.country", "AUT") ) );
// assertTrue( metadata.contains( new SimpleMetadatum("internal.identifier.classoz", "10900") ) );
// assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.classoz", "10900") ) );
// assertTrue( metadata.contains( new SimpleMetadatum("internal.identifier.ddc", "300") ) );
// assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.ddc", "300") ) );
assertTrue( metadata.contains( new SimpleMetadatum("internal.status", "formal und inhaltlich fertig erschlossen") ) );
Set<InputStream> fileSet = bundle.getContents();
assertEquals( 1, fileSet.size());
}
@Test
public void getSinglePublicationJcca() throws IOException {
Map<String, String> map = new HashMap<>();
......
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0"
xmlns:cc="http://www.d-nb.de/standards/cc/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dcmitype="http://purl.org/dc/dcmitype/"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:ddb="http://www.d-nb.de/standards/ddb/"
xmlns:dini="http://www.d-nb.de/standards/xmetadissplus/type/"
xmlns:doi="http://www.d-nb.de/standards/doi/"
xmlns:hdl="http://www.d-nb.de/standards/hdl/"
xmlns:mml="http://www.w3.org/1998/Math/MathML"
xmlns:nlm="http://dtd.nlm.nih.gov/publishing/2.3"
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
xmlns:pc="http://www.d-nb.de/standards/pc/"
xmlns:thesis="http://www.ndltd.org/standards/metadata/etdms/1.0/"
xmlns:urn="http://www.d-nb.de/standards/urn/"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:xMetaDiss="http://www.d-nb.de/standards/xmetadissplus/"
xmlns:xoai="http://www.lyncode.com/xoai"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:zoai="http://git.gesis.org/dda/zoai">
<xsl:output indent="yes"/>
<xsl:strip-space elements="*"/>
<!-- <xsl:copy-of select="."/> -->
<!-- <xsl:message terminate="no">here is some message for stderr, e.g. <xsl:value-of select='text()' /> </xsl:message> -->
<!-- override default template -->
<xsl:template match="*" />
<xsl:template match="/">
<xsl:element name="bundle">
<xsl:element name="metadata">
<xsl:apply-templates select="oai:OAI-PMH/oai:GetRecord/oai:record/oai:metadata/oai_dc:dc" />
</xsl:element>
</xsl:element>
</xsl:template>
<xsl:template match="oai:OAI-PMH/oai:GetRecord/oai:record/oai:metadata/oai_dc:dc">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.rights.licence'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'19'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.source.journal'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'5'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.description.pubstatus'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'1'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.publisher.country'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'AUT'"/>
</xsl:call-template>
<!-- <xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'ssoar.urn.registration'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'false'"/>
</xsl:call-template>-->
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.identifier.issn'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'1999-253X'"/>
</xsl:call-template>
<!--xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'internal.identifier.ddc'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'300'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.subject.ddc'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'300'"/>
</xsl:call-template-->
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'internal.status'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'formal und inhaltlich fertig erschlossen'"/>
</xsl:call-template>
<!--xsl:apply-templates select="dc:type[@xml:lang='en-US']" /-->
<xsl:apply-templates select="dc:title" />
<xsl:apply-templates select="dc:description[@xml:lang='de-DE']" />
<xsl:apply-templates select="dc:description[@xml:lang='en-US']" />
<xsl:apply-templates select="dc:language" />
<xsl:apply-templates select="dc:source[@xml:lang='de-DE']" />
<xsl:apply-templates select="dc:identifier[matches(text(), '^10\..*')]" />
<xsl:apply-templates select="dc:identifier[matches(text(), '^urn.*')]" />
<xsl:apply-templates select="dc:type[@xml:lang='en-US']" />
<xsl:apply-templates select="dc:relation" />
<xsl:apply-templates select="dc:creator" />
<xsl:apply-templates select="dc:date" />
<xsl:apply-templates select="dc:subject[@xml:lang='de-DE']" />
<xsl:apply-templates select="dc:subject[@xml:lang='en-US']" />
</xsl:template>
<xsl:template match="dc:date">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.date.issued'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="substring(text(),1,4)"/>
</xsl:call-template>
</xsl:template>
<xsl:template match="dc:creator">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.contributor.author'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="text()"/>
</xsl:call-template>
</xsl:template>
<xsl:template match="dc:relation">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.identifier.url'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="text()"/>
</xsl:call-template>
</xsl:template>
<xsl:template match="dc:description[@xml:lang='de-DE']">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.description.abstract'"/>
<xsl:with-param name="language" select="'de'"/>
<xsl:with-param name="value" select="replace(text(),'[‹›»«]','&quot;')"/>
</xsl:call-template>
</xsl:template>
<xsl:template match="dc:description[@xml:lang='en-US']">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.description.abstract'"/>
<xsl:with-param name="language" select="'en'"/>
<xsl:with-param name="value" select="replace(text(),'[‹›»«]','&quot;')"/>
</xsl:call-template>
</xsl:template>
<xsl:template match="dc:language">
<xsl:if test="position()=1">
<xsl:variable name="sanitizedLanguage" select="zoai:sanitizeLanguage( text() )" />
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.language'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="$sanitizedLanguage"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
<xsl:template match="dc:identifier[matches(text(), '^10\..*')]">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.identifier.doi'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="concat('https://doi.org/', text())"/>
</xsl:call-template>
</xsl:template>
<xsl:template match="dc:identifier[matches(text(), '^urn.*')]">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.identifier.urn'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="text()"/>
</xsl:call-template>
</xsl:template>
<xsl:template match="dc:source[@xml:lang='de-DE']">
<xsl:variable name="pageinfo" select="tokenize( text(),'; ' )[last()]" />
<xsl:if test="$pageinfo != '' and matches($pageinfo, '^\d+(-\d+)?')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.source.pageinfo'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="$pageinfo"/>
</xsl:call-template>
</xsl:if>
<xsl:variable name="volume" select="substring-before(substring-after(text(),'Vol '),',')" />
<xsl:if test="$volume != '' and matches ($volume, '\d+')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.source.volume'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="$volume"/>
</xsl:call-template>
</xsl:if>
<xsl:variable name="issue" select="substring-before(substring-after(text(),'No '),' ')" />
<xsl:if test="$issue != '' and matches ($issue, '\d+')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.source.issue'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="$issue"/>
</xsl:call-template>
</xsl:if>
<xsl:variable name="issuetopic" select="substring-before(substring-after(text(),'): '),'; ')" />
<xsl:if test="$issuetopic != ''">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.source.issuetopic'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="replace($issuetopic,'•',':')"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
<xsl:template match="dc:subject[@xml:lang='en-US']">
<xsl:if test="string-length(text()) > 0">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.subject.other'"/>
<xsl:with-param name="language" select="'en'"/>
<xsl:with-param name="value" select="text()"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
<xsl:template match="dc:subject[@xml:lang='de-DE']">
<xsl:if test="string-length(text()) > 0">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.subject.other'"/>
<xsl:with-param name="language" select="'de'"/>
<xsl:with-param name="value" select="text()"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
<xsl:template match="dc:type[@xml:lang='en-US']">
<xsl:choose>
<xsl:when test="text() = 'Non-refereed Book Review'">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.type.stock'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'recension'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.type.document'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'23'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.description.review'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'2'"/>
</xsl:call-template>
</xsl:when>
<xsl:when test="text() = 'Peer-reviewed Article'">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.type.stock'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'article'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.type.document'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'32'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.description.review'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'1'"/>
</xsl:call-template>
</xsl:when>
</xsl:choose>
</xsl:template>
<xsl:template match="dc:title">
<xsl:variable name="sanitizedLanguage" select="zoai:sanitizeLanguage( @xml:lang )" />
<xsl:variable name="sanitizedMainLanguage" select="zoai:sanitizeLanguage( /oai:OAI-PMH/oai:GetRecord/oai:record/oai:metadata/oai_dc:dc/dc:language[1] )" />
<xsl:choose>
<xsl:when test="$sanitizedLanguage = $sanitizedMainLanguage">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.title'"/>
<xsl:with-param name="language" select="$sanitizedLanguage"/>
<xsl:with-param name="value" select="replace(text(),'[‹›»«]','&quot;')"/>
</xsl:call-template>
</xsl:when>
<xsl:when test="$sanitizedLanguage = 'uk' and $sanitizedMainLanguage = ''">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.title'"/>
<xsl:with-param name="language" select="'uk'"/>
<xsl:with-param name="value" select="replace(text(),'[‹›»«]','&quot;')"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.title.alternative'"/>
<xsl:with-param name="language" select="$sanitizedLanguage"/>
<xsl:with-param name="value" select="replace(text(),'[‹›»«]','&quot;')"/>
</xsl:call-template>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template name="new-metadatum">
<xsl:param name="key" />
<xsl:param name="language" />
<xsl:param name="value" />
<xsl:element name="metadatum">
<xsl:element name="key">
<xsl:value-of select="$key" />
</xsl:element>
<xsl:if test="$language != ''">
<xsl:variable name="sanitizedLanguage" select="zoai:sanitizeLanguage($language)" />
<xsl:element name="language">
<xsl:value-of select="$sanitizedLanguage" />
</xsl:element>
</xsl:if>
<xsl:element name="value">
<xsl:value-of select="$value" />
</xsl:element>
</xsl:element>
</xsl:template>
<xsl:function name="zoai:sanitizeLanguage">
<xsl:param name="inputLanguage" />
<xsl:choose>
<xsl:when test="$inputLanguage = 'ger' or $inputLanguage = 'DE' or $inputLanguage = 'de-DE' or $inputLanguage = 'deu'">
<xsl:value-of select="'de'" />
</xsl:when>
<xsl:when test="$inputLanguage = 'eng' or $inputLanguage = 'EN' or $inputLanguage = 'en-US' or $inputLanguage = 'en-GB'">
<xsl:value-of select="'en'" />
</xsl:when>
<xsl:when test="$inputLanguage = 'ukr' or $inputLanguage = 'uk-UA'">
<xsl:value-of select="'uk'" />
</xsl:when>
<xsl:when test="$inputLanguage = 'rus' or $inputLanguage = 'ru-RU'">
<xsl:value-of select="'ru'" />
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$inputLanguage" />
</xsl:otherwise>
</xsl:choose>
</xsl:function>
</xsl:stylesheet>
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment