Commit d2ccd1d5 authored by Steinberg, Jan's avatar Steinberg, Jan
Browse files

wzb oai-dc WIP

parent aee32243
This diff is collapsed.
......@@ -1175,6 +1175,63 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest {
}
@Test
public void getSinglePublicationWzb() throws IOException {
Map<String, String> map = new HashMap<>();
String oaiDcXsltString = IOUtils.toString(
getClass().
getClassLoader().
getResourceAsStream("xslt/wzb-oai_dc-2-xmlbundle.xslt"), StandardCharsets.UTF_8);
map.put("oai_dc", oaiDcXsltString);
XsltTransformerOaiPmhBundlesStreamSource bss = new XsltTransformerOaiPmhBundlesStreamSource("http://www.econstor.eu/dspace-oai/request", map);
Bundle bundle = bss.getBundle("oai:econstor.eu:10419/43900");
LOG.info("{}", bundle);
Set<Metadatum> metadata = bundle.getMetadata();
metadata.stream().map(Object::toString).forEach(LOG::info);
assertTrue( metadata.contains( new SimpleMetadatum("dc.type.stock", "monograph") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.type.document", "3") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.source.series", "WZB Discussion Paper") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.description.pubstatus", "1") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.contributor.author", "Schömann, Klaus") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.contributor.author", "Kruppe, Thomas") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.contributor.author", "Oschmiansky, Heidi") ) );
assertEquals( 3 , metadata.stream().filter(m -> m.getKey().equals("dc.contributor.author") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.contributor.corporateeditor", "381") ) );
assertTrue( metadata.contains( new SimpleMetadatum("ssoar.contributor.institution", "WZB") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.description.review", "4") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.description.review") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.date.issued", "1998") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.description.abstract", "de", "Mit einem dynamischen Ansatz untersuchen wir Beschäftigung und Arbeitslosigkeit in der Europäischen Union. Ausgehend von der Theorie der Übergangsarbeitsmärkte werden die vielfältigen Brücken in Beschäftigung, aber auch der Verlust von Beschäftigung untersucht. Zunächst wird die Beschäftigungs- und Arbeitslosigkeitsdynamik der EU - Mitgliedsländer anhand von Stromdaten verglichen. Jährliche Zuströme in und Abströme aus Beschäftigung und Arbeitslosigkeit werden mit Daten der Europäischen Arbeitskräftestichprobe ermittelt. Diese 'natürlichen' Ströme werden zunehmend ergänzt durch Teilnehmer an Maßnahmen der Arbeitsmarktpolitik in Form von Übergangsarbeitsmärkten.") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.description.abstract") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.identifier.url", "http://hdl.handle.net/10419/43900") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.identifier.url") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.identifier.handle", "http://hdl.handle.net/10419/43900") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.identifier.handle") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.language", "de") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.source.series", "WZB Discussion Paper") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.source.volume", "FS I 98-203") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.rights.licence", "3") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.title", "de", "Beschäftigungsdynamik und Arbeitslosigkeit in der Europäischen Union") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.title") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("internal.dda.reference", "http://www.econstor.eu/dspace-oai/request@@oai:econstor.eu:10419/43900") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.publisher.country", "DEU") ) );
// assertTrue( metadata.contains( new SimpleMetadatum("internal.identifier.classoz", "10900") ) );
// assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.classoz", "10900") ) );
assertTrue( metadata.contains( new SimpleMetadatum("internal.identifier.ddc", "330") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.ddc", "330") ) );
assertTrue( metadata.contains( new SimpleMetadatum("internal.status", "formal und inhaltlich fertig erschlossen") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.publisher.city", "Berlin") ) );
Set<InputStream> fileSet = bundle.getContents();
assertEquals( 1, fileSet.size());
}
@Test
public void getSinglePublicationFqs() throws IOException {
Map<String, String> map = new HashMap<>();
......
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0"
xmlns:cc="http://www.d-nb.de/standards/cc/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dcmitype="http://purl.org/dc/dcmitype/"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:ddb="http://www.d-nb.de/standards/ddb/"
xmlns:dini="http://www.d-nb.de/standards/xmetadissplus/type/"
xmlns:doi="http://www.d-nb.de/standards/doi/"
xmlns:hdl="http://www.d-nb.de/standards/hdl/"
xmlns:mml="http://www.w3.org/1998/Math/MathML"
xmlns:nlm="http://dtd.nlm.nih.gov/publishing/2.3"
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
xmlns:pc="http://www.d-nb.de/standards/pc/"
xmlns:thesis="http://www.ndltd.org/standards/metadata/etdms/1.0/"
xmlns:urn="http://www.d-nb.de/standards/urn/"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:xMetaDiss="http://www.d-nb.de/standards/xmetadissplus/"
xmlns:xoai="http://www.lyncode.com/xoai"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:zoai="http://git.gesis.org/dda/zoai">
<xsl:output indent="yes"/>
<xsl:strip-space elements="*"/>
<!-- <xsl:copy-of select="."/> -->
<!-- <xsl:message terminate="no">here is some message for stderr, e.g. <xsl:value-of select='text()' /> </xsl:message> -->
<!-- override default template -->
<xsl:template match="*" />
<xsl:template match="/">
<xsl:element name="bundle">
<xsl:element name="metadata">
<xsl:apply-templates select="oai:OAI-PMH/oai:GetRecord/oai:record/oai:metadata/oai_dc:dc" />
</xsl:element>
</xsl:element>
</xsl:template>
<xsl:template match="oai:OAI-PMH/oai:GetRecord/oai:record/oai:metadata/oai_dc:dc">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.rights.licence'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'3'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.description.pubstatus'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'1'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.publisher.country'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'DEU'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.contributor.corporateeditor'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'381'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'ssoar.contributor.institution'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'WZB'"/>
</xsl:call-template>
<!-- <xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'ssoar.urn.registration'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'false'"/>
</xsl:call-template>-->
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.description.review'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'4'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'internal.status'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'formal und inhaltlich fertig erschlossen'"/>
</xsl:call-template>
<!--xsl:apply-templates select="dc:type[@xml:lang='en-US']" /-->
<xsl:apply-templates select="dc:title" />
<xsl:apply-templates select="dc:description[1]" />
<xsl:apply-templates select="dc:language" />
<xsl:apply-templates select="dc:source[@xml:lang='de-DE']" />
<xsl:apply-templates select="dc:identifier[matches(text(), '^10\..*')]" />
<xsl:apply-templates select="dc:identifier[matches(text(), '^urn.*')]" />
<xsl:apply-templates select="dc:type" />
<xsl:apply-templates select="dc:relation" />
<xsl:apply-templates select="dc:creator" />
<xsl:apply-templates select="dc:date" />
<xsl:apply-templates select="dc:publisher" />
<xsl:apply-templates select="dc:subject" />
</xsl:template>
<xsl:template match="dc:description[1]">
<!--xsl:if test="position() = 1"-->
<xsl:variable name="sanitizedLanguage" select="zoai:sanitizeLanguage( ../dc:language/text() )" />
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.description.abstract'"/>
<xsl:with-param name="language" select="$sanitizedLanguage"/>
<xsl:with-param name="value" select="normalize-space(replace(text(),'[‹›»«]','&quot;'))"/>
</xsl:call-template>
<!--/xsl:if-->
</xsl:template>
<xsl:template match="dc:date">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.date.issued'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="substring(text(),1,4)"/>
</xsl:call-template>
</xsl:template>
<xsl:template match="dc:creator">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.contributor.author'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="text()"/>
</xsl:call-template>
</xsl:template>
<xsl:template match="dc:relation">
<xsl:if test="starts-with(., 'Series: ')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.source.series'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="substring-before(substring-after(., 'Series: '), ' ;')"/>
</xsl:call-template>
<xsl:if test="contains(., ' ; ')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.source.volume'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="replace(substring-after(., ' ; '), 'No.','')"/>
</xsl:call-template>
</xsl:if>
</xsl:if>
</xsl:template>
<xsl:template match="dc:language">
<xsl:if test="position()=1">
<xsl:variable name="sanitizedLanguage" select="zoai:sanitizeLanguage( text() )" />
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.language'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="$sanitizedLanguage"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
<xsl:template match="dc:identifier[matches(text(), '^10\..*')]">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.identifier.doi'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="concat('https://doi.org/', text())"/>
</xsl:call-template>
</xsl:template>
<xsl:template match="dc:identifier[matches(text(), '^urn.*')]">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.identifier.urn'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="text()"/>
</xsl:call-template>
</xsl:template>
<xsl:template match="dc:source[@xml:lang='de-DE']">
<xsl:variable name="pageinfo" select="tokenize( text(),'; ' )[last()]" />
<xsl:if test="$pageinfo != '' and matches($pageinfo, '^\d+(-\d+)?')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.source.pageinfo'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="$pageinfo"/>
</xsl:call-template>
</xsl:if>
<xsl:if test="$pageinfo != '' and matches($pageinfo, '^\w\d+(-\w\d+)?')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.source.pageinfo'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="$pageinfo"/>
</xsl:call-template>
</xsl:if>
<xsl:variable name="issue" select="substring-before(substring-after(text(),'Nr. '),' ')" />
<xsl:if test="$issue != '' and matches ($issue, '\d+')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.source.issue'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="$issue"/>
</xsl:call-template>
</xsl:if>
<xsl:variable name="issuetopic" select="substring-before(substring-after(text(),'): '),'; ')" />
<xsl:if test="$issuetopic != ''">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.source.issuetopic'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="replace($issuetopic,'•',':')"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
<xsl:template match="dc:type">
<xsl:choose>
<xsl:when test="starts-with(., 'doc-type')">
<xsl:if test="contains(., 'workingPaper')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.type.stock'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'monograph'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.type.document'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'3'"/>
</xsl:call-template>
</xsl:if>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.type.stock'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'monograph'"/>
</xsl:call-template>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template match="dc:publisher">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.publisher.city'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'Berlin'"/>
</xsl:call-template>
</xsl:template>
<xsl:template match="dc:subject">
<xsl:if test="starts-with(.,'ddc:')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'internal.identifier.ddc'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="substring-after(., 'ddc:')"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
<xsl:template match="dc:title">
<xsl:variable name="sanitizedLanguage" select="zoai:sanitizeLanguage( @xml:lang )" />
<xsl:variable name="sanitizedMainLanguage" select="zoai:sanitizeLanguage( /oai:OAI-PMH/oai:GetRecord/oai:record/oai:metadata/oai_dc:dc/dc:language[1] )" />
<xsl:choose>
<xsl:when test="$sanitizedLanguage = $sanitizedMainLanguage">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.title'"/>
<xsl:with-param name="language" select="$sanitizedLanguage"/>
<xsl:with-param name="value" select="replace(text(),'[‹›»«]','&quot;')"/>
</xsl:call-template>
</xsl:when>
<xsl:when test="$sanitizedLanguage = 'uk' and $sanitizedMainLanguage = ''">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.title'"/>
<xsl:with-param name="language" select="'uk'"/>
<xsl:with-param name="value" select="replace(text(),'[‹›»«]','&quot;')"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.title.alternative'"/>
<xsl:with-param name="language" select="$sanitizedLanguage"/>
<xsl:with-param name="value" select="normalize-space(replace(text(),'[‹›»«]','&quot;'))"/>
</xsl:call-template>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template name="new-metadatum">
<xsl:param name="key" />
<xsl:param name="language" />
<xsl:param name="value" />
<xsl:element name="metadatum">
<xsl:element name="key">
<xsl:value-of select="$key" />
</xsl:element>
<xsl:if test="$language != ''">
<xsl:variable name="sanitizedLanguage" select="zoai:sanitizeLanguage($language)" />
<xsl:element name="language">
<xsl:value-of select="$sanitizedLanguage" />
</xsl:element>
</xsl:if>
<xsl:element name="value">
<xsl:value-of select="$value" />
</xsl:element>
</xsl:element>
</xsl:template>
<xsl:function name="zoai:sanitizeLanguage">
<xsl:param name="inputLanguage" />
<xsl:choose>
<xsl:when test="$inputLanguage = 'ger' or $inputLanguage = 'DE' or $inputLanguage = 'de-DE' or $inputLanguage = 'deu'">
<xsl:value-of select="'de'" />
</xsl:when>
<xsl:when test="$inputLanguage = 'eng' or $inputLanguage = 'EN' or $inputLanguage = 'en-US' or $inputLanguage = 'en-GB'">
<xsl:value-of select="'en'" />
</xsl:when>
<xsl:when test="$inputLanguage = 'ukr' or $inputLanguage = 'uk-UA'">
<xsl:value-of select="'uk'" />
</xsl:when>
<xsl:when test="$inputLanguage = 'rus' or $inputLanguage = 'ru-RU'">
<xsl:value-of select="'ru'" />
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$inputLanguage" />
</xsl:otherwise>
</xsl:choose>
</xsl:function>
</xsl:stylesheet>
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment