Commit f7a7dc9b authored by Steinberg, Jan's avatar Steinberg, Jan
Browse files

HJK - xslt and test

parent af22223d
......@@ -229,6 +229,108 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest {
assertFalse( metadata.contains( new SimpleMetadatum("dc.language", "ru") ) );
}
@Test
public void getSinglePublicationHjk() throws IOException {
Map<String, String> map = new HashMap<>();
// String oaiMarcXsltString = IOUtils.toString(
// getClass().
// getClassLoader().
// getResourceAsStream("xslt/hjk-oai_marc-2-xmlbundle.xslt"), StandardCharsets.UTF_8);
// map.put("oai_marc", oaiMarcXsltString);
//
String oaiDcXsltString = IOUtils.toString(
getClass().
getClassLoader().
getResourceAsStream("xslt/hjk-oai_dc-2-xmlbundle2.xslt"), StandardCharsets.UTF_8);
map.put("oai_dc", oaiDcXsltString);
XsltTransformerOaiPmhBundlesStreamSource bss = new XsltTransformerOaiPmhBundlesStreamSource("http://journals.sub.uni-hamburg.de/hjk/oai/", map);
Bundle bundle = bss.getBundle("oai:ojs.journals.sub.uni-hamburg.de:article/756");
LOG.info("{}", bundle);
Set<Metadatum> metadata = bundle.getMetadata();
metadata.stream().map(Object::toString).forEach(LOG::info);
assertTrue( metadata.contains( new SimpleMetadatum("dc.type.stock", "article") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.type.document", "32") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.source.journal", "1439") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.description.pubstatus", "1") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.contributor.author", "Koch, Gertraud") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.contributor.author", "Warneken, Bernd Jürgen") ) );
assertEquals(2, metadata.stream().filter(m -> m.getKey().equals("dc.contributor.author") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.description.review", "1") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.description.review") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.date.issued", "2014") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.description.abstract", "de", "\"Deutschland ist ein reiches Land\" – dieses Bild dürften die meisten Menschen teilen, auch wenn es brüchig wird, wo doch Obdachlose und Bettler_innen in den Innenstädten, Tafeln und Kleiderkammern, Umsonstläden und Lebensmittelausgabestellen für Bedürftige eine andere Wirklichkeit zeigen. In Hamburg ist diese Spanne von Reich und Arm in besonderer Weise prägend.") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.description.abstract") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.identifier.issn", "2365-1016") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.identifier.url", "https://journals.sub.uni-hamburg.de/hjk/article/view/756/759") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.identifier.url") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.language", "de") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.source.issue", "1") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.source.issue") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.rights.licence", "24") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.source.issuetopic", "OBEN_UNTEN : Bilder vom Leben der Anderen") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.source.issuetopic") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.source.pageinfo", "3-6") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.source.pageinfo") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.title", "de", "oben_unten. Bilder vom Leben der Anderen") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.title") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.identifier.urn", "urn:nbn:de:gbv:18-8-7560") ) );
assertTrue( metadata.contains( new SimpleMetadatum("internal.dda.reference", "http://journals.sub.uni-hamburg.de/hjk/oai/@@oai:ojs.journals.sub.uni-hamburg.de:article/756") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.publisher.country", "DEU") ) );
// assertTrue( metadata.contains( new SimpleMetadatum("internal.identifier.classoz", "10900") ) );
// assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.classoz", "10900") ) );
assertTrue( metadata.contains( new SimpleMetadatum("internal.identifier.ddc", "300") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.ddc", "300") ) );
assertTrue( metadata.contains( new SimpleMetadatum("internal.status", "formal und inhaltlich fertig erschlossen") ) );
Set<InputStream> fileSet = bundle.getContents();
assertEquals( 1, fileSet.size());
// an english one
Bundle bundle2 = bss.getBundle("oai:ojs.journals.sub.uni-hamburg.de:article/826");
LOG.info("{}", bundle2);
Set<Metadatum> metadata2 = bundle2.getMetadata();
metadata.stream().map(Object::toString).forEach(LOG::info);
assertTrue( metadata2.contains( new SimpleMetadatum("dc.type.stock", "article") ) );
assertTrue( metadata2.contains( new SimpleMetadatum("dc.type.document", "32") ) );
assertTrue( metadata2.contains( new SimpleMetadatum("dc.source.journal", "1439") ) );
assertTrue( metadata2.contains( new SimpleMetadatum("dc.description.pubstatus", "1") ) );
assertTrue( metadata2.contains( new SimpleMetadatum("dc.contributor.author", "Löfgren, Orvar") ) );
assertEquals(1, metadata2.stream().filter(m -> m.getKey().equals("dc.contributor.author") ).count() );
assertTrue( metadata2.contains( new SimpleMetadatum("dc.description.review", "1") ) );
assertEquals(1, metadata2.stream().filter(m -> m.getKey().equals("dc.description.review") ).count() );
assertTrue( metadata2.contains( new SimpleMetadatum("dc.date.issued", "2015") ) );
assertTrue( metadata2.contains( new SimpleMetadatum("dc.description.abstract", "de", "This paper looks at collecting practices in the everyday world of universities. How do scholars turn into hoarding squirrels, creating their own archives? My focus is on routines and technologies, which deal with storage and order as well as retrieving and discarding of research materials.") ) );
assertEquals(1, metadata2.stream().filter(m -> m.getKey().equals("dc.description.abstract") ).count() );
assertTrue( metadata2.contains( new SimpleMetadatum("dc.identifier.issn", "2365-1016") ) );
assertTrue( metadata2.contains( new SimpleMetadatum("dc.identifier.url", "https://journals.sub.uni-hamburg.de/hjk/article/view/826/806") ) );
assertEquals(1, metadata2.stream().filter(m -> m.getKey().equals("dc.identifier.url") ).count() );
assertTrue( metadata2.contains( new SimpleMetadatum("dc.language", "de") ) );
assertTrue( metadata2.contains( new SimpleMetadatum("dc.source.issue", "3") ) );
assertEquals(1, metadata2.stream().filter(m -> m.getKey().equals("dc.source.issue") ).count() );
assertTrue( metadata2.contains( new SimpleMetadatum("dc.rights.licence", "24") ) );
assertTrue( metadata2.contains( new SimpleMetadatum("dc.source.issuetopic", "SAMMELN : Zur Geschichte und Gegenwart einer alltäglichen, musealen und wissenschaftlichen Praxis") ) );
assertEquals(1, metadata2.stream().filter(m -> m.getKey().equals("dc.source.issuetopic") ).count() );
assertTrue( metadata2.contains( new SimpleMetadatum("dc.source.pageinfo", "17-33") ) );
assertEquals(1, metadata2.stream().filter(m -> m.getKey().equals("dc.source.pageinfo") ).count() );
assertTrue( metadata2.contains( new SimpleMetadatum("dc.title", "de", "The Scholar as Squirrel: Everyday Collecting in Academia") ) );
assertEquals(1, metadata2.stream().filter(m -> m.getKey().equals("dc.title") ).count() );
assertTrue( metadata2.contains( new SimpleMetadatum("dc.identifier.urn", "urn:nbn:de:gbv:18-8-8269") ) );
assertTrue( metadata2.contains( new SimpleMetadatum("internal.dda.reference", "http://journals.sub.uni-hamburg.de/hjk/oai/@@oai:ojs.journals.sub.uni-hamburg.de:article/826") ) );
assertTrue( metadata2.contains( new SimpleMetadatum("dc.publisher.country", "DEU") ) );
// assertTrue( metadata.contains( new SimpleMetadatum("internal.identifier.classoz", "10900") ) );
// assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.classoz", "10900") ) );
assertTrue( metadata2.contains( new SimpleMetadatum("internal.identifier.ddc", "300") ) );
assertTrue( metadata2.contains( new SimpleMetadatum("dc.subject.ddc", "300") ) );
assertTrue( metadata2.contains( new SimpleMetadatum("internal.status", "formal und inhaltlich fertig erschlossen") ) );
Set<InputStream> fileSet2 = bundle2.getContents();
assertEquals( 1, fileSet2.size());
}
@Test
public void getSinglePublicationJcca() throws IOException {
Map<String, String> map = new HashMap<>();
......
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0"
xmlns:cc="http://www.d-nb.de/standards/cc/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dcmitype="http://purl.org/dc/dcmitype/"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:ddb="http://www.d-nb.de/standards/ddb/"
xmlns:dini="http://www.d-nb.de/standards/xmetadissplus/type/"
xmlns:doi="http://www.d-nb.de/standards/doi/"
xmlns:hdl="http://www.d-nb.de/standards/hdl/"
xmlns:mml="http://www.w3.org/1998/Math/MathML"
xmlns:nlm="http://dtd.nlm.nih.gov/publishing/2.3"
xmlns:oai="http://www.openarchives.org/OAI/2.0/"
xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
xmlns:pc="http://www.d-nb.de/standards/pc/"
xmlns:thesis="http://www.ndltd.org/standards/metadata/etdms/1.0/"
xmlns:urn="http://www.d-nb.de/standards/urn/"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:xMetaDiss="http://www.d-nb.de/standards/xmetadissplus/"
xmlns:xoai="http://www.lyncode.com/xoai"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:zoai="http://git.gesis.org/dda/zoai">
<xsl:output indent="yes"/>
<xsl:strip-space elements="*"/>
<!-- <xsl:copy-of select="."/> -->
<!-- <xsl:message terminate="no">here is some message for stderr, e.g. <xsl:value-of select='text()' /> </xsl:message> -->
<!-- override default template -->
<xsl:template match="*" />
<xsl:template match="/">
<xsl:element name="bundle">
<xsl:element name="metadata">
<xsl:apply-templates select="oai:OAI-PMH/oai:GetRecord/oai:record/oai:metadata/oai_dc:dc" />
</xsl:element>
</xsl:element>
</xsl:template>
<xsl:template match="oai:OAI-PMH/oai:GetRecord/oai:record/oai:metadata/oai_dc:dc">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.type.stock'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'article'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.type.document'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'32'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.rights.licence'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'24'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.source.journal'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'1439'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.description.pubstatus'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'1'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.publisher.country'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'DEU'"/>
</xsl:call-template>
<!-- <xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'ssoar.urn.registration'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'false'"/>
</xsl:call-template>-->
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.description.review'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'1'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.identifier.issn'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'2365-1016'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'internal.identifier.ddc'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'300'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.subject.ddc'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'300'"/>
</xsl:call-template>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'internal.status'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'formal und inhaltlich fertig erschlossen'"/>
</xsl:call-template>
<!--xsl:apply-templates select="dc:type[@xml:lang='en-US']" /-->
<xsl:apply-templates select="dc:title" />
<xsl:apply-templates select="dc:description[@xml:lang='de-DE']" />
<xsl:apply-templates select="dc:description[@xml:lang='en-US']" />
<xsl:apply-templates select="dc:language" />
<xsl:apply-templates select="dc:source[@xml:lang='de-DE']" />
<xsl:apply-templates select="dc:identifier[matches(text(), '^10\..*')]" />
<xsl:apply-templates select="dc:identifier[matches(text(), '^urn.*')]" />
<!--xsl:apply-templates select="dc:type[@xml:lang='en-US']" /-->
<xsl:apply-templates select="dc:relation" />
<xsl:apply-templates select="dc:creator" />
<xsl:apply-templates select="dc:date" />
</xsl:template>
<xsl:template match="dc:date">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.date.issued'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="substring(text(),1,4)"/>
</xsl:call-template>
</xsl:template>
<xsl:template match="dc:creator">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.contributor.author'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="text()"/>
</xsl:call-template>
</xsl:template>
<xsl:template match="dc:relation">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.identifier.url'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="text()"/>
</xsl:call-template>
</xsl:template>
<xsl:template match="dc:description[@xml:lang='de-DE']">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.description.abstract'"/>
<xsl:with-param name="language" select="'de'"/>
<xsl:with-param name="value" select="replace(text(),'[‹›»«]','&quot;')"/>
</xsl:call-template>
</xsl:template>
<xsl:template match="dc:description[@xml:lang='en-US']">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.description.abstract'"/>
<xsl:with-param name="language" select="'en'"/>
<xsl:with-param name="value" select="replace(text(),'[‹›»«]','&quot;')"/>
</xsl:call-template>
</xsl:template>
<xsl:template match="dc:language">
<xsl:if test="position()=1">
<xsl:variable name="sanitizedLanguage" select="zoai:sanitizeLanguage( text() )" />
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.language'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="$sanitizedLanguage"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
<xsl:template match="dc:identifier[matches(text(), '^10\..*')]">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.identifier.doi'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="concat('https://doi.org/', text())"/>
</xsl:call-template>
</xsl:template>
<xsl:template match="dc:identifier[matches(text(), '^urn.*')]">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.identifier.urn'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="text()"/>
</xsl:call-template>
</xsl:template>
<xsl:template match="dc:source[@xml:lang='de-DE']">
<xsl:variable name="pageinfo" select="tokenize( text(),'; ' )[last()]" />
<xsl:if test="$pageinfo != '' and matches($pageinfo, '^\d+(-\d+)?')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.source.pageinfo'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="$pageinfo"/>
</xsl:call-template>
</xsl:if>
<xsl:if test="$pageinfo != '' and matches($pageinfo, '^\w\d+(-\w\d+)?')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.source.pageinfo'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="$pageinfo"/>
</xsl:call-template>
</xsl:if>
<xsl:variable name="issue" select="substring-before(substring-after(text(),'Nr. '),' ')" />
<xsl:if test="$issue != '' and matches ($issue, '\d+')">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.source.issue'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="$issue"/>
</xsl:call-template>
</xsl:if>
<xsl:variable name="issuetopic" select="substring-before(substring-after(text(),'): '),'; ')" />
<xsl:if test="$issuetopic != ''">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.source.issuetopic'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="replace($issuetopic,'•',':')"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
<!--xsl:template match="dc:type[@xml:lang='en-US']">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.subject.other'"/>
<xsl:with-param name="language" select="'en'"/>
<xsl:with-param name="value" select="text()"/>
</xsl:call-template>
</xsl:template-->
<xsl:template match="dc:title">
<xsl:variable name="sanitizedLanguage" select="zoai:sanitizeLanguage( @xml:lang )" />
<xsl:variable name="sanitizedMainLanguage" select="zoai:sanitizeLanguage( /oai:OAI-PMH/oai:GetRecord/oai:record/oai:metadata/oai_dc:dc/dc:language[1] )" />
<xsl:choose>
<xsl:when test="$sanitizedLanguage = $sanitizedMainLanguage">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.title'"/>
<xsl:with-param name="language" select="$sanitizedLanguage"/>
<xsl:with-param name="value" select="replace(text(),'[‹›»«]','&quot;')"/>
</xsl:call-template>
</xsl:when>
<xsl:when test="$sanitizedLanguage = 'uk' and $sanitizedMainLanguage = ''">
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.title'"/>
<xsl:with-param name="language" select="'uk'"/>
<xsl:with-param name="value" select="replace(text(),'[‹›»«]','&quot;')"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'dc.title.alternative'"/>
<xsl:with-param name="language" select="$sanitizedLanguage"/>
<xsl:with-param name="value" select="replace(text(),'[‹›»«]','&quot;')"/>
</xsl:call-template>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template name="new-metadatum">
<xsl:param name="key" />
<xsl:param name="language" />
<xsl:param name="value" />
<xsl:element name="metadatum">
<xsl:element name="key">
<xsl:value-of select="$key" />
</xsl:element>
<xsl:if test="$language != ''">
<xsl:variable name="sanitizedLanguage" select="zoai:sanitizeLanguage($language)" />
<xsl:element name="language">
<xsl:value-of select="$sanitizedLanguage" />
</xsl:element>
</xsl:if>
<xsl:element name="value">
<xsl:value-of select="$value" />
</xsl:element>
</xsl:element>
</xsl:template>
<xsl:function name="zoai:sanitizeLanguage">
<xsl:param name="inputLanguage" />
<xsl:choose>
<xsl:when test="$inputLanguage = 'ger' or $inputLanguage = 'DE' or $inputLanguage = 'de-DE' or $inputLanguage = 'deu'">
<xsl:value-of select="'de'" />
</xsl:when>
<xsl:when test="$inputLanguage = 'eng' or $inputLanguage = 'EN' or $inputLanguage = 'en-US' or $inputLanguage = 'en-GB'">
<xsl:value-of select="'en'" />
</xsl:when>
<xsl:when test="$inputLanguage = 'ukr' or $inputLanguage = 'uk-UA'">
<xsl:value-of select="'uk'" />
</xsl:when>
<xsl:when test="$inputLanguage = 'rus' or $inputLanguage = 'ru-RU'">
<xsl:value-of select="'ru'" />
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$inputLanguage" />
</xsl:otherwise>
</xsl:choose>
</xsl:function>
</xsl:stylesheet>
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment