Commit 58bd1ded authored by Steinberg, Jan's avatar Steinberg, Jan
Browse files

exeley first stage

parent 10694611
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -138,7 +138,6 @@ public class BundlesStreamSourceFactory {
        }
        return result;
    }

    public InputStream getExcelInputStreamFromDataString(String data) {
        InputStream result = null;
        //e.g. data == "excel-database-3" for a lookup in the database org.gesis.dda.wizard.domain.File JPA entity and its byte[] data field
+5 −1
Original line number Diff line number Diff line
@@ -181,7 +181,11 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo
                
                // --- STEP 1: get XML input
                String getRecordXmlDocumentResponseString = client.getRecordString(oaiPmhIdentifier, metadataPrefix);
                
                // try to make exeley data transformable ( & -> & to start with)
                // ToDo: find the right location for it!
                if ( getRecordXmlDocumentResponseString.contains("identifier=\"oai:exeley.com:10.") ) {
                    getRecordXmlDocumentResponseString = getRecordXmlDocumentResponseString.replace("&", "&");
                }
                //LOG.info("--------------------------");
                //LOG.info("{}", getRecordXmlDocumentResponseString);
                if ( isDeletedRecord(getRecordXmlDocumentResponseString) ) {
+62 −0
Original line number Diff line number Diff line
@@ -229,6 +229,68 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest {
        assertFalse(  metadata.contains( new SimpleMetadatum("dc.language", "ru") )  );
    }
    
    
    @Test
    public void getSinglePublicationExeley() throws IOException {
        Map<String, String> map = new HashMap<>();
        
        String oaiDcXsltString = IOUtils.toString(
                getClass().
                getClassLoader().
                getResourceAsStream("xslt/exeley-pam-2-xmlbundle.xslt"), StandardCharsets.UTF_8);
        map.put("oai_dc", oaiDcXsltString);
        
        XsltTransformerOaiPmhBundlesStreamSource bss = new XsltTransformerOaiPmhBundlesStreamSource("http://www.exeley.com/oai/", map);
        
        
        Bundle bundle = bss.getBundle("oai:exeley.com:10.21307/joss-2018-001");
        LOG.info("{}", bundle);
        Set<Metadatum> metadata = bundle.getMetadata();
        metadata.stream().map(Object::toString).forEach(LOG::info);
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.type.stock", "article") )  );
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.type.document", "32") )  );
        // assertTrue(  metadata.contains( new SimpleMetadatum("dc.source.journal", "1439") )  );
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.description.pubstatus", "1") )  );
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.contributor.author", "Krishna, M.") )  );
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.contributor.author", "Bino Paul, G.D.") )  );
        assertEquals(2, metadata.stream().filter(m -> m.getKey().equals("dc.contributor.author") ).count() );
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.description.review", "1") )  );
        assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.description.review") ).count() );
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.date.issued", "2018") )  );
        // assertTrue(  metadata.contains( new SimpleMetadatum("dc.description.abstract", "de", "\"Deutschland ist ein reiches Land\"  dieses Bild dürften die meisten Menschen teilen, auch wenn es brüchig wird, wo doch Obdachlose und Bettler_innen in den Innenstädten, Tafeln und Kleiderkammern, Umsonstläden und Lebensmittelausgabestellen für Bedürftige eine andere Wirklichkeit zeigen. In Hamburg ist diese Spanne von Reich und Arm in besonderer Weise prägend.") )  );
        // assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.description.abstract") ).count() );
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.identifier.issn", "1529-1227") )  );
        // assertTrue(  metadata.contains( new SimpleMetadatum("dc.identifier.url", "https://journals.sub.uni-hamburg.de/hjk/article/view/756/759") )  );
        // assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.identifier.url") ).count() );
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.language", "en") )  );
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.source.issue", "1") )  );
        assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.source.issue") ).count() );
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.rights.licence", "16") )  );
        // assertTrue(  metadata.contains( new SimpleMetadatum("dc.source.issuetopic", "OBEN_UNTEN : Bilder vom Leben der Anderen") )  );
        // assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.source.issuetopic") ).count() );
        // assertTrue(  metadata.contains( new SimpleMetadatum("dc.source.pageinfo", "3-6") )  );
        // assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.source.pageinfo") ).count() );
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.title", "en", "The Structure of Collaboration Networks: An Illustration of Indian Economics") )  );
        assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.title") ).count() );
        // assertTrue(  metadata.contains( new SimpleMetadatum("dc.identifier.urn", "urn:nbn:de:gbv:18-8-7560") )  );
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.identifier.doi", "https://doi.org/10.21307/joss-2018-001") )  );
        assertTrue(  metadata.contains( new SimpleMetadatum("internal.dda.reference", "http://www.exeley.com/oai/@@oai:exeley.com:10.21307/joss-2018-001") )  );
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.publisher.country", "USA") )  );
        // assertTrue(  metadata.contains( new SimpleMetadatum("internal.identifier.classoz", "10900") )  );
        // assertTrue(  metadata.contains( new SimpleMetadatum("dc.subject.classoz", "10900") )  );
        // assertTrue(  metadata.contains( new SimpleMetadatum("internal.identifier.ddc", "300") )  );
        // assertTrue(  metadata.contains( new SimpleMetadatum("dc.subject.ddc", "300") )  );
        assertTrue(  metadata.contains( new SimpleMetadatum("internal.status", "formal und inhaltlich fertig erschlossen") )  );
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.source.other", "Collaboration") )  );
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.source.other", "Structure") )  );
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.source.other", "Networks") )  );
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.source.other", "Degree") )  );
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.source.other", "Indian economics") )  );
        
        Set<InputStream> fileSet = bundle.getContents();
        assertEquals(  1, fileSet.size());
    }

    @Test
    public void getSinglePublicationHjk() throws IOException {
        Map<String, String> map = new HashMap<>();
+245 −0
Original line number Diff line number Diff line
<?xml version="1.0" encoding="UTF-8" ?>
<xsl:stylesheet version="2.0"
  xmlns:pam="http://prismstandard.org/namespaces/pam/2.2/"
  xmlns:prism="http://prismstandard.org/namespaces/basic/2.2/"
  xmlns:dc="http://purl.org/dc/elements/1.1/"
  xmlns:oai="http://www.openarchives.org/OAI/2.0/"
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  xmlns:zoai="http://git.gesis.org/dda/zoai"
  >
 
  <xsl:output indent="yes"/>
  <xsl:strip-space elements="*"/>
  <!-- <xsl:message terminate="no">here is the tree at the current node <xsl:copy-of select="."/>  </xsl:message> -->
  <!-- <xsl:message terminate="no">here is some message for stderr, e.g. <xsl:value-of select='text()' /> </xsl:message> -->
  <!-- 
       <xsl:for-each select="./*">
         <xsl:message terminate="no">a child=<xsl:value-of select="name()"/></xsl:message>
       </xsl:for-each>
     -->
  
  <!-- override default template -->
  <xsl:template match="*" />
  
  <xsl:template match="/">
      
    <xsl:element name="bundle">
      <xsl:element name="metadata">
          <xsl:variable name="ddaReference" select="concat(oai:OAI-PMH/oai:request[@verb='GetRecord']/text(), '@@', oai:OAI-PMH/oai:request[@verb='GetRecord']/@identifier)" />
          <xsl:call-template name="new-metadatum">
              <xsl:with-param name="key" select="'internal.dda.reference'"/>
              <xsl:with-param name="language" select="''"/>
              <xsl:with-param name="value" select="$ddaReference"/>
          </xsl:call-template>
        <xsl:apply-templates select="oai:OAI-PMH/oai:GetRecord/oai:record/oai:metadata/pam:message/pam:article/pam:head" />
        <xsl:call-template name="new-metadatum">
          <xsl:with-param name="key" select="'dc.type.stock'"/>
          <xsl:with-param name="language" select="''"/>
          <xsl:with-param name="value" select="'article'"/>
        </xsl:call-template>
        
        <xsl:call-template name="new-metadatum">
          <xsl:with-param name="key" select="'dc.type.document'"/>
          <xsl:with-param name="language" select="''"/>
          <xsl:with-param name="value" select="'32'"/>
        </xsl:call-template>
        
        <xsl:call-template name="new-metadatum">
          <xsl:with-param name="key" select="'dc.rights.licence'"/>
          <xsl:with-param name="language" select="''"/>
          <xsl:with-param name="value" select="'32'"/>
        </xsl:call-template>
        
        <xsl:call-template name="new-metadatum">
          <xsl:with-param name="key" select="'dc.source.journal'"/>
          <xsl:with-param name="language" select="''"/>
          <xsl:with-param name="value" select="'1507'"/>
        </xsl:call-template>
        
        <xsl:call-template name="new-metadatum">
          <xsl:with-param name="key" select="'dc.description.pubstatus'"/>
          <xsl:with-param name="language" select="''"/>
          <xsl:with-param name="value" select="'1'"/>
        </xsl:call-template>
        
        <xsl:call-template name="new-metadatum">
          <xsl:with-param name="key" select="'dc.publisher.country'"/>
          <xsl:with-param name="language" select="''"/>
          <xsl:with-param name="value" select="'USA'"/>
        </xsl:call-template>
        
        <xsl:call-template name="new-metadatum">
          <xsl:with-param name="key" select="'ssoar.urn.registration'"/>
          <xsl:with-param name="language" select="''"/>
          <xsl:with-param name="value" select="'false'"/>
        </xsl:call-template>
        
        <xsl:call-template name="new-metadatum">
          <xsl:with-param name="key" select="'dc.description.review'"/>
          <xsl:with-param name="language" select="''"/>
          <xsl:with-param name="value" select="'1'"/>
        </xsl:call-template>
        
        <!--xsl:call-template name="new-metadatum">
          <xsl:with-param name="key" select="'internal.identifier.classoz'"/>
          <xsl:with-param name="language" select="''"/>
          <xsl:with-param name="value" select="'10900'"/>
        </xsl:call-template-->
        
        <!--xsl:call-template name="new-metadatum">
          <xsl:with-param name="key" select="'dc.subject.classoz'"/>
          <xsl:with-param name="language" select="''"/>
          <xsl:with-param name="value" select="'10900'"/>
        </xsl:call-template-->
        
        <xsl:call-template name="new-metadatum">
          <xsl:with-param name="key" select="'dc.identifier.issn'"/>
          <xsl:with-param name="language" select="''"/>
          <xsl:with-param name="value" select="'1529-1227'"/>
        </xsl:call-template>
        
        <!--xsl:call-template name="new-metadatum">
          <xsl:with-param name="key" select="'internal.identifier.ddc'"/>
          <xsl:with-param name="language" select="''"/>
          <xsl:with-param name="value" select="'330'"/>
        </xsl:call-template-->
        
        <!--xsl:call-template name="new-metadatum">
          <xsl:with-param name="key" select="'dc.subject.ddc'"/>
          <xsl:with-param name="language" select="''"/>
          <xsl:with-param name="value" select="'330'"/>
        </xsl:call-template-->
        
        <xsl:call-template name="new-metadatum">
          <xsl:with-param name="key" select="'internal.status'"/>
          <xsl:with-param name="language" select="''"/>
          <xsl:with-param name="value" select="'formal und inhaltlich fertig erschlossen'"/>
        </xsl:call-template>
      </xsl:element>
    </xsl:element>
  </xsl:template>
  
  <xsl:template match="oai:OAI-PMH/oai:GetRecord/oai:record/oai:metadata/pam:message/pam:article/pam:head">
    <xsl:apply-templates select="dc:identifier"/>
    <xsl:apply-templates select="dc:title"/>
    <xsl:apply-templates select="dc:creator"/>
    <xsl:apply-templates select="prism:publicationDate"/>
    <xsl:apply-templates select="prism:volume"/>
    <xsl:apply-templates select="prism:number"/>
  </xsl:template>
    <xsl:template match="dc:identifier">
      <xsl:choose>
        <xsl:when test="contains(text(),'doi.org')">
          <xsl:call-template name="new-metadatum">
            <xsl:with-param name="key" select="'dc.identifier.doi'"/>
            <xsl:with-param name="language" select="''"/>
            <xsl:with-param name="value" select="text()"/>
          </xsl:call-template>
        </xsl:when>
        <xsl:otherwise>
          <xsl:call-template name="new-metadatum">
            <xsl:with-param name="key" select="'dc.identifier.url'"/>
            <xsl:with-param name="language" select="''"/>
            <xsl:with-param name="value" select="text()"/>
          </xsl:call-template>
        </xsl:otherwise>
      </xsl:choose>
    </xsl:template>
  <xsl:template match="dc:title">
    <xsl:call-template name="new-metadatum">
      <xsl:with-param name="key" select="'dc.title'"/>
      <xsl:with-param name="language" select="'en'"/>
      <xsl:with-param name="value" select="normalize-space(text())"/>
    </xsl:call-template>
  </xsl:template>
  <xsl:template match="dc:creator">
    <xsl:call-template name="new-metadatum">
      <xsl:with-param name="key" select="'dc.contributor.author'"/>
      <xsl:with-param name="language" select="''"/>
      <xsl:with-param name="value" select="text()"/>
    </xsl:call-template>
  </xsl:template>
  <xsl:template match="prism:publicationDate">
    <xsl:call-template name="new-metadatum">
      <xsl:with-param name="key" select="'dc.date.issued'"/>
      <xsl:with-param name="language" select="''"/>
      <xsl:with-param name="value" select="substring-before(text(),'-')"/>
    </xsl:call-template>
  </xsl:template>
  <xsl:template match="prism:volume">
    <xsl:call-template name="new-metadatum">
      <xsl:with-param name="key" select="'dc.source.volume'"/>
      <xsl:with-param name="language" select="''"/>
      <xsl:with-param name="value" select="text()"/>
    </xsl:call-template>
  </xsl:template>
  <xsl:template match="prism:number">
    <xsl:call-template name="new-metadatum">
      <xsl:with-param name="key" select="'dc.source.issue'"/>
      <xsl:with-param name="language" select="''"/>
      <xsl:with-param name="value" select="text()"/>
    </xsl:call-template>
  </xsl:template>
   
  
        <!-- guard against multiple set assignments such as 'region:ART' AND 'driver' 
        <xsl:if test="starts-with( text(), 'region:' )">
          <xsl:choose>
            <xsl:when test="text() = 'region:ART' or text() = 'region:LET'">
              <xsl:call-template name="new-metadatum">
                <xsl:with-param name="key" select="'dc.description.review'"/>
                <xsl:with-param name="language" select="''"/>
                <xsl:with-param name="value" select="'1'"/>
              </xsl:call-template>
            </xsl:when>
            <xsl:otherwise>
              <xsl:call-template name="new-metadatum">
                <xsl:with-param name="key" select="'dc.description.review'"/>
                <xsl:with-param name="language" select="''"/>
                <xsl:with-param name="value" select="'2'"/>
              </xsl:call-template>
            </xsl:otherwise>
          </xsl:choose>
        </xsl:if>-->
        
 
  <xsl:template name="new-metadatum">
    <xsl:param name="key" />
    <xsl:param name="language" />
    <xsl:param name="value" />
    <xsl:element name="metadatum">
     
     <xsl:element name="key">
       <xsl:value-of select="$key" />
     </xsl:element>
     
     <xsl:if test="$language != ''">
       <xsl:variable name="sanitizedLanguage" select="zoai:sanitizeLanguage($language)" />
       
       <xsl:element name="language">
         <xsl:value-of select="$sanitizedLanguage" />
       </xsl:element>
     </xsl:if>
     
     <xsl:element name="value">
       <xsl:value-of select="$value" />
     </xsl:element>
     
    </xsl:element>
  </xsl:template>
  
  <xsl:function name="zoai:sanitizeLanguage">
    <xsl:param name="inputLanguage" />
         <xsl:choose>
           <xsl:when test="$inputLanguage = 'ger' or $inputLanguage = 'DE' or $inputLanguage = 'de-DE' or $inputLanguage = 'deu'">
             <xsl:value-of select="'de'" />
           </xsl:when>
           <xsl:when test="$inputLanguage = 'eng' or $inputLanguage = 'EN' or $inputLanguage = 'en-US' or $inputLanguage = 'en-GB'">
             <xsl:value-of select="'en'" />
           </xsl:when>
           <xsl:otherwise>
             <xsl:value-of select="$inputLanguage" />
           </xsl:otherwise>
         </xsl:choose>
  </xsl:function>
</xsl:stylesheet>
 No newline at end of file