Commit f48a2537 authored by Steinberg, Jan's avatar Steinberg, Jan
Browse files

DOAbooks - added licence filter

parent 581542c4
Loading
Loading
Loading
Loading
+23 −5
Original line number Diff line number Diff line
@@ -15,7 +15,8 @@ public class Doabooks2SsoarBundleFilter implements BundleFilter {
    private final static Logger LOG = LoggerFactory.getLogger(Doabooks2SsoarBundleFilter.class);
    
    /**
     * SSOAR only gets the given class notation's publications from this source
     * SSOAR only gets the given class notation's publications from this source.
     * In addition only known licenced publications are accepted. 
     */
    @Override
    public boolean test(Bundle bundle) {
@@ -25,13 +26,30 @@ public class Doabooks2SsoarBundleFilter implements BundleFilter {
                                         "Political science",
                                         "Political science (General)",
                                         "Political theory"};
        String[] licenceStringsToBeFilteredTo = {
                "Attribution (CC by)",
                "Attribution No Derivatives (CC by-nd)",
                "Attribution Non-commercial (CC by-nc)",
                "Attribution Non-commercial No Derivatives (CC by-nc-nd)",
                "Attribution Non-commercial Share Alike (CC by-nc-sa)",
                "Attribution Share Alike (CC by-sa)",
                "CC BY 3.0",
                "CC BY IGO 3.0"};
        Set<Metadatum> metadata = bundle.getMetadata();
        Set<Metadatum> clasShorts = metadata.stream().filter( m -> m.getKey().equals("dc.subject.classhort") ).collect(Collectors.toSet());
        if (null != clasShorts.stream().filter(cls -> Arrays.asList(valuesToBeFilteredTo).contains(cls.getValue()))) {
            LOG.debug("clasShort-Set not null.");
        Metadatum licence = metadata.stream().filter( m -> m.getKey().equals("dc.dc.rights.licence") ).findFirst().orElse(null);
        if (null != clasShorts.stream().filter(cls -> Arrays.asList(valuesToBeFilteredTo).contains(cls.getValue()))
                && null != licence) {
            LOG.debug("clasShort-Set not null and a licence entry is found");
        if (clasShorts.stream().filter(cls -> Arrays.asList(valuesToBeFilteredTo).contains(cls.getValue())).count() > 0) {
            if (Arrays.asList(licenceStringsToBeFilteredTo).stream().anyMatch( str -> licence.getValue().indexOf(str) != -1)) {
                result = true;
                LOG.debug("We take it: {}", clasShorts.stream().map(Metadatum::getValue).collect(Collectors.joining("; ")));
            } else {
                result = false;
                LOG.debug("Filtered away because of licence: {}", licence.getValue());
            }
            
        }
        else {
            result = false;
+2 −2
Original line number Diff line number Diff line
@@ -813,7 +813,7 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest {
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.identifier.url", "https://www.doabooks.org/doab?func=fulltext&rid=12555") )  );
        assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.identifier.url") ).count() );
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.language", "en") )  );
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.rights.licence", "16") )  );
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.rights.licence", "2") )  );
        // assertTrue(  metadata.contains( new SimpleMetadatum("dc.source.issue", "3") )  );
        // assertTrue(  metadata.contains( new SimpleMetadatum("dc.source.issuetopic", "Sustainable Planning and Technologies") )  );
        assertTrue(  metadata.contains( new SimpleMetadatum("dc.source.pageinfo", "192") )  );
@@ -853,7 +853,7 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest {
        assertTrue(  metadata2.contains( new SimpleMetadatum("dc.identifier.url", "https://www.doabooks.org/doab?func=fulltext&rid=12703") )  );
        assertEquals(1, metadata2.stream().filter(m -> m.getKey().equals("dc.identifier.url") ).count() );
        // assertTrue(  metadata2.contains( new SimpleMetadatum("dc.language", "en") )  );
        assertTrue(  metadata2.contains( new SimpleMetadatum("dc.rights.licence", "16") )  );
        assertTrue(  metadata2.contains( new SimpleMetadatum("dc.rights.licence", "10") )  );
        // assertTrue(  metadata.contains( new SimpleMetadatum("dc.source.issue", "3") )  );
        // assertTrue(  metadata.contains( new SimpleMetadatum("dc.source.issuetopic", "Sustainable Planning and Technologies") )  );
        assertTrue(  metadata2.contains( new SimpleMetadatum("dc.source.pageinfo", "368") )  );
+63 −3
Original line number Diff line number Diff line
@@ -63,11 +63,11 @@
                    <xsl:with-param name="value" select="'20'"/>
                </xsl:call-template>
                
                <xsl:call-template name="new-metadatum">
                <!--xsl:call-template name="new-metadatum">
                    <xsl:with-param name="key" select="'dc.rights.licence'"/>
                    <xsl:with-param name="language" select="''"/>
                    <xsl:with-param name="value" select="'16'"/>
                </xsl:call-template>
                </xsl:call-template-->
                <xsl:call-template name="new-metadatum">
                    <xsl:with-param name="key" select="'dc.description.pubstatus'"/>
                    <xsl:with-param name="language" select="''"/>
@@ -115,6 +115,7 @@
        <xsl:apply-templates select="marcxml:datafield[@tag='720']/child::node()[@code='a']"/>
        <!--xsl:apply-templates select="marcxml:datafield[@tag='786']/child::node()[@code='n']"/-->
        <xsl:apply-templates select="marcxml:datafield[@tag='856']/child::node()[@code='u']"/>
        <xsl:apply-templates select="marcxml:datafield[@tag='856']/child::node()[@code='z']"/>
    </xsl:template>
    
    <xsl:template match="marcxml:datafield[@tag='020']/child::node()[@code='a']">
@@ -302,7 +303,66 @@
            </xsl:call-template>
        </xsl:if>
    </xsl:template>
    
    <xsl:template match="marcxml:datafield[@tag='856']/child::node()[@code='z']">
        <xsl:choose>
            <xsl:when test="contains(text(), 'Attribution (CC by)')">
                <xsl:call-template name="new-metadatum">
                    <xsl:with-param name="key" select="'dc.rights.licence'"/>
                    <xsl:with-param name="language" select="''"/>
                    <xsl:with-param name="value" select="'1'"/>
                </xsl:call-template>
            </xsl:when>
            <xsl:when test="contains(text(), 'Attribution No Derivatives (CC by-nd)')">
                <xsl:call-template name="new-metadatum">
                    <xsl:with-param name="key" select="'dc.rights.licence'"/>
                    <xsl:with-param name="language" select="''"/>
                    <xsl:with-param name="value" select="'9'"/>
                </xsl:call-template>
            </xsl:when>
            <xsl:when test="contains(text(), 'Attribution Non-commercial (CC by-nc)')">
                <xsl:call-template name="new-metadatum">
                    <xsl:with-param name="key" select="'dc.rights.licence'"/>
                    <xsl:with-param name="language" select="''"/>
                    <xsl:with-param name="value" select="'10'"/>
                </xsl:call-template>
            </xsl:when>
            <xsl:when test="contains(text(), 'Attribution Non-commercial No Derivatives (CC by-nc-nd)')">
                <xsl:call-template name="new-metadatum">
                    <xsl:with-param name="key" select="'dc.rights.licence'"/>
                    <xsl:with-param name="language" select="''"/>
                    <xsl:with-param name="value" select="'2'"/>
                </xsl:call-template>
            </xsl:when>
            <xsl:when test="contains(text(), 'Attribution Non-commercial Share Alike (CC by-nc-sa)')">
                <xsl:call-template name="new-metadatum">
                    <xsl:with-param name="key" select="'dc.rights.licence'"/>
                    <xsl:with-param name="language" select="''"/>
                    <xsl:with-param name="value" select="'11'"/>
                </xsl:call-template>
            </xsl:when>
            <xsl:when test="contains(text(), 'Attribution Share Alike (CC by-sa)')">
                <xsl:call-template name="new-metadatum">
                    <xsl:with-param name="key" select="'dc.rights.licence'"/>
                    <xsl:with-param name="language" select="''"/>
                    <xsl:with-param name="value" select="'8'"/>
                </xsl:call-template>
            </xsl:when>
            <xsl:when test="contains(text(), 'CC BY 3.0')">
                <xsl:call-template name="new-metadatum">
                    <xsl:with-param name="key" select="'dc.rights.licence'"/>
                    <xsl:with-param name="language" select="''"/>
                    <xsl:with-param name="value" select="'15'"/>
                </xsl:call-template>
            </xsl:when>
            <xsl:when test="contains(text(), 'CC BY IGO 3.0')">
                <xsl:call-template name="new-metadatum">
                    <xsl:with-param name="key" select="'dc.rights.licence'"/>
                    <xsl:with-param name="language" select="''"/>
                    <xsl:with-param name="value" select="'15'"/>
                </xsl:call-template>
            </xsl:when>
        </xsl:choose>
    </xsl:template>
    <xsl:template name="new-metadatum">
        <xsl:param name="key" />
        <xsl:param name="language" />