Commit 84b76a16 authored by Steinberg, Jan's avatar Steinberg, Jan
Browse files

doabooks - filter corrected and simplified, test extension

parent 48f8c86c
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -139,7 +139,8 @@ public class FeedingContextFactory {
                log.info("using Wzb2SsoarBundleFilter");
                result = new Wzb2SsoarBundleFilter();
                break;
            case "https://doabooks.org/oai@@oai:doab-books":
            case "https://doabooks.org/oai/@@ssoar":
            case "https://doabooks.org/oai@@ssoar":
                log.info("using Doabooks2SsoarBundleFilter");
                result = new Doabooks2SsoarBundleFilter();
                break;
+8 −40
Original line number Diff line number Diff line
package org.gesis.dda.filter.impl;

import java.util.Arrays;
import java.util.Set;
import java.util.stream.Collectors;

import org.gesis.dda.filter.BundleFilter;
import org.gesis.dda.publishing.domain.Bundle;
import org.gesis.dda.publishing.domain.Metadatum;
@@ -15,53 +12,24 @@ public class Doabooks2SsoarBundleFilter implements BundleFilter {
    private final static Logger LOG = LoggerFactory.getLogger(Doabooks2SsoarBundleFilter.class);
    
    /**
     * SSOAR only gets the given class notation's publications from this source.
     * SSOAR only gets the classoz notation when publications is relevant.
     * In addition only known licenced publications are accepted. 
     */
    @Override
    public boolean test(Bundle bundle) {
        LOG.debug("doabooks filter...");
        boolean result;
        String[] valuesToBeFilteredTo = {"Political institutions and public administration (General)",
                                         "Political science",
                                         "Political science (General)",
                                         "Political theory"};
        String[] licenceStringsToBeFilteredTo = {
                "Attribution (CC by)",
                "Attribution No Derivatives (CC by-nd)",
                "Attribution Non-commercial (CC by-nc)",
                "Attribution Non-commercial No Derivatives (CC by-nc-nd)",
                "Attribution Non-commercial Share Alike (CC by-nc-sa)",
                "Attribution Share Alike (CC by-sa)",
                "CC BY 3.0",
                "CC BY IGO 3.0"};
        Set<Metadatum> metadata = bundle.getMetadata();
        Set<Metadatum> clasShorts = metadata.stream().filter( m -> m.getKey().equals("dc.subject.classhort") ).collect(Collectors.toSet());
        Metadatum licence = metadata.stream().filter( m -> m.getKey().equals("dc.dc.rights.licence") ).findFirst().orElse(null);
        if (null != clasShorts.stream().filter(cls -> Arrays.asList(valuesToBeFilteredTo).contains(cls.getValue()))
                && null != licence) {
            LOG.info("clasShort-Set not null and a licence entry is found");
        if (clasShorts.stream().filter(cls -> Arrays.asList(valuesToBeFilteredTo).contains(cls.getValue())).count() > 0) {
            if (Arrays.asList(licenceStringsToBeFilteredTo).stream().filter( str -> licence.getValue().contains(str)).count() > 0) {
        Metadatum clasSoz = metadata.stream().filter( m -> m.getKey().equals("dc.subject.classoz") ).findFirst().orElse(null);
        Metadatum licence = metadata.stream().filter( m -> m.getKey().equals("dc.rights.licence") ).findFirst().orElse(null);
        if (null != clasSoz && null != clasSoz.getValue() && null != licence && null != licence.getValue()) {
            LOG.info("clasSoz and  licence entry are found");
                result = true;
                LOG.info("We take it: {}", clasShorts.stream().map(Metadatum::getValue).collect(Collectors.joining("; ")));
                LOG.info("We take it: {}, {}", clasSoz.getValue(), licence.getValue());
        } else {
            result = false;
                LOG.info("Filtered away because of licence: {}", licence.getValue());
            }
            
        }
        else {
            result = false;
            LOG.debug("Filtered away because of: {}", clasShorts.stream().map(Metadatum::getValue).collect(Collectors.joining("; ")));
            
            LOG.info("Filtered away");
        }
        
        return result;
        } else {
            LOG.error("null pointer?");
            return false;
        }
}
    
}
+1 −0
Original line number Diff line number Diff line
@@ -831,6 +831,7 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest {
        // assertTrue(  metadata.contains( new SimpleMetadatum("dc.publisher.country", "PRT") )  );
        // assertTrue(  metadata.contains( new SimpleMetadatum("dc.subject.classoz", "20700") )  );
        assertTrue(  metadata.contains( new SimpleMetadatum("ssoar.urn.registration", "false") )  );
        assertTrue(  metadata.contains( new SimpleMetadatum("internal.dda.reference", "https://doabooks.org/oai@@oai:doab-books:12555") )  );
        
        // 12703
        Bundle bundle2 = bss.getBundle("oai:doab-books:12703");