Commit 84b76a16 by Steinberg, Jan

doabooks - filter corrected and simplified, test extension

parent 48f8c86c
......@@ -139,7 +139,8 @@ public class FeedingContextFactory {
log.info("using Wzb2SsoarBundleFilter");
result = new Wzb2SsoarBundleFilter();
break;
case "https://doabooks.org/oai@@oai:doab-books":
case "https://doabooks.org/oai/@@ssoar":
case "https://doabooks.org/oai@@ssoar":
log.info("using Doabooks2SsoarBundleFilter");
result = new Doabooks2SsoarBundleFilter();
break;
......
package org.gesis.dda.filter.impl;
import java.util.Arrays;
import java.util.Set;
import java.util.stream.Collectors;
import org.gesis.dda.filter.BundleFilter;
import org.gesis.dda.publishing.domain.Bundle;
import org.gesis.dda.publishing.domain.Metadatum;
......@@ -15,53 +12,24 @@ public class Doabooks2SsoarBundleFilter implements BundleFilter {
private final static Logger LOG = LoggerFactory.getLogger(Doabooks2SsoarBundleFilter.class);
/**
* SSOAR only gets the given class notation's publications from this source.
* SSOAR only gets the classoz notation when publications is relevant.
* In addition only known licenced publications are accepted.
*/
@Override
public boolean test(Bundle bundle) {
LOG.debug("doabooks filter...");
boolean result;
String[] valuesToBeFilteredTo = {"Political institutions and public administration (General)",
"Political science",
"Political science (General)",
"Political theory"};
String[] licenceStringsToBeFilteredTo = {
"Attribution (CC by)",
"Attribution No Derivatives (CC by-nd)",
"Attribution Non-commercial (CC by-nc)",
"Attribution Non-commercial No Derivatives (CC by-nc-nd)",
"Attribution Non-commercial Share Alike (CC by-nc-sa)",
"Attribution Share Alike (CC by-sa)",
"CC BY 3.0",
"CC BY IGO 3.0"};
Set<Metadatum> metadata = bundle.getMetadata();
Set<Metadatum> clasShorts = metadata.stream().filter( m -> m.getKey().equals("dc.subject.classhort") ).collect(Collectors.toSet());
Metadatum licence = metadata.stream().filter( m -> m.getKey().equals("dc.dc.rights.licence") ).findFirst().orElse(null);
if (null != clasShorts.stream().filter(cls -> Arrays.asList(valuesToBeFilteredTo).contains(cls.getValue()))
&& null != licence) {
LOG.info("clasShort-Set not null and a licence entry is found");
if (clasShorts.stream().filter(cls -> Arrays.asList(valuesToBeFilteredTo).contains(cls.getValue())).count() > 0) {
if (Arrays.asList(licenceStringsToBeFilteredTo).stream().filter( str -> licence.getValue().contains(str)).count() > 0) {
Metadatum clasSoz = metadata.stream().filter( m -> m.getKey().equals("dc.subject.classoz") ).findFirst().orElse(null);
Metadatum licence = metadata.stream().filter( m -> m.getKey().equals("dc.rights.licence") ).findFirst().orElse(null);
if (null != clasSoz && null != clasSoz.getValue() && null != licence && null != licence.getValue()) {
LOG.info("clasSoz and licence entry are found");
result = true;
LOG.info("We take it: {}", clasShorts.stream().map(Metadatum::getValue).collect(Collectors.joining("; ")));
} else {
result = false;
LOG.info("Filtered away because of licence: {}", licence.getValue());
}
}
else {
LOG.info("We take it: {}, {}", clasSoz.getValue(), licence.getValue());
} else {
result = false;
LOG.debug("Filtered away because of: {}", clasShorts.stream().map(Metadatum::getValue).collect(Collectors.joining("; ")));
LOG.info("Filtered away");
}
return result;
} else {
LOG.error("null pointer?");
return false;
}
}
}
......@@ -831,6 +831,7 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest {
// assertTrue( metadata.contains( new SimpleMetadatum("dc.publisher.country", "PRT") ) );
// assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.classoz", "20700") ) );
assertTrue( metadata.contains( new SimpleMetadatum("ssoar.urn.registration", "false") ) );
assertTrue( metadata.contains( new SimpleMetadatum("internal.dda.reference", "https://doabooks.org/oai@@oai:doab-books:12555") ) );
// 12703
Bundle bundle2 = bss.getBundle("oai:doab-books:12703");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment