Loading src/main/java/org/gesis/dda/feeder/impl/FeedingContextFactory.java +2 −1 Original line number Diff line number Diff line Loading @@ -139,7 +139,8 @@ public class FeedingContextFactory { log.info("using Wzb2SsoarBundleFilter"); result = new Wzb2SsoarBundleFilter(); break; case "https://doabooks.org/oai@@oai:doab-books": case "https://doabooks.org/oai/@@ssoar": case "https://doabooks.org/oai@@ssoar": log.info("using Doabooks2SsoarBundleFilter"); result = new Doabooks2SsoarBundleFilter(); break; Loading src/main/java/org/gesis/dda/filter/impl/Doabooks2SsoarBundleFilter.java +8 −40 Original line number Diff line number Diff line package org.gesis.dda.filter.impl; import java.util.Arrays; import java.util.Set; import java.util.stream.Collectors; import org.gesis.dda.filter.BundleFilter; import org.gesis.dda.publishing.domain.Bundle; import org.gesis.dda.publishing.domain.Metadatum; Loading @@ -15,53 +12,24 @@ public class Doabooks2SsoarBundleFilter implements BundleFilter { private final static Logger LOG = LoggerFactory.getLogger(Doabooks2SsoarBundleFilter.class); /** * SSOAR only gets the given class notation's publications from this source. * SSOAR only gets the classoz notation when publications is relevant. * In addition only known licenced publications are accepted. */ @Override public boolean test(Bundle bundle) { LOG.debug("doabooks filter..."); boolean result; String[] valuesToBeFilteredTo = {"Political institutions and public administration (General)", "Political science", "Political science (General)", "Political theory"}; String[] licenceStringsToBeFilteredTo = { "Attribution (CC by)", "Attribution No Derivatives (CC by-nd)", "Attribution Non-commercial (CC by-nc)", "Attribution Non-commercial No Derivatives (CC by-nc-nd)", "Attribution Non-commercial Share Alike (CC by-nc-sa)", "Attribution Share Alike (CC by-sa)", "CC BY 3.0", "CC BY IGO 3.0"}; Set<Metadatum> metadata = bundle.getMetadata(); Set<Metadatum> clasShorts = metadata.stream().filter( m -> m.getKey().equals("dc.subject.classhort") ).collect(Collectors.toSet()); Metadatum licence = metadata.stream().filter( m -> m.getKey().equals("dc.dc.rights.licence") ).findFirst().orElse(null); if (null != clasShorts.stream().filter(cls -> Arrays.asList(valuesToBeFilteredTo).contains(cls.getValue())) && null != licence) { LOG.info("clasShort-Set not null and a licence entry is found"); if (clasShorts.stream().filter(cls -> Arrays.asList(valuesToBeFilteredTo).contains(cls.getValue())).count() > 0) { if (Arrays.asList(licenceStringsToBeFilteredTo).stream().filter( str -> licence.getValue().contains(str)).count() > 0) { Metadatum clasSoz = metadata.stream().filter( m -> m.getKey().equals("dc.subject.classoz") ).findFirst().orElse(null); Metadatum licence = metadata.stream().filter( m -> m.getKey().equals("dc.rights.licence") ).findFirst().orElse(null); if (null != clasSoz && null != clasSoz.getValue() && null != licence && null != licence.getValue()) { LOG.info("clasSoz and licence entry are found"); result = true; LOG.info("We take it: {}", clasShorts.stream().map(Metadatum::getValue).collect(Collectors.joining("; "))); LOG.info("We take it: {}, {}", clasSoz.getValue(), licence.getValue()); } else { result = false; LOG.info("Filtered away because of licence: {}", licence.getValue()); } } else { result = false; LOG.debug("Filtered away because of: {}", clasShorts.stream().map(Metadatum::getValue).collect(Collectors.joining("; "))); LOG.info("Filtered away"); } return result; } else { LOG.error("null pointer?"); return false; } } } src/test/java/org/gesis/dda/publishing/domain/impl/XsltTransformerOaiPmhBundlesStreamSourceTest.java +1 −0 Original line number Diff line number Diff line Loading @@ -831,6 +831,7 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest { // assertTrue( metadata.contains( new SimpleMetadatum("dc.publisher.country", "PRT") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.classoz", "20700") ) ); assertTrue( metadata.contains( new SimpleMetadatum("ssoar.urn.registration", "false") ) ); assertTrue( metadata.contains( new SimpleMetadatum("internal.dda.reference", "https://doabooks.org/oai@@oai:doab-books:12555") ) ); // 12703 Bundle bundle2 = bss.getBundle("oai:doab-books:12703"); Loading Loading
src/main/java/org/gesis/dda/feeder/impl/FeedingContextFactory.java +2 −1 Original line number Diff line number Diff line Loading @@ -139,7 +139,8 @@ public class FeedingContextFactory { log.info("using Wzb2SsoarBundleFilter"); result = new Wzb2SsoarBundleFilter(); break; case "https://doabooks.org/oai@@oai:doab-books": case "https://doabooks.org/oai/@@ssoar": case "https://doabooks.org/oai@@ssoar": log.info("using Doabooks2SsoarBundleFilter"); result = new Doabooks2SsoarBundleFilter(); break; Loading
src/main/java/org/gesis/dda/filter/impl/Doabooks2SsoarBundleFilter.java +8 −40 Original line number Diff line number Diff line package org.gesis.dda.filter.impl; import java.util.Arrays; import java.util.Set; import java.util.stream.Collectors; import org.gesis.dda.filter.BundleFilter; import org.gesis.dda.publishing.domain.Bundle; import org.gesis.dda.publishing.domain.Metadatum; Loading @@ -15,53 +12,24 @@ public class Doabooks2SsoarBundleFilter implements BundleFilter { private final static Logger LOG = LoggerFactory.getLogger(Doabooks2SsoarBundleFilter.class); /** * SSOAR only gets the given class notation's publications from this source. * SSOAR only gets the classoz notation when publications is relevant. * In addition only known licenced publications are accepted. */ @Override public boolean test(Bundle bundle) { LOG.debug("doabooks filter..."); boolean result; String[] valuesToBeFilteredTo = {"Political institutions and public administration (General)", "Political science", "Political science (General)", "Political theory"}; String[] licenceStringsToBeFilteredTo = { "Attribution (CC by)", "Attribution No Derivatives (CC by-nd)", "Attribution Non-commercial (CC by-nc)", "Attribution Non-commercial No Derivatives (CC by-nc-nd)", "Attribution Non-commercial Share Alike (CC by-nc-sa)", "Attribution Share Alike (CC by-sa)", "CC BY 3.0", "CC BY IGO 3.0"}; Set<Metadatum> metadata = bundle.getMetadata(); Set<Metadatum> clasShorts = metadata.stream().filter( m -> m.getKey().equals("dc.subject.classhort") ).collect(Collectors.toSet()); Metadatum licence = metadata.stream().filter( m -> m.getKey().equals("dc.dc.rights.licence") ).findFirst().orElse(null); if (null != clasShorts.stream().filter(cls -> Arrays.asList(valuesToBeFilteredTo).contains(cls.getValue())) && null != licence) { LOG.info("clasShort-Set not null and a licence entry is found"); if (clasShorts.stream().filter(cls -> Arrays.asList(valuesToBeFilteredTo).contains(cls.getValue())).count() > 0) { if (Arrays.asList(licenceStringsToBeFilteredTo).stream().filter( str -> licence.getValue().contains(str)).count() > 0) { Metadatum clasSoz = metadata.stream().filter( m -> m.getKey().equals("dc.subject.classoz") ).findFirst().orElse(null); Metadatum licence = metadata.stream().filter( m -> m.getKey().equals("dc.rights.licence") ).findFirst().orElse(null); if (null != clasSoz && null != clasSoz.getValue() && null != licence && null != licence.getValue()) { LOG.info("clasSoz and licence entry are found"); result = true; LOG.info("We take it: {}", clasShorts.stream().map(Metadatum::getValue).collect(Collectors.joining("; "))); LOG.info("We take it: {}, {}", clasSoz.getValue(), licence.getValue()); } else { result = false; LOG.info("Filtered away because of licence: {}", licence.getValue()); } } else { result = false; LOG.debug("Filtered away because of: {}", clasShorts.stream().map(Metadatum::getValue).collect(Collectors.joining("; "))); LOG.info("Filtered away"); } return result; } else { LOG.error("null pointer?"); return false; } } }
src/test/java/org/gesis/dda/publishing/domain/impl/XsltTransformerOaiPmhBundlesStreamSourceTest.java +1 −0 Original line number Diff line number Diff line Loading @@ -831,6 +831,7 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest { // assertTrue( metadata.contains( new SimpleMetadatum("dc.publisher.country", "PRT") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.classoz", "20700") ) ); assertTrue( metadata.contains( new SimpleMetadatum("ssoar.urn.registration", "false") ) ); assertTrue( metadata.contains( new SimpleMetadatum("internal.dda.reference", "https://doabooks.org/oai@@oai:doab-books:12555") ) ); // 12703 Bundle bundle2 = bss.getBundle("oai:doab-books:12703"); Loading