Commit 0ee0238a authored by Steinberg, Jan's avatar Steinberg, Jan

ASEAS - xslt, filter, test

parent f7a7dc9b
......@@ -7,6 +7,7 @@ import org.gesis.dda.feeder.FeedingContext;
import org.gesis.dda.feeder.NextIncrementalHarvestingIntervalStrategy;
import org.gesis.dda.filter.BundleFilter;
import org.gesis.dda.filter.impl.AcceptAnyBundleFilter;
import org.gesis.dda.filter.impl.Aseas2SsoarBundleFilter;
import org.gesis.dda.filter.impl.Fqs2SsoarBundleFilter;
import org.gesis.dda.filter.impl.SsoarTargetRepositoryBundleFilter;
import org.gesis.dda.filter.impl.Wbv2SsoarBundleFilter;
......@@ -128,6 +129,10 @@ public class FeedingContextFactory {
log.info("using Fqs2SsoarBundleFilter");
result = new Fqs2SsoarBundleFilter();
break;
case "https://aseas.univie.ac.at/index.php/aseas/oai/$$ssoar":
log.info("using Aseas2SsoarBundleFilter");
result = new Aseas2SsoarBundleFilter();
break;
default:
log.debug("using default IdentityMetadataTransformer");
result = new AcceptAnyBundleFilter();
......
package org.gesis.dda.filter.impl;
import java.util.Set;
import org.gesis.dda.filter.BundleFilter;
import org.gesis.dda.publishing.domain.Bundle;
import org.gesis.dda.publishing.domain.Metadatum;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class Aseas2SsoarBundleFilter implements BundleFilter {
private final static Logger LOG = LoggerFactory.getLogger(Aseas2SsoarBundleFilter.class);
/**
* SSOAR already has everything up to and including dc.source.volume=9 dc.source.issue=1 ->
* therefore: harvest everything starting with volume>=10 and volume == 9 and issue > 1
*/
@Override
public boolean test(Bundle bundle) {
boolean result;
Set<Metadatum> metadata = bundle.getMetadata();
Metadatum volume = metadata.stream().filter( m -> m.getKey().equals("dc.source.volume") ).findFirst().orElse(null);
Metadatum issue = metadata.stream().filter( m -> m.getKey().equals("dc.source.issue") ).findFirst().orElse(null);
if (null != volume) {
String volumeValueString = volume.getValue();
String issueValueString = issue.getValue();
try {
int volumeValue = Integer.parseInt(volumeValueString);
int issueValue = Integer.parseInt(issueValueString);
if ( volumeValue >= 10 ) {
result = true;
}
else if ( volumeValue == 9 && issueValue > 1) {
result = true;
}
else {
LOG.info("filtering away bundle.reference={}", bundle.getReference() );
result = false;
}
}
catch (NumberFormatException e) {
LOG.warn("unparsable dc.source.volume={} for bundle.reference={}", volumeValueString, bundle.getReference() );
result = true;
}
}
else {
result = true;
}
return result;
}
}
......@@ -331,6 +331,68 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest {
assertEquals( 1, fileSet2.size());
}
@Test
public void getSinglePublicationAseas() throws IOException {
Map<String, String> map = new HashMap<>();
// String oaiMarcXsltString = IOUtils.toString(
// getClass().
// getClassLoader().
// getResourceAsStream("xslt/hjk-oai_marc-2-xmlbundle.xslt"), StandardCharsets.UTF_8);
// map.put("oai_marc", oaiMarcXsltString);
//
String oaiDcXsltString = IOUtils.toString(
getClass().
getClassLoader().
getResourceAsStream("xslt/aseas-oai_dc-2-xmlbundle.xslt"), StandardCharsets.UTF_8);
map.put("oai_dc", oaiDcXsltString);
XsltTransformerOaiPmhBundlesStreamSource bss = new XsltTransformerOaiPmhBundlesStreamSource("https://aseas.univie.ac.at/index.php/aseas/oai/", map);
Bundle bundle = bss.getBundle("oai:ojs.univie.ac.at:article/394");
LOG.info("{}", bundle);
Set<Metadatum> metadata = bundle.getMetadata();
metadata.stream().map(Object::toString).forEach(LOG::info);
assertTrue( metadata.contains( new SimpleMetadatum("dc.type.stock", "article") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.type.document", "32") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.source.journal", "5") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.description.pubstatus", "1") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.contributor.author", "Pichler, Melanie") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.contributor.author") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.description.review", "1") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.description.review") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.date.issued", "2010") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.description.abstract", "en", "This paper deals with agrofuel policies within the European Union (EU) and the consequences of these policies in Indonesia. That South-East-Asian country is the world leader in the production and exportation of palm oil, which is one of the cheapest feedstocks for the production of biodiesel. Recently, production has expanded signifi cantly due to the incentives of the international energy market. This paper analyses the interests and strategies of the key players in the palm oil and agrofuels business in Indonesia, looks at the model of development they (re-)produce, and analyses their reactions to the problem of sustainability in relation to deforestation, land confl icts, and biodiversity loss through the expansion of monocultures and industrial agriculture.") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.description.abstract") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.identifier.issn", "1999-253X") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.identifier.url", "https://aseas.univie.ac.at/index.php/aseas/article/view/394/198") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.identifier.url") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.language", "en") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.source.issue", "2") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.source.issue") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.source.volume", "3") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.source.volume") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.rights.licence", "19") ) );
// assertTrue( metadata.contains( new SimpleMetadatum("dc.source.issuetopic", "OBEN_UNTEN : Bilder vom Leben der Anderen") ) );
assertEquals(0, metadata.stream().filter(m -> m.getKey().equals("dc.source.issuetopic") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.source.pageinfo", "175-193") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.source.pageinfo") ).count() );
assertTrue( metadata.contains( new SimpleMetadatum("dc.title", "en", "Agrofuels in Indonesia: Structures, Conflicts, Consequences, and the Role of the EU") ) );
assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.title") ).count() );
// assertTrue( metadata.contains( new SimpleMetadatum("dc.identifier.urn", "urn:nbn:de:gbv:18-8-7560") ) );
assertTrue( metadata.contains( new SimpleMetadatum("internal.dda.reference", "https://aseas.univie.ac.at/index.php/aseas/oai/@@oai:ojs.univie.ac.at:article/394") ) );
assertTrue( metadata.contains( new SimpleMetadatum("dc.publisher.country", "AUT") ) );
// assertTrue( metadata.contains( new SimpleMetadatum("internal.identifier.classoz", "10900") ) );
// assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.classoz", "10900") ) );
// assertTrue( metadata.contains( new SimpleMetadatum("internal.identifier.ddc", "300") ) );
// assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.ddc", "300") ) );
assertTrue( metadata.contains( new SimpleMetadatum("internal.status", "formal und inhaltlich fertig erschlossen") ) );
Set<InputStream> fileSet = bundle.getContents();
assertEquals( 1, fileSet.size());
}
@Test
public void getSinglePublicationJcca() throws IOException {
Map<String, String> map = new HashMap<>();
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment