Loading src/main/java/org/gesis/dda/publishing/domain/impl/XsltTransformerOaiPmhBundlesStreamSource.java +318 −313 Original line number Original line Diff line number Diff line Loading @@ -46,8 +46,8 @@ import org.xml.sax.InputSource; import net.sf.saxon.TransformerFactoryImpl; import net.sf.saxon.TransformerFactoryImpl; import net.sf.saxon.trans.XPathException; import net.sf.saxon.trans.XPathException; public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSource, ErrorListener { public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSource, ErrorListener { private final static Logger LOG = LoggerFactory.getLogger(XsltTransformerOaiPmhBundlesStreamSource.class); private final static Logger LOG = LoggerFactory.getLogger(XsltTransformerOaiPmhBundlesStreamSource.class); Loading @@ -66,14 +66,16 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo client = new OaiPmhClient(oaiPmhEndpoint); client = new OaiPmhClient(oaiPmhEndpoint); } } public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, Map<String, String> metadataPrefix2XsltMap, LocalDate from, LocalDate until) { public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, Map<String, String> metadataPrefix2XsltMap, LocalDate from, LocalDate until) { this(oaiPmhEndpoint, metadataPrefix2XsltMap); this(oaiPmhEndpoint, metadataPrefix2XsltMap); this.intervalType = HarvestingIntervalType.DAY_INTERVAL_HARVEST; this.intervalType = HarvestingIntervalType.DAY_INTERVAL_HARVEST; this.dayFrom = from; this.dayFrom = from; this.dayUntil = until; this.dayUntil = until; } } public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, Map<String, String> metadataPrefix2XsltMap, Instant from, Instant until) { public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, Map<String, String> metadataPrefix2XsltMap, Instant from, Instant until) { this(oaiPmhEndpoint, metadataPrefix2XsltMap); this(oaiPmhEndpoint, metadataPrefix2XsltMap); this.intervalType = HarvestingIntervalType.SECOND_INTERVAL_HARVEST; this.intervalType = HarvestingIntervalType.SECOND_INTERVAL_HARVEST; this.secondFrom = from; this.secondFrom = from; Loading @@ -81,7 +83,8 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo } } // And here come the same methods but with set specification // And here come the same methods but with set specification public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, String setSpec, Map<String, String> metadataPrefix2XsltMap) { public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, String setSpec, Map<String, String> metadataPrefix2XsltMap) { this.oaiPmhEndpoint = oaiPmhEndpoint; this.oaiPmhEndpoint = oaiPmhEndpoint; this.setSpec = setSpec; this.setSpec = setSpec; this.metadataPrefix2XsltMap = metadataPrefix2XsltMap; this.metadataPrefix2XsltMap = metadataPrefix2XsltMap; Loading @@ -89,14 +92,16 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo client = new OaiPmhClient(oaiPmhEndpoint); client = new OaiPmhClient(oaiPmhEndpoint); } } public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, String setSpec, Map<String, String> metadataPrefix2XsltMap, LocalDate from, LocalDate until) { public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, String setSpec, Map<String, String> metadataPrefix2XsltMap, LocalDate from, LocalDate until) { this(oaiPmhEndpoint, setSpec, metadataPrefix2XsltMap); this(oaiPmhEndpoint, setSpec, metadataPrefix2XsltMap); this.intervalType = HarvestingIntervalType.DAY_INTERVAL_HARVEST; this.intervalType = HarvestingIntervalType.DAY_INTERVAL_HARVEST; this.dayFrom = from; this.dayFrom = from; this.dayUntil = until; this.dayUntil = until; } } public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, String setSpec, Map<String, String> metadataPrefix2XsltMap, Instant from, Instant until) { public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, String setSpec, Map<String, String> metadataPrefix2XsltMap, Instant from, Instant until) { this(oaiPmhEndpoint, setSpec, metadataPrefix2XsltMap); this(oaiPmhEndpoint, setSpec, metadataPrefix2XsltMap); this.intervalType = HarvestingIntervalType.SECOND_INTERVAL_HARVEST; this.intervalType = HarvestingIntervalType.SECOND_INTERVAL_HARVEST; this.secondFrom = from; this.secondFrom = from; Loading Loading @@ -129,23 +134,32 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo break; break; } } // some record's metadata may not be available for all specified metadataPrefixes // some record's metadata may not be available for all specified // therefore collecting the union of all identifiers over all specified metadataPrefixes // metadataPrefixes Stream<OAIPMHtype> listIdentifiersResponseStream = metadataPrefixes. // therefore collecting the union of all identifiers over all specified stream(). // metadataPrefixes flatMap(mp -> { Stream<OAIPMHtype> listIdentifiersResponseStream = metadataPrefixes.stream().flatMap(mp -> { LOG.debug("filling list identifier stream with mp {}, from {}, until {}, setSpec {}", mp, from, until, setSpec); LOG.info("filling list identifier stream with mp {}, from {}, until {}, setSpec {}", mp, from, until, if (setSpec == null|| setSpec.trim().isEmpty()) { setSpec); // exeley only takes "YYY-mm-dd" as from date String exeleyFrom = from.split("T")[0]; // String exeleyUntil= until.split("T")[0]; LOG.info("exeley specials: {} -> {}", exeleyFrom, until); if (setSpec == null || setSpec.trim().isEmpty() && mp.equals("pam")) { return client.listIdentifiersStream(mp, exeleyFrom, until, null); } else if (setSpec == null || setSpec.trim().isEmpty()) { return client.listIdentifiersStream(mp, from, until, null); return client.listIdentifiersStream(mp, from, until, null); } } if ( mp.equals("pam") ) { return client.listIdentifiersStream(mp, exeleyFrom, until, setSpec); } return client.listIdentifiersStream(mp, from, until, setSpec); return client.listIdentifiersStream(mp, from, until, setSpec); }); }); Stream<String> uniqueIdentifiersStream = listIdentifiersResponseStream.flatMap(oaiPmhType -> { Stream<String> uniqueIdentifiersStream = listIdentifiersResponseStream.flatMap(oaiPmhType -> { /*return oaiPmhType. /* getListIdentifiers(). * return oaiPmhType. getListIdentifiers(). getHeader(). stream().map(h -> getHeader(). * h.getIdentifier() ); stream().map(h -> h.getIdentifier() ); */ */ Stream<String> result = Stream.empty(); Stream<String> result = Stream.empty(); ListIdentifiersType listIdentifiersType = oaiPmhType.getListIdentifiers(); ListIdentifiersType listIdentifiersType = oaiPmhType.getListIdentifiers(); Loading @@ -156,9 +170,7 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo } } } } return result; return result; }). }).filter(Objects::nonNull).distinct(); filter(Objects::nonNull). distinct(); List<String> uniqueIdentifiers = uniqueIdentifiersStream.collect(Collectors.toList()); List<String> uniqueIdentifiers = uniqueIdentifiersStream.collect(Collectors.toList()); Loading @@ -170,7 +182,8 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo public Bundle getBundle(String oaiPmhIdentifier) { public Bundle getBundle(String oaiPmhIdentifier) { LOG.debug("GetBundle - {}", oaiPmhIdentifier); LOG.debug("GetBundle - {}", oaiPmhIdentifier); //Bundle bundleResult = new AutonomouslyContentResolvingBundle(ImmutableSet.of() ); // Bundle bundleResult = new // AutonomouslyContentResolvingBundle(ImmutableSet.of() ); Set<Metadatum> bundleMetadata = new HashSet<>(); Set<Metadatum> bundleMetadata = new HashSet<>(); Set<String> metadataPrefixes = metadataPrefix2XsltMap.keySet(); Set<String> metadataPrefixes = metadataPrefix2XsltMap.keySet(); String lastModifiedString = ""; String lastModifiedString = ""; Loading @@ -181,11 +194,7 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo // --- STEP 1: get XML input // --- STEP 1: get XML input String getRecordXmlDocumentResponseString = client.getRecordString(oaiPmhIdentifier, metadataPrefix); String getRecordXmlDocumentResponseString = client.getRecordString(oaiPmhIdentifier, metadataPrefix); // try to make exeley data transformable ( & -> & to start with) // ToDo: find the right location for it! if ( getRecordXmlDocumentResponseString.contains("identifier=\"oai:exeley.com:10.") ) { getRecordXmlDocumentResponseString = getRecordXmlDocumentResponseString.replace("&", "&"); } // LOG.info("--------------------------"); // LOG.info("--------------------------"); // LOG.info("{}", getRecordXmlDocumentResponseString); // LOG.info("{}", getRecordXmlDocumentResponseString); if (isDeletedRecord(getRecordXmlDocumentResponseString)) { if (isDeletedRecord(getRecordXmlDocumentResponseString)) { Loading @@ -194,12 +203,10 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo } } else { else { // --- STEP 2: convert XML input according to XSLT // --- STEP 2: convert XML input according to XSLT TransformerFactory factory = TransformerFactory.newInstance("net.sf.saxon.TransformerFactoryImpl", TransformerFactoryImpl.class.getClassLoader() ); TransformerFactory factory = TransformerFactory.newInstance("net.sf.saxon.TransformerFactoryImpl", TransformerFactoryImpl.class.getClassLoader()); Templates xslTemplate = factory.newTemplates( Templates xslTemplate = factory.newTemplates( new StreamSource( new StreamSource(new StringReader(metadataPrefix2XsltMap.get(metadataPrefix)))); new StringReader( metadataPrefix2XsltMap. get(metadataPrefix) ) ) ); Source xmlInput = new StreamSource(new StringReader(getRecordXmlDocumentResponseString)); Source xmlInput = new StreamSource(new StringReader(getRecordXmlDocumentResponseString)); Loading @@ -217,7 +224,8 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo // STEP 3: convert XSLT-converted XML output to Java JAXB object // STEP 3: convert XSLT-converted XML output to Java JAXB object JAXBContext jaxbContext = JAXBContext.newInstance(XmlBundle.class); JAXBContext jaxbContext = JAXBContext.newInstance(XmlBundle.class); Unmarshaller unmarshaller = jaxbContext.createUnmarshaller(); Unmarshaller unmarshaller = jaxbContext.createUnmarshaller(); InputStream convertedXmlInputStream = new ByteArrayInputStream(xsltConvertedXmlOutput.getBytes(StandardCharsets.UTF_8) ); InputStream convertedXmlInputStream = new ByteArrayInputStream( xsltConvertedXmlOutput.getBytes(StandardCharsets.UTF_8)); Bundle currentPartBundle = (Bundle) unmarshaller.unmarshal(convertedXmlInputStream); Bundle currentPartBundle = (Bundle) unmarshaller.unmarshal(convertedXmlInputStream); Set<Metadatum> metadata = currentPartBundle.getMetadata(); Set<Metadatum> metadata = currentPartBundle.getMetadata(); Loading @@ -230,19 +238,21 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo Unmarshaller oaiPmhUnmarshaller = oaiPmhjaxbContext.createUnmarshaller(); Unmarshaller oaiPmhUnmarshaller = oaiPmhjaxbContext.createUnmarshaller(); @SuppressWarnings("unchecked") @SuppressWarnings("unchecked") JAXBElement<OAIPMHtype> wrappedResponseObject = (JAXBElement<OAIPMHtype>) oaiPmhUnmarshaller.unmarshal(sr); JAXBElement<OAIPMHtype> wrappedResponseObject = (JAXBElement<OAIPMHtype>) oaiPmhUnmarshaller .unmarshal(sr); OAIPMHtype response = wrappedResponseObject.getValue(); OAIPMHtype response = wrappedResponseObject.getValue(); lastModifiedString = response.getGetRecord().getRecord().getHeader().getDatestamp(); lastModifiedString = response.getGetRecord().getRecord().getHeader().getDatestamp(); // STEP 5: add identifier metadatum // STEP 5: add identifier metadatum Metadatum reference = new SimpleMetadatum("internal.dda.reference", oaiPmhEndpoint + "@@" + oaiPmhIdentifier); Metadatum reference = new SimpleMetadatum("internal.dda.reference", oaiPmhEndpoint + "@@" + oaiPmhIdentifier); bundleMetadata.add(reference); bundleMetadata.add(reference); resultBundle = BundleBuilder.create().withMetadata(bundleMetadata).withLastModifiedString(lastModifiedString).build(); resultBundle = BundleBuilder.create().withMetadata(bundleMetadata) .withLastModifiedString(lastModifiedString).build(); } } } } catch (XPathException e) { catch (XPathException e) { LOG.debug("Catched XPathException"); LOG.debug("Catched XPathException"); String errorCode = e.getErrorCodeLocalPart(); String errorCode = e.getErrorCodeLocalPart(); if (null != errorCode) { if (null != errorCode) { Loading @@ -252,19 +262,18 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo resultBundle = null; resultBundle = null; // test if this break is really necessary, taken out. STJ // test if this break is really necessary, taken out. STJ // break; // break; } } else { else { LOG.warn("2- fatalError. Filtering away oaiPmhIdentifier=" + oaiPmhIdentifier, e); LOG.warn("2- fatalError. Filtering away oaiPmhIdentifier=" + oaiPmhIdentifier, e); } } } } else { else { LOG.warn("3- fatalError. Filtering away oaiPmhIdentifier=" + oaiPmhIdentifier, e); LOG.warn("3- fatalError. Filtering away oaiPmhIdentifier=" + oaiPmhIdentifier, e); } } resultBundle = null; resultBundle = null; } } catch (Throwable t) { catch (Throwable t) { LOG.warn("Problem getting record with id " + oaiPmhIdentifier + " and metadataPrefix " + metadataPrefix + ". Skipping it.", t); LOG.warn("Problem getting record with id " + oaiPmhIdentifier + " and metadataPrefix " + metadataPrefix + ". Skipping it.", t); resultBundle = null; resultBundle = null; } } Loading @@ -281,8 +290,8 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo boolean result; boolean result; try { try { // see https://stackoverflow.com/a/6397369/923560 // see https://stackoverflow.com/a/6397369/923560 XPathExpression expr = xpath.compile( XPathExpression expr = xpath "//*[local-name()='OAI-PMH' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']" .compile("//*[local-name()='OAI-PMH' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']" + "/*[local-name()='GetRecord' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']" + "/*[local-name()='GetRecord' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']" + "/*[local-name()='record' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']" + "/*[local-name()='record' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']" + "/*[local-name()='header' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']/" + "/*[local-name()='header' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']/" Loading @@ -291,12 +300,10 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo headerStatus = expr.evaluate(inputSource); headerStatus = expr.evaluate(inputSource); if ("deleted".equals(headerStatus)) { if ("deleted".equals(headerStatus)) { result = true; result = true; } } else { else { result = false; result = false; } } } } catch (XPathExpressionException e) { catch (XPathExpressionException e) { LOG.error("Problem identifying if deleted record", e); LOG.error("Problem identifying if deleted record", e); result = true; result = true; } } Loading Loading @@ -345,12 +352,10 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo if (!"filteraway".equals(errorCode)) { if (!"filteraway".equals(errorCode)) { LOG.error("fatalError", arg0); LOG.error("fatalError", arg0); } } } } else { else { LOG.error("fatalError", arg0); LOG.error("fatalError", arg0); } } } } else { else { LOG.error("fatalError", arg0); LOG.error("fatalError", arg0); } } } } Loading src/test/java/org/gesis/dda/publishing/domain/impl/XsltTransformerOaiPmhBundlesStreamSourceTest.java +11 −11 Original line number Original line Diff line number Diff line Loading @@ -238,9 +238,9 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest { getClass(). getClass(). getClassLoader(). getClassLoader(). getResourceAsStream("xslt/exeley-pam-2-xmlbundle.xslt"), StandardCharsets.UTF_8); getResourceAsStream("xslt/exeley-pam-2-xmlbundle.xslt"), StandardCharsets.UTF_8); map.put("oai_dc", oaiDcXsltString); map.put("pam", oaiDcXsltString); XsltTransformerOaiPmhBundlesStreamSource bss = new XsltTransformerOaiPmhBundlesStreamSource("http://www.exeley.com/oai/", map); XsltTransformerOaiPmhBundlesStreamSource bss = new XsltTransformerOaiPmhBundlesStreamSource("http://www.exeley.com/oai/OAIRequest", map); Bundle bundle = bss.getBundle("oai:exeley.com:10.21307/joss-2018-001"); Bundle bundle = bss.getBundle("oai:exeley.com:10.21307/joss-2018-001"); Loading @@ -249,7 +249,7 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest { metadata.stream().map(Object::toString).forEach(LOG::info); metadata.stream().map(Object::toString).forEach(LOG::info); assertTrue( metadata.contains( new SimpleMetadatum("dc.type.stock", "article") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.type.stock", "article") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.type.document", "32") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.type.document", "32") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.source.journal", "1439") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.source.journal", "1632") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.description.pubstatus", "1") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.description.pubstatus", "1") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.contributor.author", "Krishna, M.") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.contributor.author", "Krishna, M.") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.contributor.author", "Bino Paul, G.D.") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.contributor.author", "Bino Paul, G.D.") ) ); Loading @@ -265,7 +265,7 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest { assertTrue( metadata.contains( new SimpleMetadatum("dc.language", "en") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.language", "en") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.source.issue", "1") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.source.issue", "1") ) ); assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.source.issue") ).count() ); assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.source.issue") ).count() ); assertTrue( metadata.contains( new SimpleMetadatum("dc.rights.licence", "16") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.rights.licence", "32") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.source.issuetopic", "OBEN_UNTEN : Bilder vom Leben der Anderen") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.source.issuetopic", "OBEN_UNTEN : Bilder vom Leben der Anderen") ) ); // assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.source.issuetopic") ).count() ); // assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.source.issuetopic") ).count() ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.source.pageinfo", "3-6") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.source.pageinfo", "3-6") ) ); Loading @@ -274,21 +274,21 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest { assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.title") ).count() ); assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.title") ).count() ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.identifier.urn", "urn:nbn:de:gbv:18-8-7560") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.identifier.urn", "urn:nbn:de:gbv:18-8-7560") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.identifier.doi", "https://doi.org/10.21307/joss-2018-001") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.identifier.doi", "https://doi.org/10.21307/joss-2018-001") ) ); assertTrue( metadata.contains( new SimpleMetadatum("internal.dda.reference", "http://www.exeley.com/oai/@@oai:exeley.com:10.21307/joss-2018-001") ) ); assertTrue( metadata.contains( new SimpleMetadatum("internal.dda.reference", "http://www.exeley.com/oai/OAIRequest@@oai:exeley.com:10.21307/joss-2018-001") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.publisher.country", "USA") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.publisher.country", "USA") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("internal.identifier.classoz", "10900") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("internal.identifier.classoz", "10900") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.classoz", "10900") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.classoz", "10900") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("internal.identifier.ddc", "300") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("internal.identifier.ddc", "300") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.ddc", "300") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.ddc", "300") ) ); assertTrue( metadata.contains( new SimpleMetadatum("internal.status", "formal und inhaltlich fertig erschlossen") ) ); assertTrue( metadata.contains( new SimpleMetadatum("internal.status", "formal und inhaltlich fertig erschlossen") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.source.other", "Collaboration") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.other", "en", "Collaboration") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.source.other", "Structure") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.other", "en", "Structure") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.source.other", "Networks") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.other", "en", "Networks") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.source.other", "Degree") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.other", "en", "Degree") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.source.other", "Indian economics") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.other", "en", "Indian economics") ) ); Set<InputStream> fileSet = bundle.getContents(); Set<InputStream> fileSet = bundle.getContents(); assertEquals( 1, fileSet.size()); assertEquals( 2, fileSet.size()); } } @Test @Test Loading src/test/resources/xslt/exeley-pam-2-xmlbundle.xslt +88 −17 Original line number Original line Diff line number Diff line Loading @@ -8,7 +8,7 @@ xmlns:zoai="http://git.gesis.org/dda/zoai" xmlns:zoai="http://git.gesis.org/dda/zoai" > > <xsl:output indent="yes"/> <xsl:output method="xml" indent="yes"/> <xsl:strip-space elements="*"/> <xsl:strip-space elements="*"/> <!-- <xsl:message terminate="no">here is the tree at the current node <xsl:copy-of select="."/> </xsl:message> --> <!-- <xsl:message terminate="no">here is the tree at the current node <xsl:copy-of select="."/> </xsl:message> --> <!-- <xsl:message terminate="no">here is some message for stderr, e.g. <xsl:value-of select='text()' /> </xsl:message> --> <!-- <xsl:message terminate="no">here is some message for stderr, e.g. <xsl:value-of select='text()' /> </xsl:message> --> Loading @@ -26,6 +26,7 @@ <xsl:element name="bundle"> <xsl:element name="bundle"> <xsl:element name="metadata"> <xsl:element name="metadata"> <xsl:variable name="ddaReference" select="concat(oai:OAI-PMH/oai:request[@verb='GetRecord']/text(), '@@', oai:OAI-PMH/oai:request[@verb='GetRecord']/@identifier)" /> <xsl:variable name="ddaReference" select="concat(oai:OAI-PMH/oai:request[@verb='GetRecord']/text(), '@@', oai:OAI-PMH/oai:request[@verb='GetRecord']/@identifier)" /> <xsl:variable name="setSpec" select="/oai:OAI-PMH/oai:GetRecord/oai:record/oai:header/oai:setSpec/text()"/> <xsl:call-template name="new-metadatum"> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'internal.dda.reference'"/> <xsl:with-param name="key" select="'internal.dda.reference'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="language" select="''"/> Loading @@ -44,17 +45,62 @@ <xsl:with-param name="value" select="'32'"/> <xsl:with-param name="value" select="'32'"/> </xsl:call-template> </xsl:call-template> <xsl:choose> <xsl:when test="$setSpec = 'journal_of_social_structure'"> <xsl:call-template name="new-metadatum"> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.rights.licence'"/> <xsl:with-param name="key" select="'dc.rights.licence'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="value" select="'32'"/> <xsl:with-param name="value" select="'32'"/> </xsl:call-template> </xsl:call-template> <xsl:call-template name="new-metadatum"> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.source.journal'"/> <xsl:with-param name="key" select="'dc.source.journal'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="value" select="'1507'"/> <xsl:with-param name="value" select="'1632'"/> </xsl:call-template> </xsl:call-template> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.identifier.issn'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="value" select="'1529-1227'"/> </xsl:call-template> </xsl:when> <xsl:when test="$setSpec = 'journal_of_educational_leadership_policy_and_pract'"> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.rights.licence'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="value" select="'20'"/> </xsl:call-template> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.source.journal'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="value" select="'1674'"/> </xsl:call-template> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.identifier.issn'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="value" select="'1178-8704'"/> </xsl:call-template> </xsl:when> <xsl:when test="$setSpec = 'connections'"> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.rights.licence'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="value" select="'16'"/> </xsl:call-template> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.source.journal'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="value" select="'1725'"/> </xsl:call-template> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.identifier.issn'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="value" select="'1529-1227'"/> </xsl:call-template> </xsl:when> </xsl:choose> <xsl:call-template name="new-metadatum"> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.description.pubstatus'"/> <xsl:with-param name="key" select="'dc.description.pubstatus'"/> Loading @@ -80,6 +126,12 @@ <xsl:with-param name="value" select="'1'"/> <xsl:with-param name="value" select="'1'"/> </xsl:call-template> </xsl:call-template> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.language'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="value" select="'en'"/> </xsl:call-template> <!--xsl:call-template name="new-metadatum"> <!--xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'internal.identifier.classoz'"/> <xsl:with-param name="key" select="'internal.identifier.classoz'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="language" select="''"/> Loading @@ -92,12 +144,6 @@ <xsl:with-param name="value" select="'10900'"/> <xsl:with-param name="value" select="'10900'"/> </xsl:call-template--> </xsl:call-template--> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.identifier.issn'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="value" select="'1529-1227'"/> </xsl:call-template> <!--xsl:call-template name="new-metadatum"> <!--xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'internal.identifier.ddc'"/> <xsl:with-param name="key" select="'internal.identifier.ddc'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="language" select="''"/> Loading @@ -123,6 +169,7 @@ <xsl:apply-templates select="dc:identifier"/> <xsl:apply-templates select="dc:identifier"/> <xsl:apply-templates select="dc:title"/> <xsl:apply-templates select="dc:title"/> <xsl:apply-templates select="dc:creator"/> <xsl:apply-templates select="dc:creator"/> <xsl:apply-templates select="prism:keyword"/> <xsl:apply-templates select="prism:publicationDate"/> <xsl:apply-templates select="prism:publicationDate"/> <xsl:apply-templates select="prism:volume"/> <xsl:apply-templates select="prism:volume"/> <xsl:apply-templates select="prism:number"/> <xsl:apply-templates select="prism:number"/> Loading Loading @@ -159,6 +206,30 @@ <xsl:with-param name="value" select="text()"/> <xsl:with-param name="value" select="text()"/> </xsl:call-template> </xsl:call-template> </xsl:template> </xsl:template> <xsl:template match="prism:keyword"> <xsl:if test="string-length(normalize-space(text())) > 0"> <xsl:choose> <xsl:when test="contains(normalize-space(text()), ';')"> <xsl:for-each select="tokenize(normalize-space(text()), ';')"> <xsl:if test="string-length(normalize-space(.)) > 0"> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.subject.other'"/> <xsl:with-param name="language" select="'en'"/> <xsl:with-param name="value" select="normalize-space(.)"/> </xsl:call-template> </xsl:if> </xsl:for-each> </xsl:when> <xsl:otherwise> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.subject.other'"/> <xsl:with-param name="language" select="'en'"/> <xsl:with-param name="value" select="normalize-space(text())"/> </xsl:call-template> </xsl:otherwise> </xsl:choose> </xsl:if> </xsl:template> <xsl:template match="prism:publicationDate"> <xsl:template match="prism:publicationDate"> <xsl:call-template name="new-metadatum"> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.date.issued'"/> <xsl:with-param name="key" select="'dc.date.issued'"/> Loading Loading
src/main/java/org/gesis/dda/publishing/domain/impl/XsltTransformerOaiPmhBundlesStreamSource.java +318 −313 Original line number Original line Diff line number Diff line Loading @@ -46,8 +46,8 @@ import org.xml.sax.InputSource; import net.sf.saxon.TransformerFactoryImpl; import net.sf.saxon.TransformerFactoryImpl; import net.sf.saxon.trans.XPathException; import net.sf.saxon.trans.XPathException; public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSource, ErrorListener { public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSource, ErrorListener { private final static Logger LOG = LoggerFactory.getLogger(XsltTransformerOaiPmhBundlesStreamSource.class); private final static Logger LOG = LoggerFactory.getLogger(XsltTransformerOaiPmhBundlesStreamSource.class); Loading @@ -66,14 +66,16 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo client = new OaiPmhClient(oaiPmhEndpoint); client = new OaiPmhClient(oaiPmhEndpoint); } } public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, Map<String, String> metadataPrefix2XsltMap, LocalDate from, LocalDate until) { public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, Map<String, String> metadataPrefix2XsltMap, LocalDate from, LocalDate until) { this(oaiPmhEndpoint, metadataPrefix2XsltMap); this(oaiPmhEndpoint, metadataPrefix2XsltMap); this.intervalType = HarvestingIntervalType.DAY_INTERVAL_HARVEST; this.intervalType = HarvestingIntervalType.DAY_INTERVAL_HARVEST; this.dayFrom = from; this.dayFrom = from; this.dayUntil = until; this.dayUntil = until; } } public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, Map<String, String> metadataPrefix2XsltMap, Instant from, Instant until) { public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, Map<String, String> metadataPrefix2XsltMap, Instant from, Instant until) { this(oaiPmhEndpoint, metadataPrefix2XsltMap); this(oaiPmhEndpoint, metadataPrefix2XsltMap); this.intervalType = HarvestingIntervalType.SECOND_INTERVAL_HARVEST; this.intervalType = HarvestingIntervalType.SECOND_INTERVAL_HARVEST; this.secondFrom = from; this.secondFrom = from; Loading @@ -81,7 +83,8 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo } } // And here come the same methods but with set specification // And here come the same methods but with set specification public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, String setSpec, Map<String, String> metadataPrefix2XsltMap) { public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, String setSpec, Map<String, String> metadataPrefix2XsltMap) { this.oaiPmhEndpoint = oaiPmhEndpoint; this.oaiPmhEndpoint = oaiPmhEndpoint; this.setSpec = setSpec; this.setSpec = setSpec; this.metadataPrefix2XsltMap = metadataPrefix2XsltMap; this.metadataPrefix2XsltMap = metadataPrefix2XsltMap; Loading @@ -89,14 +92,16 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo client = new OaiPmhClient(oaiPmhEndpoint); client = new OaiPmhClient(oaiPmhEndpoint); } } public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, String setSpec, Map<String, String> metadataPrefix2XsltMap, LocalDate from, LocalDate until) { public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, String setSpec, Map<String, String> metadataPrefix2XsltMap, LocalDate from, LocalDate until) { this(oaiPmhEndpoint, setSpec, metadataPrefix2XsltMap); this(oaiPmhEndpoint, setSpec, metadataPrefix2XsltMap); this.intervalType = HarvestingIntervalType.DAY_INTERVAL_HARVEST; this.intervalType = HarvestingIntervalType.DAY_INTERVAL_HARVEST; this.dayFrom = from; this.dayFrom = from; this.dayUntil = until; this.dayUntil = until; } } public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, String setSpec, Map<String, String> metadataPrefix2XsltMap, Instant from, Instant until) { public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, String setSpec, Map<String, String> metadataPrefix2XsltMap, Instant from, Instant until) { this(oaiPmhEndpoint, setSpec, metadataPrefix2XsltMap); this(oaiPmhEndpoint, setSpec, metadataPrefix2XsltMap); this.intervalType = HarvestingIntervalType.SECOND_INTERVAL_HARVEST; this.intervalType = HarvestingIntervalType.SECOND_INTERVAL_HARVEST; this.secondFrom = from; this.secondFrom = from; Loading Loading @@ -129,23 +134,32 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo break; break; } } // some record's metadata may not be available for all specified metadataPrefixes // some record's metadata may not be available for all specified // therefore collecting the union of all identifiers over all specified metadataPrefixes // metadataPrefixes Stream<OAIPMHtype> listIdentifiersResponseStream = metadataPrefixes. // therefore collecting the union of all identifiers over all specified stream(). // metadataPrefixes flatMap(mp -> { Stream<OAIPMHtype> listIdentifiersResponseStream = metadataPrefixes.stream().flatMap(mp -> { LOG.debug("filling list identifier stream with mp {}, from {}, until {}, setSpec {}", mp, from, until, setSpec); LOG.info("filling list identifier stream with mp {}, from {}, until {}, setSpec {}", mp, from, until, if (setSpec == null|| setSpec.trim().isEmpty()) { setSpec); // exeley only takes "YYY-mm-dd" as from date String exeleyFrom = from.split("T")[0]; // String exeleyUntil= until.split("T")[0]; LOG.info("exeley specials: {} -> {}", exeleyFrom, until); if (setSpec == null || setSpec.trim().isEmpty() && mp.equals("pam")) { return client.listIdentifiersStream(mp, exeleyFrom, until, null); } else if (setSpec == null || setSpec.trim().isEmpty()) { return client.listIdentifiersStream(mp, from, until, null); return client.listIdentifiersStream(mp, from, until, null); } } if ( mp.equals("pam") ) { return client.listIdentifiersStream(mp, exeleyFrom, until, setSpec); } return client.listIdentifiersStream(mp, from, until, setSpec); return client.listIdentifiersStream(mp, from, until, setSpec); }); }); Stream<String> uniqueIdentifiersStream = listIdentifiersResponseStream.flatMap(oaiPmhType -> { Stream<String> uniqueIdentifiersStream = listIdentifiersResponseStream.flatMap(oaiPmhType -> { /*return oaiPmhType. /* getListIdentifiers(). * return oaiPmhType. getListIdentifiers(). getHeader(). stream().map(h -> getHeader(). * h.getIdentifier() ); stream().map(h -> h.getIdentifier() ); */ */ Stream<String> result = Stream.empty(); Stream<String> result = Stream.empty(); ListIdentifiersType listIdentifiersType = oaiPmhType.getListIdentifiers(); ListIdentifiersType listIdentifiersType = oaiPmhType.getListIdentifiers(); Loading @@ -156,9 +170,7 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo } } } } return result; return result; }). }).filter(Objects::nonNull).distinct(); filter(Objects::nonNull). distinct(); List<String> uniqueIdentifiers = uniqueIdentifiersStream.collect(Collectors.toList()); List<String> uniqueIdentifiers = uniqueIdentifiersStream.collect(Collectors.toList()); Loading @@ -170,7 +182,8 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo public Bundle getBundle(String oaiPmhIdentifier) { public Bundle getBundle(String oaiPmhIdentifier) { LOG.debug("GetBundle - {}", oaiPmhIdentifier); LOG.debug("GetBundle - {}", oaiPmhIdentifier); //Bundle bundleResult = new AutonomouslyContentResolvingBundle(ImmutableSet.of() ); // Bundle bundleResult = new // AutonomouslyContentResolvingBundle(ImmutableSet.of() ); Set<Metadatum> bundleMetadata = new HashSet<>(); Set<Metadatum> bundleMetadata = new HashSet<>(); Set<String> metadataPrefixes = metadataPrefix2XsltMap.keySet(); Set<String> metadataPrefixes = metadataPrefix2XsltMap.keySet(); String lastModifiedString = ""; String lastModifiedString = ""; Loading @@ -181,11 +194,7 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo // --- STEP 1: get XML input // --- STEP 1: get XML input String getRecordXmlDocumentResponseString = client.getRecordString(oaiPmhIdentifier, metadataPrefix); String getRecordXmlDocumentResponseString = client.getRecordString(oaiPmhIdentifier, metadataPrefix); // try to make exeley data transformable ( & -> & to start with) // ToDo: find the right location for it! if ( getRecordXmlDocumentResponseString.contains("identifier=\"oai:exeley.com:10.") ) { getRecordXmlDocumentResponseString = getRecordXmlDocumentResponseString.replace("&", "&"); } // LOG.info("--------------------------"); // LOG.info("--------------------------"); // LOG.info("{}", getRecordXmlDocumentResponseString); // LOG.info("{}", getRecordXmlDocumentResponseString); if (isDeletedRecord(getRecordXmlDocumentResponseString)) { if (isDeletedRecord(getRecordXmlDocumentResponseString)) { Loading @@ -194,12 +203,10 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo } } else { else { // --- STEP 2: convert XML input according to XSLT // --- STEP 2: convert XML input according to XSLT TransformerFactory factory = TransformerFactory.newInstance("net.sf.saxon.TransformerFactoryImpl", TransformerFactoryImpl.class.getClassLoader() ); TransformerFactory factory = TransformerFactory.newInstance("net.sf.saxon.TransformerFactoryImpl", TransformerFactoryImpl.class.getClassLoader()); Templates xslTemplate = factory.newTemplates( Templates xslTemplate = factory.newTemplates( new StreamSource( new StreamSource(new StringReader(metadataPrefix2XsltMap.get(metadataPrefix)))); new StringReader( metadataPrefix2XsltMap. get(metadataPrefix) ) ) ); Source xmlInput = new StreamSource(new StringReader(getRecordXmlDocumentResponseString)); Source xmlInput = new StreamSource(new StringReader(getRecordXmlDocumentResponseString)); Loading @@ -217,7 +224,8 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo // STEP 3: convert XSLT-converted XML output to Java JAXB object // STEP 3: convert XSLT-converted XML output to Java JAXB object JAXBContext jaxbContext = JAXBContext.newInstance(XmlBundle.class); JAXBContext jaxbContext = JAXBContext.newInstance(XmlBundle.class); Unmarshaller unmarshaller = jaxbContext.createUnmarshaller(); Unmarshaller unmarshaller = jaxbContext.createUnmarshaller(); InputStream convertedXmlInputStream = new ByteArrayInputStream(xsltConvertedXmlOutput.getBytes(StandardCharsets.UTF_8) ); InputStream convertedXmlInputStream = new ByteArrayInputStream( xsltConvertedXmlOutput.getBytes(StandardCharsets.UTF_8)); Bundle currentPartBundle = (Bundle) unmarshaller.unmarshal(convertedXmlInputStream); Bundle currentPartBundle = (Bundle) unmarshaller.unmarshal(convertedXmlInputStream); Set<Metadatum> metadata = currentPartBundle.getMetadata(); Set<Metadatum> metadata = currentPartBundle.getMetadata(); Loading @@ -230,19 +238,21 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo Unmarshaller oaiPmhUnmarshaller = oaiPmhjaxbContext.createUnmarshaller(); Unmarshaller oaiPmhUnmarshaller = oaiPmhjaxbContext.createUnmarshaller(); @SuppressWarnings("unchecked") @SuppressWarnings("unchecked") JAXBElement<OAIPMHtype> wrappedResponseObject = (JAXBElement<OAIPMHtype>) oaiPmhUnmarshaller.unmarshal(sr); JAXBElement<OAIPMHtype> wrappedResponseObject = (JAXBElement<OAIPMHtype>) oaiPmhUnmarshaller .unmarshal(sr); OAIPMHtype response = wrappedResponseObject.getValue(); OAIPMHtype response = wrappedResponseObject.getValue(); lastModifiedString = response.getGetRecord().getRecord().getHeader().getDatestamp(); lastModifiedString = response.getGetRecord().getRecord().getHeader().getDatestamp(); // STEP 5: add identifier metadatum // STEP 5: add identifier metadatum Metadatum reference = new SimpleMetadatum("internal.dda.reference", oaiPmhEndpoint + "@@" + oaiPmhIdentifier); Metadatum reference = new SimpleMetadatum("internal.dda.reference", oaiPmhEndpoint + "@@" + oaiPmhIdentifier); bundleMetadata.add(reference); bundleMetadata.add(reference); resultBundle = BundleBuilder.create().withMetadata(bundleMetadata).withLastModifiedString(lastModifiedString).build(); resultBundle = BundleBuilder.create().withMetadata(bundleMetadata) .withLastModifiedString(lastModifiedString).build(); } } } } catch (XPathException e) { catch (XPathException e) { LOG.debug("Catched XPathException"); LOG.debug("Catched XPathException"); String errorCode = e.getErrorCodeLocalPart(); String errorCode = e.getErrorCodeLocalPart(); if (null != errorCode) { if (null != errorCode) { Loading @@ -252,19 +262,18 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo resultBundle = null; resultBundle = null; // test if this break is really necessary, taken out. STJ // test if this break is really necessary, taken out. STJ // break; // break; } } else { else { LOG.warn("2- fatalError. Filtering away oaiPmhIdentifier=" + oaiPmhIdentifier, e); LOG.warn("2- fatalError. Filtering away oaiPmhIdentifier=" + oaiPmhIdentifier, e); } } } } else { else { LOG.warn("3- fatalError. Filtering away oaiPmhIdentifier=" + oaiPmhIdentifier, e); LOG.warn("3- fatalError. Filtering away oaiPmhIdentifier=" + oaiPmhIdentifier, e); } } resultBundle = null; resultBundle = null; } } catch (Throwable t) { catch (Throwable t) { LOG.warn("Problem getting record with id " + oaiPmhIdentifier + " and metadataPrefix " + metadataPrefix + ". Skipping it.", t); LOG.warn("Problem getting record with id " + oaiPmhIdentifier + " and metadataPrefix " + metadataPrefix + ". Skipping it.", t); resultBundle = null; resultBundle = null; } } Loading @@ -281,8 +290,8 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo boolean result; boolean result; try { try { // see https://stackoverflow.com/a/6397369/923560 // see https://stackoverflow.com/a/6397369/923560 XPathExpression expr = xpath.compile( XPathExpression expr = xpath "//*[local-name()='OAI-PMH' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']" .compile("//*[local-name()='OAI-PMH' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']" + "/*[local-name()='GetRecord' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']" + "/*[local-name()='GetRecord' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']" + "/*[local-name()='record' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']" + "/*[local-name()='record' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']" + "/*[local-name()='header' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']/" + "/*[local-name()='header' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']/" Loading @@ -291,12 +300,10 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo headerStatus = expr.evaluate(inputSource); headerStatus = expr.evaluate(inputSource); if ("deleted".equals(headerStatus)) { if ("deleted".equals(headerStatus)) { result = true; result = true; } } else { else { result = false; result = false; } } } } catch (XPathExpressionException e) { catch (XPathExpressionException e) { LOG.error("Problem identifying if deleted record", e); LOG.error("Problem identifying if deleted record", e); result = true; result = true; } } Loading Loading @@ -345,12 +352,10 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo if (!"filteraway".equals(errorCode)) { if (!"filteraway".equals(errorCode)) { LOG.error("fatalError", arg0); LOG.error("fatalError", arg0); } } } } else { else { LOG.error("fatalError", arg0); LOG.error("fatalError", arg0); } } } } else { else { LOG.error("fatalError", arg0); LOG.error("fatalError", arg0); } } } } Loading
src/test/java/org/gesis/dda/publishing/domain/impl/XsltTransformerOaiPmhBundlesStreamSourceTest.java +11 −11 Original line number Original line Diff line number Diff line Loading @@ -238,9 +238,9 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest { getClass(). getClass(). getClassLoader(). getClassLoader(). getResourceAsStream("xslt/exeley-pam-2-xmlbundle.xslt"), StandardCharsets.UTF_8); getResourceAsStream("xslt/exeley-pam-2-xmlbundle.xslt"), StandardCharsets.UTF_8); map.put("oai_dc", oaiDcXsltString); map.put("pam", oaiDcXsltString); XsltTransformerOaiPmhBundlesStreamSource bss = new XsltTransformerOaiPmhBundlesStreamSource("http://www.exeley.com/oai/", map); XsltTransformerOaiPmhBundlesStreamSource bss = new XsltTransformerOaiPmhBundlesStreamSource("http://www.exeley.com/oai/OAIRequest", map); Bundle bundle = bss.getBundle("oai:exeley.com:10.21307/joss-2018-001"); Bundle bundle = bss.getBundle("oai:exeley.com:10.21307/joss-2018-001"); Loading @@ -249,7 +249,7 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest { metadata.stream().map(Object::toString).forEach(LOG::info); metadata.stream().map(Object::toString).forEach(LOG::info); assertTrue( metadata.contains( new SimpleMetadatum("dc.type.stock", "article") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.type.stock", "article") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.type.document", "32") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.type.document", "32") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.source.journal", "1439") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.source.journal", "1632") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.description.pubstatus", "1") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.description.pubstatus", "1") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.contributor.author", "Krishna, M.") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.contributor.author", "Krishna, M.") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.contributor.author", "Bino Paul, G.D.") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.contributor.author", "Bino Paul, G.D.") ) ); Loading @@ -265,7 +265,7 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest { assertTrue( metadata.contains( new SimpleMetadatum("dc.language", "en") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.language", "en") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.source.issue", "1") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.source.issue", "1") ) ); assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.source.issue") ).count() ); assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.source.issue") ).count() ); assertTrue( metadata.contains( new SimpleMetadatum("dc.rights.licence", "16") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.rights.licence", "32") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.source.issuetopic", "OBEN_UNTEN : Bilder vom Leben der Anderen") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.source.issuetopic", "OBEN_UNTEN : Bilder vom Leben der Anderen") ) ); // assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.source.issuetopic") ).count() ); // assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.source.issuetopic") ).count() ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.source.pageinfo", "3-6") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.source.pageinfo", "3-6") ) ); Loading @@ -274,21 +274,21 @@ public class XsltTransformerOaiPmhBundlesStreamSourceTest { assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.title") ).count() ); assertEquals(1, metadata.stream().filter(m -> m.getKey().equals("dc.title") ).count() ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.identifier.urn", "urn:nbn:de:gbv:18-8-7560") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.identifier.urn", "urn:nbn:de:gbv:18-8-7560") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.identifier.doi", "https://doi.org/10.21307/joss-2018-001") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.identifier.doi", "https://doi.org/10.21307/joss-2018-001") ) ); assertTrue( metadata.contains( new SimpleMetadatum("internal.dda.reference", "http://www.exeley.com/oai/@@oai:exeley.com:10.21307/joss-2018-001") ) ); assertTrue( metadata.contains( new SimpleMetadatum("internal.dda.reference", "http://www.exeley.com/oai/OAIRequest@@oai:exeley.com:10.21307/joss-2018-001") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.publisher.country", "USA") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.publisher.country", "USA") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("internal.identifier.classoz", "10900") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("internal.identifier.classoz", "10900") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.classoz", "10900") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.classoz", "10900") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("internal.identifier.ddc", "300") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("internal.identifier.ddc", "300") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.ddc", "300") ) ); // assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.ddc", "300") ) ); assertTrue( metadata.contains( new SimpleMetadatum("internal.status", "formal und inhaltlich fertig erschlossen") ) ); assertTrue( metadata.contains( new SimpleMetadatum("internal.status", "formal und inhaltlich fertig erschlossen") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.source.other", "Collaboration") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.other", "en", "Collaboration") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.source.other", "Structure") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.other", "en", "Structure") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.source.other", "Networks") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.other", "en", "Networks") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.source.other", "Degree") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.other", "en", "Degree") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.source.other", "Indian economics") ) ); assertTrue( metadata.contains( new SimpleMetadatum("dc.subject.other", "en", "Indian economics") ) ); Set<InputStream> fileSet = bundle.getContents(); Set<InputStream> fileSet = bundle.getContents(); assertEquals( 1, fileSet.size()); assertEquals( 2, fileSet.size()); } } @Test @Test Loading
src/test/resources/xslt/exeley-pam-2-xmlbundle.xslt +88 −17 Original line number Original line Diff line number Diff line Loading @@ -8,7 +8,7 @@ xmlns:zoai="http://git.gesis.org/dda/zoai" xmlns:zoai="http://git.gesis.org/dda/zoai" > > <xsl:output indent="yes"/> <xsl:output method="xml" indent="yes"/> <xsl:strip-space elements="*"/> <xsl:strip-space elements="*"/> <!-- <xsl:message terminate="no">here is the tree at the current node <xsl:copy-of select="."/> </xsl:message> --> <!-- <xsl:message terminate="no">here is the tree at the current node <xsl:copy-of select="."/> </xsl:message> --> <!-- <xsl:message terminate="no">here is some message for stderr, e.g. <xsl:value-of select='text()' /> </xsl:message> --> <!-- <xsl:message terminate="no">here is some message for stderr, e.g. <xsl:value-of select='text()' /> </xsl:message> --> Loading @@ -26,6 +26,7 @@ <xsl:element name="bundle"> <xsl:element name="bundle"> <xsl:element name="metadata"> <xsl:element name="metadata"> <xsl:variable name="ddaReference" select="concat(oai:OAI-PMH/oai:request[@verb='GetRecord']/text(), '@@', oai:OAI-PMH/oai:request[@verb='GetRecord']/@identifier)" /> <xsl:variable name="ddaReference" select="concat(oai:OAI-PMH/oai:request[@verb='GetRecord']/text(), '@@', oai:OAI-PMH/oai:request[@verb='GetRecord']/@identifier)" /> <xsl:variable name="setSpec" select="/oai:OAI-PMH/oai:GetRecord/oai:record/oai:header/oai:setSpec/text()"/> <xsl:call-template name="new-metadatum"> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'internal.dda.reference'"/> <xsl:with-param name="key" select="'internal.dda.reference'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="language" select="''"/> Loading @@ -44,17 +45,62 @@ <xsl:with-param name="value" select="'32'"/> <xsl:with-param name="value" select="'32'"/> </xsl:call-template> </xsl:call-template> <xsl:choose> <xsl:when test="$setSpec = 'journal_of_social_structure'"> <xsl:call-template name="new-metadatum"> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.rights.licence'"/> <xsl:with-param name="key" select="'dc.rights.licence'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="value" select="'32'"/> <xsl:with-param name="value" select="'32'"/> </xsl:call-template> </xsl:call-template> <xsl:call-template name="new-metadatum"> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.source.journal'"/> <xsl:with-param name="key" select="'dc.source.journal'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="value" select="'1507'"/> <xsl:with-param name="value" select="'1632'"/> </xsl:call-template> </xsl:call-template> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.identifier.issn'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="value" select="'1529-1227'"/> </xsl:call-template> </xsl:when> <xsl:when test="$setSpec = 'journal_of_educational_leadership_policy_and_pract'"> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.rights.licence'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="value" select="'20'"/> </xsl:call-template> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.source.journal'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="value" select="'1674'"/> </xsl:call-template> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.identifier.issn'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="value" select="'1178-8704'"/> </xsl:call-template> </xsl:when> <xsl:when test="$setSpec = 'connections'"> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.rights.licence'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="value" select="'16'"/> </xsl:call-template> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.source.journal'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="value" select="'1725'"/> </xsl:call-template> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.identifier.issn'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="value" select="'1529-1227'"/> </xsl:call-template> </xsl:when> </xsl:choose> <xsl:call-template name="new-metadatum"> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.description.pubstatus'"/> <xsl:with-param name="key" select="'dc.description.pubstatus'"/> Loading @@ -80,6 +126,12 @@ <xsl:with-param name="value" select="'1'"/> <xsl:with-param name="value" select="'1'"/> </xsl:call-template> </xsl:call-template> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.language'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="value" select="'en'"/> </xsl:call-template> <!--xsl:call-template name="new-metadatum"> <!--xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'internal.identifier.classoz'"/> <xsl:with-param name="key" select="'internal.identifier.classoz'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="language" select="''"/> Loading @@ -92,12 +144,6 @@ <xsl:with-param name="value" select="'10900'"/> <xsl:with-param name="value" select="'10900'"/> </xsl:call-template--> </xsl:call-template--> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.identifier.issn'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="value" select="'1529-1227'"/> </xsl:call-template> <!--xsl:call-template name="new-metadatum"> <!--xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'internal.identifier.ddc'"/> <xsl:with-param name="key" select="'internal.identifier.ddc'"/> <xsl:with-param name="language" select="''"/> <xsl:with-param name="language" select="''"/> Loading @@ -123,6 +169,7 @@ <xsl:apply-templates select="dc:identifier"/> <xsl:apply-templates select="dc:identifier"/> <xsl:apply-templates select="dc:title"/> <xsl:apply-templates select="dc:title"/> <xsl:apply-templates select="dc:creator"/> <xsl:apply-templates select="dc:creator"/> <xsl:apply-templates select="prism:keyword"/> <xsl:apply-templates select="prism:publicationDate"/> <xsl:apply-templates select="prism:publicationDate"/> <xsl:apply-templates select="prism:volume"/> <xsl:apply-templates select="prism:volume"/> <xsl:apply-templates select="prism:number"/> <xsl:apply-templates select="prism:number"/> Loading Loading @@ -159,6 +206,30 @@ <xsl:with-param name="value" select="text()"/> <xsl:with-param name="value" select="text()"/> </xsl:call-template> </xsl:call-template> </xsl:template> </xsl:template> <xsl:template match="prism:keyword"> <xsl:if test="string-length(normalize-space(text())) > 0"> <xsl:choose> <xsl:when test="contains(normalize-space(text()), ';')"> <xsl:for-each select="tokenize(normalize-space(text()), ';')"> <xsl:if test="string-length(normalize-space(.)) > 0"> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.subject.other'"/> <xsl:with-param name="language" select="'en'"/> <xsl:with-param name="value" select="normalize-space(.)"/> </xsl:call-template> </xsl:if> </xsl:for-each> </xsl:when> <xsl:otherwise> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.subject.other'"/> <xsl:with-param name="language" select="'en'"/> <xsl:with-param name="value" select="normalize-space(text())"/> </xsl:call-template> </xsl:otherwise> </xsl:choose> </xsl:if> </xsl:template> <xsl:template match="prism:publicationDate"> <xsl:template match="prism:publicationDate"> <xsl:call-template name="new-metadatum"> <xsl:call-template name="new-metadatum"> <xsl:with-param name="key" select="'dc.date.issued'"/> <xsl:with-param name="key" select="'dc.date.issued'"/> Loading