Commit 5993985b authored by Steinberg, Jan's avatar Steinberg, Jan
Browse files

Merge branch 'master' of git.gesis.org:dda/dda-wizard

parents 1b098309 6dd78548
...@@ -46,318 +46,323 @@ import org.xml.sax.InputSource; ...@@ -46,318 +46,323 @@ import org.xml.sax.InputSource;
import net.sf.saxon.TransformerFactoryImpl; import net.sf.saxon.TransformerFactoryImpl;
import net.sf.saxon.trans.XPathException; import net.sf.saxon.trans.XPathException;
public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSource, ErrorListener { public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSource, ErrorListener {
private final static Logger LOG = LoggerFactory.getLogger(XsltTransformerOaiPmhBundlesStreamSource.class);
private final static Logger LOG = LoggerFactory.getLogger(XsltTransformerOaiPmhBundlesStreamSource.class);
private String oaiPmhEndpoint;
private String oaiPmhEndpoint; private String setSpec;
private String setSpec; private Map<String, String> metadataPrefix2XsltMap;
private Map<String, String> metadataPrefix2XsltMap; private OaiPmhClient client;
private OaiPmhClient client; HarvestingIntervalType intervalType;
HarvestingIntervalType intervalType; private LocalDate dayFrom, dayUntil;
private LocalDate dayFrom, dayUntil; private Instant secondFrom, secondUntil;
private Instant secondFrom, secondUntil;
public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, Map<String, String> metadataPrefix2XsltMap) {
public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, Map<String, String> metadataPrefix2XsltMap) { this.oaiPmhEndpoint = oaiPmhEndpoint;
this.oaiPmhEndpoint = oaiPmhEndpoint; this.metadataPrefix2XsltMap = metadataPrefix2XsltMap;
this.metadataPrefix2XsltMap = metadataPrefix2XsltMap; this.intervalType = HarvestingIntervalType.FULL_HARVEST;
this.intervalType = HarvestingIntervalType.FULL_HARVEST; client = new OaiPmhClient(oaiPmhEndpoint);
client = new OaiPmhClient(oaiPmhEndpoint); }
}
public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, Map<String, String> metadataPrefix2XsltMap,
public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, Map<String, String> metadataPrefix2XsltMap, LocalDate from, LocalDate until) { LocalDate from, LocalDate until) {
this(oaiPmhEndpoint, metadataPrefix2XsltMap); this(oaiPmhEndpoint, metadataPrefix2XsltMap);
this.intervalType = HarvestingIntervalType.DAY_INTERVAL_HARVEST; this.intervalType = HarvestingIntervalType.DAY_INTERVAL_HARVEST;
this.dayFrom = from; this.dayFrom = from;
this.dayUntil = until; this.dayUntil = until;
} }
public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, Map<String, String> metadataPrefix2XsltMap, Instant from, Instant until) { public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, Map<String, String> metadataPrefix2XsltMap,
this(oaiPmhEndpoint, metadataPrefix2XsltMap); Instant from, Instant until) {
this.intervalType = HarvestingIntervalType.SECOND_INTERVAL_HARVEST; this(oaiPmhEndpoint, metadataPrefix2XsltMap);
this.secondFrom = from; this.intervalType = HarvestingIntervalType.SECOND_INTERVAL_HARVEST;
this.secondUntil = until; this.secondFrom = from;
} this.secondUntil = until;
}
// And here come the same methods but with set specification
public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, String setSpec, Map<String, String> metadataPrefix2XsltMap) { // And here come the same methods but with set specification
this.oaiPmhEndpoint = oaiPmhEndpoint; public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, String setSpec,
this.setSpec = setSpec; Map<String, String> metadataPrefix2XsltMap) {
this.metadataPrefix2XsltMap = metadataPrefix2XsltMap; this.oaiPmhEndpoint = oaiPmhEndpoint;
this.intervalType = HarvestingIntervalType.FULL_HARVEST; this.setSpec = setSpec;
client = new OaiPmhClient(oaiPmhEndpoint); this.metadataPrefix2XsltMap = metadataPrefix2XsltMap;
} this.intervalType = HarvestingIntervalType.FULL_HARVEST;
client = new OaiPmhClient(oaiPmhEndpoint);
public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, String setSpec, Map<String, String> metadataPrefix2XsltMap, LocalDate from, LocalDate until) { }
this(oaiPmhEndpoint, setSpec, metadataPrefix2XsltMap);
this.intervalType = HarvestingIntervalType.DAY_INTERVAL_HARVEST; public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, String setSpec,
this.dayFrom = from; Map<String, String> metadataPrefix2XsltMap, LocalDate from, LocalDate until) {
this.dayUntil = until; this(oaiPmhEndpoint, setSpec, metadataPrefix2XsltMap);
} this.intervalType = HarvestingIntervalType.DAY_INTERVAL_HARVEST;
this.dayFrom = from;
public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, String setSpec, Map<String, String> metadataPrefix2XsltMap, Instant from, Instant until) { this.dayUntil = until;
this(oaiPmhEndpoint, setSpec, metadataPrefix2XsltMap); }
this.intervalType = HarvestingIntervalType.SECOND_INTERVAL_HARVEST;
this.secondFrom = from; public XsltTransformerOaiPmhBundlesStreamSource(String oaiPmhEndpoint, String setSpec,
this.secondUntil = until; Map<String, String> metadataPrefix2XsltMap, Instant from, Instant until) {
} this(oaiPmhEndpoint, setSpec, metadataPrefix2XsltMap);
this.intervalType = HarvestingIntervalType.SECOND_INTERVAL_HARVEST;
@Override this.secondFrom = from;
public Stream<Bundle> getBundlesStream() { this.secondUntil = until;
Stream<Bundle> resultBundleStream; }
Set<String> metadataPrefixes = metadataPrefix2XsltMap.keySet();
@Override
final String from; public Stream<Bundle> getBundlesStream() {
final String until; Stream<Bundle> resultBundleStream;
switch (intervalType) { Set<String> metadataPrefixes = metadataPrefix2XsltMap.keySet();
case FULL_HARVEST:
from = null; final String from;
until = null; final String until;
break; switch (intervalType) {
case SECOND_INTERVAL_HARVEST: case FULL_HARVEST:
from = convertInstantToOaiPmhString(secondFrom); from = null;
until = convertInstantToOaiPmhString(secondUntil); until = null;
break; break;
case DAY_INTERVAL_HARVEST: case SECOND_INTERVAL_HARVEST:
from = convertLocalDateToOaiPmhString(dayFrom); from = convertInstantToOaiPmhString(secondFrom);
until = convertLocalDateToOaiPmhString(dayUntil); until = convertInstantToOaiPmhString(secondUntil);
break; break;
default: case DAY_INTERVAL_HARVEST:
from = null; from = convertLocalDateToOaiPmhString(dayFrom);
until = null; until = convertLocalDateToOaiPmhString(dayUntil);
break; break;
} default:
from = null;
// some record's metadata may not be available for all specified metadataPrefixes until = null;
// therefore collecting the union of all identifiers over all specified metadataPrefixes break;
Stream<OAIPMHtype> listIdentifiersResponseStream = metadataPrefixes. }
stream().
flatMap(mp -> { // some record's metadata may not be available for all specified
LOG.debug("filling list identifier stream with mp {}, from {}, until {}, setSpec {}", mp, from, until, setSpec); // metadataPrefixes
if (setSpec == null|| setSpec.trim().isEmpty()) { // therefore collecting the union of all identifiers over all specified
return client.listIdentifiersStream(mp, from, until, null); // metadataPrefixes
} Stream<OAIPMHtype> listIdentifiersResponseStream = metadataPrefixes.stream().flatMap(mp -> {
return client.listIdentifiersStream(mp, from, until, setSpec); LOG.info("filling list identifier stream with mp {}, from {}, until {}, setSpec {}", mp, from, until,
}); setSpec);
// exeley only takes "YYY-mm-dd" as from date
Stream<String> uniqueIdentifiersStream = listIdentifiersResponseStream.flatMap( oaiPmhType -> { String exeleyFrom = from.split("T")[0];
/*return oaiPmhType. // String exeleyUntil= until.split("T")[0];
getListIdentifiers(). LOG.info("exeley specials: {} -> {}", exeleyFrom, until);
getHeader(). if (setSpec == null || setSpec.trim().isEmpty() && mp.equals("pam")) {
stream().map(h -> h.getIdentifier() ); return client.listIdentifiersStream(mp, exeleyFrom, until, null);
*/ } else if (setSpec == null || setSpec.trim().isEmpty()) {
Stream<String> result = Stream.empty(); return client.listIdentifiersStream(mp, from, until, null);
ListIdentifiersType listIdentifiersType = oaiPmhType.getListIdentifiers(); }
if (null != listIdentifiersType) { if ( mp.equals("pam") ) {
List<HeaderType> headers = listIdentifiersType.getHeader(); return client.listIdentifiersStream(mp, exeleyFrom, until, setSpec);
if (null != headers) { }
result = headers.stream().map(h -> h.getIdentifier() ); return client.listIdentifiersStream(mp, from, until, setSpec);
} });
}
return result; Stream<String> uniqueIdentifiersStream = listIdentifiersResponseStream.flatMap(oaiPmhType -> {
}). /*
filter(Objects::nonNull). * return oaiPmhType. getListIdentifiers(). getHeader(). stream().map(h ->
distinct(); * h.getIdentifier() );
*/
List<String> uniqueIdentifiers = uniqueIdentifiersStream.collect( Collectors.toList() ); Stream<String> result = Stream.empty();
ListIdentifiersType listIdentifiersType = oaiPmhType.getListIdentifiers();
//uniqueIdentifiers.forEach(LOG::debug); if (null != listIdentifiersType) {
List<HeaderType> headers = listIdentifiersType.getHeader();
resultBundleStream = uniqueIdentifiers.stream().map(this::getBundle).filter(Objects::nonNull); if (null != headers) {
return resultBundleStream; result = headers.stream().map(h -> h.getIdentifier());
} }
}
public Bundle getBundle(String oaiPmhIdentifier) { return result;
LOG.debug("GetBundle - {}", oaiPmhIdentifier); }).filter(Objects::nonNull).distinct();
//Bundle bundleResult = new AutonomouslyContentResolvingBundle(ImmutableSet.of() );
Set<Metadatum> bundleMetadata = new HashSet<>(); List<String> uniqueIdentifiers = uniqueIdentifiersStream.collect(Collectors.toList());
Set<String> metadataPrefixes = metadataPrefix2XsltMap.keySet();
String lastModifiedString = ""; // uniqueIdentifiers.forEach(LOG::debug);
Bundle resultBundle = null;
//LOG.info("looking up {}", id); resultBundleStream = uniqueIdentifiers.stream().map(this::getBundle).filter(Objects::nonNull);
for (String metadataPrefix : metadataPrefixes) { return resultBundleStream;
try { }
// --- STEP 1: get XML input public Bundle getBundle(String oaiPmhIdentifier) {
String getRecordXmlDocumentResponseString = client.getRecordString(oaiPmhIdentifier, metadataPrefix); LOG.debug("GetBundle - {}", oaiPmhIdentifier);
// try to make exeley data transformable ( & -> &amp; to start with) // Bundle bundleResult = new
// ToDo: find the right location for it! // AutonomouslyContentResolvingBundle(ImmutableSet.of() );
if ( getRecordXmlDocumentResponseString.contains("identifier=\"oai:exeley.com:10.") ) { Set<Metadatum> bundleMetadata = new HashSet<>();
getRecordXmlDocumentResponseString = getRecordXmlDocumentResponseString.replace("&", "&amp;"); Set<String> metadataPrefixes = metadataPrefix2XsltMap.keySet();
} String lastModifiedString = "";
//LOG.info("--------------------------"); Bundle resultBundle = null;
//LOG.info("{}", getRecordXmlDocumentResponseString); // LOG.info("looking up {}", id);
if ( isDeletedRecord(getRecordXmlDocumentResponseString) ) { for (String metadataPrefix : metadataPrefixes) {
resultBundle = null; try {
break;
} // --- STEP 1: get XML input
else { String getRecordXmlDocumentResponseString = client.getRecordString(oaiPmhIdentifier, metadataPrefix);
// --- STEP 2: convert XML input according to XSLT
TransformerFactory factory = TransformerFactory.newInstance("net.sf.saxon.TransformerFactoryImpl", TransformerFactoryImpl.class.getClassLoader() ); // LOG.info("--------------------------");
Templates xslTemplate = factory.newTemplates( // LOG.info("{}", getRecordXmlDocumentResponseString);
new StreamSource( if (isDeletedRecord(getRecordXmlDocumentResponseString)) {
new StringReader( resultBundle = null;
metadataPrefix2XsltMap. break;
get(metadataPrefix) ) ) ); }
else {
Source xmlInput = new StreamSource( new StringReader(getRecordXmlDocumentResponseString) ); // --- STEP 2: convert XML input according to XSLT
TransformerFactory factory = TransformerFactory.newInstance("net.sf.saxon.TransformerFactoryImpl",
StringWriter writer = new StringWriter(); TransformerFactoryImpl.class.getClassLoader());
Result xmlOutput = new StreamResult(writer); Templates xslTemplate = factory.newTemplates(
new StreamSource(new StringReader(metadataPrefix2XsltMap.get(metadataPrefix))));
Transformer transformer = xslTemplate.newTransformer();
transformer.setErrorListener(this); Source xmlInput = new StreamSource(new StringReader(getRecordXmlDocumentResponseString));
transformer.transform(xmlInput, xmlOutput);
StringWriter writer = new StringWriter();
String xsltConvertedXmlOutput = writer.toString(); Result xmlOutput = new StreamResult(writer);
// LOG.info("---CONVERTED-----------------------");
// LOG.info("{}", output); Transformer transformer = xslTemplate.newTransformer();
transformer.setErrorListener(this);
// STEP 3: convert XSLT-converted XML output to Java JAXB object transformer.transform(xmlInput, xmlOutput);
JAXBContext jaxbContext = JAXBContext.newInstance(XmlBundle.class);
Unmarshaller unmarshaller = jaxbContext.createUnmarshaller(); String xsltConvertedXmlOutput = writer.toString();
InputStream convertedXmlInputStream = new ByteArrayInputStream(xsltConvertedXmlOutput.getBytes(StandardCharsets.UTF_8) ); // LOG.info("---CONVERTED-----------------------");
Bundle currentPartBundle = (Bundle) unmarshaller.unmarshal(convertedXmlInputStream); // LOG.info("{}", output);
Set<Metadatum> metadata = currentPartBundle.getMetadata(); // STEP 3: convert XSLT-converted XML output to Java JAXB object
bundleMetadata.addAll(metadata); JAXBContext jaxbContext = JAXBContext.newInstance(XmlBundle.class);
Unmarshaller unmarshaller = jaxbContext.createUnmarshaller();
// STEP 4: extract lastModifiedDate InputStream convertedXmlInputStream = new ByteArrayInputStream(
StringReader sr = new StringReader(getRecordXmlDocumentResponseString); xsltConvertedXmlOutput.getBytes(StandardCharsets.UTF_8));
//LOG.info("sr=\n{}", getRecordXmlDocumentResponseString); Bundle currentPartBundle = (Bundle) unmarshaller.unmarshal(convertedXmlInputStream);
JAXBContext oaiPmhjaxbContext = JAXBContext.newInstance(OAIPMHtype.class);
Unmarshaller oaiPmhUnmarshaller = oaiPmhjaxbContext.createUnmarshaller(); Set<Metadatum> metadata = currentPartBundle.getMetadata();
bundleMetadata.addAll(metadata);
@SuppressWarnings("unchecked")
JAXBElement<OAIPMHtype> wrappedResponseObject = (JAXBElement<OAIPMHtype>) oaiPmhUnmarshaller.unmarshal(sr); // STEP 4: extract lastModifiedDate
OAIPMHtype response = wrappedResponseObject.getValue(); StringReader sr = new StringReader(getRecordXmlDocumentResponseString);
lastModifiedString = response.getGetRecord().getRecord().getHeader().getDatestamp(); // LOG.info("sr=\n{}", getRecordXmlDocumentResponseString);
JAXBContext oaiPmhjaxbContext = JAXBContext.newInstance(OAIPMHtype.class);
// STEP 5: add identifier metadatum Unmarshaller oaiPmhUnmarshaller = oaiPmhjaxbContext.createUnmarshaller();
Metadatum reference = new SimpleMetadatum("internal.dda.reference", oaiPmhEndpoint + "@@" + oaiPmhIdentifier);
bundleMetadata.add(reference); @SuppressWarnings("unchecked")
resultBundle = BundleBuilder.create().withMetadata(bundleMetadata).withLastModifiedString(lastModifiedString).build(); JAXBElement<OAIPMHtype> wrappedResponseObject = (JAXBElement<OAIPMHtype>) oaiPmhUnmarshaller
.unmarshal(sr);
} OAIPMHtype response = wrappedResponseObject.getValue();
lastModifiedString = response.getGetRecord().getRecord().getHeader().getDatestamp();
}
catch (XPathException e) { // STEP 5: add identifier metadatum
LOG.debug("Catched XPathException"); Metadatum reference = new SimpleMetadatum("internal.dda.reference",
String errorCode = e.getErrorCodeLocalPart(); oaiPmhEndpoint + "@@" + oaiPmhIdentifier);
if (null != errorCode) { bundleMetadata.add(reference);
if ("filteraway".equals(errorCode) ) { resultBundle = BundleBuilder.create().withMetadata(bundleMetadata)
LOG.info("1- filtering away oaiPmhIdentifier={}", oaiPmhIdentifier); .withLastModifiedString(lastModifiedString).build();
LOG.info("Error detail: {}", e.getCause().getLocalizedMessage());
resultBundle = null; }
// test if this break is really necessary, taken out. STJ
// break; } catch (XPathException e) {
} LOG.debug("Catched XPathException");
else { String errorCode = e.getErrorCodeLocalPart();
LOG.warn("2- fatalError. Filtering away oaiPmhIdentifier=" + oaiPmhIdentifier, e); if (null != errorCode) {
} if ("filteraway".equals(errorCode)) {
} LOG.info("1- filtering away oaiPmhIdentifier={}", oaiPmhIdentifier);
else { LOG.info("Error detail: {}", e.getCause().getLocalizedMessage());
LOG.warn("3- fatalError. Filtering away oaiPmhIdentifier=" + oaiPmhIdentifier, e); resultBundle = null;
} // test if this break is really necessary, taken out. STJ
resultBundle = null; // break;
} } else {
LOG.warn("2- fatalError. Filtering away oaiPmhIdentifier=" + oaiPmhIdentifier, e);
catch (Throwable t) { }
LOG.warn("Problem getting record with id " + oaiPmhIdentifier + " and metadataPrefix " + metadataPrefix + ". Skipping it.", t); } else {
resultBundle = null; LOG.warn("3- fatalError. Filtering away oaiPmhIdentifier=" + oaiPmhIdentifier, e);
} }
resultBundle = null;
} }
return resultBundle; catch (Throwable t) {
} LOG.warn("Problem getting record with id " + oaiPmhIdentifier + " and metadataPrefix " + metadataPrefix
+ ". Skipping it.", t);
public static boolean isDeletedRecord(String getRecordXmlDocumentResponseString) { resultBundle = null;
XPath xpath = XPathFactory.newInstance().newXPath(); }
InputSource inputSource = new InputSource( new StringReader(getRecordXmlDocumentResponseString) ); }
String headerStatus;
boolean result; return resultBundle;
try { }
//see https://stackoverflow.com/a/6397369/923560
XPathExpression expr = xpath.compile( public static boolean isDeletedRecord(String getRecordXmlDocumentResponseString) {
"//*[local-name()='OAI-PMH' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']" XPath xpath = XPathFactory.newInstance().newXPath();
+ "/*[local-name()='GetRecord' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']"
+ "/*[local-name()='record' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']" InputSource inputSource = new InputSource(new StringReader(getRecordXmlDocumentResponseString));
+ "/*[local-name()='header' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']/" String headerStatus;
+ "@status"); boolean result;
try {
headerStatus = expr.evaluate(inputSource); // see https://stackoverflow.com/a/6397369/923560
if ( "deleted".equals(headerStatus) ) { XPathExpression expr = xpath
result = true; .compile("//*[local-name()='OAI-PMH' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']"
} + "/*[local-name()='GetRecord' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']"
else { + "/*[local-name()='record' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']"
result = false; + "/*[local-name()='header' and namespace-uri()='http://www.openarchives.org/OAI/2.0/']/"
} + "@status");
}
catch (XPathExpressionException e) { headerStatus = expr.evaluate(inputSource);
LOG.error("Problem identifying if deleted record", e); if ("deleted".equals(headerStatus)) {
result = true; result = true;
} } else {
return result; result = false;
} }
} catch (XPathExpressionException e) {
@Override LOG.error("Problem identifying if deleted record", e);
public String getReference() { result = true;
return oaiPmhEndpoint; }
} return result;
}
public String getSetSpec() {
return setSpec; @Override
} public String getReference() {
return oaiPmhEndpoint;
@Override }
public Set<Metadatum> getAllMetadata(String scopedIdentifier) {
return getBundle(scopedIdentifier).getMetadata(); public String getSetSpec() {
} return setSpec;
}
public static String convertInstantToOaiPmhString(Instant instant) {
String result; @Override
result = instant != null? instant.toString() : null; public Set<Metadatum> getAllMetadata(String scopedIdentifier) {
return result; return getBundle(scopedIdentifier).getMetadata();
}
}
public static String convertInstantToOaiPmhString(Instant instant) {
public static String convertLocalDateToOaiPmhString(LocalDate localDate) { String result;
String result; result = instant != null ? instant.toString() : null;
result = localDate != null? localDate.toString() : null; return result;
return result;
} }
@Override public static String convertLocalDateToOaiPmhString(LocalDate localDate) {
public void error(TransformerException arg0) throws TransformerException { String result;
LOG.error("error", arg0); result = localDate != null ? localDate.toString() : null;
return result;
} }
@Override @Override
public void fatalError(TransformerException arg0) throws TransformerException { public void error(TransformerException arg0) throws TransformerException {
if (arg0 instanceof XPathException) { LOG.error("error", arg0);
XPathException xPathException = (XPathException) arg0;
String errorCode = xPathException.getErrorCodeLocalPart(); }
if (null != errorCode) {
if (! "filteraway".equals(errorCode) ) { @Override
LOG.error("fatalError", arg0); public void fatalError(TransformerException arg0) throws TransformerException {
} if (arg0 instanceof XPathException) {
} XPathException xPathException = (XPathException) arg0;
else { String errorCode = xPathException.getErrorCodeLocalPart();