Commit f6fef92a authored by Gerrit Hübbers's avatar Gerrit Hübbers 🃏
Browse files

Improving XsltOAIPMH bundles source filtering by introducing new Exception handling

parent 82893b3e
......@@ -18,10 +18,12 @@ import java.util.stream.Stream;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBElement;
import javax.xml.bind.Unmarshaller;
import javax.xml.transform.ErrorListener;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Templates;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
......@@ -43,7 +45,8 @@ import org.slf4j.LoggerFactory;
import org.xml.sax.InputSource;
import net.sf.saxon.TransformerFactoryImpl;
public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSource {
import net.sf.saxon.trans.XPathException;
public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSource, ErrorListener {
private final static Logger LOG = LoggerFactory.getLogger(XsltTransformerOaiPmhBundlesStreamSource.class);
......@@ -171,6 +174,7 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo
Result xmlOutput = new StreamResult(writer);
Transformer transformer = xslTemplate.newTransformer();
transformer.setErrorListener(this);
transformer.transform(xmlInput, xmlOutput);
String xsltConvertedXmlOutput = writer.toString();
......@@ -183,34 +187,44 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo
InputStream convertedXmlInputStream = new ByteArrayInputStream(xsltConvertedXmlOutput.getBytes(StandardCharsets.UTF_8) );
Bundle currentPartBundle = (Bundle) unmarshaller.unmarshal(convertedXmlInputStream);
boolean shallFilterAway = currentPartBundle.getMetadata().stream().anyMatch(m -> "internal.dda.flags".equals( m.getKey() ) && m.getValue().contains("filteraway") );
if (shallFilterAway) {
resultBundle = null;
Set<Metadatum> metadata = currentPartBundle.getMetadata();
bundleMetadata.addAll(metadata);
// STEP 4: extract lastModifiedDate
StringReader sr = new StringReader(getRecordXmlDocumentResponseString);
//LOG.info("sr=\n{}", getRecordXmlDocumentResponseString);
JAXBContext oaiPmhjaxbContext = JAXBContext.newInstance(OAIPMHtype.class);
Unmarshaller oaiPmhUnmarshaller = oaiPmhjaxbContext.createUnmarshaller();
@SuppressWarnings("unchecked")
JAXBElement<OAIPMHtype> wrappedResponseObject = (JAXBElement<OAIPMHtype>) oaiPmhUnmarshaller.unmarshal(sr);
OAIPMHtype response = wrappedResponseObject.getValue();
lastModifiedString = response.getGetRecord().getRecord().getHeader().getDatestamp();
// STEP 5: add identifier metadatum
Metadatum reference = new SimpleMetadatum("internal.dda.reference", oaiPmhEndpoint + "@@" + oaiPmhIdentifier);
bundleMetadata.add(reference);
resultBundle = BundleBuilder.create().withMetadata(bundleMetadata).withLastModifiedString(lastModifiedString).build();
}
}
catch (XPathException e) {
String errorCode = e.getErrorCodeLocalPart();
if (null != errorCode) {
if ("filteraway".equals(errorCode) ) {
LOG.debug("filtering away oaiPmhIdentifier={}", oaiPmhIdentifier);
}
else {
Set<Metadatum> filteredMetadata = currentPartBundle.getMetadata().stream().filter(m -> !"internal.dda.flags".equals( m.getKey() )).collect(Collectors.toSet() );
bundleMetadata.addAll( filteredMetadata );
// STEP 4: extract lastModifiedDate
StringReader sr = new StringReader(getRecordXmlDocumentResponseString);
//LOG.info("sr=\n{}", getRecordXmlDocumentResponseString);
JAXBContext oaiPmhjaxbContext = JAXBContext.newInstance(OAIPMHtype.class);
Unmarshaller oaiPmhUnmarshaller = oaiPmhjaxbContext.createUnmarshaller();
@SuppressWarnings("unchecked")
JAXBElement<OAIPMHtype> wrappedResponseObject = (JAXBElement<OAIPMHtype>) oaiPmhUnmarshaller.unmarshal(sr);
OAIPMHtype response = wrappedResponseObject.getValue();
lastModifiedString = response.getGetRecord().getRecord().getHeader().getDatestamp();
// STEP 5: add identifier metadatum
Metadatum reference = new SimpleMetadatum("internal.dda.reference", oaiPmhEndpoint + "@@" + oaiPmhIdentifier);
bundleMetadata.add(reference);
resultBundle = BundleBuilder.create().withMetadata(bundleMetadata).withLastModifiedString(lastModifiedString).build();
LOG.warn("fatalError. Filtering away oaiPmhIdentifier=" + oaiPmhIdentifier, e);
}
}
else {
LOG.warn("fatalError. Filtering away oaiPmhIdentifier=" + oaiPmhIdentifier, e);
}
resultBundle = null;
}
catch (Throwable t) {
LOG.warn("Problem getting record with id " + oaiPmhIdentifier + " and metadataPrefix " + metadataPrefix + ". Skipping it.", t);
resultBundle = null;
......@@ -274,4 +288,34 @@ public class XsltTransformerOaiPmhBundlesStreamSource implements BundlesStreamSo
return result;
}
@Override
public void error(TransformerException arg0) throws TransformerException {
LOG.error("error", arg0);
}
@Override
public void fatalError(TransformerException arg0) throws TransformerException {
if (arg0 instanceof XPathException) {
XPathException xPathException = (XPathException) arg0;
String errorCode = xPathException.getErrorCodeLocalPart();
if (null != errorCode) {
if (! "filteraway".equals(errorCode) ) {
LOG.error("fatalError", arg0);
}
}
else {
LOG.error("fatalError", arg0);
}
}
else {
LOG.error("fatalError", arg0);
}
}
@Override
public void warning(TransformerException arg0) throws TransformerException {
LOG.warn("warn", arg0);
}
}
......@@ -233,11 +233,7 @@
</xsl:choose>
<xsl:if test="contains( text(), 'journalofhumansecurity:EDT' )">
<!-- we are not interested in editorials -->
<xsl:call-template name="new-metadatum">
<xsl:with-param name="key" select="'internal.dda.flags'"/>
<xsl:with-param name="language" select="''"/>
<xsl:with-param name="value" select="'filteraway'"/>
</xsl:call-template>
<xsl:value-of select="error(QName('','filteraway'), 'is in set journalofhumansecurity:EDT')" />
</xsl:if>
</xsl:if>
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment