Commit d8e6b8d9 authored by Steinberg, Jan's avatar Steinberg, Jan

Content Resolver and Content resolver test - not yet green because of strange…

Content Resolver and Content resolver test - not yet green because of strange redirect configurations on SAGE server
parent b32fa116
......@@ -126,6 +126,13 @@ public class ContentResolver {
if ( possibleContentType.equals(ContentType.PDF) ) {
content = ContentHelpers.getTimeoutBackoffRetryAwareURLInputStream( new URL(url) );
}
else if (url.contains("10.1177/")) {
// GIGA journals special
String sageUrl = url;
String sageRedirectURLToPdf = sageUrl.replace("doi/10.1177", "doi/pdf/10.1177").replace("http://", "https://") + "?cookieSet=1";
URL sageURL = new URL(sageRedirectURLToPdf);
content = ContentHelpers.getTimeoutBackoffRetryAwareURLInputStream( sageURL );
}
else if ( possibleContentType.equals(ContentType.HTML)
|| possibleContentType.equals(ContentType.XHTML)
|| possibleContentType.equals(ContentType.XML) ) {
......@@ -151,6 +158,7 @@ public class ContentResolver {
}
if (null == content) { // we still haven't found a PDF, therefore looking through some URLs ending in ".pdf"
Elements endingWithDotPdfUrlElements = doc.select("a[href$=.pdf]");
for (Element endingWithDotPdfUrlElement : endingWithDotPdfUrlElements) {
......
......@@ -78,6 +78,9 @@ public class ContentResolverTest {
private static Metadatum DOI_TO_LANDING_PAGE_METADATUM;
private static Set<Metadatum> DOI_TO_LANDING_PAGE_METADATA;
private static Metadatum DOI_TO_LANDING_PAGE_GIGA_METADATUM;
private static Set<Metadatum> DOI_TO_LANDING_PAGE_GIGA_METADATA;
private static Metadatum DOI_TO_PDF_METADATUM;
private static Set<Metadatum> DOI_TO_PDF_METADATA;
......@@ -117,6 +120,9 @@ public class ContentResolverTest {
DOI_TO_LANDING_PAGE_METADATUM = new SimpleMetadatum("dc.identifier.doi", "http://dx.doi.org/10.1140/epjc/s10052-014-3060-7");
DOI_TO_LANDING_PAGE_METADATA = ImmutableSet.of(DOI_TO_LANDING_PAGE_METADATUM);
DOI_TO_LANDING_PAGE_GIGA_METADATUM = new SimpleMetadatum("dc.identifier.doi", "https://doi.org/10.1177/1866802X19840455");
DOI_TO_LANDING_PAGE_GIGA_METADATA = ImmutableSet.of(DOI_TO_LANDING_PAGE_GIGA_METADATUM);
DOI_TO_PDF_METADATUM = new SimpleMetadatum("dc.identifier.doi", "http://dx.doi.org/doi:10.1392/BC1.0");
DOI_TO_PDF_METADATA = ImmutableSet.of(DOI_TO_PDF_METADATUM);
......@@ -292,6 +298,27 @@ public class ContentResolverTest {
LOG.info("expected InputStream has length {}", expectedBytes.length );
byte[] actualBytes = IOUtils.toByteArray(actualInputStream);
LOG.info(" actual InputStream has length {}", actualBytes.length );
ByteArrayInputStream expectedBais = new ByteArrayInputStream(expectedBytes);
ByteArrayInputStream actualBais = new ByteArrayInputStream(actualBytes);
//boolean isEqual = IOUtils.contentEquals(expectedInputStream, actualInputStream);
boolean isEqual = IOUtils.contentEquals(expectedBais, actualBais);
assertTrue(isEqual);
}
@Test
public void testDcIdentifierDoiViaLandingPageGiga() throws IOException {
InputStream expectedInputStream = getClass().getResourceAsStream("/1866802x19840455.pdf");
InputStream actualInputStream = ContentResolver.resolveContentInputStream(DOI_TO_LANDING_PAGE_GIGA_METADATA);
byte[] expectedBytes = IOUtils.toByteArray(expectedInputStream);
LOG.info("expected InputStream has length {}", expectedBytes.length );
byte[] actualBytes = IOUtils.toByteArray(actualInputStream);
LOG.info(" actual InputStream has length {}", actualBytes.length );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment