Commit d8e6b8d9 authored by Steinberg, Jan's avatar Steinberg, Jan
Browse files

Content Resolver and Content resolver test - not yet green because of strange...

Content Resolver and Content resolver test - not yet green because of strange redirect configurations on SAGE server
parent b32fa116
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -126,6 +126,13 @@ public class ContentResolver {
                if ( possibleContentType.equals(ContentType.PDF) ) {
                    content = ContentHelpers.getTimeoutBackoffRetryAwareURLInputStream( new URL(url) );
                }
                else if (url.contains("10.1177/")) {
                	// GIGA journals special
                	String sageUrl = url;
                    String sageRedirectURLToPdf = sageUrl.replace("doi/10.1177", "doi/pdf/10.1177").replace("http://", "https://") + "?cookieSet=1";
                	URL sageURL = new URL(sageRedirectURLToPdf);
                	content = ContentHelpers.getTimeoutBackoffRetryAwareURLInputStream( sageURL );
                }
                else if (    possibleContentType.equals(ContentType.HTML)
                          || possibleContentType.equals(ContentType.XHTML)
                          || possibleContentType.equals(ContentType.XML)   ) {
@@ -151,6 +158,7 @@ public class ContentResolver {
                        
                    }
                    
                    
                    if (null == content) { // we still haven't found a PDF, therefore looking through some URLs ending in ".pdf"
                        Elements endingWithDotPdfUrlElements = doc.select("a[href$=.pdf]");
                        for (Element endingWithDotPdfUrlElement : endingWithDotPdfUrlElements) {
+27 −0
Original line number Diff line number Diff line
@@ -79,6 +79,9 @@ public class ContentResolverTest {
    private static Metadatum DOI_TO_LANDING_PAGE_METADATUM;
    private static Set<Metadatum> DOI_TO_LANDING_PAGE_METADATA;
    
    private static Metadatum DOI_TO_LANDING_PAGE_GIGA_METADATUM;
    private static Set<Metadatum> DOI_TO_LANDING_PAGE_GIGA_METADATA;

    private static Metadatum DOI_TO_PDF_METADATUM;
    private static Set<Metadatum> DOI_TO_PDF_METADATA;

@@ -118,6 +121,9 @@ public class ContentResolverTest {
        DOI_TO_LANDING_PAGE_METADATUM = new SimpleMetadatum("dc.identifier.doi", "http://dx.doi.org/10.1140/epjc/s10052-014-3060-7");
        DOI_TO_LANDING_PAGE_METADATA = ImmutableSet.of(DOI_TO_LANDING_PAGE_METADATUM);
        
        DOI_TO_LANDING_PAGE_GIGA_METADATUM = new SimpleMetadatum("dc.identifier.doi", "https://doi.org/10.1177/1866802X19840455");
        DOI_TO_LANDING_PAGE_GIGA_METADATA = ImmutableSet.of(DOI_TO_LANDING_PAGE_GIGA_METADATUM);

        DOI_TO_PDF_METADATUM = new SimpleMetadatum("dc.identifier.doi", "http://dx.doi.org/doi:10.1392/BC1.0");
        DOI_TO_PDF_METADATA = ImmutableSet.of(DOI_TO_PDF_METADATUM);

@@ -304,6 +310,27 @@ public class ContentResolverTest {
        assertTrue(isEqual);
    }
    
    @Test
    public void testDcIdentifierDoiViaLandingPageGiga() throws IOException {
        InputStream expectedInputStream = getClass().getResourceAsStream("/1866802x19840455.pdf");
        InputStream actualInputStream = ContentResolver.resolveContentInputStream(DOI_TO_LANDING_PAGE_GIGA_METADATA);

        byte[] expectedBytes = IOUtils.toByteArray(expectedInputStream);
        LOG.info("expected InputStream has length {}", expectedBytes.length );


        byte[] actualBytes = IOUtils.toByteArray(actualInputStream);
        LOG.info("  actual InputStream has length {}", actualBytes.length );

        ByteArrayInputStream expectedBais = new ByteArrayInputStream(expectedBytes);
        ByteArrayInputStream actualBais = new ByteArrayInputStream(actualBytes);

        //boolean isEqual = IOUtils.contentEquals(expectedInputStream, actualInputStream);
        boolean isEqual = IOUtils.contentEquals(expectedBais, actualBais);

        assertTrue(isEqual);
    }

    @Test
    public void testDcIdentifierDoiDirectPdf() throws IOException {
        InputStream expectedInputStream = getClass().getResourceAsStream("/casalini_bc_en.pdf");
+535 KiB

File added.

No diff preview for this file type.