Commit 550b5f90 authored by Gerrit Hübbers's avatar Gerrit Hübbers 🃏
Browse files

Improve ContentResolver for DeGruyter

parent 6470111d
......@@ -319,10 +319,13 @@ public class ContentResolver {
if ( seedUrlString.contains("wbv.de") || seedUrlString.contains("budrich-academic.de") ) {
result = new UseNthPdfOnPageStrategy(0);
}
else if ( seedUrlString.contains("journals.sub.uni-hamburg.de") ) {
else if ( seedUrlString.contains("journals.sub.uni-hamburg.de") || seedUrlString.contains("degruyter.com") ) {
// unfortunately, SUB Uni Hamburg's OJS instances report an incorrect HTML citation_pdf_url
// meta element which points not to the actual PDF bitstream, but the landing page.
// Therefore, we discard all possible (X)HTML bitstream heuristics
// DeGruyter provides in all known cases single chapter PDFs, not full PDFs. Therefore we discard
// results from degruyter.com and rely on alternative PDF sources
result = new EmptySetResolvingStrategy();
}
else {
......
......@@ -690,6 +690,17 @@ public class ContentResolverTest {
assertTrue(isEqual);
}
@Test
public void resolveDeGruyterDoi() throws MalformedURLException, IOException {
String underTest = "https://doi.org/10.14361/9783839437124";
Metadatum metadatum = new SimpleMetadatum("dc.identifier.doi", underTest);
Set<Metadatum> metadata = ImmutableSet.of(metadatum);
Set<InputStreamAndFilenameSupplier> suppliers = ContentResolver.resolveContentsSuppliers(metadata);
assertEquals(0, suppliers.size());
}
public static String unrootDomainName(String url) {
try {
String result;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment