Loading src/main/java/org/gesis/dda/publishing/domain/impl/CatchAllResolvingStrategy.java +1 −1 Original line number Diff line number Diff line Loading @@ -26,7 +26,7 @@ public class CatchAllResolvingStrategy implements ContentsUrlsResolvingStrategy Set<String> contentsUrlStrings = new HashSet<>(); try { Document doc = Jsoup.connect(seedUrl).get(); Document doc = Jsoup.connect(seedUrl).userAgent("Mozilla").get(); // BEGIN check all "meta" elements with an attribute "name"="citation_pdf_url" Elements citationPdfUrlMetaElements = doc.select("meta[name=citation_pdf_url]"); Loading src/main/java/org/gesis/dda/publishing/domain/impl/ContentResolver.java +2 −1 Original line number Diff line number Diff line Loading @@ -302,6 +302,7 @@ public class ContentResolver { if ( possibleContentType.equals(ContentType.PDF) ) { contentsUrlStrings.add(resolvedUrlString); log.info("ContentType = pdf"); } else if ( possibleContentType.equals(ContentType.HTML) || possibleContentType.equals(ContentType.XHTML) Loading @@ -309,7 +310,7 @@ public class ContentResolver { String hasHtmlLikeContentUrl = resolvedUrlString; ContentsUrlsResolvingStrategy strategy = getStrategy(resolvedUrlString); contentsUrlStrings = strategy.getContentsUrls(hasHtmlLikeContentUrl); log.info("ContentType = html?"); } } catch (IOException e) { Loading Loading
src/main/java/org/gesis/dda/publishing/domain/impl/CatchAllResolvingStrategy.java +1 −1 Original line number Diff line number Diff line Loading @@ -26,7 +26,7 @@ public class CatchAllResolvingStrategy implements ContentsUrlsResolvingStrategy Set<String> contentsUrlStrings = new HashSet<>(); try { Document doc = Jsoup.connect(seedUrl).get(); Document doc = Jsoup.connect(seedUrl).userAgent("Mozilla").get(); // BEGIN check all "meta" elements with an attribute "name"="citation_pdf_url" Elements citationPdfUrlMetaElements = doc.select("meta[name=citation_pdf_url]"); Loading
src/main/java/org/gesis/dda/publishing/domain/impl/ContentResolver.java +2 −1 Original line number Diff line number Diff line Loading @@ -302,6 +302,7 @@ public class ContentResolver { if ( possibleContentType.equals(ContentType.PDF) ) { contentsUrlStrings.add(resolvedUrlString); log.info("ContentType = pdf"); } else if ( possibleContentType.equals(ContentType.HTML) || possibleContentType.equals(ContentType.XHTML) Loading @@ -309,7 +310,7 @@ public class ContentResolver { String hasHtmlLikeContentUrl = resolvedUrlString; ContentsUrlsResolvingStrategy strategy = getStrategy(resolvedUrlString); contentsUrlStrings = strategy.getContentsUrls(hasHtmlLikeContentUrl); log.info("ContentType = html?"); } } catch (IOException e) { Loading