Commit 2d840dd8 authored by Steinberg, Jan's avatar Steinberg, Jan

jsoup cannot get input stream...

parent fb98728e
......@@ -26,7 +26,7 @@ public class CatchAllResolvingStrategy implements ContentsUrlsResolvingStrategy
Set<String> contentsUrlStrings = new HashSet<>();
try {
Document doc = Jsoup.connect(seedUrl).get();
Document doc = Jsoup.connect(seedUrl).userAgent("Mozilla").get();
// BEGIN check all "meta" elements with an attribute "name"="citation_pdf_url"
Elements citationPdfUrlMetaElements = doc.select("meta[name=citation_pdf_url]");
......
......@@ -302,6 +302,7 @@ public class ContentResolver {
if ( possibleContentType.equals(ContentType.PDF) ) {
contentsUrlStrings.add(resolvedUrlString);
log.info("ContentType = pdf");
}
else if ( possibleContentType.equals(ContentType.HTML)
|| possibleContentType.equals(ContentType.XHTML)
......@@ -309,7 +310,7 @@ public class ContentResolver {
String hasHtmlLikeContentUrl = resolvedUrlString;
ContentsUrlsResolvingStrategy strategy = getStrategy(resolvedUrlString);
contentsUrlStrings = strategy.getContentsUrls(hasHtmlLikeContentUrl);
log.info("ContentType = html?");
}
}
catch (IOException e) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment