Commit 2d840dd8 authored by Steinberg, Jan's avatar Steinberg, Jan
Browse files

jsoup cannot get input stream...

parent fb98728e
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -26,7 +26,7 @@ public class CatchAllResolvingStrategy implements ContentsUrlsResolvingStrategy
        Set<String> contentsUrlStrings = new HashSet<>();
        
        try {
            Document doc = Jsoup.connect(seedUrl).get();
            Document doc = Jsoup.connect(seedUrl).userAgent("Mozilla").get();
            
            // BEGIN check all "meta" elements with an attribute "name"="citation_pdf_url"
            Elements citationPdfUrlMetaElements = doc.select("meta[name=citation_pdf_url]");
+2 −1
Original line number Diff line number Diff line
@@ -302,6 +302,7 @@ public class ContentResolver {
                
                if ( possibleContentType.equals(ContentType.PDF) ) {
                    contentsUrlStrings.add(resolvedUrlString);
                    log.info("ContentType = pdf");
                }
                else if (    possibleContentType.equals(ContentType.HTML)
                          || possibleContentType.equals(ContentType.XHTML)
@@ -309,7 +310,7 @@ public class ContentResolver {
                    String hasHtmlLikeContentUrl = resolvedUrlString;
                    ContentsUrlsResolvingStrategy strategy = getStrategy(resolvedUrlString);
                    contentsUrlStrings = strategy.getContentsUrls(hasHtmlLikeContentUrl);
                    
                    log.info("ContentType = html?");
                }
            }
            catch (IOException e) {