爬取网站记录,爬取动态生成网页,java爬取动态生成网页

        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.13.1</version>
        </dependency>
    public static void main(String[] args) throws Exception {
        Document document = Jsoup.connect("https://www.xbiquge.la/29/29770/14341237.html").get();
        Element body = document.body();
        String s = body.toString();
        s=s.substring(s.indexOf("<div id="content">"));
        s=s.substring(0,s.indexOf("read4()"))
                .replaceAll("&nbsp;","")
                .replaceAll("<br>","")
                .replaceAll("
","")
                .replaceAll(" ","");
        while (s.length()>=51){
            System.out.println(s.substring(0,50));
            s=s.substring(50);
            System.out.println();
        }
        System.out.println(s);
    }
原文地址:https://www.cnblogs.com/qq376324789/p/14973419.html