HtmlUnit入门二

  由于在在WebClient中,默认支持对CSS,JavaScript的解析,因此会总是会出现很多错误信息,并且执行速度也很慢。

  因此,我们可以选择关闭掉WebClient对CSS,JavaScript的解析。

  使用WebClient#getOption()方法,返回一个WebClientOptions对象,可以对WebClient进行很多设置。比如:使用代理,设置连接的有效时间之类的。

关闭对Css,JavaScript的支持

package com.fuwh;

import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlPage;

public class Demo04 {

    public static void main(String[] args) {
        
        WebClient webClient=null;
        try {
             webClient= new WebClient(BrowserVersion.FIREFOX_45);    //定义一个WebClient
             webClient.getOptions().setCssEnabled(false);    //设置CSS解析无效
             webClient.getOptions().setJavaScriptEnabled(false);//设置JavaScript解析无效
             
            final HtmlPage page=webClient.getPage("http://www.tuicool.com/");    //从指定URL获取HtmlPage
            
            System.out.println(page.asXml());
            
        } catch (Exception e) {
            // TODO: handle exception
            e.printStackTrace();
        }finally {
            webClient.close();    //关闭客户端
        }
    }
}

使用代理ip

package com.fuwh;

import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.ProxyConfig;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.WebClientOptions;
import com.gargoylesoftware.htmlunit.html.HtmlPage;

public class Demo05 {

    public static void main(String[] args) {
        
        WebClient webClient=null;
        try {
             webClient= new WebClient(BrowserVersion.FIREFOX_45);    //定义一个WebClient
             WebClientOptions webClientOptions=webClient.getOptions();
             webClientOptions.setCssEnabled(false);    //设置CSS解析无效
             webClientOptions.setJavaScriptEnabled(false);//设置JavaScript解析无效
             
             webClientOptions.setProxyConfig(new ProxyConfig("49.212.39.221", 3129));
             
            final HtmlPage page=webClient.getPage("http://www.tuicool.com/");    //从指定URL获取HtmlPage
            
            System.out.println(page.asXml());
            
        } catch (Exception e) {
            // TODO: handle exception
            e.printStackTrace();
        }finally {
            webClient.close();    //关闭客户端
        }
    }
}

模拟提交表单

package com.fuwh;

import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlButton;
import com.gargoylesoftware.htmlunit.html.HtmlButtonInput;
import com.gargoylesoftware.htmlunit.html.HtmlDivision;
import com.gargoylesoftware.htmlunit.html.HtmlForm;
import com.gargoylesoftware.htmlunit.html.HtmlInput;
import com.gargoylesoftware.htmlunit.html.HtmlObject;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlSpan;
import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput;
import com.gargoylesoftware.htmlunit.html.HtmlTextInput;

public class Demo06 {

    public static void main(String[] args) {
        
        
        try(final WebClient webClient= new WebClient(BrowserVersion.CHROME)) {
             
            final HtmlPage page=webClient.getPage("https://mvnrepository.com/");
            webClient.getOptions().setCssEnabled(false);
            webClient.getOptions().setJavaScriptEnabled(false);
            HtmlDivision htmlDiv=(HtmlDivision) page.getByXPath("//div[@id='search']").get(0);
            HtmlForm form=(HtmlForm) htmlDiv.getElementsByTagName("form").get(0);
//            System.out.println(form.asXml());
            
            HtmlTextInput textInput=form.getInputByName("q");
            HtmlSubmitInput submit=form.getInputByValue("Search");
            
            textInput.setValueAttribute("junit");
            
            HtmlPage resultPage=submit.click();
//            System.out.println(resultPage.asXml());
            HtmlDivision htmlDiv2=resultPage.getFirstByXPath("//div[@class='im']");
            System.out.println(htmlDiv2.asXml());
//            
            
            webClient.close();
        } catch (Exception e) {
            // TODO: handle exception
            e.printStackTrace();
        }
    }
}
原文地址:https://www.cnblogs.com/zerotomax/p/7257352.html