03_Jsoup

【1.获取一个页面所有的链接】

public static void main(String[] args) throws IOException {
    String url="http://www.cnblogs.com/HigginCui/";
    Document doc=Jsoup.connect(url).get();  //下载并解析成html DOM结构
    System.out.println(doc);   //这里打印就是整个html页面
    Elements links=doc.select("a[href]"); //使用select方法选择元素
    System.out.println(links.size());
    for(Element link:links){
        System.err.println("<a href=""+link.attr("abs:href")+""> ("+link.text()+") </a>");
    }
}

【运行结果】

【02】

@Test  
public void test01(){
    String html="<p> "
              + "    <a href='http://example.com/'>"
              + "        <b>霸气</b>"
              + "    </a> "
              + "    link."
              + "</p>";
    Document doc=Jsoup.parse(html);  
    Element ele=doc.select("a").first();  //查找第一个a元素
    
    System.out.println("ele.text()==="+ele.text());  //Element.text()获取标签的文本值
    System.out.println("ele.attr("href")==="+ele.attr("href"));
    String linkOuter=ele.outerHtml();
    System.out.println("ele.outerHtml()==="+linkOuter);
}

【运行结果】

【03.根据id获取对应的Element】

@Test
public void test02(){
    String html="<p id="ppp" value="i am best!">哈哈哈  </p>";
    Document doc=Jsoup.parse(html);  
    Element ele=doc.select("#ppp").first();  //查找第一个a元素
    System.out.println(ele.attr("value"));
    System.out.println(ele.text());
    
}

【运行结果】

【04】

@Test
public void test03(){
    String html="<div id="zxSale">"
              + "    <table class="sssss">"
              + "        <tbody>"
              + "            <tr value="tttttrrrrr">"
              + "                <td>2017-02-22</td>"
              + "                <td> 富国基金</td>"
              + "                <td>嘉实基金</td>"
              + "            </tr>"
              + "            <tr value="tttttrrrrr">"
              + "                <td>2017-03-22</td>"
              + "                <td>建信基金</td>"
              + "                <td>易方达基金</td>"
              + "            </tr>"
              + "        </tbody>"
              + "    </table>"
              + "</div>";
    Document doc=Jsoup.parse(html);
    Elements trs=doc.select("#zxSale > table > tbody > tr");
    for(Element tr:trs){
        Elements tds=tr.getElementsByTag("td");
        System.out.println(tds.get(0).ownText());
        System.out.println(tds.get(1).ownText());
        System.out.println(tds.get(2).ownText());
//            System.out.println("这个是不存在的:"+tds.get(3).ownText()+"!"); //这里会报错
    }
}

【运行结果】

【05】

@Test
public void test03_plus(){
    String html="<div id="zxSale">"
              + "    <table class="sssss">"
              + "        <tbody>"
              + "            <tr value="tttttrrrrr">"
              + "                <td>2017-02-22</td>"
              + "                <td> 富国基金</td>"
              + "                <td>嘉实基金</td>"
              + "            </tr>"
              + "            <tr value="tttttrrrrr">"
              + "                <td>2017-03-22</td>"
              + "                <td>建信基金</td>"
              + "                <td>易方达基金</td>"
              + "            </tr>"
              + "        </tbody>"
              + "    </table>"
              + "</div>";
    Document doc=Jsoup.parse(html);
    Elements trs=doc.select("#zxSale > table > tbody > tr");
    for(Element tr:trs){
        Elements tds=tr.getElementsByTag("td");   //根据标签获取元素
        for(Element td : tds){
            System.out.println(td.text());
        }
    }
}

【运行结果】

 【06】

@Test
public void test04(){
    String html="<div id="zxSale">"
              + "    <div>"
              + "        <select name="fundCode">"
              + "            <option value="">全部</option>"
              + "            <option value="000001">华夏001</option>"
              + "            <option value="000002">华夏002</option>"
              + "            <option value="000003">华夏003</option>"
              + "            <option value="000004">华夏004</option>"
              + "            <option value="000005">华夏005</option>"
              + "            <option value="000006">华夏006</option>"
              + "        </select>"
              + "    </div>"
              + "</div>";
    Document doc=Jsoup.parse(html);
    Elements options=doc.select("select[name=fundCode]").get(0).getElementsByTag("option");
    if(options.size()>1){
        for(Element option:options){
            System.out.println("value==="+option.attr("value"));
            System.out.println("owntext==="+option.ownText());
        }
    }
}

【运行结果】

原文地址:https://www.cnblogs.com/HigginCui/p/6484018.html