Jsoup Selector选择器用法

 1 package com.kangcong.zy;
 2 
 3 import org.jsoup.Jsoup;
 4 import org.jsoup.nodes.Document;
 5 import org.jsoup.select.Elements;
 6 
 7 import java.io.File;
 8 import java.io.IOException;
 9 
10 
11 12  
13 public class Jsoup_Selector {
14     public static void main(String[] args) {
15         String path = Jsoup_Selector.class.getClassLoader().getResource("student.xml").getPath();
16         try {
17             Document document = Jsoup.parse(new File(path), "UTF-8");
18             System.out.println("1.---------------------------------");
19             //1.eq(0)去找student的第一个索引
20             Elements elements1 = document.select("student:eq(0)");
21             System.out.println(elements1);
22             System.out.println("2.---------------------------------");
23 
24             //2.lt(1)去找student的索引小于1
25             Elements elements2 = document.select("student:lt(1)");
26             System.out.println(elements2);
27             System.out.println("3.---------------------------------");
28 
29             //3.gt(1)去找student的索引大于1
30             Elements elements3 = document.select("student:gt(1)");
31             System.out.println(elements3);
32             System.out.println("4.---------------------------------");
33 
34             //4.has(p)去找student包含p标签的
35             Elements elements4 = document.select("student:has(p)");
36             System.out.println(elements4);
37             System.out.println("5.---------------------------------");
38 
39             //5.not()去找student不包含p标签的
40             Elements elements5= document.select("student:not(:has(p))");
41             System.out.println(elements5);
42             System.out.println("6.---------------------------------");
43 
44             //6.contains()是否包含某个元素
45             Elements elements6= document.select("p:contains(ff)");
46             System.out.println(elements6);
47             System.out.println("7.---------------------------------");
48 
49             //7.获取class=ge的student
50             Elements elements7= document.select("student.ge");
51             System.out.println(elements7);
52             System.out.println("8.---------------------------------");
53 
54             //8.获取class=ge后面的元素name
55             Elements elements8= document.select(".ge>name");
56             System.out.println(elements8);
57             System.out.println("9.---------------------------------");
58 
59             //9.属性为color，值为ff的元素
60             Elements elements9= document.select("[color=ff]");
61             System.out.println(elements9);
62             System.out.println("10.---------------------------------");
63 
64             //10.所有匹配元素p,student
65             Elements elements10= document.select("p,student");
66             System.out.println(elements10);
67 
68         } catch (IOException e) {
69             e.printStackTrace();
70         }
71 
72     }
73 }

　　Document:文档对象,代表内存中的dom树:
　　　getElementById(String id):根据id属性值获取唯一的element对象

　　　getElementsByTag(String tagName):根据标签名获取元素对象集合

　　　getElementsByAttribute(String key):根据属性名称获取元素对象集合

　　　getElementsByAttributeValue(String key,String value):根据对应的属性名和属性值获取元素对象的集合

　　Elements :元素Element对象的集合.可以当做ArrayList<Element>来使用:

　　　document具有如下几种方法,同样适用于element,原因是document,element有一个共同的父类:node

　　　getElementById(String id):根据id属性值获取唯一的element对象`

　　　getElementsByTag(String tagName):根据标签名获取元素对象集合`

　　　getElementsByAttribute(String key):根据属性名称获取元素对象集合`

　　　getElementsByAttributeValue(String key,String value):根据对应的属性名和属性值获取元素对象的集合`

　　获取属性值

　　　String attr (String key):根据属性名获取属性值

　　获取文本内容

　　　String text():获取文本内容`

　　　String html():获取标签体的所有内容(包括子标签的字符串内容)