Java解析HTML

  使用Java解析HTML很简单,使用jsoup.jar来解析,使用起来和jquery差不多

下面是两个例子

1.解析web页面

 1 import org.jsoup.Connection;
 2 import org.jsoup.Jsoup;
 3 import org.jsoup.nodes.Document;
 4 import org.jsoup.nodes.Element;
 5 import org.jsoup.select.Elements;
 6 
 7 
 8 public class ParseWebPage {
 9 
10     
11 
12         public static void main(String[] args) throws Exception {
13             
14             Connection conn = Jsoup.connect("http://www.hao123.com");
15             Document document = conn.get();
16             
17             //解析出 class为feedback的li标签  的后代a标签元素
18             Elements elements = document.select("li.feedback a");
19               
20             for (Element element : elements) {
21                 System.out.println(element.html());
22                 System.out.println(element.attr("href"));
23             }
24               
25         }
26         
27     
28 }
View Code

2.解析本地页面

 1 import java.io.File;
 2 
 3 import org.jsoup.Jsoup;
 4 import org.jsoup.nodes.Document;
 5 import org.jsoup.nodes.Element;
 6 import org.jsoup.select.Elements;
 7 
 8 public class ParseLocalPage {
 9 
10     public static void main(String[] args) throws Exception {
11         File file =new File("E:/JavaScriptDojo/jqueryui/测试Button.html");
12         Document document = Jsoup.parse(file, "utf-8");
13         
14         Elements es = document.select("#getDPvalues");
15         
16         for (Element element : es) {
17              System.out.printf("%s\t%s\n" ,element.html() ,element.val());
18         }
19     }
20 }
View Code

很简单吧

原文地址:https://www.cnblogs.com/sandynie/p/3137170.html