Jsoup获取DOM元素

(1)doc.getElementsByTag(String tagName);

(2)doc.getElementById(String id);

(3)doc.getElementsByClass(String className);

(4)doc.getElementsByAttribute(String key);

elements=document.getElementsByAttribute("width");
for(Element e:elements){
	System.out.println(e.toString());
}

  

(5)doc.getElementsByAttributeValue(String key,String value);

示例:通过key-value查找src=“/images/logo_small.gif”的元素

		//根据key-value名称来查询DOM(查找src="")
		elements=document.getElementsByAttributeValue("src", "/images/logo_small.gif");
		System.out.println(elements.get(0).toString());

示例:通过key-value查找target=“_blank”的元素

		elements=document.getElementsByAttributeValue("target","_blank");
		for(Element e:elements){
			System.out.println(e.toString());
		}

  

 使用document.select();选择元素

通过class一级一级往下找

package com.oracle.zibo;

import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class Demo2 {

	public static void main(String[] args) throws Exception {
		CloseableHttpClient closeableHttpClient=HttpClients.createDefault();
		HttpGet httpGet=new HttpGet("http://www.bootcss.com/");
			
		CloseableHttpResponse closeableHttpResponse=closeableHttpClient.execute(httpGet);
		HttpEntity httpEntity=closeableHttpResponse.getEntity(); //获取实体、网页内容
			
		String str=EntityUtils.toString(httpEntity, "utf-8");
		
		closeableHttpResponse.close();
		closeableHttpClient.close();
		
		Document document=Jsoup.parse(str); //解析网页
		
		//查找bootstrap主页下的所有标题
		Elements elements=document.select(".row .thumbnail .caption h3 a");
		for(Element e:elements){
			System.out.println(e.text());
		}
	}

}

  

使用a["href"]

查找所有带href属性的a标签

		//查找a[href]
		Elements elements=document.select("a[href]");
		for(Element e:elements){
			System.out.println(e.html());
		}

使用"img[src$=.png]"

查找扩展名为.png的图片的元素

		Elements elements=document.select("img[src$=.png]");
		for(Element e:elements){
			System.out.println(e.toString());
		}

  

取得我们需要的信息

		Elements elements=document.select("img[src$=.png]");
		for(Element e:elements){
			System.out.println(e.toString());
			System.out.println(e.text()); //取得标签中的内容
			System.out.println(e.html()); //取得标签中的html代码
			System.out.println(e.attr("src")); //取得某属性的属性值
		}

e.attr(属性),返回属性值

.first()取得第一个

.last()取得最后一个

Element element=document.select("img[src$=.gif]").first();
System.out.println(element.attr("src")); //取得某属性的属性值

  

原文地址:https://www.cnblogs.com/mengxinrenyu/p/7635492.html