jsoup解析页面

package com.java.jsoup;
/**
 * jsoup解析网页
* @author nidegui
* @version 2019年4月29日 下午5:12:02
* 
*/

import java.io.IOException;
import java.io.InputStream;

import org.apache.http.HttpEntity;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class Demo1 {
	public static void main(String[] args) throws ClientProtocolException, IOException {
		
		CloseableHttpClient httpClient = HttpClients.createDefault();
		HttpGet httpget=new HttpGet("https://www.cnblogs.com/");
		CloseableHttpResponse response = httpClient.execute(httpget);
		HttpEntity entity = response.getEntity();
		String content = EntityUtils.toString(entity);
		
		response.close();
		Document doc = Jsoup.parse(content);
		//css样式选择器
		Elements aSelect = doc.select("#post_list .post_item .post_item_body h3 a");
		for(Element a:aSelect) {
			System.out.println("文本:"+a.text());
			System.out.println("链接:"+a.attr("href"));
		}
		
		
	}
}

  

原文地址:https://www.cnblogs.com/nidegui/p/10894739.html