Jsoup解析HTML

1 在解析HTML之前还需导入jsoup-1.10.2.jar

2 解析HTML,代码如下:

package com.od.cn;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class JsoupParserHtml {
	private static final Logger LOGGER=Logger.getLogger(JsoupParserHtml.class);
	
	//从网上把天气爬下来
	private List<Weather> parserHtmlByHttp(String url){
		List<Weather> weathers=new ArrayList<Weather>();
		try {
			Document document=Jsoup.connect(url).get();
			Elements classes=document.getElementsByClass("part_se");
			for(Element ele:classes){
				String data_role=ele.attr("data-role");
				if("collapsible".equals(data_role)){
					Elements h1=ele.select("h1");
					Elements td=ele.select("td");
					Weather weather=new Weather();
					weather.setArea(h1.text());
					weather.setAirTemperature(td.get(1).text());
					weather.setRainFall(td.get(3).text());
					weather.setRelativeWet(td.get(5).text());
					weather.setWindPower(td.get(7).text());
					weather.setWindDirection(td.get(9).text());
					weather.setDate(td.get(11).text());
					weathers.add(weather);
				}
			}
		} catch (IOException e) {
			LOGGER.error("解析网页异常:"+e.getMessage());
		}
		LOGGER.info("成功获取网页数据");
		return weathers;
	}
	
	//以json的格式保存到文本中
	private void saveFile(List<Weather> weathers){
		if(weathers!=null){
			SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd");
			StringBuffer buffer=new StringBuffer();
			buffer.append("{date:""+sdf.format(new Date())+"",data[");
			for(int i=0;i<weathers.size();i++){
				Weather weather=weathers.get(i);
				if(i==weathers.size()-1){
					buffer.append("{area:""+weather.getArea()+"",airTemperature:""+weather.getAirTemperature()+"",rainFall:""+weather.getRainFall()+
							"",relativeWet:""+weather.getRelativeWet()+"",windPower:""+weather.getWindPower()+"",windDirection:""+weather.getWindDirection()+"",dateTime:""+weather.getDate()+""}");
					
				}else{
					buffer.append("{area:""+weather.getArea()+"",airTemperature:""+weather.getAirTemperature()+"",rainFall:""+weather.getRainFall()+
							"",relativeWet:""+weather.getRelativeWet()+"",windPower:""+weather.getWindPower()+"",windDirection:""+weather.getWindDirection()+"",dateTime:""+weather.getDate()+""},");
				}
			}
			buffer.append("]}");
			BufferedWriter bw=null;
			try {
				 bw=new BufferedWriter(new FileWriter("d:\weather.txt"));
				bw.write(buffer.toString());
				bw.flush();
				LOGGER.info("已保存文件");
			} catch (IOException e) {
				LOGGER.error("保存文件异常:"+e.getMessage());
			}finally{
				if(bw!=null){
					try {
						bw.close();
					} catch (IOException e) {
						LOGGER.error("关闭流异常:"+e.getMessage());
					}
				}
			}
		}
	}
	
	public static void main(String[] args) {
		PropertyConfigurator.configure("WebRoot/conf/log4j.properties");
		LOGGER.info("启动程序");
		JsoupParserHtml jph=new JsoupParserHtml();
		List<Weather> weathers=jph.parserHtmlByHttp("http://www.zhpmsc.org.cn/WeChat/monitorController/zoneSk?winzoom=1#");
		jph.saveFile(weathers);
		LOGGER.info("程序结束");
	}

}


原文地址:https://www.cnblogs.com/t0404/p/10290972.html