jsoup 抓取省市区

package com.xazhxc.htjcom.back.controller.base;

import cn.hutool.core.util.StrUtil;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.xazhxc.htjcom.entity.Citys;
import com.xazhxc.htjcom.init.HttpServerInit;
import com.xazhxc.htjcom.kit.Kits;
import com.xazhxc.htjcom.kit.PropsKit;
import com.xazhxc.htjcom.kit.UploadKit;
import com.xazhxc.htjcom.service.CitysService;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.tio.core.ChannelContext;
import org.tio.core.GroupContext;
import org.tio.http.common.HttpRequest;
import org.tio.http.common.HttpResponse;
import org.tio.http.common.UploadFile;
import org.tio.http.server.annotation.RequestPath;
import org.tio.http.server.mvc.Routes;
import org.tio.http.server.util.Resps;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

/**
 * 公共类
 *
 * @author leizhen.wang
 */
@RequestPath(value = "/base")
@Slf4j
public class BaseController {
    static CitysService citysService = Kits.getBean( CitysService.class);
    @Mapper
    ProductService productService;

   private static Map<Integer, String> cssMap = new HashMap<Integer, String>();
    private static BufferedWriter bufferedWriter = null;
   static {
       cssMap.put(1, "provincetr");// 省
       cssMap.put(2, "citytr");// 市
       cssMap.put(3, "countytr");// 市
   }
    @RequestPath(value = "/pro")
    public HttpResponse product(HttpRequest request) throws IOException {

        new Thread(() -> {
            try {
                initFile();
                Document connect = Jsoup.connect( "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2017/" ).get();
                int level = 1;
                Elements rowProvince = connect.select("tr." + cssMap.get(level));
                for (Element provinceElement : rowProvince) {
                    Elements select = provinceElement.select("a");
                    for (Element province  : select) {
                        try {
                            parseNextLevel(province, level + 1, null, null);
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
//                        System.out.println("----province-----"+province);
                    }
                }
                closeStream();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }).start();

        return Resps.json( request, Kits.result().ok() );
    }

    private static void closeStream() {
        if (bufferedWriter != null) {
            try {
                bufferedWriter.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    private static void parseNextLevel(Element parentElement, int level, String code, String area) throws IOException {
        try {
            Thread.sleep(500);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }

        String attr = parentElement.attr( "abs:href" );
        if (StrUtil.isEmpty( code )) {
            code = attr.substring( 54,56 )+"0000";
        }
//        Citys citys = new Citys(String.valueOf( SnowFlakeUtil.getFlowIdInstance().nextId() ), code, parentElement.text(), "-1");
//        citysService.insert( citys );
        Document doc = Jsoup.connect( attr).get();
        if (doc != null) {
            Elements newsHeadlines = doc.select("tr." + cssMap.get(level));
            for (Element element : newsHeadlines) {
                if (StrUtil.isEmpty( area )) {
//                    printInfo(element, level + 1, code);
                } else {
                    printInfo2(element, level + 1, code);
                }
                Elements select = element.select("a");// 在递归调用的时候,这里是判断是否是村一级的数据,村一级的数据没有a标签
//                System.out.println(select);
                if (select.size() != 0) {
                    code = element.select("td").first().text();
                    parseNextLevel2(select.last(), level + 1, code, "area");
                }
            }
        }

    }

    private static void parseNextLevel2(Element parentElement, int level, String code, String area) throws IOException {
        try {
            Thread.sleep(500);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        String attr = parentElement.attr( "abs:href" );
        if (StrUtil.isEmpty( code )) {
            code = attr.substring( 54,56 )+"0000";
        }
        Document doc = Jsoup.connect( attr).get();
        if (doc != null) {
            Elements newsHeadlines = doc.select("tr." + cssMap.get(level));
            for (Element element : newsHeadlines) {
                printInfo2(element, level + 1, code);
            }
        }

    }

    private static void initFile() throws IOException {
        bufferedWriter = new BufferedWriter(new FileWriter(new File("d:\CityInfo.txt"), true));
    }
    private static void printInfo(Element element, int level, String pid) {
//        System.out.println(pid + "---: "+element.select("td").last().text()+"============="+element.select("td").first().text());
        Citys citys = new Citys(String.valueOf( SnowFlakeUtil.getFlowIdInstance().nextId() ), element.select("td").first().text(), element.select("td").last().text(), pid);
        citysService.insert( citys );
        /*try {
            bufferedWriter.write(element.select("td").last().text() + "{" + level + "}["
                    + element.select("td").first().text() + "]");
            bufferedWriter.newLine();
            bufferedWriter.flush();
        } catch (IOException e) {
            e.printStackTrace();
        }*/
    }

    private static void printInfo2(Element element, int level, String pid) {
//        System.out.println(pid + "---: "+element.select("td").last().text()+"============="+element.select("td").first().text());
        Citys citys = new Citys(String.valueOf( SnowFlakeUtil.getFlowIdInstance().nextId() ), element.select("td").first().text(), element.select("td").last().text(), pid);
        citysService.insert( citys );
        /*try {
            bufferedWriter.write(element.select("td").last().text() + "{" + level + "}["
                    + element.select("td").first().text() + "]");
            bufferedWriter.newLine();
            bufferedWriter.flush();
        } catch (IOException e) {
            e.printStackTrace();
        }*/
    }


  


    
}

  

原文地址:https://www.cnblogs.com/joyny/p/9995040.html