物流轨迹抓取

/**
 * Created by aixiaofeng on 17/2/6.
 */
public class FedroadSpider extends ExpressSpider {

    private static final SimpleDateFormat FMT_COL_DATE = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
    private static final SimpleDateFormat FMT_DATE     = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

    private static QueryTrackApi          queryTrackApi;

    @Override
    public Result<String> doQuery(String express, String expressNo, String attr) {
        String deliveryNo = null;
        if (StringUtils.isNotBlank(attr) || !StringUtils.lowerCase(expressNo).startsWith("ec")) {
            deliveryNo = queryDeliveryNoByApi(express, expressNo, attr);
        }
        return queryByPage(express, StringUtils.isNotBlank(deliveryNo) ? deliveryNo : expressNo, attr);
    }

    private String queryDeliveryNoByApi(String express, String expressNo, String attr) {
        if (queryTrackApi == null) {
            try {
                queryTrackApi = ServiceFactory.getBean(QueryTrackApi.class);
            } catch (BeansException e) {
                queryTrackApi = new QueryTrackApi();
            }
            if (queryTrackApi == null) {
                queryTrackApi = new QueryTrackApi();
            }
        }
        QueryTrackReq reqTrack = new QueryTrackReq();
        reqTrack.getParameters().setPackageNo(expressNo);
        Result<QueryTrackRes> res = queryTrackApi.doRequest(reqTrack);
        if (res.isSuccess() && res.getData() != null && res.getData().getTrackList() != null
                && CommonUtil.isNotEmpty(res.getData().getTrackList().getTrackList())) {
            return res.getData().getTrackList().getTrackList().get(0).getDeliveryNo();
        }
        return null;
    }

    private Result<String> queryByPage(String express, String expressNo, String attr) {
        Result<String> result = new Result<>();
        String res = "";
        String BOUNDARY = UUID.randomUUID().toString();
        String urlStr = "https://www.fedroad.com";//访问页面
        try {
            StringBuilder strBuilder = new StringBuilder();
            //请求链接,拿到document
            HttpURLConnection conn = null;
            Connection connection = HttpUtils.getConnection(urlStr);
            Connection.Response response = connection.method(Connection.Method.GET).execute();
            Document document = response.parse();
            //定位到form表单
            Elements formDocuments = document.select("#aspnetForm");

            //获取conn连接
            URL url = new URL(urlStr);
            conn = (HttpURLConnection) url.openConnection();
            conn.setConnectTimeout(5000);
            conn.setReadTimeout(30000);
            conn.setDoOutput(true);
            conn.setDoInput(true);
            conn.setUseCaches(false);
            conn.setRequestMethod("POST");
            conn.setRequestProperty("Connection", "Keep-Alive");
            conn.setRequestProperty("User-Agent",
                    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36");
            conn.setRequestProperty("Content-Type", "multipart/form-data; boundary=" + BOUNDARY);

            OutputStream out = new DataOutputStream(conn.getOutputStream());
            //拼POST装请求参数
            for (int i = 0; i < formDocuments.select("input").size(); i++) {
                if (formDocuments.select("input").get(i).attr("class").contains("user_loginout")) {
                    continue;
                }
                strBuilder.append("--" + BOUNDARY + "
");
                strBuilder.append("Content-Disposition: form-data; name="" + formDocuments.select("input").get(i).attr("name") + """ + "

");

                if (formDocuments.select("input").get(i).attr("name").contains("search_shippingorder")) {
                    strBuilder.append(expressNo + "
");
                } else {
                    strBuilder.append(formDocuments.select("input").get(i).val() + "
");
                }
            }
            strBuilder.append("--" + BOUNDARY + "--");
            out.write(strBuilder.toString().getBytes());
            byte[] endData = ("
--" + BOUNDARY + "--
").getBytes();
            out.write(endData);
            out.flush();
            out.close();

            // 读取返回数据
            strBuilder = new StringBuilder();
            BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
            String line;
            while ((line = reader.readLine()) != null) {
                strBuilder.append(line).append("
");
            }
            res = strBuilder.toString();
            //关闭
            reader.close();
            //获取返回的document(就是你需要的)
            document = Jsoup.parse(res);
            Elements trackinfo = document.select(".trackinfo tr");
            JSONObject json = new JSONObject();
            JSONArray arr = new JSONArray();
             通过Jsoup 获取相应的字段 进行组装
            for (Element trElement : trackinfo) {
                if (trElement.select("td").attr("class").contains("title")) {
                    continue;
                }
                Elements tdElement = trElement.getElementsByTag("td");
                JSONObject item = new JSONObject();
                if (tdElement.get(0).text().trim().isEmpty()) {
                    continue;
                } else {
                    item.put("time", FMT_DATE.format(FMT_COL_DATE.parse(tdElement.get(0).text().trim())));
                }
                item.put("context", tdElement.get(1).text().trim());
                arr.add(item);
            }
            json.put("data", arr);
             //成功返回
            return result.setSuccess(true).setCode(ErrorConstants.SUCCESS).setData(json.toString());
        } catch (Exception e) {
            result.setCode(ErrorConstants.HTTP_ERR).setMessage(StackTraceUtil.getStackTrace(e));
            LOGGER.error(" - doQuery error,express = " + express + "," + expressNo, e);
            waitRandom();
        }
        //拿到抓取到的参数
        return result;
    }

    // 测试
    public static void main(String[] args) {
        FedroadSpider spider = new FedroadSpider();
        Result<String> ret = spider.doQuery(null, "EC000021436MY", null);
        System.out.print(ret);
    }
}

  

原文地址:https://www.cnblogs.com/dreammyone/p/7071659.html