天猫、淘宝商品详情、库存、价格抓包

如有侵权,请联系作者删除

水平有限,还望大牛指点

<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.3</version>
</dependency>

import com.sun.tools.doclets.formats.html.SourceToHTMLConverter;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import org.apache.commons.lang.StringUtils;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Created with Chenquan.
 * Description: 淘宝抓包
 * Date: 2018-12-13
 * Time: 15:12
 */
public class TaobaoCatch {

    public static void main(String[] args) {
        int i = 0;

/*        String url = "https://acs.m.taobao.com/h5/mtop.taobao.wsearch.h5search/1.0/?jsv=2.3.16&appKey=12574478&t=1545023581359&sign=e3476c9041a75de0a9190da470204d93&api=mtop.taobao.wsearch.h5search&v=1.0&H5Request=true&ecode=1&type=jsonp&dataType=jsonp&callback=mtopjsonp1&data=%7B%22q%22%3A%22%E4%BB%99%E6%B6%B5%E5%86%85%E8%A1%A3%22%2C%22search%22%3A%22%E6%8F%90%E4%BA%A4%22%2C%22tab%22%3A%22all%22%2C%22sst%22%3A%221%22%2C%22n%22%3A20%2C%22buying%22%3A%22buyitnow%22%2C%22m%22%3A%22api4h5%22%2C%22token4h5%22%3A%22%22%2C%22abtest%22%3A%221%22%2C%22wlsort%22%3A%221%22%2C%22page%22%3A1%7D";

        Connection con = Jsoup.connect(url);
        con.header("Cookie", "cna=TA+aFFGXQFUCAXQaRYGZVU8Q; t=efa81a9785cd86f885e13998b6d5f9cb; thw=cn; uc3=vt3=F8dByRzMU9X8Hvccr00%3D&id2=W8zLpWipxVFu&nk2=0PLo6GHZOM8%3D&lg2=V32FPkk%2Fw0dUvg%3D%3D; tracknick=%5Cu9648%5Cu94E81992; lgc=%5Cu9648%5Cu94E81992; _cc_=Vq8l%2BKCLiw%3D%3D; tg=0; enc=4rB%2FfKFx8DJKgPpoHlZjr824CEYw%2BlPaKBDWbFO4fnh6svGA97NoZNGERui4fOo2tXSnSVN1ygkfn5R5ekztTQ%3D%3D; hng=CN%7Czh-CN%7CCNY%7C156; mt=ci=0_1; _m_h5_tk=e501ac7690832934d663aef19ee36be5_1545033419107; _m_h5_tk_enc=5147579a652b4fb508dc886d59c37045; isg=BFVVgDOkpYNz64H7Z31pC9thZFHP-goqhI4h7tf6EUwbLnUgn6IZNGPv_DSYLiEc");
//        con.header("referer", "https://item.taobao.com/item.htm ");
        Connection.Response resp = null;
        try {
            resp = con.method(Connection.Method.GET).ignoreContentType(true).execute();
        } catch (IOException e) {
            e.printStackTrace();
        }
        String body = resp.body();
//        System.out.println(body);
        body = body.substring(12, body.length() - 1);
        JSONObject jb = JSONObject.fromObject(body);
        JSONArray jsonArray = jb.getJSONObject("data").getJSONArray("listItem");


//        while(i<100){
            i++;
            for (int j = 0; j < jsonArray.size(); j++) {
                JSONObject jsonObject = jsonArray.getJSONObject(j);
                String item_id = jsonObject.getString("item_id");
                System.out.println("item_id: "+item_id);
                getAll(item_id);
            }
//        }*/

        //传链接上的产品id
        getAll("577996531297");


    }

    public static void getAll(String item_id ) {
        try {
            Thread.sleep(2000);//一个休息5s,太快会被禁
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        System.out.println("开始时间:" + new Date());
        Date dateStart = new Date();
        Document doc = null;
        String id = "";
        try {
//            int i = 0;
//            while(i < 10000){
//            i++;
            String url = "https://item.taobao.com/item.htm?id="+item_id;
            id = getParamByUrl(url, "id");
            doc = Jsoup.connect(url).ignoreContentType(true).get();

           /* String url = "https://h5api.m.taobao.com/h5/mtop.taobao.detail.getdetail/6.0/?data=";//手机的html 5 页面 ,为了获取库存、价格
            String enc = "{"itemNumId":"582061497975"}";
            String gbk = URLEncoder.encode(enc, "utf-8");
            String sds = url + gbk;
            System.out.println("库存、价格"+sds);
            doc = Jsoup.connect(sds).ignoreContentType(true).get();*/

            //设置请求头
//                    Connection con = Jsoup.connect(url);
//                    con.header("Cookie", " enc=1LWJWtPGgf6MF1NVsn2rbeRb3%2FU1%2Fk5ZiiedHbVedmxmfvUUWDPmFeyKeLYl7NVchBB19JCIVnX0eFv4otK9HA%3D%3D;" +
//                            "x5sec=7b2264657461696c736b69703b32223a226235653133353933646637396131353230343663346139633633653038326465434c6a4e7a654146454e447739724732716644534b426f4c4f4455774d7a51304e7a4d794f7a453d227d;" );
//                    con.header("referer", "https://item.taobao.com/item.htm ");
//                    Connection.Response resp=con.method(Connection.Method.GET).execute();
//                    Map<String,String> cookies = resp.cookies();
//                    Connection.Request request = con.request();
//                    String body = resp.body();


        } catch (IOException e) {
            e.printStackTrace();
        }
        if (doc.baseUri().contains("tmall")) {
            System.out.println("商品名称:"+ doc.select("h1[data-spm="1000983"]").text());
        }else {
            System.out.println("商品名称:" + doc.select("h3[class="tb-main-title"]").text());
        }
        Elements imgSrcElement = doc.select("#J_UlThumb > li");
        for (Element element : imgSrcElement) {
            String imgSrc = "";
            if (element.baseUri().contains("tmall")){
                imgSrc = element.getElementsByTag("img").attr("src");
            }else{
                imgSrc = element.getElementsByTag("img").attr("data-src");
            }
            imgSrc = imgSrc.replaceFirst("//img.alicdn.com/imgextra/", "");
            imgSrc = imgSrc.substring(0, imgSrc.length() - 10);
//            imgSrc = imgSrc.replaceAll("_60x60q90.jpg",""); //处理掉不必要的数据
            System.out.println("主图url:" + imgSrc);
        }
        // 规格参数
        Elements selectRules = doc.select(".J_TSaleProp");

        List<List<String>> liHashMap = new ArrayList<>();
        for (Element ulElement : selectRules) {
            String ul = ulElement.getElementsByTag("ul").attr("data-property");
            System.out.println("ul:" + ul);

            List<String> liString = new ArrayList<>();

            for (Element liElement : ulElement.getElementsByTag("li")) {

                String liDataValue = liElement.getElementsByTag("li").attr("data-value");
                System.out.println("liDataValue: " + liDataValue);
                liString.add(liDataValue);

                String aStyle = liElement.getElementsByTag("a").attr("style");
                if (StringUtils.isNotBlank(aStyle)) {
                    aStyle = aStyle.replaceAll("background:url\(", "");
                    aStyle = aStyle.substring(0, aStyle.length() - 29);
//                aStyle = aStyle.replaceAll("_40x40q90.jpg\) center no-repeat;", "");
                    System.out.println("aStyle: " + aStyle);
                }

                String spanText = liElement.getElementsByTag("span").text();
                if (StringUtils.isNotBlank(spanText)) {
                    System.out.println("spanText: " + spanText);
                }
            }
            liHashMap.add(liString);
        }

        List<String> combination = test.combination(liHashMap);

        //获取价格、库存
        Elements eles = doc.getElementsByTag("script");
        for (Element ele : eles) {

                String s = ele.toString();
            if (!ele.baseUri().contains("tmall")) {//淘宝
                String rgex = "";
                String subUtilSimple = "";
                if (s.contains("skuMap")) {

                    //获取sku的id
                    rgex = "skuMap(.*?)propertyMemoMap";
                    String skuId = s.replaceAll("\s*", "");
    //                System.out.println(s);
                    subUtilSimple = getSubUtilSimple(skuId, rgex);
                    subUtilSimple = subUtilSimple.substring(1, subUtilSimple.length() - 1);
    //
                    JSONObject jb = JSONObject.fromObject(subUtilSimple);

                    JSONObject finalJb = jb;
                    List<String> skuList = new ArrayList<>();
                    combination.forEach(p->{

                        JSONObject jsonObject = finalJb.getJSONObject(";" + p + ";");
                        if (!jsonObject.isNullObject()) {

                            String o = jsonObject.getString("skuId");
                            System.out.println("sku的id: " + o);
                            skuList.add(o);

                        }
                    });


                    String url = "https://h5api.m.taobao.com/h5/mtop.taobao.detail.getdetail/6.0/?data=";//手机的html 5 页面 ,为了获取库存、价格
                    String enc = "{"itemNumId":"" + id + ""}";
                    String substore = "";
                    String store = "";
                    try {
                        String gbk = URLEncoder.encode(enc, "utf-8");
                        String sds = url + gbk;
                        System.out.println("库存、价格" + sds);
                        doc = Jsoup.connect(sds).ignoreContentType(true).get();
                        store = doc.toString();
                        rgex = "sku2info(.*?)skuItem";
                        substore = getSubUtilSimple(store, rgex);
                        substore = substore.substring(3, substore.length() - 3);
                        String sub = substore.replaceAll("\\", "").replaceAll("\s*", "");
                        JSONObject sb = JSONObject.fromObject(sub);
                        skuList.stream().forEach(p->{
                            if (sb.has(p)) {//判断是否有值,没值不取,不然会报错
                                String string = sb.getString(p);
                                System.out.println("淘宝的价格库存==============" + string);
                            }
                        });


                    } catch (Exception e) {
                        System.out.println("报错的地方store:" + store);
//                        System.out.println("报错的地方substore:" + substore);
                        e.printStackTrace();
                        System.out.println("=====================================程序报错,提前结束===================================================" );
                        return;
                    }


                }
                if (s.contains("descUrl") && s.contains("counterApi")) {
    //                System.out.println(s);
                    //详情链接
                    rgex = "protocol(.*?)desc\.alicdn\.com";
                    subUtilSimple = getSubUtilSimple(s, rgex);
                    subUtilSimple = subUtilSimple.substring(14, subUtilSimple.length() - 7);
                    System.out.println("详情链接: " + subUtilSimple);
                    try {
                        doc = Jsoup.connect("http:" + subUtilSimple).get();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }

                    Elements imgDetail = doc.getElementsByTag("img");
                    for (Element element : imgDetail) {
                        String imgSrc = element.getElementsByTag("img").attr("src");
    //                    imgSrc = imgSrc.replaceFirst("//img.alicdn.com/imgextra/","");
    //                    imgSrc = imgSrc.replaceAll("_60x60q90.jpg",""); //处理掉不必要的数据
                        if (StringUtils.isNotBlank(imgSrc)) {
                            System.out.println("详情图url:" + imgSrc);
                        }
                    }
                }
            }else {//天猫的
                if (s.contains("TShop.Setup")) {

//                String rgex = "<bdocid>(.*?)</bdocid>";
                    String rgex = "skuMap(.*?)salesProp";

                    String subUtilSimple = getSubUtilSimple(s, rgex);
                    subUtilSimple = subUtilSimple.substring(2, subUtilSimple.length() - 2);

                    JSONObject jb = JSONObject.fromObject(subUtilSimple);
                    List<String> skuList = new ArrayList<>();

                    combination.forEach(p->{
                        JSONObject jsonObject = jb.getJSONObject(";" + p + ";");
                        if (!jsonObject.isNullObject()) {
                            String skuId = jsonObject.getString("skuId");
                            System.out.println(skuId);
                            skuList.add(skuId);
                        }

                    });



                    //库存、价格
                    String url = "https://h5api.m.taobao.com/h5/mtop.taobao.detail.getdetail/6.0/?data=";//手机的html 5 页面 ,为了获取库存、价格
                    String enc = "{"itemNumId":"" + id + ""}";
                    String substore = "";
                    String store = "";
                    try {
                        String gbk = URLEncoder.encode(enc, "utf-8");
                        String sds = url + gbk;
                        System.out.println("库存、价格" + sds);
                        doc = Jsoup.connect(sds).ignoreContentType(true).get();
                        store = doc.toString();
                        rgex = "sku2info(.*?)skuItem";
                        substore = getSubUtilSimple(store, rgex);
                        substore = substore.substring(3, substore.length() - 3);
                        String sub = substore.replaceAll("\\", "").replaceAll("\s*", "");
                        JSONObject sb = JSONObject.fromObject(sub);
                        skuList.stream().forEach(p->{
                            if (sb.has(p)) {//判断是否有值,没值不取,不然会报错
                                String string = sb.getString(p);
                                System.out.println("天猫的价格库存==============" + string);
                            }
                        });


                    } catch (Exception e) {
                        System.out.println("报错的地方store:" + store);
//                        System.out.println("报错的地方substore:" + substore);
                        e.printStackTrace();
                        System.out.println("=====================================程序报错,提前结束===================================================" );
                        return;
                    }








                    //详情链接
                    rgex = "httpsDescUrl(.*?)fetchDcUrl";
                    subUtilSimple = getSubUtilSimple(s, rgex);
                    subUtilSimple = subUtilSimple.substring(3, subUtilSimple.length() - 3);
                    System.out.println(subUtilSimple);

                    try {
                        doc = Jsoup.connect("http:"+subUtilSimple).get();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }

                    Elements imgDetail = doc.getElementsByTag("img");
                    for (Element element :imgDetail) {
                        String imgSrc = element.getElementsByTag("img").attr("src");
//                    imgSrc = imgSrc.replaceFirst("//img.alicdn.com/imgextra/","");
//                    imgSrc = imgSrc.replaceAll("_60x60q90.jpg",""); //处理掉不必要的数据
                        System.out.println("详情图url:"+imgSrc);
                    }

                    break;
                }
            }



        }

        System.out.println("结束时间:" + new Date());
        Date dateEnd = new Date();
        long number = dateEnd.getTime()-dateStart.getTime();
        //然后在将毫秒转换为date类型就可以了
        System.out.println("时间差为: "+number/1000);
    }


    /**
     * 返回单个字符串,若匹配到多个的话就返回第一个,方法与getSubUtil一样
     *
     * @param soap
     * @param rgex
     * @return
     */
    public static String getSubUtilSimple(String soap, String rgex) {
        Pattern pattern = Pattern.compile(rgex);// 匹配的模式
        Matcher m = pattern.matcher(soap);
        while (m.find()) {
            return m.group(1);
        }
        return "";
    }


    /**
     * 获取指定url中的某个参数
     *
     * @param url
     * @param name
     * @return
     */
    public static String getParamByUrl(String url, String name) {
        url += "&";
        String pattern = "(\?|&){1}#{0,1}" + name + "=[a-zA-Z0-9]*(&{1})";

        Pattern r = Pattern.compile(pattern);

        Matcher m = r.matcher(url);
        if (m.find()) {
//            System.out.println(m.group(0));
            return m.group(0).split("=")[1].replace("&", "");
        } else {
            return null;
        }
    }


}
import com.google.gson.JsonObject;
import net.sf.json.JSONObject;

import java.util.ArrayList;
import java.util.List;

/**
 * Created with Chenquan.
 * Description:
 * Date: 2018-12-16
 * Time: 10:27
 */
public class test {
    public static void main(String[] args) {
        List<String> li = new ArrayList<>();
        li.add("aa");
        li.add("bb");
        li.add("cc");

        List<String> bi = new ArrayList<>();
        bi.add("ee");
        bi.add("rr");
        bi.add("tt");

        List<String> ci = new ArrayList<>();
        ci.add("yy");
        ci.add("uu");
        ci.add("ii");

        List<List<String>> list = new ArrayList<>();
        list.add(li);
        list.add(bi);
        list.add(ci);

        List<String> vv = new ArrayList<>();
        List<String> combination = combination(list);
        System.out.println(combination);
    }


    /**
     * 若干个集合元素的组合
     *
     * @param groups 多个集合
     * @return 组合结果
     */
    public static List<String> combination(List<List<String>> groups) {
        if (invalid(groups) || invalid(groups.get(0))) {
            return null;
        }
        List<String> combine = groups.get(0);
        for (int i = 1; i < groups.size(); i++) {
            combine = cartesianProduct(combine, groups.get(i));
            if (combine == null) {
                return null;
            }
        }
        return combine;
    }

    /**
     * 两个集合元素的组合
     *
     * @param c1 集合1
     * @param c2 集合2
     * @return 组合结果
     */
    public static List<String> cartesianProduct(List<String> c1, List<String> c2) {
        if (invalid(c1) || invalid(c2)) {
            return null;
        }
        List<String> combine = new ArrayList<>();
        for (String s : c1) {
            for (String t : c2) {
                combine.add(String.format("%s;%s", s, t));
                //combine.add(String.format("%s%s", t, s));
            }
        }
        return combine;
    }

    /**
     * 验证集合是否无效
     *
     * @param c 集合
     * @return true 无效
     */
    private static boolean invalid(List<?> c) {
        return c == null || c.isEmpty();
    }




}

  

原文地址:https://www.cnblogs.com/itchenfirst/p/10131526.html