Java爬取12306余票

  一、前言

  

  今年国庆和中秋一起放,虽然很欢快,但是没有票了!!!

  于是自己倒腾了一个查询余票的小程序。

  二、准备工作

   

  1、先打开12306的页面

  

  2、然后右键检查,点network

  

  3、再点一下12306页面上的查询,就可以看到发起了ajax请求

  

  4、点第一个,很明显是json串,这样就方便很多

  

  5、复制第二个的链接

  

  比如我这里就是:

      https://kyfw.12306.cn/otn/leftTicket/queryX?leftTicketDTO.train_date=2017-10-01&leftTicketDTO.from_station=BJP&leftTicketDTO.to_station=NJH&purpose_codes=ADULT

  三、开始工作

   这是通过HttpURLConnection来发起一个请求,里面的网址就填准备工作里面复制的那串

  /**
     * 发起一个http请求
     */
    public static void sendHttp(){
        URL url;
        int responsecode;
        HttpURLConnection urlConnection;
        BufferedReader reader;
        String line;
        try{
            //忽略Ssl(针对12306)
            SslUtils.ignoreSsl();
            //生成一个URL对象
            url=new URL("这里填你要访问的网址");
            /**
             * 这是为了防止12306对同一ip多次访问进行限制
             * 这里填的ip是暂时有效的,想要获取更多就得自己去找 搜索代理ip
             */
            System.getProperties().setProperty("proxySet", "true");
            System.setProperty("http.proxyHost", "120.78.15.63");  
            System.setProperty("http.proxyPort", "80"); 
            //打开URL
            urlConnection = (HttpURLConnection)url.openConnection();
            //伪造一个请求头 一般网页不用,有些网站会看你有没有请求头,比如 12306......
            urlConnection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:55.0) Gecko/20100101 Firefox/55.0"); 
            urlConnection.setRequestProperty("Host","kyfw.12306.cn");
            urlConnection.setRequestProperty("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
            urlConnection.setRequestProperty("Accept-Language","zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3");
            urlConnection.setRequestProperty("Accept-Encoding","identity");
            urlConnection.setRequestProperty("Connection","keep-alive");
            urlConnection.setRequestProperty("Upgrade-Insecure-Requests","1");
            //获取服务器响应代码
            responsecode=urlConnection.getResponseCode();
            //假如响应代码为200,就是代表成功
            if(responsecode==200){
                reader=new BufferedReader(new InputStreamReader(urlConnection.getInputStream(),"UTF-8"));
                while((line=reader.readLine())!=null){
                    System.out.println(line);//在这里干你想干的事情
                }
            }else{
                System.out.println("获取不到网页的源码,服务器响应代码为:"+responsecode);
            }
        }catch(Exception e){
            System.out.println("获取不到网页的源码,出现异常:"+e);
        }
    }

   

  跑这个方法之前还需要用到一个类,是用来忽略12306的证书问题

package domain;

import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;

import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;

public class SslUtils {

    public static void trustAllHttpsCertificates() throws Exception {
        TrustManager[] trustAllCerts = new TrustManager[1];
        TrustManager tm = new miTM();
        trustAllCerts[0] = tm;
        SSLContext sc = SSLContext.getInstance("SSL");
        sc.init(null, trustAllCerts, null);
        HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
    }

    static class miTM implements TrustManager,X509TrustManager {
        public X509Certificate[] getAcceptedIssuers() {
            return null;
        }

        public boolean isServerTrusted(X509Certificate[] certs) {
            return true;
        }

        public boolean isClientTrusted(X509Certificate[] certs) {
            return true;
        }

        public void checkServerTrusted(X509Certificate[] certs, String authType)
                throws CertificateException {
            return;
        }

        public void checkClientTrusted(X509Certificate[] certs, String authType)
                throws CertificateException {
            return;
        }
    }


    public static void ignoreSsl() throws Exception{
        HostnameVerifier hv = new HostnameVerifier() {
            public boolean verify(String urlHostName, SSLSession session) {
                return true;
            }
        };
        trustAllHttpsCertificates();
        HttpsURLConnection.setDefaultHostnameVerifier(hv);
    }
}

  然后跑一下这个方法,看到获取到了json

  

  就可以对这串字符串为所欲为了,嘿嘿嘿

  四、最后

   

  一般查到余票都是发邮件,不会JavaMail的可以看我另一篇博客,开箱即用。

  转载需标注原文地址!

原文地址:https://www.cnblogs.com/summertime-wu/p/7595366.html