java 必应壁纸批量下载

基于java 必应壁纸批量下载 - rookie丶k - 博客园 (cnblogs.com)实现

上面代码运行本地有点小问题,改了改

1.ssl验证

2.请求头

3.需要优化下载速度,多线程方式(还不会)

/* *
 *@Description:
 *@Author:TYJ
 *@Date: create in  2020/4/22 18:18
 */

import javax.net.ssl.*;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
public class SslUtils {
    private static void trustAllHttpsCertificates() throws Exception {
        TrustManager[] trustAllCerts = new TrustManager[1];
        TrustManager tm = new miTM();
        trustAllCerts[0] = tm;
        SSLContext sc = SSLContext.getInstance("SSL");
        sc.init(null, trustAllCerts, null);
        HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
    }
    static class miTM implements TrustManager,X509TrustManager {
        public X509Certificate[] getAcceptedIssuers() {
            return null;
        }
        public boolean isServerTrusted(X509Certificate[] certs) {
            return true;
        }
        public boolean isClientTrusted(X509Certificate[] certs) {
            return true;
        }
        public void checkServerTrusted(X509Certificate[] certs, String authType)
                throws CertificateException {
            return;
        }
        public void checkClientTrusted(X509Certificate[] certs, String authType)
                throws CertificateException {
            return;
        }
    }
    /**
     * 忽略HTTPS请求的SSL证书,必须在openConnection之前调用
     * @throws Exception
     */
    public static void ignoreSsl() throws Exception{
        HostnameVerifier hv = new HostnameVerifier() {//实现自己的脚丫逻辑,这里就直接返回true,不进行校验
            public boolean verify(String urlHostName, SSLSession session) {
                System.out.println("Warning: URL Host: " + urlHostName + " vs. " + session.getPeerHost());
                return true;
            }
        };
        trustAllHttpsCertificates();
        HttpsURLConnection.setDefaultHostnameVerifier(hv);
    }
}

  

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import java.io.*;
import java.net.*;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class WallpaperDownload {
    private static final String BY_PREFIX = "https://bing.ioliu.cn";

    public static void main(String[] args) {
        // 运行时指定一个本地下载路径
        String path = "";
        for(int i=0; i<args.length; i++){
            path = args[i];

        }
        if(path == null || path.length()==0){
            path = "C:\Users\HP\Pictures\bing";
        }
        File filePath = new File(path);
        if(!filePath.exists()){
            System.out.println("创建目录:" + filePath.getName());
            filePath.mkdirs();
        }
        System.out.println("下载位置:" + filePath.getName());
        download(path);

    }

    public static void download(String path) {
        long start = System.currentTimeMillis();
        String pageHtml = "https://bing.ioliu.cn/ranking";
        for (int i = 1; i <= 105; i++) {
            if (i > 1) {
                pageHtml = pageHtml + "?p=" + i;

            }
            try {
                String[] links = getAddress(pageHtml);
                execute(links, path);
            } catch (IOException e) {
                e.printStackTrace();
            }
            pageHtml = "https://bing.ioliu.cn/ranking";

        }
        long end = System.currentTimeMillis();
        long time = (end - start) / 1000;
        System.out.println("下载耗时:" + time);
    }

    /**
     * 下载图片
     *
     * @param links
     * @param path  下载位置
     * @throws
     */
    public static void execute(String[] links, String path) throws IOException {
        if (!path.endsWith("\")) {
            path = path + "\";
        }
        for (int i = 0; i < links.length; i++) {
            HttpURLConnection urlConnection = getConnection(links[i]);
            InputStream ins = urlConnection.getInputStream();
            String imageName = links[i].substring(links[i].lastIndexOf("/") + 1).split("\?")[0];
            File file = new File(path + imageName + ".jpg");
            OutputStream outputStream = null;
            if (!file.exists()) {
                outputStream = new FileOutputStream(file);
                int readCount;
                byte[] bytes = new byte[10240];
                while ((readCount = ins.read(bytes)) != -1) {
                    outputStream.write(bytes, 0, readCount);
                }
                System.out.println("[" + imageName + "] download finished ...");
            } else {
                System.out.println(file.getName() + " existed ...");
            }
        }

    }

    /**
     * 获取下载链接地址
     *
     * @return
     * @throws IOException
     */
    public static String[] getAddress(String htmlPage) throws IOException {
        System.out.println("get [" + htmlPage + "] info ...");
        HttpURLConnection connection = getConnection(htmlPage);
        InputStream is = connection.getInputStream();
        String newLine = System.getProperty("line.separator");
        BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
        StringBuilder result = new StringBuilder();
        String line;
        String html;
        while ((line = reader.readLine()) != null) {
            result.append(line + newLine);
        }
        html = result.toString();
        Document doc = Jsoup.parseBodyFragment(html);
        html = doc.body().html();
        String[] links = extractLinks(html);
        return links;
    }

    /**
     * 提取图片链接
     *
     * @param html
     */
    static String[] extractLinks(String html) {
        List<String> list = new ArrayList<>();

        String pattern = "/photo/.*_.*\?force=download";
        Pattern r = Pattern.compile(pattern);
        Matcher m = r.matcher(html);



        while (m.find()) {
            String group = m.group();
            String[] split = group.split("&");
            list.add(split[0]+"?force=download");
        }
        String[] results = new String[list.size()];
        for (int i = 0; i < list.size(); i++) {
            results[i] = BY_PREFIX + list.get(i);
        }
        return results;

    }

    /**
     * 获取连接
     *
     * @param urlStr
     * @return
     */
    public static HttpURLConnection getConnection(String urlStr) {
        HttpURLConnection urlConnection = null;
        try {
            URI uri = new URI(urlStr);
            URL url = uri.toURL();

            try {
                //自己伪造ssl证书
                SslUtils.ignoreSsl();
            } catch (Exception e) {
                e.printStackTrace();
            }

            urlConnection = (HttpURLConnection) url.openConnection();
            //防止403
            //Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36 Edg/87.0.664.55
            urlConnection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36 Edg/87.0.664.55");

        } catch (URISyntaxException e) {
            e.printStackTrace();
        } catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }

        return urlConnection;
    }
}

  

原文地址:https://www.cnblogs.com/q1359720840/p/14088904.html