Java 验证码识别之多线程打码兔

验证码识别,爬虫永远的话题~

用打码兔总体的体验就是单线程速度太慢~

import java.io.IOException;
import java.net.MalformedURLException;
import java.util.Date;

import org.apache.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlButton;
import com.gargoylesoftware.htmlunit.html.HtmlForm;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput;

import cn.smy.dama2.Dama2Web;
import cn.smy.dama2.Dama2Web.DecodeResult;
import cn.smy.dama2.Dama2Web.ReadBalanceResult;

/**   
* @Title: main.java 
* @Package  
* @Description: TODO(用一句话描述该文件做什么) 
* @author A18ccms A18ccms_gmail_com   
* @date 2017年2月15日 下午3:42:00 
* @version V1.0   
*/

/**
 * @ClassName: main
 * @Description: TODO
 * @author zeze
 * @date 2017年2月15日 下午3:42:00
 * 
 */
public class main {
    private static Logger logger = Logger.getLogger(main.class);
    private static final long serialVersionUID = 1325980466616825****;
    private static Dama2Web dama2 = new Dama2Web(46****, "41c5a58de6********d23b67f61645e3a7", "***", "****");
    private static int id;

    private static long nd = 1000 * 24 * 60 * 60;
    private static long nh = 1000 * 60 * 60;
    private static long nm = 1000 * 60;
    private static long ns = 1000;
    // 获得两个时间的毫秒时间差异
    private static Date nowDate;
    private static Date endDate;
    private static long diff;// getTime返回的是一个long型的毫秒数
    // 计算差多少分钟
    private static long min;
    // 计算差多少秒//输出结果
    private static long sec;
    // 计算多少毫秒
    private static long ms;

    public static void main(String[] agrs) {
        String emailAccount = "asd@qq.com";


        for (int i = 0; i < 10; i++) {
            nowDate = new Date();

            emailAccount = "asd" + i + "@qq.com";
            if(i==0)
                emailAccount="asd@qq.com";
            
            int statusCode=checkEbayAccount(emailAccount);
            if(statusCode==0){
                System.out.println(emailAccount + " 该邮箱号不是ebay账号");
            }else if(statusCode==1){
                System.out.println(emailAccount + " 该账号是eBay账号!");
            }else if(statusCode==101){
                System.out.println("打码错误!");
                statusCode=checkEbayAccount(emailAccount);
                while(statusCode==101){
                    statusCode=checkEbayAccount(emailAccount);
                }
            }else{
                System.out.println(statusCode);
            }

            endDate = new Date();
            diff = endDate.getTime() - nowDate.getTime();
            min = diff % nd % nh / nm;
            sec = diff % nd % nh % nm / ns;
            ms = diff % nd % nh % nm % ns;
            System.out.println(min + "分钟" + sec + "秒" + ms + "毫秒");
        }

    }

    // 验证邮箱是否为eBay账号
    public static int checkEbayAccount(String emailAccount) {
        System.out.println("开始验证账号:" + emailAccount);
        WebClient webClient = new WebClient(BrowserVersion.FIREFOX_17);
        HtmlPage page = null;
        try {
            page = webClient.getPage("http://fyp.ebay.com/");
        } catch (FailingHttpStatusCodeException e) {
            logger.error(e);
        } catch (MalformedURLException e) {
            logger.error(e);
        } catch (IOException e) {
            logger.error(e);
        }
        HtmlForm form = page.getForms().get(1);
        form.getInputByName("input").setValueAttribute(emailAccount);
        HtmlButton button = (HtmlButton) form.getElementsByTagName("button").get(0);

        try {
            page = button.click();
        } catch (IOException e1) {
            logger.error(e1);
        }
        if (page.asText().indexOf("Select how you want to reset your password") != -1) {
//            System.out.println(emailAccount + " 该账号是eBay账号!");
            return 1;
        }

        while (page.asText().indexOf("Security Measure") != -1) {

            Document doc = Jsoup.parse(page.asXml());
            Elements imgSrc = doc.getElementsByTag("iframe");
            String imgUrl = imgSrc.attr("src");
            System.out.println("验证码图片链接:" + imgUrl);
            String code = getCode(imgUrl);

            // 提交验证码
            form = page.getForms().get(0);
            form.getInputByName("tokenText").setValueAttribute(code);
            HtmlSubmitInput input = (HtmlSubmitInput) form.getElementsByTagName("input").get(5);
            try {
                page = input.click();
            } catch (IOException e1) {
                logger.error(e1);
            }

            if (page.asText().indexOf("the verification code you entered doesn't match against the image") != -1) {
//                System.out.println("打码错误!");
                dama2.reportError(id);
                return 101;
            }

            // 再次提交邮箱
            form = page.getForms().get(1);
            form.getInputByName("input").setValueAttribute(emailAccount);
            button = (HtmlButton) form.getElementsByTagName("button").get(0);
            try {
                page = button.click();
            } catch (IOException e1) {
                logger.error(e1);
            }

            if (page.asText().indexOf("Security Measure") != -1){// 如果还是验证码页面
                System.out.println("提交还是验证码页面!");
                continue;
            }

            if (page.asText().indexOf("Oops, that's not a match. Try again?") != -1) {
//                System.out.println(emailAccount + " 该邮箱号不是ebay账号");
                return 0;
            } else if (page.asText().indexOf("Select how you want to reset your password") != 1) {
//                System.out.println(emailAccount + " 该账号是eBay账号!");
                return 1;
            } else {
                System.out.println(page.asText());
                return 2;
            }
        }
        return 3;
    }

    // 打码兔获取验证码
    public static String getCode(String imgUrl) {
        // 打码兔
        int type = 6;
        int timeout = 30;
        ReadBalanceResult balanceResult = dama2.getBalance();
        // System.out.println(balanceResult);
        DecodeResult res = dama2.decodeUrlAndGetResult(imgUrl, type, timeout);
        String s;
        if (res.ret >= 0) {
            id = res.ret;
            s = "success: result=" + res.result + "; id=" + res.ret;
            System.out.println(s);
        } else {
            s = "failed: ret = " + res.ret + "; desc=" + res.desc;
            System.err.println(s);
        }
        return res.result;
    }

}

 测试结果如下:

用多线程测试,明显快多了

package test;

import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Date;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import org.apache.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlButton;
import com.gargoylesoftware.htmlunit.html.HtmlForm;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput;

import cn.smy.dama2.Dama2Web;
import cn.smy.dama2.Dama2Web.DecodeResult;
import cn.smy.dama2.Dama2Web.ReadBalanceResult;

/***
 * 
 * @ClassName: EbayMultiplyThreadCheck
 * @Description: TODO
 * @author zeze
 * @date 2017年2月16日 上午8:49:46
 *
 */
public class EbayMultiplyThreadCheck {

    private static int threadNum = 30;

    private static long nd = 1000 * 24 * 60 * 60;
    private static long nh = 1000 * 60 * 60;
    private static long nm = 1000 * 60;
    private static long ns = 1000;
    private static Date nowDate;
    private static Date endDate;
    private static long diff;
    private static long min;
    private static long sec;
    private static long ms;

    public static void main(String[] args) {
        nowDate = new Date();

        ExecutorService exec = Executors.newFixedThreadPool(threadNum);
        ArrayList<Future<Integer>> results = new ArrayList<Future<Integer>>();

        for (int i = 0; i < threadNum; i++) {
            String email = "asd" + i + "@qq.com";
            if (i == 0)
                email = "asd@qq.com";
            results.add(exec.submit(new CheckEbayAccount(email)));
        }

        boolean isDone = false;
        while (!isDone) {
            isDone = true;
            for (Future<Integer> future : results) {
                if (!future.isDone()) {
                    isDone = false;
                    try {
                        Thread.sleep(1000);
                    } catch (InterruptedException e) {
                    }
                    break;
                }
            }
        }
        exec.shutdown();

        endDate = new Date();
        diff = endDate.getTime() - nowDate.getTime();
        min = diff % nd % nh / nm;
        sec = diff % nd % nh % nm / ns;
        ms = diff % nd % nh % nm % ns;
        System.out.println(min + "分钟" + sec + "秒" + ms + "毫秒");

    }
}

class CheckEbayAccount implements Callable<Integer> {

    private String email;
    private static Logger logger = Logger.getLogger(CheckEbayAccount.class);
    private static Dama2Web dama2 = new Dama2Web(****, "41c5a58de68ebe2*******", "***", "****");
    private static int id;

    public CheckEbayAccount(String email) {
        this.email = email;
    }

    @Override
    public Integer call() {

        System.out.println(Thread.currentThread().getName() + " 开始验证账号:" + email);
        WebClient webClient = new WebClient(BrowserVersion.FIREFOX_17);
        HtmlPage page = null;
        try {
            page = webClient.getPage("http://fyp.ebay.com/");
        } catch (FailingHttpStatusCodeException e) {
            logger.error(e);
        } catch (MalformedURLException e) {
            logger.error(e);
        } catch (IOException e) {
            logger.error(e);
        }
        HtmlForm form = page.getForms().get(1);
        form.getInputByName("input").setValueAttribute(email);
        HtmlButton button = (HtmlButton) form.getElementsByTagName("button").get(0);

        try {
            page = button.click();
        } catch (IOException e1) {
            logger.error(e1);
        }

        if (page.asText().indexOf("Select how you want to reset your password") != -1) {
            System.out.println(Thread.currentThread().getName() + " " + email + " 该账号是eBay账号!");
            return 1;
        } else if (page.asText().indexOf("Oops, that's not a match. Try again?") != -1) {
            System.out.println(Thread.currentThread().getName() + " " + email + " 该邮箱号不是ebay账号");
            return 0;
        }

        while (page.asText().indexOf("Security Measure") != -1) {

            Document doc = Jsoup.parse(page.asXml());
            Elements imgSrc = doc.getElementsByTag("iframe");
            String imgUrl = imgSrc.attr("src");
            System.out.println(Thread.currentThread().getName() + " " + "验证码图片链接:" + imgUrl);
            String code = getCode(imgUrl);

            // 提交验证码
            form = page.getForms().get(0);
            form.getInputByName("tokenText").setValueAttribute(code);
            HtmlSubmitInput input = (HtmlSubmitInput) form.getElementsByTagName("input").get(5);
            try {
                page = input.click();
            } catch (IOException e1) {
                System.out.println(Thread.currentThread().getName() + " " + e1);
            }

            while (page.asText().indexOf("Sorry") != -1) {
                System.out.println(Thread.currentThread().getName() + " 打码错误!重试");
                dama2.reportError(id);

                doc = Jsoup.parse(page.asXml());
                imgSrc = doc.getElementsByTag("iframe");
                imgUrl = imgSrc.attr("src");
                System.out.println(Thread.currentThread().getName() + " " + "验证码图片链接:" + imgUrl);
                code = getCode(imgUrl);

                // 提交验证码
                form = page.getForms().get(0);
                form.getInputByName("tokenText").setValueAttribute(code);
                input = (HtmlSubmitInput) form.getElementsByTagName("input").get(5);
                try {
                    page = input.click();
                } catch (IOException e1) {
                    logger.error(e1);
                }
            }

            // 再次提交邮箱
            form = page.getForms().get(1);
            form.getInputByName("input").setValueAttribute(email);
            button = (HtmlButton) form.getElementsByTagName("button").get(0);
            try {
                page = button.click();
            } catch (IOException e1) {
                logger.error(e1);
            }

            if (page.asText().indexOf("Security Measure") != -1) {// 如果还是验证码页面
                System.out.println(Thread.currentThread().getName() + " 提交还是验证码页面!");
                continue;
            }

            if (page.asText().indexOf("Oops, that's not a match. Try again?") != -1) {
                System.out.println(Thread.currentThread().getName() + " " + email + " 该邮箱号不是ebay账号");
                return 0;
            } else if (page.asText().indexOf("Select how you want to reset your password") != 1) {
                System.out.println(Thread.currentThread().getName() + " " + email + " 该账号是eBay账号!");
                return 1;
            } else {
                System.out.println(Thread.currentThread().getName() + " " + page.asText());
                return 2;
            }
        }
        System.out.println(Thread.currentThread().getName() + " " + page.asText());
        return 3;

    }

    // 打码兔获取验证码
    public static String getCode(String imgUrl) {
        // 打码兔
        int type = 6;
        int timeout = 30;
        ReadBalanceResult balanceResult = dama2.getBalance();
        // System.out.println(balanceResult);
        DecodeResult res = dama2.decodeUrlAndGetResult(imgUrl, type, timeout);
        String s;
        if (res.ret >= 0) {
            id = res.ret;
            s = "[打码结果=" + res.result + "] [id=" + res.ret + "] " + balanceResult;
            System.out.println(Thread.currentThread().getName() + " " + s);
        } else {
            while (res.result == null) {
                s = "打码失败,重试: ret = " + res.ret + "; desc=" + res.desc;
                System.out.println(Thread.currentThread().getName() + " " + s);
                dama2.reportError(id);
                res = dama2.decodeUrlAndGetResult(imgUrl, type, timeout);
                if (res.ret >= 0) {
                    id = res.ret;
                    s = "[打码结果=" + res.result + "] [id=" + res.ret + "] " + balanceResult;
                    System.out.println(Thread.currentThread().getName() + " " + s);
                }
            }
        }
        return res.result;
    }

}

 测试30个账号,平均每个3秒

原文地址:https://www.cnblogs.com/zeze/p/6402963.html