Rhino+envjs-1.2.js 在java运行网站js 工具类

java爬虫遇到个页面加密的东西,找了些资料学习学习

做了个java运行js的工具类,希望对大家有用,其中用到client(获取js)可以自行换成自己的client。主要是用了

Rhino就是JavaScript引擎,它的目的就是实现Java与JavaScript的互操作性。rhino-1.7R1.jar

Envjs一个纯js方式在无浏览器环境下模拟浏览器的行为。envjs-1.2.js

一般网站js中都会用到jauery,所以还用了jauery.js

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
import java.lang.ref.SoftReference;
import java.net.URI;
import java.nio.charset.Charset;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.Validate;
import org.apache.http.Header;
import org.apache.http.HeaderElement;
import org.apache.http.HttpEntity;
import org.apache.http.ParseException;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.protocol.RequestAcceptEncoding;
import org.apache.http.impl.DefaultConnectionReuseStrategy;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.Args;
import org.apache.http.util.ByteArrayBuffer;
import org.jsoup.Jsoup;
import org.mozilla.javascript.Context;
import org.mozilla.javascript.ContextFactory;
import org.mozilla.javascript.Function;
import org.mozilla.javascript.Scriptable;

import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;

//import net.sourceforge.htmlunit.corejs.javascript.Context;
//import net.sourceforge.htmlunit.corejs.javascript.ContextFactory;
//import net.sourceforge.htmlunit.corejs.javascript.Function;
//import net.sourceforge.htmlunit.corejs.javascript.Scriptable;

/**
 * 参照http://mybeautiful.iteye.com/blog/1442839
 * http://m.oschina.net/blog/121347
 * http://blog.csdn.net/dwjmantou/article/details/45276967
 * http://lcllcl987.iteye.com/blog/87423
 * ***不可使用htmlunit的包******Cannot call method "setOptimizationLevel" of null
 * @author 5432
 *
 */
public class RhinoScaper {
    private Context context;
    private Scriptable scriptable;
    /**
     * 初始化方法
     */
    public void init(){
        context = ContextFactory.getGlobal().enterContext();
        scriptable =context.initStandardObjects(null);
        context.setOptimizationLevel(-1);
        context.setLanguageVersion(Context.VERSION_1_5);
//        初始化测试用,并定义envjs-1.2.js未定义print
        context.evaluateString(scriptable,  
                "var v='sssaass';"
                + "var print = function(v) {"+  
                      " java.lang.System.out.println(v);return v ;"+  
                " };function hah(){return v }",  
                "print",1,null);
//        System.out.println("v == " + scriptable.get("v", scriptable)  ); 
        Function prf =  (Function)scriptable.get("print", scriptable);
        Object call = prf.call(Context.getCurrentContext(), scriptable, prf, new Object[]{"test"});
//        System.out.println("print == "+call.toString());
        Object invokFunction = invokFunction("hah");
//        System.out.println(invokFunction.toString());
        
        String[] file = { this.getClass().getResource("/")+"envjs-1.2.js", "./lib/jquery.js" }; 
        for (String f : file) {  
            evaluateJs(f);  
        }  
    }
    /**
     * 调用函数
     * @param functionName
     * @param functionArags
     * @return
     */
    public Object invokFunction(String functionName,Object... functionArags) {
        Validate.notNull(context, "context is null");
        Validate.notNull(scriptable, "scriptable is null");
        Function function = (Function) scriptable.get(functionName, scriptable);
        Object call = function.call(Context.getCurrentContext(), scriptable, function, functionArags);
//        System.out.println("reslult  = "+call.toString());
        return call;
    }
    
    /**
     * 加载js文件
     * (当没有找到对应文件,
     * 且要加载文件名路径包含‘envjs-1.2.js’ 会访问  https://raw.githubusercontent.com/ryan-roemer/envjs-1.2/master/env.rhino.1.2.js
     * 文件名路径包含‘jquery.js’ 会访问 http://apps.bdimg.com/libs/jquery/1.6.0/jquery.js
     * 加载js文件 )
     * @param f 文件名路径
     */
    public void evaluateJs(String f) { 
        Validate.notNull(context, "context is null");
        Validate.notNull(scriptable, "scriptable is null");
        FileReader in = null;  
        try {  
//            FileInputStream fI = new FileInputStream(f);
//            String js = IOUtils.toString(fI, "UTF-8");//设置默认js文件编码为utf-8
//            context.evaluateString(scriptable, js, f, 1, null);
            in = new FileReader(f);  
            context.evaluateReader(scriptable, in, f, 1, null);  
        } catch (FileNotFoundException e1) {  
//            e1.printStackTrace();  
            if (f.contains("envjs-1.2.js")) {
                String envjs ="https://raw.githubusercontent.com/ryan-roemer/envjs-1.2/master/env.rhino.1.2.js";
                try {
                    SoftReference<String> htmlString = Client.getHtmlString(envjs);
                    String jqueryStr = htmlString==null?"":htmlString.get();
//                    DefaultClient defaultClient = new DefaultClient();
//                    String jqueryStr =defaultClient.get(envjs).asHtml();
                    context.evaluateString(scriptable, jqueryStr, envjs, 1, null);
                } catch (Exception e) {
                    e.printStackTrace();
                }
            } else if (f.contains("jquery.js")) {
                String jquery = "http://apps.bdimg.com/libs/jquery/1.6.0/jquery.js";
                Reader bufR =null;
                try {
                    SoftReference<Reader> htmlReader = Client.getHtmlReader(jquery);
                    bufR = htmlReader==null?new BufferedReader(null):htmlReader.get();
//                     String js = IOUtils.toString(bufR);
                    context.evaluateReader(scriptable, bufR , jquery, 1, null);
                } catch (IOException e) {
                    e.printStackTrace();
                } catch (Exception e) {
                    e.printStackTrace();
                }finally {
//                    close(bufR);
                    IOUtils.closeQuietly(bufR);
                }
            } else{
                throw new RuntimeException("unknown file "+f);
            }
        } catch (IOException e1) {  
            e1.printStackTrace();  
        }finally {
//            close(in);
            IOUtils.closeQuietly(in);
        }  
    }

    public static void main(String[] args) {
        RhinoScaper rhinoScaper = new RhinoScaper();
        rhinoScaper.init();
//        rhinoScaper.JSloadString("jsString", "jsname");
//        rhinoScaper.evaluateJs("E:/Desktop/loginjs.js");
//        rhinoScaper.loadJS("", classpathURI);
        
//        电信登录加密测试
        String pwd="111";
        StringBuilder ascending = new StringBuilder();
        SoftReference<String> htmlString = null;
        try {
            htmlString = Client.getHtmlString("http://login.189.cn/bundles/jquery?v=h3Pl8XT8zdNkoI1VbV5sEZOBrSqsxRXX0TIQ9S_lAlM1");
        } catch (Exception e) {
            e.printStackTrace();
        }
        String jsStr =htmlString==null?"":htmlString.get();
        jsStr = jsStr.replaceAll("float:", "floats:").replaceAll("throws", "throwss");
        ascending.append(jsStr);
        ascending.append(";
 var input=document.createElement("input");input.value='"+pwd+"';;input.id= 'pass';input.type='password';");
        ascending.append("
 function getpassword(){ return $(input).valAesEncryptSet()}");
        rhinoScaper.JSloadString(ascending.toString(), "jsname");
        Object result = rhinoScaper.invokFunction("getpassword");
        System.out.println(result);
        try {
            htmlString = Client.getHtmlString("http://www.youdaili.net/Daili/");
            jsStr =htmlString==null?"":htmlString.get();
            String runScript = rhinoScaper.runScript(jsStr);
            System.out.println(runScript);
        } catch (Exception e) {
            e.printStackTrace();
        }
        

    }
    /**
     * 运行js
     * @param html
     * @return
     */
    private String runScript(String html) {
        String function = null;int jsfrom = 0;
        Pattern p = Pattern.compile("setTimeout\("(.*)\((.*)\)", 200\);");
        Matcher m = p.matcher(html);
        if(m.find()){
         function = m.group(1);//函数名
         jsfrom = Integer.parseInt(m.group(2));//参数
        }
        JSloadString(Jsoup.parse(html).select("script").html().replace("eval("qo=eval;qo(po);")", "return po"), "jsname");
        Object result = invokFunction(function, jsfrom);
        return result.toString();
    }
    /**
     * 加载js文件
     * @param sourceName 名称
     * @param classpathURI 文件路径
     */
    public void loadJS(String sourceName, String classpathURI) {
        Validate.notNull(context, "context is null");
        Validate.notNull(scriptable, "scriptable is null");
        String js = null;
        InputStream inputStream = null;
        try {
            inputStream = getClass().getResourceAsStream(classpathURI);
            js = IOUtils.toString(inputStream, "UTF-8");//设置默认js文件编码为utf-8
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            IOUtils.closeQuietly(inputStream);
        }
        context.evaluateString(scriptable, js, sourceName, 1, null);
    }
    /**
     * 加载js字符串
     * @param source js字符串(注意处理js中由于变量名为throws,float类似名称导致的报错)
     * @param sourceName 名称
     */
    public void JSloadString(String source, String sourceName){
        Validate.notNull(context, "context is null");
        Validate.notNull(scriptable, "scriptable is null");
        context.evaluateString(scriptable, source, sourceName, 1, null);
    }
}
class Client{
    public static void close(AutoCloseable close) {
        if (close != null) {
            try {
                close.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    public static CloseableHttpResponse HttpGetResponse(String url) throws IOException, ClientProtocolException {
        HttpGet httpGet = new HttpGet(URI.create(url));
        BasicCookieStore cookieStore = new BasicCookieStore();
        HttpClientBuilder builder = HttpClientBuilder.create().disableContentCompression()
                .setConnectionReuseStrategy(new DefaultConnectionReuseStrategy()).setUserAgent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36");
        builder.addInterceptorLast(new RequestAcceptEncoding());
        builder.setDefaultCookieStore(cookieStore);
        CloseableHttpClient client = builder.build();
        CloseableHttpResponse execute = client.execute(httpGet);
        return execute;
    } 
    public static SoftReference<String> getHtmlString(String url)throws Exception {
        CloseableHttpResponse execute = null; 
        byte[] binary =null;//初次解析内容
        SoftReference<String> result = null;
        try {
            execute = HttpGetResponse(url);
//            content = execute.getEntity().getContent();
            binary = HttpEntityTOByte(execute.getEntity());
        }finally {
            close(execute);
        }
        String html;
        byte[] decode;
        try {
            System.out.println(execute.getStatusLine().toString());
            System.out.println(execute.getEntity().getContentEncoding());
            
            Args.notNull(binary, "binary");
            decode= decode(binary,execute.getEntity());
            try {
                String charset = getContentCharSet(execute.getEntity().getContentType().getValue());
                if (charset != null) {
                    html = new String(decode, Charset.forName(charset));
                } else {
                    CharsetMatch match = new CharsetDetector().setText(decode)
                            .detect();
                    html = match.getString();
                }
            } catch (Exception e) {
                throw new Exception(e);
            }
            result = new SoftReference<String>(html);
        }finally {
            binary =null;
            decode =null;
            html=null;
        }
        return result;
                
    }
    public static SoftReference<Reader> getHtmlReader(String url)throws Exception {
        CloseableHttpResponse execute = null; 
        byte[] binary =null;//初次解析内容
        SoftReference<Reader> result = null;
        try {
            execute = HttpGetResponse(url);
            binary = HttpEntityTOByte(execute.getEntity());
        }finally {
            close(execute);
        }
        byte[] decode;
        Reader bufR = null;
        try {
            System.out.println(execute.getStatusLine().toString());
            System.out.println(execute.getEntity().getContentEncoding().toString());
            Args.notNull(binary, "binary");
            decode= decode(binary,execute.getEntity());
            bufR= new BufferedReader(new InputStreamReader(new ByteArrayInputStream(decode)));
            result=new SoftReference<Reader>(bufR);   
        }finally {
            binary =null;
            decode =null;
            //close(bufR);
        }
        return result;
                
    }
    private static String getContentCharSet(String contentType) throws ParseException {
        String charset = null;
        if (StringUtils.isNotEmpty(contentType)) {
            String[] strs = contentType.split(";");
            for (String string : strs) {
                if (string.contains("charset")) {
                    String[] tmp = string.split("=");
                    if (tmp.length == 2) {
                        return tmp[1];
                    }
                }
            }
        }
        return charset;
    }
    public static final int BUFFER = 1024;  
    /** 
     * 数据解压缩 gizp
     *  
     * @param data 
     * @return 
     * @throws Exception 
     * @author http://snowolf.iteye.com/blog/643010
     */  
    public static byte[] decompress(byte[] data) throws Exception {  
        ByteArrayInputStream bais = new ByteArrayInputStream(data);  
        ByteArrayOutputStream baos = new ByteArrayOutputStream();  
        // 解压缩  
        decompress(bais, baos);  
        data = baos.toByteArray();  
        baos.flush();  
        close(baos);
        close(bais);
//        baos.close();  
//        bais.close();  
        return data;  
    }
    /** 
     * 数据解压缩 
     *  
     * @param is 
     * @param os 
     * @throws Exception 
     */  
    public static void decompress(InputStream is, OutputStream os)  
            throws Exception {  
        GZIPInputStream gis =null;
        byte data[];
        try {
            gis = new GZIPInputStream(is);  
            int count;  
            data = new byte[BUFFER];  
            while ((count = gis.read(data, 0, BUFFER)) != -1) {  
                os.write(data, 0, count);  
            }
        } finally{
            data = null;
            close(gis);
//            gis.close();  
        }
    } 
  
    /**
     * gizp解压
     * @param binary 
     * @param res
     * @param entity
     * @return
     * @throws Exception
     *
     */
    public static byte[] decode(byte[] binary, final HttpEntity entity) throws Exception {
        if (entity != null && entity.getContentLength() != 0) {
            final Header ceheader = entity.getContentEncoding();
            if (ceheader != null) {
                final HeaderElement[] codecs = ceheader.getElements();
                for (final HeaderElement codec : codecs) {
                    final String codecname = codec.getName().toLowerCase(Locale.US);
                    if ("gzip".equals(codecname) || "x-gzip".equals(codecname)) {
                         return decompress(binary);
                    } else if ("deflate".equals(codecname)) {
                        return binary;
                    } else if ("identity".equals(codecname)) {

                        /* Don't need to transform the content - no-op */
                        return binary;
                    } else {
                        throw new Exception("Unsupported Content-Coding: "+codecname );
                    }
                }
            }
        }
        return binary;
    }
    /**  
     * 将HttpEntity转换成byte数组  
     * @param entity HttpEntity  
     * @return byte[]  
     * @throws IOException  
   * @author EntityUtils.toByteArray(entity)
     */  
    public static byte[] HttpEntityTOByte(HttpEntity entity) throws IOException{  
        final InputStream instream = entity.getContent();
        if (instream == null) {
            return null;
        }
        try {
            Args.check(entity.getContentLength() <= Integer.MAX_VALUE,
                    "HTTP entity too large to be buffered in memory");
            int i = (int)entity.getContentLength();
            if (i < 0) {
                i = 4096;
            }
            final ByteArrayBuffer buffer = new ByteArrayBuffer(i);
            final byte[] tmp = new byte[4096];
            int l;
            while((l = instream.read(tmp)) != -1) {
                buffer.append(tmp, 0, l);
            }
            return buffer.toByteArray();
        } finally {
            instream.close();
        }
    } 
}
原文地址:https://www.cnblogs.com/wangly/p/5443565.html