[工具类] 获取URL编码1

package com.claw.util.html;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Charset {

    /**
     * @param args
     */
    public static void main(String[] args) {
        List<String> list = new ArrayList<String>();
        
        list.add("http://li200429.iteye.com/blog/1608758");
        list.add("http://blog.csdn.net/vic0228/article/details/49634311");
        list.add("http://www.zhihu.com/");
        list.add("http://www.sohu.com/");
        list.add("http://blog.163.com/wenchangqing_live/blog/static/173722309201182044545864/");
        
/*        for (String url : list) {
            String html = getHTML(url);
            String title = getTitle(html);
            System.out.println("url:"+url+" ----- title:"+title);
            if(title.equals("")){
                System.out.println(html);
            }
        }*/
    }
    
    
    public static String getCharset(InputStream in){
        String charset = "UTF-8";
        BytesEncodingDetect s = new BytesEncodingDetect();
        byte[] b = new byte[1024];
        try {
            int length = in.read(b);
            String encode = BytesEncodingDetect.nicename[s.detectEncoding(b)];
            if(encode.equals("GB-2312")){
                encode = "GBK";
            }
            /*if(encode.equals("ASCII")){
                encode = "UTF-8";
            }*/
            charset = encode;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return charset;
    }
    
    
    /**
     * 404有问题 暂时停用
     * @param urlStr
     * @return
     */
    public static String getCharset(String urlStr) {
        String charset = "UTF-8";
        URL url = null;
        BufferedInputStream in = null;
        try {
            url = new URL(urlStr);
            HttpURLConnection conn = (HttpURLConnection) url.openConnection();
            conn.setConnectTimeout(10000);
            conn.setRequestProperty("User-Agent",
                    "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)");
            conn.connect();
            
            int status = conn.getResponseCode();
            System.out.println(status);
            if(status==200){
                in = new BufferedInputStream(conn.getInputStream());
                BytesEncodingDetect s = new BytesEncodingDetect();
                StringBuffer sb = new StringBuffer();
                byte[] b = new byte[1024];
                int length = in.read(b);
                String encode = BytesEncodingDetect.nicename[s.detectEncoding(b)];
                System.out.println("encode:" + encode);
                if(encode.equals("GB-2312")){
                    encode = "GBK";
                }
                charset = encode;
            }else if(status==404){
                
            }
        } catch (Exception e) {
            System.out.println(urlStr);
            e.printStackTrace();
        } finally {
            if (in != null)
                try {
                    in.close();
                } catch (IOException e) {
                    System.out.println(urlStr);
                    e.printStackTrace();
                }
        }
        return charset;
    }
    
    


  
}
原文地址:https://www.cnblogs.com/feelgood/p/4992131.html