httpclient解析gzip网页

httpclient解析gzip网页 - 夜隼 - 博客园

请求时加上:getHC.setRequestHeader("Accept-Encoding","gzip, deflate");

解析时

String acceptEncoding = "";

     if(getHC.getResponseHeader("Content-Encoding")!=null)

      acceptEncoding = getHC.getResponseHeader("Content-Encoding").getValue();

     StringBuffer sb =new StringBuffer();

     log.debug("acceptEncoding:"+acceptEncoding);

     if(acceptEncoding.toLowerCase().indexOf("gzip") > -1)

     {

      //建立gzip解压工作流

      InputStream is = getHC.getResponseBodyAsStream();

      GZIPInputStream gzin = new GZIPInputStream(is);

      InputStreamReader isr = new InputStreamReader(gzin, charset); // 设置读取流的编码格式,自定义编码

      java.io.BufferedReader br = new java.io.BufferedReader(isr);

      String tempbf;

      while((tempbf=br.readLine())!=null){

       sb.append(tempbf);

       sb.append("\r\n");

      }

      isr.close();

      gzin.close();

     }

     else

     {

      InputStreamReader isr = new InputStreamReader(getHC.getResponseBodyAsStream(), charset); // 设置读取流的编码格式,自定义编码

      java.io.BufferedReader br = new java.io.BufferedReader(isr);

      String tempbf;

      while((tempbf=br.readLine())!=null){

       sb.append(tempbf);

       sb.append("\r\n");

      }

      isr.close();

     }

     getHC.abort();

     getHC.releaseConnection();

     ((SimpleHttpConnectionManager)httpClient.getHttpConnectionManager()).shutdown();

原文地址:https://www.cnblogs.com/lexus/p/2376812.html