扒网页第二弹

第二种方式(HTTPClient)

  (实时)

  很顺利地就抄完了。。。

  现在最新的是HTTPClient4.5.3,我就使用这个写的。不过好像DefaultHTTPClient()方法被取消了,百度了一下没找到相应回答,我觉得大概是我的姿势不对。

  反正能正常运行,对我来说,不报错能正常运行的代码都是好代码,所以就暂时不追究了,留给以后的我去头疼吧。

  我觉得其实两种方法流程上没什么太大的区别,可能我以后会深究一下吧,以后再细致地讨论好了。

  下面粘一下代码:

  1 package tixiJG;
  2 
  3 import java.io.BufferedReader;
  4 import java.io.FileOutputStream;
  5 import java.io.IOException;
  6 import java.io.InputStream;
  7 import java.io.InputStreamReader;
  8 import java.io.OutputStreamWriter;
  9 
 10 import org.apache.http.HttpEntity;
 11 import org.apache.http.HttpResponse;
 12 import org.apache.http.client.HttpClient;
 13 import org.apache.http.client.methods.HttpGet;
 14 import org.apache.http.impl.client.DefaultHttpClient;
 15 
 16 /**
 17  * HTTPClient 扒取
 18  * @author Administrator
 19  *
 20  */
 21 public class test3 {
 22     public static void main(String[] args) throws Exception {
 23         
 24         String urlStr = "http://www.imau.edu.cn";
 25         String charset = "utf-8";
 26         String filepath = "E:/eclipseWorkspace/tixiJG/TTest/002.html";
 27         
 28         
 29         HttpClient hc = new DefaultHttpClient();
 30         
 31         HttpGet hg = new HttpGet(urlStr);
 32         
 33         HttpResponse response = hc.execute(hg);
 34         
 35         HttpEntity entity = response.getEntity();
 36         
 37         
 38         InputStream htm_in = null;
 39         
 40         
 41         if(entity != null) {
 42             
 43             System.out.println(entity.getContentLength());
 44             
 45             htm_in = entity.getContent();
 46             
 47             String htm_str = InputStream2String(htm_in, charset);
 48             
 49             saveHtml(filepath, htm_str);
 50         }
 51         
 52         
 53         
 54         
 55     }
 56     
 57     
 58     /**
 59      * saveHtml函数
 60      */
 61     
 62     public static void saveHtml(String filepath, String str) {
 63         
 64         try {
 65             
 66             OutputStreamWriter outs = new OutputStreamWriter(new FileOutputStream(filepath, true), "utf-8");
 67             
 68             outs.write(str);
 69             
 70             outs.close();
 71                 
 72         }catch(IOException e) {
 73             
 74             System.out.println("Error at save html..");
 75             
 76             e.printStackTrace();
 77         }
 78     }
 79     
 80     
 81     public static String InputStream2String(InputStream in_st, String charset) throws IOException {
 82         
 83         BufferedReader buff = new BufferedReader(new InputStreamReader(in_st, charset));
 84         
 85         StringBuffer res = new StringBuffer();
 86         
 87         String line = "";
 88         
 89         while((line = buff.readLine()) != null){
 90             
 91             res.append(line);
 92             
 93         }
 94         
 95         
 96         
 97         return res.toString();
 98     }
 99     
100     
101     
102     
103 
104 }
————————————来自 大中国的智慧结晶
原文地址:https://www.cnblogs.com/guoqiaoqiao/p/7665969.html