爬取中国信用黑名单网站图片和数据到本地

 2 
 3 import java.io.File;
 4 import java.io.IOException;
 5 import java.io.InputStream;
 6 import java.net.URL;
 7 import java.net.URLConnection;
 8 
 9 import org.apache.commons.io.FileUtils;
10 
11 
12 
13 public class SpiderDemo {
14     public static void main(String[] args) throws IOException {
15 //        URL url = new URL("http://www.zhongguoxinyongheimingdan.com");
16 //        URLConnection connection = url.openConnection();
17 //        InputStream in = connection.getInputStream();
18 //        File file = new File("F://a.txt");
19 //        FileUtils.copyInputStreamToFile(in, file);
20         File srcDir = new File("F://a.txt");
21         String str = FileUtils.readFileToString(srcDir, "UTF-8");
22         String[] str1 = str.split("href=");
23         for (int i = 3; i < str1.length-1; i++) {
24             URL url = new URL("http://www.zhongguoxinyongheimingdan.com"+str1[i].substring(1, 27));
25             File f = new File("F://abc//"+str1[i].substring(2, 22));
26             if(!f.exists()){
27             f.mkdir();    
28             File desc1 = new File(f,str1[i].substring(1, 22)+".txt");
29             URLConnection connection = url.openConnection();
30             InputStream in = connection.getInputStream();
31             FileUtils.copyInputStreamToFile(in, desc1);
32             String str2 = FileUtils.readFileToString(desc1, "UTF-8");
33             String[] str3 = str2.split("" src="");
34             for(int j = 1;j<str3.length-2;j++){
35                 URL url1 = new URL(str3[j].substring(0, 81));
36                 URLConnection connection1 = url1.openConnection();
37                 connection1.setDoInput(true);
38                 InputStream in1 = connection1.getInputStream();
39                 File desc2 = new File(f,str3[j].substring(44,76)+".jpg");
40                 FileUtils.copyInputStreamToFile(in1, desc2);
41             }
42             }
43             }
44         }
45     
46 }
原文地址:https://www.cnblogs.com/bianqi/p/6404066.html