一个爬喜马拉雅音频的例子

不废话了,上代码

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;

import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;

import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.net.URL;
import java.net.URLConnection;

import com.yeepay.g3.utils.common.json.JSONUtils;

public class MyHttpClient {
    public static void main(String[] args) throws ClientProtocolException, IOException {
        HttpClient hClient = new DefaultHttpClient();
            //http://www.ximalaya.com/32160470/album/2881558/
        //http://www.ximalaya.com/tracks/61185755.json
        String mainU = "http://www.ximalaya.com/32160470/album/2881558?page=";
        for(int i=1;i<=1;i++){
            int m = mainU.indexOf("page=");
            mainU = mainU.substring(0,m+5);
            mainU = mainU+i;
            System.out.println(mainU);
            
            
            HttpGet hget = new HttpGet(mainU);
            HttpResponse response = hClient.execute(hget);
            // EntityUtils工具类把网页实体转换成字符串
            String content = EntityUtils.toString(response.getEntity(), "utf-8");
            Document doc = Jsoup.parse(content);
            Elements elements=doc.select("a[class='title']");
            
            for(Element ele:elements){
                String dateStr = ele.nextElementSibling().text();
                System.out.println(dateStr);  
                String dirName = "/Users/yp-tc-m-2777/Desktop/testNewP/"+dateStr.substring(0,7);
                System.out.println(dirName);
                File filed = new File(dirName);
                if(!filed.exists()){
                    filed.mkdir();
                }
                String id = ele.attr("href").split("/")[3];
                System.out.println(id);
                    //      /32160470/sound/68215809/
                
                String url = "http://www.ximalaya.com/tracks/{id}.json";
                url = url.replace("{id}", id);
                System.out.println(url);
                System.out.println(ele.text());
                System.out.println(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date()));
                String fileName = ele.text();
                File file = new File(dirName+File.separator+dateStr+" "+fileName+".m4a");  
                if(file.exists()){
                    continue;
                }
                //hClient = new DefaultHttpClient();
                HttpGet r = new HttpGet(url);
                HttpResponse r1 = hClient.execute(r);
                String r2 = EntityUtils.toString(r1.getEntity(), "utf-8");
                Map<String,String> map = JSONUtils.jsonToMap(r2, String.class, String.class);
                String downUrl = map.get("play_path_64");
                URL url1 = new URL(downUrl);;
                URLConnection urlconn = url1.openConnection();
                InputStream inputStream = urlconn.getInputStream();    
                //获取自己数组  
                byte[] getData = readInputStream(inputStream);      
          
                    
                FileOutputStream fos = new FileOutputStream(file);       
                fos.write(getData);   
                if(fos!=null){  
                    fos.close();    
                }  
                if(inputStream!=null){  
                    inputStream.close();  
                } 
                
            }
        }
        
       // System.out.println(content);
    }
    
    public static  byte[] readInputStream(InputStream inputStream) throws IOException {    
        byte[] buffer = new byte[1024];    
        int len = 0;    
        ByteArrayOutputStream bos = new ByteArrayOutputStream();    
        while((len = inputStream.read(buffer)) != -1) {    
            bos.write(buffer, 0, len);    
        }    
        bos.close();    
        return bos.toByteArray();    
    } 
}
原文地址:https://www.cnblogs.com/coolgame/p/8795898.html