简单的一个远传采集并下载远传图片存储到本地示例

针对淘宝的数据采集写了个简单的示例,但是最好的还是要用淘宝api去获取信息这个是比较正规的

<?php
/**
    * 采集淘宝数据
    * @Author Bieanju
    **/
    header("Content-Type:text/html; charset=utf-8");
    $url = "https://item.taobao.com/item.htm?spm=a217h.7274645.1998424065.10.5kIFLZ&id=43823358756";
    $str = file_get_contents($url);
    $str = mb_convert_encoding($str,'UTF-8','GBK');
    $goods['title'] = preg_substr('/<h3 class="tb-main-title"[^>]*>/','/</h3>/',$str);
    $goods['market_price'] = preg_substr('/<em class="tb-rmb-num">/','/</em>/',$str);
    preg_match('/<([a-z]+)[^i]*id="J_StrPrice"[^>]*>([^<]*)</\1>/is', $str, $price);
    preg_match('/]*id="J_ImgBooth"[^r]*rc="([^"]*)"[^>]*>/', $str, $img); 
    $goods['price'] = !empty($price[1]) ? $price[1] : 0;
    $url =!empty($img[1]) ? getImage('http:'.$img[1],'','upload',array('jpg','gif','png'),1) : 0;
    $goods['url'] = $url;
    if(isset($goods) && !empty($goods)){
        echo "采集成功!<br />商品名称【".$goods['title']."】<br />商品价格【".$goods['market_price']."】<br />商品图片已成功保存到本地【".$goods['url']."】<br /><img src='".$goods['url']."' />";
    }else{
        exit("采集失败,请重试!");
    }    
    /**
        * 匹配区域采集数据
        * @Author Bieanju
        **/    
    function preg_substr($start, $end, $str){     
        $temp = preg_split($start, $str);      
        $content = preg_split($end, $temp[1]); 
        return $content[0];      
    }
    
    /**
        * 下载图片到本地
        * @Author Bieanju
        **/
    function getImage($url, $filename='', $dirName, $fileType, $type=0){
        if($url == ''){return false;}
        $defaultFileName = basename($url);
        $suffix = substr(strrchr($url,'.'), 1);
        if(!in_array($suffix, $fileType)){
            return false;
        }
        $filename = $filename == '' ? time().rand(0,9).'.'.$suffix : $defaultFileName;
        if($type){
            $ch = curl_init();
            $timeout = 5;
            curl_setopt($ch, CURLOPT_URL, $url);
            curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
            curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
            $file = curl_exec($ch);
            curl_close($ch);
        }else{
            ob_start();
            readfile($url);
            $file = ob_get_contents();
            ob_end_clean();
        }
        $dirName = $dirName.'/'.date('Y', time()).'/'.date('m', time()).'/'.date('d',time()).'/';
        if(!file_exists($dirName)){
            mkdir($dirName, 0777, true);
        }
        $res = fopen($dirName.$filename,'a');
        fwrite($res,$file);
        fclose($res);
        return $dirName.$filename;
    }
?>
原文地址:https://www.cnblogs.com/bieanju/p/5821288.html