下载远程图片到本地

  1 <?php
  2 
  3     /**
  4      * @desc CURL下载处理
  5      * @param unknown $url
  6      * @return string
  7      */
  8     public function downloadImage($url){
  9         global $_GLOBAL;
 10         //error_reporting(E_ALL);
 11         set_time_limit(160);
 12         
 13         //url filter
 14         $url = isset($url) ? trim(urldecode($url)) : '';
 15         //$url = filter_input(INPUT_GET, 'URL', FILTER_SANITIZE_URL);
 16         if (substr($url, 0, 7) != 'http://') {
 17             return array('res' => 0, 'msg' => '采集图片仅支持http协议');
 18         }
 19         //url validate
 20         $url = filter_var($url, FILTER_VALIDATE_URL);
 21         if (!$url) {
 22             return array('res' => 0, 'msg' => 'url地址无效');
 23         }
 24         //host filter
 25         $host = strtolower(parse_url($url, PHP_URL_HOST));
 26         $hostarr = json_decode($this->hostfilter($host));
 27         if ($hostarr['res'] == '0') { return array('res' => 0, 'msg' => $hostarr['msg']); }
 28         
 29         //url filter        
 30         list($fileid, $hash, $thumb) = $this->urlfilter($url);
 31         if ($fileid < 0) {
 32             return array('res' => 0, 'msg' => '图片不符合要求');
 33         } else if ($fileid) {
 34             return array('res' => 1, 'msg' => $thumb);
 35         }        
 36         //curl start
 37         $ch = $this->getcurl($url);
 38         $tmpfile = 'temp/'. $hash;
 39         $fp = fopen($tmpfile, 'wb');
 40         flock($fp, LOCK_EX);
 41         curL_setopt($ch, CURLOPT_FILE, $fp);
 42         curl_exec($ch);
 43         if (curl_errno($ch)) {  //error check
 44             //echo curl_error($ch);
 45             curl_close($ch);
 46             fclose($fp);
 47             unlink($tmpfile);
 48             return array('res' => 0, 'msg' => '采集超时,请重试...');
 49         }
 50         $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
 51         $content_type = curl_getinfo($ch, CURLINFO_CONTENT_TYPE);
 52         $size = curl_getinfo($ch, CURLINFO_SIZE_DOWNLOAD) || curl_getinfo($ch, CURLINFO_CONTENT_LENGTH_DOWNLOAD);
 53         curl_close($ch);
 54         fclose($fp);
 55         
 56         $_GLOBAL['mime'] = array('image/jpeg' => 'jpg', 'image/jpg' => 'jpg', 'image/png' => 'png', 'image/gif' => 'gif', 'image/bmp' => 'bmp');        
 57         if (isset($_GLOBAL['mime'][$content_type])) {  //image
 58             if (filesize($tmpfile) != $size) {  //size check
 59                 unlink($tmpfile);
 60                 return array('res' => 0, 'msg' => '采集超时,文件不完整,请重试...');
 61             }        
 62             //image filter
 63             list($status, $message) = $this->imgfilter($tmpfile);
 64             if ($status <= 0) {
 65                 @unlink($tmpfile);
 66                 $tmp1 = array('res' => 0, 'msg' => $message);
 67             } else {
 68                 $this->updateurl($url, $hash, $status);
 69                 $tmp1 = array('res' => 1, 'msg' => $message);
 70             }
 71             return $tmp1;
 72                 
 73         } else {
 74             unlink($tmpfile);
 75             return array('res' => 0, 'msg' => '链接地址失效或者不存在');
 76         }        
 77     }
 78 
 79     
 80     /**
 81      * @desc 防止本地采集
 82      * @param unknown $host
 83      * @return string
 84      */
 85     public function hostfilter($host) {
 86         if (strpos($host, $_SERVER['SERVER_NAME']) !== false) { //self check
 87             return json_encode(array('res' => 0, 'msg' => '不支持站内采集'));
 88             //exit;
 89         } else if ($host == '127.0.0.1' || $host == 'localhost') {  //localhost check
 90             return json_encode(array('res' => 0, 'msg' => '不支持本地网络采集'));
 91             //exit;
 92         } else if (preg_match('/^d{1,3}.d{1,3}.d{1,3}.d{1,3}$/', $host)) {   //inner network check
 93             $iparr = explode('.', $host);
 94             if($iparr['0'] == 10 || $iparr['0'] == 127 || ($iparr['0'] == 192 && $iparr['1'] == 168) || ($iparr['0'] == 172 && ($iparr['1'] >= 16 && $iparr['1'] <= 31))) {
 95                 return json_encode(array('res' => 0, 'msg' => '不支持内部网络采集'));
 96                 // exit;
 97             } else if ($iparr['0'] > 255 || $iparr['1'] > 255 || $iparr['2'] > 255 || $iparr['3'] > 255) {
 98                 return json_encode(array('res' => 0, 'msg' => '无效的ip'));
 99                 //exit;
100             }
101             unset($iparr);
102         }
103     }
104     
105     /**
106      * @desc URL记录,防重复下载
107      * @param unknown $url
108      * @param string $type
109      * @return multitype:string |multitype:string unknown
110      */
111     public function urlfilter($url,$type=false) {
112         global $db;
113         $url = filter_var($url, FILTER_VALIDATE_URL);
114         if (empty($url)) return array('-1', '', '');
115         $hash = sha1($url);
116         $sql = "SELECT `fileid` FROM `". TNAME. "url` WHERE `hash`='$hash' LIMIT 1";
117         $db->query($sql);
118         $fileid = $db->fetchResult();
119         if ($fileid === false) {
120             return array('0', $hash, '');
121         } else if ($fileid == 0) {
122             return array('-1', $hash, '');
123         } else {
124             $db->query("SELECT `thumbdata` FROM `". TNAME. "file` WHERE `fileid`=$fileid LIMIT 1");
125             $thumb = $db->fetchResult();
126             if (empty($thumb)) {
127                 return array('0', $hash, '');
128             } else {
129                 return array($fileid, $hash, $thumb);
130             }
131         }
132     }
133     
134     /**
135      * @desc CURL模拟
136      * @return Ambigous <string>
137      */
138     public function setuseragent() {
139         $uarr = array(
140                 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
141                 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)',
142                 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.28 (KHTML, like Gecko) Chrome/12.0.725.0 Safari/534.28',
143                 'Mozilla/5.0 (Windows NT 5.1; rv:2.0) Gecko/20100101 Firefox/4.0',
144                 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; zh-CN; rv:1.9.2.16) Gecko/20110319 Firefox/3.6.16',
145         );
146         $rnd = array_rand($uarr, 1);
147         return $uarr[$rnd];
148     }
149     
150     /**
151      * @desc CURL
152      * @param unknown $url
153      * @param number $timout
154      * @return resource
155      */
156     public function getcurl($url, $timout = 120) {
157         $ch = curl_init();
158         curl_setopt($ch, CURLOPT_USERAGENT, $this->setuseragent());
159         curl_setopt($ch, CURLOPT_MAXREDIRS, 5);
160         curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
161         curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
162         curl_setopt($ch, CURLOPT_NOBODY, 0);
163         curl_setopt($ch, CURLOPT_HEADER, 0);
164         curl_setopt($ch, CURLOPT_URL, $url);
165         curl_setopt($ch, CURLOPT_TIMEOUT, $timout);
166         return $ch;
167     }
168     
169     /**
170      * @desc curl url记录
171      * @param unknown $url
172      * @param unknown $urlhash
173      * @param unknown $fileid
174      */
175     public function updateurl($url, $urlhash, $fileid) {
176         global $db;
177         $urlsql = "INSERT INTO `". TNAME. "url` (`url`,`hash`,`fileid`) VALUES('$url','$urlhash','$fileid') ON DUPLICATE KEY UPDATE `fileid`='$fileid'";
178         $db->query($urlsql);
179         $db->disconnect();
180     }
181     
182     /**
183      * @desc 下载文件
184      * @param unknown $file
185      * @param number $hashcheck
186      * @return multitype:number string |multitype:Ambigous <unknown, string> Ambigous <unknown, number>
187      */
188     public function imgfilter($file, $hashcheck = 1) {
189         global $db, $_GLOBAL;
190         //mime check
191         $mimearr = array('image/jpeg' => 'jpg', 'image/jpg' => 'jpg', 'image/png' => 'png', 'image/gif' => 'gif', 'image/bmp' => 'bmp');
192         $imginfo = getimagesize($file);
193         if (empty($imginfo) || !isset($mimearr[$imginfo['mime']])) {
194             unlink($file);
195             return array(0, '图片格式无效');
196         } else if ($imginfo['0'] < IMAGE_MIN_WIDTH && $imginfo['1'] < IMAGE_MIN_HEIGHT) {
197             unlink($file);
198             return array(0, '宽度不小于'. IMAGE_MIN_WIDTH. ', 高度不小于'. IMAGE_MIN_HEIGHT);
199         }    
200         if (!$hashcheck) return array(1, 'mime check passed');
201     
202         //hash check
203         $ext = $mimearr[$imginfo['mime']];
204         $Hash = $this->getHashFile($file);
205         $hashdir = 'files/'.$this->getHashDir($Hash);
206         $srcfile = $hashdir. $Hash. '.'.$ext;
207         if (!is_dir($hashdir)) {
208             mkdir($hashdir, 0755, true);
209         }
210         $sql = "SELECT `fileid`,`thumbdata` FROM `".TNAME."file` WHERE `hash`='$Hash'

";//上传图片记录表
211         $db->query($sql);
212         $filearr = $db->fetchRow();
213         $db->clear();
214         if($filearr) {  //hit
215             extract($filearr);
216             if (!is_file($srcfile)) {  //recover
217                 rename($file, $srcfile);
218             } else {
219                 unlink($file);
220             }
221             $HashID = $filearr['fileid'];
222             $ThumbData = $filearr['thumbdata'];
223         } else {    //miss
224             rename($file, $srcfile);
225             $Size = filesize($srcfile);
226             $Geometry = "{$imginfo['0']}x{$imginfo['1']}";
227             $mode = ($ext == 'jpg') ? ($imginfo['channels'] == 3 ? 'RGB' : 'CMYK') : '';
228             $filesql = "INSERT IGNORE INTO `".TNAME."file` SET uid='{$_GLOBAL['user']['uid']}', username='{$_GLOBAL['user']['username']}',`hash`='{$Hash}',`type`='{$ext}', `size`='{$Size}', `format`='{$ext}', `mode`='{$mode}', `geometry`='{$Geometry}', `created`='".time()."'";
229             
230             $db->beginTransaction();
231             $db->query($filesql);
232             $HashID = $db->fetchLastInsertId();
233             $db->commitTransaction();
234             if (empty($HashID)) exit('db failed');  //guard
235     
236             $ThumbData = $HashID. '?'. $Hash. '?'. $Size;
237             $ThumbHash = sha1(sha1($Hash). $Size);
238             
239             //gif workaround
240             //$ext = ($imginfo['2'] == 1)? ".gif?{$imginfo['0']}?{$imginfo['1']}" : '.jpg';
241                         
242             if ($imginfo['2'] == 1) {   //gif workaround
243                 unlink('temp/'. md5($srcfile). '.gif');
244             }
245             unset($Thumb);    
246             //post fix
247             $ThumbData .= '?'.$ext;                
248             $this->getThumbImg($srcfile, $ThumbData, 'article');
249                         
250             $db->query("UPDATE `".TNAME."file` SET `file`='$srcfile',`thumbdata`='$ThumbData' WHERE `fileid`=$HashID;");
251             $db->disconnect();
252         }
253         return array($HashID, $ThumbData);
254     }
255  
256 ?>
原文地址:https://www.cnblogs.com/zhaoyuqi/p/3467796.html