好用的 curl 抓取页面的封装函数

由于经常使用php curl 抓取页面的内容，在此mark 平时自己封装的 curl函数,(其实 现在也开始用 Python 来爬了~  ^-^)

/**
 * 封装curl方法
 * @author FredGui
 * @param string $url 必选  接口地址
 * @param string $post 可选  如果是post访问填写post参数数组
 * @param int $timeout 可选  超时时间
 * @param string $cookie
 * @param int $decode
 * @return mixed|null
 */
function curlHtml($url, $post = '', $timeout = 30, $cookie = '', $decode = 1){
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_HEADER, 0);
    if ($post) {
        curl_setopt($ch, CURLOPT_POST, 1);
        curl_setopt($ch, CURLOPT_POSTFIELDS, $post);
    }
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
    curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);

    if ($cookie) {
        curl_setopt($ch, CURLOPT_COOKIE, $cookie);
    }
    $data = curl_exec($ch);
    $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    if ($httpCode == '200') {
        if ($decode == 1 && !is_null(json_decode($data))) {
            $data = json_decode($data, true);
        }
    } else {
        $data = NULL;
    }
    curl_close($ch);
    return $data;
}

$html = curlHtml('http://www.baidu.com');
echo '<pre>';
//var_dump($html);

var_dump(strip_tags($html));exit;

　把页面内的 html标签去掉了：如下（网页的所有内容）

　本文地址：http://www.cnblogs.com/guixiaoming/p/6424160.html

好用的 curl 抓取 页面的封装函数

好用的 curl 抓取页面的封装函数