php 传一个url抓取此页面所有的链接

<?php
// $html = file_get_contents('http://www.example.com');
$html = file_get_contents('http://blog.csdn.net/yhy5240/article/details/20870403');
$dom = new DOMDocument();
@$dom->loadHTML($html);
// grab all the on the page 抓取所有的页面
$xpath = new DOMXPath($dom);
$hrefs = $xpath->evaluate("/html/body//a");
$arr = array();
for ($i = 0; $i < $hrefs->length; $i++) {
$href = $hrefs->item($i);
$url = $href->getAttribute('href');
$arr[] = $url;
}
if(!empty($arr)&&is_array($arr)){ //判断是否有链接
foreach($arr as $k=>$v){
$a = strpos($v,'http://');
$b = strpos($v,'https://');
if(is_int($a)||is_int($b)){//判断是否有http或者https
$data[] = $v;
}
}
}
var_dump($data);//打印结果

?>

原文地址:https://www.cnblogs.com/ayanboke/p/6484185.html