有用的函数系统采集(二)

1.去除html标记

function Text2Html($txt){

$txt = str_replace(" ","　",$txt);

$txt = str_replace("<","<",$txt);

$txt = str_replace(">",">",$txt);

$txt = preg_replace("/[\r\n]{1,}/isU","<br/>\r\n",$txt);

return $txt;

}

2.相对路径转绝对路径

function relative_to_absolute($content, $feed_url) {

preg_match('/(http|https|ftp):\/\//', $feed_url, $protocol);

$server_url = preg_replace("/(http|https|ftp|news):\/\//", "", $feed_url);

$server_url = preg_replace("/\/.*/", "", $server_url);

if ($server_url == '') {

return $content;

}

if (isset($protocol[0])) {

$new_content = preg_replace('/href="\//', 'href="'.$protocol[0].$server_url.'/', $content);

$new_content = preg_replace('/src="\//', 'src="'.$protocol[0].$server_url.'/', $new_content);

} else {

$new_content = $content;

}

return $new_content;

}

3.取得所有链接

function get_all_url($code){

preg_match_all('/<a\s+href=["|\']?([^>"\' ]+)["|\']?\s*[^>]*>([^>]+)<\/a>/i',$code,$arr);

return array('name'=>$arr[2],'url'=>$arr[1]);

}

4..获取指定标记中的内容

function get_tag_data($str, $start, $end){

if ( $start == '' || $end == '' ){

return;

}

$str = explode($start, $str);

$str = explode($end, $str[1]);

return $str[0];

}

5.获取远程文件内容(抓内容)

/**

获取远程文件内容

@param $url 文件http地址

function fopen_url($url)

{

if (function_exists('file_get_contents')) {

$file_content = @file_get_contents($url);

} elseif (ini_get('allow_url_fopen') && ($file = @fopen($url, 'rb'))){

$i = 0;

while (!feof($file) && $i++ < 1000) {

$file_content .= strtolower(fread($file, 4096));

}

fclose($file);

} elseif (function_exists('curl_init')) {

$curl_handle = curl_init();

curl_setopt($curl_handle, CURLOPT_URL, $url);

curl_setopt($curl_handle, CURLOPT_CONNECTTIMEOUT,2);

curl_setopt($curl_handle, CURLOPT_RETURNTRANSFER,1);

curl_setopt($curl_handle, CURLOPT_FAILONERROR,1);

curl_setopt($curl_handle, CURLOPT_USERAGENT, 'Trackback Spam Check');

$file_content = curl_exec($curl_handle);

curl_close($curl_handle);

} else {

$file_content = '';

}

return $file_content;

}

6.去掉指定的标签函数

$str = "ertet<a href=\"http://www.xxxx.com\" tasdfgrget=\"_blank\">aaaabbbb

</a>aaadf";

echo _strip_tags(array("a","img"),$str);

function _strip_tags($tags_a,$str)

{

foreach ($tags_a as $tag)

{

$p[]="/(<(?:\/".$tag."|".$tag.")[^>]*>)/i";

}

$return_str = preg_replace($p,"",$str);

return $return_str;

}