[php-dom] php-dom使用注意事项

/*
注意事项:
  1. 在loadHTML之前,应该将内容转义为UTF-8编码的,这样子避免出现entity等等的报错;
  2. 已经使用了php函数htmlspecialchars()转换的html实体,再经过DOM解析转换后,会被直接还原为标签,如:&lt;br/&gt; 在dom解析之后,直接被还原为 "<br/>";


*/ 
$doc = new DOMDocument('1.0' , 'UTF-8');
            //var_dump($doc);
            libXml_use_internal_errors(true);
            $doc->loadHTML(mb_convert_encoding($content , 'HTML-ENTITIES', 'UTF-8'));
            
            $node = $doc->getElementsByTagName('div');
            $arr_return = array();
            
            // 只发了一条动态的情况还没有考虑清楚
            for($c = 0; $c<$node->length; $c++){ 
                $arr_return[$c]['time'] = $node->item($c)->getAttribute('hnb-time');
                $arr_return[$c]['ftime'] = date('H:i' , $arr_return[$c]['time']);
                $arr_return[$c]['nation'] = $node->item($c)->getAttribute('hnb-nation');
                $arr_return[$c]['nation_info'] = Hnb_Model_Tag::getInstance()->getCndNationalInfoByID($node->item($c)->getAttribute('hnb-nation'));
                $p_nodes = $node->item($c)->getElementsByTagName('p');
                //$doc->encoding = 'UTF-8';
                //echo iconv("UTF-8", "GB18030//TRANSLIT", $dom->saveXML($n) );
                //$arr_return[$c]['content'] = iconv("UTF-8", "UTF-8", $doc->saveXML($p_nodes->item(0)));
                //var_dump($p_nodes->item(0));
                // 默认将第一个p节点作为内容来处理
                $arr_return[$c]['content'] = $doc->saveXML($p_nodes->item(0));
                $arr_return[$c]['raw_content'] = $p_nodes->item(0)->textContent;
                $arr_img_list = [];
                for($p = 1; $p<$p_nodes->length; $p++){
                    $img = $doc->saveXML($p_nodes->item($p));
                    if(preg_match('/jpg|png|gif|jpeg/i' , $img))
                    {
                        $arr_img_list[] = $img;
                    }
                }
                $arr_return[$c]['img_list'] = $arr_img_list;
            }
            return $arr_return;
原文地址:https://www.cnblogs.com/shuman/p/5083027.html