perl 爬取 find_by_tag_name

find_by_tag_name

 @elements = $h->find_by_tag_name('tag', ...);
 $first_match = $h->find_by_tag_name('tag', ...);


在上下文列表, 返回元素的列表 在$h下 有任何指定的tag名字

node2:/root/pachong/yylc#
node2:/root/pachong/yylc#cat t500.html 
   <tr>
                    <th>项目名</th>
                    <th>年利率</th>
                    <th>期 限</th>
                    <th>起购金额</th>
                    <th>进度</th>
                    <th>项目状态</th>
                    <th>操作</th>
                </tr>
                                <tr>
                	<td><div class="fresh"><p class="text-ellipsis-2"><i class="fresh-icon"></i><a href="/detail/31836-260-500-913-8627.htm">房贷3518号</a></p></div></td>
                    <td>12.00 %</td>
                    <td>15天</td>
                    <td>10.00元</td>
                    <td>
                    	<div class="wyd-list-bar">
                    				                    	<div class="wyd-inner-bar-1" style="70.42%;"></div>
		                                            </div>
                        	                        		<p class="wyd-list-txt">已融资<i>70.42%</i> | 剩余<span>266160.00</span>元</p>
                        	                    </td>
                    <td >
                    				            	投标中
			                                </td>
                    <td>
	                    <p class="pro-btn">
	                    					            	<a href="/detail/31836-260-500-706-8408.htm" class="icon-n-sprite icon-n-3" id="Btn">投 标</a>
				            	                    </p>
                    </td>
                </tr
  use  LWP::UserAgent;    
  use POSIX;    
  use HTML::TreeBuilder::XPath;     
  use Encode;     
  use HTML::TreeBuilder;    
   my $tree= HTML::TreeBuilder::XPath->new;    
            $tree->parse_file( "t500.html");  
            my @arr1= $tree->find_by_tag_name("tr") ;  
            foreach my $row ( @arr1) {    
                my @arr2= $row->content_list;    
        
                my $str1= $arr2[0]->as_text;       
                my $str2= $arr2[1]->as_text;       
                my $str3= $arr2[2]->as_text;       
                my $str4= $arr2[3]->as_text;       
                my $str5= $arr2[4]->as_text;       
                my $str6= $arr2[5]->as_text;       
                print $str1,$str2,$str3,$str4,$str5,$str6."
";   
                };   

node2:/root/pachong/yylc#perl t500.pl
项目名||年利率||期 限||起购金额||进度||项目状态||
房易贷3518号||12.00 %||15天||10.00元||已融资70.42% | 剩余266160.00元|| 投标中 ||



                                    
原文地址:https://www.cnblogs.com/hzcya1995/p/13349800.html