<pre name="code" class="cpp">jrhmpt01:/root/lwp# cat data.html <div class="m-page J-ajax-page"> <a class="changePage" page="1" href="javascript:void(0);">首页</a> <a class="changePage" page="11" href="javascript:void(0);">上一页</a> <a class="changePage" page="11" href="javascript:void(0);">11</a> <a class="cur" href="javascript:void(0)">12</a> <span class="page_info">12/12</span> </div> <div class="m-page J-ajax-page"> <a class="changePage" page="1" href="javascript:void(0);">首页</a> <a class="changePage" page="11" href="javascript:void(0);">上一页</a> <a class="changePage" page="11" href="javascript:void(0);">11</a> <a class="cur" href="javascript:void(0)">12</a> <span class="page_info">12/12</span> </div> jrhmpt01:/root/lwp# cat c1.pl use LWP::UserAgent; use DBI; use POSIX; use Data::Dumper; use HTML::TreeBuilder; my $ua = LWP::UserAgent->new; $ua->timeout(10); $ua->env_proxy; $ua->agent("Mozilla/8.0"); use HTML::TreeBuilder::XPath; $tree= HTML::TreeBuilder::XPath->new; $tree->parse_file( "data.html"); my @title= $tree->findvalues('/html/body//a[@class="changePage"]'); print "@title is @title "; jrhmpt01:/root/lwp# perl c1.pl @title is 首页 上一页 11 首页 上一页 11 my @title= $tree->findvalue('/html/body//a[@class="changePage"]'); 表示 根据body的内容 查找a标签的@class="changePage"的值 jrhmpt01:/root/lwp# cat c1.pl use LWP::UserAgent; use DBI; use POSIX; use Data::Dumper; use HTML::TreeBuilder; my $ua = LWP::UserAgent->new; $ua->timeout(10); $ua->env_proxy; $ua->agent("Mozilla/8.0"); use HTML::TreeBuilder::XPath; $tree= HTML::TreeBuilder::XPath->new; $tree->parse_file( "data.html"); my @pages=$tree->find_by_tag_name('a'); #@urlall除了包含每个类别的文章,还包含阅读排行里的文章 foreach (@pages) { @titlepage = $_->attr('page'); foreach (@titlepage) { if ($_){ print "$_ is $_ "; }; }; }; jrhmpt01:/root/lwp# perl c1.pl $_ is 1 $_ is 11 $_ is 11 $_ is 1 $_ is 11 $_ is 11 根据a标签,查看page属性的值