perl 爬取html findvalues 方法

node2:/root/pachong/yylc#cat t500.html 
    <p id="p-page"><input type='submit' style='display:none' name='turnPage' id='turnPage'><input type='hidden' id='pageNum' name='pageNum' value='1'/>

<span οnmοuseοver="this.className='cur-s-page'" οnmοuseοut="this.className=''"><</span>

<span class='cur-s-page'>1</span>

<span οnclick="document.getElementById('pageNum').value=2;document.getElementById('turnPage').click();" οnmοuseοver="this.className='cur-s-page'" οnmοuseοut="this.className=''">
2</span>

<span οnclick="document.getElementById('pageNum').value=3;document.getElementById('turnPage').click();" οnmοuseοver="this.className='cur-s-page'" οnmοuseοut="this.className=''">
3</span>...   

<span οnclick="document.getElementById('pageNum').value=1749;document.getElementById('turnPage').click();" οnmοuseοver="this.className='cur-s-page'" οnmοuseοut="this.className=''">
1749</span>

<span οnclick="document.getElementById('pageNum').value=2;document.getElementById('turnPage').click();" οnmοuseοver="this.className='cur-s-page'" οnmοuseοut="this.className=''">
></span></p>
    </form>


node2:/root/pachong/yylc#perl t400.pl 
<html> @0 (IMPLICIT)
  <head> @0.0 (IMPLICIT)
  <body> @0.1 (IMPLICIT)
    <p id="p-page"> @0.1.0
      <input id="turnPage" name="turnPage" style="display:none" type="submit" /> @0.1.0.0
      <input id="pageNum" name="pageNum" type="hidden" value="1" /> @0.1.0.1
      <span οnmοuseοut="this.className=''" οnmοuseοver="this.className='cur-s-page'"> @0.1.0.2
        "<"
      <span class="cur-s-page"> @0.1.0.3
        "1"
      <span οnclick="document.getElementById('pageNum').value=2;document.getElementById('turnPage').click();" οnmοuseοut="this.className=''" οnmοuseοver="this.className='cur-s-page'"> @0.1.0.4
        "2"
      <span οnclick="document.getElementById('pageNum').value=3;document.getElementById('turnPage').click();" οnmοuseοut="this.className=''" οnmοuseοver="this.className='cur-s-page'"> @0.1.0.5
        "3"
      "...???"
      <span οnclick="document.getElementById('pageNum').value=1749;document.getElementById('turnPage').click();" οnmοuseοut="this.className=''" οnmοuseοver="this.className='cur-s-page'"> @0.1.0.7
        "1749"
      <span οnclick="document.getElementById('pageNum').value=2;document.getElementById('turnPage').click();" οnmοuseοut="this.className=''" οnmοuseοver="this.className='cur-s-page'"> @0.1.0.8
        ">"
@pageString is < 1 2 3 1749 >


node2:/root/pachong/yylc#cat t500.pl 
use LWP::UserAgent;  
use POSIX;  
use HTML::TreeBuilder::XPath;   
use Encode;   
use HTML::TreeBuilder;    
use Data::Dumper;
use HTML::TreeBuilder::XPath;  
use DBI;  
use Encode;
    my $tree= HTML::TreeBuilder::XPath->new;  
       $tree->parse_file("t500.html");
       my @pageString = $tree->findvalues('/html/body//p[@id="p-page"]/span');  
       print "@pageString is @pageString
"; 
node2:/root/pachong/yylc#perl t500.pl 
@pageString is < 1 2 3 1749 >



findvalues ($path)

Returns the values of the matching nodes as a list. 
This is mostly the same as findnodes_as_strings,
 except that the elements of the list are objects (with overloaded stringification) instead of plain strings.

返回 匹配节点的值作为一个列表,这个是和findnodes_as_strings 很像,















原文地址:https://www.cnblogs.com/hzcya1995/p/13349801.html