perl 根据//div/a 获取href


use LWP::UserAgent;
use HTTP::Cookies;
use HTTP::Headers;
use HTTP::Response;
use Encode;
use JSON;
use File::Temp qw/tempfile/;
use HTML::TreeBuilder::XPath;
use Encode;      
use HTML::TreeBuilder;       
use Data::Dumper;   
use HTML::TreeBuilder::XPath;     
use DBI;     
use Encode;
my $ua = LWP::UserAgent->new( ssl_opts => { verify_hostname => 0 }, );;
$ua->timeout(10);
$ua->env_proxy;
print $ua->timeout;
my $now          = time();
$ua->agent('Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0');
print $ua->agent;
my $cookie_jar = HTTP::Cookies->new(

    file           => 'lwp_cookies.txt',
    autosave       => 1,
    ignore_discard => 1
);
$ua->cookie_jar($cookie_jar);
my $login_url = 'http://10.3.200.9/login/VerifyLogin.jsp';
my $res = $ua->post($login_url,{
                 'formmethod'=>'post',
                          'logfile'=>'/wui/theme/ecology8/page/login.jsp?templateId=3&logintype=1&gopage=',
                          'loginid'=>'88',
                          'logintype'=> '1',
         'submit'=>'登录',
        'userpassword'=>'888'
                                               });
my $response = $ua->get('http://10.3.200.9/tailong/syslink/daohanglianjie.jsp');
if ($response->is_success) {
    $r = $response->content;   
};
sub geturl{
   my $url=shift;
   my $response = $ua->get($url);
   if ($response->is_success) {
    $r = $response->content;
    #print $r;
    #print "
";
   }
 else {
   print $url."->".$response->status_line."
";
   &SendSms("$url"."->".$response->status_line);
};
};


sub SendSms {
     my $message=shift;
     my $token_url= 'http://10.5.100.232:8080/tlcbspt/sendText';  
     $ua->default_headers;
     #$ua->get("http://10.3.172.15:3000/api/SMSsendx?sourceip=$IP&message=$message&phone=18072722237|17605860611|17605860662");
     $ua->get("http://10.5.100.232:8001/?message='$message'&phone=18072722237");

       };


open fh1,">fh1.html" || die "open csdn file failed:$!";  
 print fh1  $r;  
 close fh1;

#my $tree= HTML::TreeBuilder::XPath->new; 
#$tree->parse_file("fh1.html"); 
#my @bumen = $tree->findvalues('//div/span'); 
#foreach (@bumen){
#  $_=~s/|//g;
#  next if $_=~/^$/;
#  print $_;
#  print "
";
#};

my $tree= HTML::TreeBuilder::XPath->new;
$tree->parse_file("fh1.html");
my @xitong = $tree->findvalues('//div/a');
foreach (@xitong){
};

##获取url
$tree->parse_file( "fh1.html");
#获取博客分类的URL,根据a标签查找属性为href 
@Links = $tree->find_by_tag_name('a'); 
  foreach (@Links) {  
      @Href = $_->attr('href');
      $i++;
      foreach (@Href){
       next unless ($_=~/^http/ or $_=~/^/tailong/);
       if ($_=~/^/tailong/){
       $_='http://10.3.200.9'.$_
       };
       print "$_
";
       next if $_=~'http://tb.zjtlcb.com/third/tailong/callback';
       next if $_=~'http://cwhsapp1.zjtlcb.com:8002';
       next if $_=~'http://10.3.246.2:7001/newaml';
       next if $_=~'http://10.3.244.2:7001/bbsp';
       #next if $_=~'http://10.2.247.70:7001/brms/'; 
       #next if $_=~'http://10.3.200.10/ucenter/zonghang/index.do'; 
       &geturl("$_");
  };
};

                     <div class="daohang-kuai">
                                <div class="daohang-org"><span>互联网金融部</span></div>
                                <div class="daohang-links"><a href="http://10.16.38.7/innermanage?userId=013800&tgt=TGT-518341-bAJd4aeK0sZnlJQC3ET2U2VPUCSDmNlMm3dq0VITiIrOQUx6ik-cas01.example.org" target="_blank">电子银行管理系统</a> <span >|</span><a href="http://20.3.220.1:8010?tgt=TGT-518341-bAJd4aeK0sZnlJQC3ET2U2VPUCSDmNlMm3dq0VITiIrOQUx6ik-cas01.example.org" target="_blank">动态密码锁管理系统(新
)</a> <span >|</span><div class="clear"></div></div><div class="daohang-links"><a href="/tailong/syslink/goMbbk.jsp?tgt=TGT-518341-bAJd4aeK0sZnlJQC3ET2U2VPUCSDmNlMm3dq0VITiIrOQUx6ik-cas01.example.org" target="_blank">金融移动服务站业务管理系统</a> <span >|</span><a href="http://10.2.248.30:7002/shepherd?tgt=TGT-518341-bAJd4aeK0sZnlJQC3ET2U2VPUCSDmNlMm3dq0VITiIrOQUx6ik-cas01.example.org" target="_blank">自助机具管理系统</a> <span >|</span><div class="clear"></div></div><div class="daohang-links"><a href="http://10.16.38.12:9081/mweb/prelogin.do?LoginType=R&_locale=zh_CN&BankId=9999&tgt=TGT-518341-bAJd4aeK0sZnlJQC3ET2U2VPUCSDmNlMm3dq0VITiIrOQUx6ik-cas01.example.org" target="_blank">小鱼Bank后管系统</a> <span >|</span><a href="http://10.3.215.20:8080/nqsky-meap-manager/index?tgt=TGT-518341-bAJd4aeK0sZnlJQC3ET2U2VPUCSDmNlMm3dq0VITiIrOQUx6ik-cas01.example.org" target="_blank">移动应用管理平台</a> <span >|</span><div class="clear"></div></div><div class="daohang-links"><a href="http://10.16.76.3:8080/mdm/admin/login.do?tgt=TGT-518341-bAJd4aeK0sZnlJQC3ET2U2VPUCSDmNlMm3dq0VITiIrOQUx6ik-cas01.example.org" target="_blank">移动设备管理平台</a> <span >|</span><a href="http://10.3.242.36:7001/flow?tgt=TGT-518341-bAJd4aeK0sZnlJQC3ET2U2VPUCSDmNlMm3dq0VITiIrOQUx6ik-cas01.example.org" target="_blank">工单协作平台</a> <span >|</span><div class="clear"></div></div><div class="daohang-links"><a href="http://10.16.38.7/Demo/corporbankDemo/login.htm?tgt=TGT-518341-bAJd4aeK0sZnlJQC3ET2U2VPUCSDmNlMm3dq0VITiIrOQUx6ik-cas01.example.org" target="_blank">企业网上银行演示</a> <span >|</span><a href="http://10.16.38.7/corporbank/login.htm?tgt=TGT-518341-bAJd4aeK0sZnlJQC3ET2U2VPUCSDmNlMm3dq0VITiIrOQUx6ik-cas01.example.org" target="_blank">企业网上银行</a> <span >|</span><div class="clear"></div></div><div class="daohang-links"><a href="http://10.16.38.7/Demo/perbankDemo/login.htm?tgt=TGT-518341-bAJd4aeK0sZnlJQC3ET2U2VPUCSDmNlMm3dq0VITiIrOQUx6ik-cas01.example.org" target="_blank">个人网上银行演示</a> <span >|</span><a href="http://10.16.38.7/perbank?tgt=TGT-518341-bAJd4aeK0sZnlJQC3ET2U2VPUCSDmNlMm3dq0VITiIrOQUx6ik-cas01.example.org" target="_blank">个人网上银行</a> <span >|</span><div class="clear"></div></div>
                        </div>
原文地址:https://www.cnblogs.com/hzcya1995/p/13349001.html