爬取li标签下的 的 href

# !/usr/bin/env python
# -*- coding: utf-8 -*-
from lxml import etree

# 获取文件元素
from lxml import etree

# 获取文件元素
htmlEmt = etree.parse('test02.html')
# 获取所有的 <li> 标签
result = htmlEmt.xpath('//a/@href')
print(result)
print type(result)
for x in result:
    # print x
    # print type(x)
    # print '-------------------------'
    print x

	
C:Python27python.exe C:/Users/TLCB/PycharmProjects/untitled/xpath/l1.py
['aaa', 'bbb']
<type 'list'>
aaa
bbb

Process finished with exit code 0

爬取li标签下的<a> 的 href

# !/usr/bin/env python
# -*- coding: utf-8 -*-
from lxml import etree

# 获取文件元素
from lxml import etree

# 获取文件元素
htmlEmt = etree.parse('test02.html')
# 获取所有的 <li> 标签
result = htmlEmt.xpath('//li/a/@href')
print(result)
print type(result)
for x in result:
    # print x
    # print type(x)
    # print '-------------------------'
    print x

	C:Python27python.exe C:/Users/TLCB/PycharmProjects/untitled/xpath/l1.py
['aaa']
<type 'list'>
aaa
原文地址:https://www.cnblogs.com/hzcya1995/p/13349011.html