xpath的基本用法

from lxml import etree
text = """
<div>
<ul>
<li class="item-0"><a href="link1,html">one</a></li>
<li class="item-1"><a href="link1,html">two</a></li>
<li class="item-inactive"><a href="link1,html">three</a></li>
<li class="item-1"><a href="link1,html">four</a></li>
<li class="item-0"><a href="link1,html">five</a>
</ul>
</div>
"""
html = etree.HTML(text)
# 从文本读取
html = etree.parse('test.html', etree.HTMLParser())
result = etree.tostring(html)  # bytes类型
# print(result)
# print(result.decode('utf-8'))  # string类型

#匹配
# rp1 = html.xpath('//*')
# print(rp1)

# rp2 = html.xpath('//li')
# print(rp2)

# 匹配文本
rp3 = html.xpath("//li[@class='item-0']/a/text()")
print(rp3)
rp4 = html.xpath("//li[@class='item-0']//text()")
print(rp4)
# 获取属性
rp5 = html.xpath('//li//a/@href')
print(rp5)

# 一个属性多个值 需要contains匹配
rp6 = html.xpath("//li[contains(@class, '-first')]/a/text()")
print(rp6)

# 多属性匹配 and连接
rp7 = html.xpath("//li[contains(@class, 'li') and @name='item']//text()")
print(rp7)
# 在条件中传入顺序 如数字 position()> last等等
rp8 = html.xpath("//li[1]/a/text()")
rp9 = html.xpath("//li[position()>2]/a/text()")
rp10 = html.xpath("//li[last()-1]/a/text()")
print(rp8)
print(rp9)
print(rp10)

  

你不能把坏习惯扔出窗外 但你可以一步步赶下电梯
原文地址:https://www.cnblogs.com/Ychao/p/9372216.html