获取 标签的所有 class

# !/usr/bin/env python
# -*- coding: utf-8 -*-
from lxml import etree

# 获取文件元素
htmlEmt = etree.parse('test03.html')
# 获取所有的 <li> 标签
result = htmlEmt.xpath('//li/@class')
print result
print type(result)

<html><body><div>
    <ul>
         <li class="item-0">a01<a href="link1.html">first item</a></li>
         <li class="item-1">b02<a href="link2.html">second item</a></li>
         <li class="item-inactive">c03<a href="link3.html">third item</a></li>
         <li class="item-1">d04<a href="link4.html">fourth item</a></li>
         <li class="item-0">e05<a href="link5.html">fifth item</a></li>
		 <a class='aaaa'>ddd</a>
    </ul>
	<div>
        <li class="item-6"></li>
	</div>
    <li class="4444">aaaa</li>
    <li class="55555">bbbbb</li>
    <span>
        <a><li>cc</li></a>
    </span>
 </div></body></html>

 C:Python27python.exe C:/Users/TLCB/PycharmProjects/untitled/xpath/l2.py
['item-0', 'item-1', 'item-inactive', 'item-1', 'item-0', 'item-6', '4444', '55555']
<type 'list'>

	
	
# !/usr/bin/env python
# -*- coding: utf-8 -*-
from lxml import etree

# 获取文件元素
htmlEmt = etree.parse('test03.html')
# 获取所有的 <li> 标签
result = htmlEmt.xpath('//@class')
print result
print type(result)	
	
C:Python27python.exe C:/Users/TLCB/PycharmProjects/untitled/xpath/l2.py
['item-0', 'item-1', 'item-inactive', 'item-1', 'item-0', 'aaaa', 'item-6', '4444', '55555']
<type 'list'>

Process finished with exit code 0
	
原文地址:https://www.cnblogs.com/hzcya1995/p/13349009.html