Beautiful Soup的简介(3) CSS选择器

CSS修饰器中ID是不会重复,class是会重复. 
soup.select('#title')  #代表找处id为title的元素
soup.select('.link')    #代表找出class为link的元素

css选取属性方法:

soup.select('a')[0]['href']   #代表将a标签里面的href属性拿出来
html = """
<div class="panel">
    <div class="panel-heading">
        <h4>Hello</h4>
    <div class="panel-body">
        <ul class="list" id="list-1" name="elements">
            <li class="element">Foo</li>
            <li class="element">Bar</li>
            <li class="element">Jay</li>
        </ul>
        <ul class="list list-small" id="list-2">
            <li class="element">Foo</li>
            <li class="element">Bar</li>
        </ul>
    </div>
</div>
"""
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, 'html5lib')
print(1, soup.select('.panel .panel-heading'))  #凡是选择class里面的标签,在选择器中写入.panel这样类似的方式,这里表示查找panel里面的panel-heading,中间需要使用空格表示
print(2, soup.select('ul li'))  # 直接选择标签,标签前不用添加.,这里代表选择ul里面的li
print(3, soup.select('#list-2 .element')) # 如果需要选择id里面的内,则要选择#,这里是id='list-2'的element标签
print(4, type(soup.select('ul')[0])) 

1 [<div class="panel-heading"> <h4>Hello</h4> <div class="panel-body"> <ul class="list" id="list-1" name="elements"> <li class="element">Foo</li> <li class="element">Bar</li> <li class="element">Jay</li> </ul> <ul class="list list-small" id="list-2"> <li class="element">Foo</li> <li class="element">Bar</li> </ul> </div> </div>] 2 [<li class="element">Foo</li>, <li class="element">Bar</li>, <li class="element">Jay</li>, <li class="element">Foo</li>, <li class="element">Bar</li>] 3 [<li class="element">Foo</li>, <li class="element">Bar</li>] 4 <class 'bs4.element.Tag'>
html = """
<div class="panel">
    <div class="panel-heading">
        <h4>Hello</h4>
    <div class="panel-body">
        <ul class="list" id="list-1" name="elements">
            <li class="element">Foo</li>
            <li class="element">Bar</li>
            <li class="element">Jay</li>
        </ul>
        <ul class="list list-small" id="list-2">
            <li class="element">Foo</li>
            <li class="element">Bar</li>
        </ul>
    </div>
</div>
"""
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, 'html5lib')
for ul in soup.select('ul'):
    print(ul.select('li'))   #层层迭代的方式打印
[<li class="element">Foo</li>, <li class="element">Bar</li>, <li class="element">Jay</li>]
[<li class="element">Foo</li>, <li class="element">Bar</li>]

获取属性
html = """
<div class="panel">
    <div class="panel-heading">
        <h4>Hello</h4>
    <div class="panel-body">
        <ul class="list" id="list-1" name="elements">
            <li class="element">Foo</li>
            <li class="element">Bar</li>
            <li class="element">Jay</li>
        </ul>
        <ul class="list list-small" id="list-2">
            <li class="element">Foo</li>
            <li class="element">Bar</li>
        </ul>
    </div>
</div>
"""
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, 'html5lib')
for ul in soup.select('ul'):
    print(ul['id'])
    print(ul.attrs['id'])
list-1
list-1
list-2
list-2

获取内容
html = """
<div class="panel">
    <div class="panel-heading">
        <h4>Hello</h4>
    <div class="panel-body">
        <ul class="list" id="list-1" name="elements">
            <li class="element">Foo</li>
            <li class="element">Bar</li>
            <li class="element">Jay</li>
        </ul>
        <ul class="list list-small" id="list-2">
            <li class="element">Foo</li>
            <li class="element">Bar</li>
        </ul>
    </div>
</div>
"""
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, 'html5lib')
for li in soup.select('li'):
    print(li.get_text()) 
Foo
Bar
Jay
Foo
Bar
 
 
原文地址:https://www.cnblogs.com/ecwork/p/7597249.html