13 Beautiful Soup库的基本元素

举例:


"""Beautiful Soup库的基本元素"""


import requests
from bs4 import BeautifulSoup

url = "https://python123.io/ws/demo.html"
r = requests.get(url)
demo = r.text
soup = BeautifulSoup(demo, "html.parser")
#print(soup.prettify())

# <title>This is a python demo page</title>
print(soup.title)

tag = soup.a
# <a class="py1" href="http://www.icourse163.org/course/BIT-268001" id="link1">Basic Python</a>
print(tag)
# a
print(soup.a.name)
# p
print(soup.a.parent.name)
# body
print(soup.a.parent.parent.name)
# html
print(soup.a.parent.parent.parent.name)
# [document]
print(soup.a.parent.parent.parent.parent.name)
# {'href': 'http://www.icourse163.org/course/BIT-268001', 'class': ['py1'], 'id': 'link1'}
print(tag.attrs)
# ['py1']
print(tag.attrs['class'])
# http://www.icourse163.org/course/BIT-268001
print(tag.attrs['href'])
# <class 'dict'>
print(type(tag.attrs))
# <class 'bs4.element.Tag'>
print(type(tag))


# Basic Python
print(soup.a.string)
# <p class="title"><b>The demo python introduces several python courses.</b></p>
print(soup.p)
# The demo python introduces several python courses.
print(soup.p.string)
# <class 'bs4.element.NavigableString'>
print(type(soup.p.string))


# HTML注释(comment)的类型
"""Beautiful Soup库的基本元素"""


import requests
from bs4 import BeautifulSoup

# HTML注释(comment)的类型
newHTML = "<b><!--This is a comment--></b><p>This is not a comment</p>"
newsoup = BeautifulSoup(newHTML, "html.parser")
# This is a comment
print(newsoup.b.string)    #只能获取注释一行信息;print(newsoup.b.contents)# 可获取多行信息

# This is not a comment
print(newsoup.p.string)
# <class 'bs4.element.Comment'>
print(type(newsoup.b.string))
# <class 'bs4.element.NavigableString'>
print(type(newsoup.p.string))
原文地址:https://www.cnblogs.com/sruzzg/p/13046881.html