bs4数据解析基础



import requests
from bs4 import BeautifulSoup

if __name__ == "__main__":
# headers = {
# 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
# } # UA伪装

fp = open('./text.html','r',encoding='utf-8')
soup = BeautifulSoup(fp,'lxml') #将本地的html文档中的数据加载到该对象中
# print(soup)
# print(soup.input['href']) # soup.tagName 返回的是html文件第一次出现的tagname对应的标签
# print(soup.find('div')) # 等同于 soup.div
# print(soup.find('div',class_ = 'hzbtabs')) # 属性定位
# print(soup.find_all("div"))
# print(soup.select(".hzbtabs"))
# print(soup.select('.hzbbannertxt >a')[0]['href'])
原文地址:https://www.cnblogs.com/huahuawang/p/12692333.html