python系列之(2)PyQuery的用法

 

1.了解

pyquery库是jQuery的Python实现,能够以jQuery的语法来操作解析 HTML 文档,易用性和解析速度都很好。

2.安装

pip install pyquery

 

3引用

from pyquery import PyQuery as pq

4.初始化

1)字符串

html = """
<html lang="en">
    <head>
        simple good
        <title>PyQuery</title>
    </head>
    <body>
        <ul id="container" class="list">
            <li class="object-1">Python</li>
            <li class="object-2">amazing</li>
            <li class="object-3">wonderful</li>
        </ul>
    </body>
</html>
"""

doc = pq(html)

2)url

response = pq(url='https://www.baidu.com')
print(response("head"))

3)文件

#filename参数为html文件路径
test_html = pq(filename = 'test.html')
print(type(test_html))
print(test_html)

5.使用

#-*- coding: UTF-8 -*- 
from pyquery import PyQuery as pq

html = """
<html lang="en">
    <head>
        simple good
        <title>PyQuery</title>
    </head>
    <body>
        <ul id="container" class="list">
            <li class="object-1">Python</li>
            <li class="object-2">amazing</li>
            <li class="object-3">wonderful</li>
        </ul>
    </body>
</html>
"""

doc = pq(html)
#常用的css选择器
print(doc)
#打印id为container的标签
print(doc('#container'))
#打印class为object-1的标签
print(doc('.object-1'))
#打印body
print(doc('body'))
#多种css选择器使用
print(doc('html #container'))
#打印.list的li
print(doc('#container .list li'))
print('-----------------------------------')

#伪类选择器
#打印第二个孩子
print(doc('li:nth-child(2)'))
#打印第一个孩子
print(doc('li:first-child'))
#打印最后一个孩子
print(doc('li:last-child'))
#打印含Python的li
print(doc("li:contains('Python')"))
print('-----------------------------------')

#查找
#查找id为container
print(doc.find('#container'))
#查找li
print(doc.find('li'))
#查找id为container的孩子
print(doc.find('#container').children())
#查找类为object-2的父亲
print(doc.find('.object-2').parent())
#查找类为object-2的兄弟姐妹
print(doc.find('.object-2').siblings())
print('-----------------------------------')

#获取标签属性
#获取attr为class
print(doc.find('.object-2').attr('class'))
#标签内的文本
print(doc.find('.object-1').text())
#去掉li标签
print(doc.find('#container').remove('li').text())
print('----------------------------------')

输出如下:

<html lang="en">
    <head>
        simple good
        <title>PyQuery</title>
    </head>
    <body>
        <ul id="container" class="list">
            <li class="object-1">Python</li>
            <li class="object-2">amazing</li>
            <li class="object-3">wonderful</li>
        </ul>
    </body>
</html>
<ul id="container" class="list">
            <li class="object-1">Python</li>
            <li class="object-2">amazing</li>
            <li class="object-3">wonderful</li>
        </ul>
    
<li class="object-1">Python</li>
            
<body>
        <ul id="container" class="list">
            <li class="object-1">Python</li>
            <li class="object-2">amazing</li>
            <li class="object-3">wonderful</li>
        </ul>
    </body>

<ul id="container" class="list">
            <li class="object-1">Python</li>
            <li class="object-2">amazing</li>
            <li class="object-3">wonderful</li>
        </ul>
    

-----------------------------------
<li class="object-2">amazing</li>
            
<li class="object-1">Python</li>
            
<li class="object-3">wonderful</li>
        
<li class="object-1">Python</li>
            
-----------------------------------
<ul id="container" class="list">
            <li class="object-1">Python</li>
            <li class="object-2">amazing</li>
            <li class="object-3">wonderful</li>
        </ul>
    
<li class="object-1">Python</li>
            <li class="object-2">amazing</li>
            <li class="object-3">wonderful</li>
        
<li class="object-1">Python</li>
            <li class="object-2">amazing</li>
            <li class="object-3">wonderful</li>
        
<ul id="container" class="list">
            <li class="object-1">Python</li>
            <li class="object-2">amazing</li>
            <li class="object-3">wonderful</li>
        </ul>
    
<li class="object-1">Python</li>
            <li class="object-3">wonderful</li>
        
-----------------------------------
object-2
Python

----------------------------------

参考:https://www.jianshu.com/p/5def029dbdf8

https://www.jianshu.com/p/770c0cdef481

原文地址:https://www.cnblogs.com/kumufengchun/p/11837094.html