网络爬虫基础练习

# -*- coding: utf-8 -*-

import requests
url='http://localhost:63342/bd/25.html?_ijt=q7qavr0q8g0hj999ls86b8iuq4'
res = requests.get(url)
res.encoding='utf-8'
res.text

from bs4 import BeautifulSoup

soup = BeautifulSoup(res.text,'html.parser')

print(soup.h1.text)
print(soup.a.attrs['href'])

for i in (soup.select('li')):
    print(i.contents)

#     取出一条新闻的标题、链接、发布时间、来源
print(soup.select('.news-list-title')[0].contents)
print(soup.li.a.attrs['href'])
print(soup.select('.news-list-info')[0].contents[0].text)
print(soup.select('.news-list-info')[0].contents[1].text)

  

原文地址:https://www.cnblogs.com/a13798508446/p/8668845.html