网络爬虫基础练习

import requests
from bs4 import BeautifulSoup

'''
   Practice
'''
url = 'http://news.gzcc.cn/html/2018/xiaoyuanxinwen_0329/9129.html'
re = requests.get(url)
re.encoding = 'utf-8'
# print(re)
# print(re.text)
soup = BeautifulSoup(re.text, 'html.parser')
# print(soup.p)
# print(soup.head)
# print(soup.head.name)
# print(soup.a.attrs)
# print(soup.li.contents)
# print(soup.li.text)
# 输出所选标签中的字符串
# print(soup.li.a.string)
# print(soup.select('div'))

# print(soup.select('div[class="news-list-description"]'))
'''
   HomeWork
'''
"找一个有h1的网站"

h = soup.select('h1')
print(h)
a_href = soup.a.attrs
print(a_href)
l = soup.select('li')
# print(l)
# for i in l:
    # print(i.text)
title = soup.select('.show-title')
print(title[0].text)
href = soup.select('a')
t = soup.select('.show-info')
print(t)

  

原文地址:https://www.cnblogs.com/miranda-76/p/8672466.html