3 爬取网页的通用代码框架

爬取网页的通用代码框架

 

 1 """通用代码框架"""
 2 
 3 
 4 import requests
 5 
 6 def getHTMLText(url):
 7     try:
 8         r = requests.get(url, timeout = 30)
 9         r.raise_for_status() # 如果状态码不是200,引发HTTPERROR
10         r.encoding = r.apparent_encoding
11         return r.text
12     except:
13         return "产生异常"
14 
15 
16 if __name__ == "__main__":
17     url = "https://www.baidu.com"
18     print(getHTMLText(url))
原文地址:https://www.cnblogs.com/sruzzg/p/13041583.html