python开发模板(自用 2)-- 模块清晰抓取

def _get_new_data(self, page_url, soup, final_str_name):
con_datas = []
res_data = {}
# url
res_data['url'] = page_url
res_data['name'] = 'hibernate'
contents = soup.find_all(attrs = {'class':'dl-horizontal'})
#print(contents)
for content in contents:
name = content.find('dt')
str_name = name.get_text()
str_name = str_name.strip()
conts = content.find_all('dd')
str_cont = conts[2].get_text() + ' '+ conts[3].get_text() + ' : '
str_url = conts[3].find('a')
str_cont = str_cont + str_url['href']
con_data = {}
con_data['name'] = str_name
con_data['cont'] = str_cont
con_datas.append(con_data)
# print("111111")
# print(con_data['name'])
# print("333333")
# print(con_data['cont'])
# print("222222")
return res_data, con_datas
原文地址:https://www.cnblogs.com/clover-xuqi/p/7150463.html