采集化工内容写入TXT文本

Python爬虫视频教程零基础小白到scrapy爬虫高手-轻松入门

https://item.taobao.com/item.htm?spm=a1z38n.10677092.0.0.482434a6EmUbbW&id=564564604865

# -*- coding: utf-8 -*-
"""
Created on Tue Mar 15 08:53:08 2016
采集化工标准补录项目
@author: Administrator
"""
import requests,bs4

text=open("hb.txt",'w',encoding='utf-8')
webpage="http://www.bzwxw.com/html/2016/1988_0116/9.html"
res=requests.get(webpage)
res.encoding = 'gbk'
requests.codes.ok

#中文显示全是乱码
res.text

#soup1=bs4.BeautifulSoup(res.text,"lxml",from_encoding="gb18030")
soup1=bs4.BeautifulSoup(res.text,"lxml")

title=soup1.select('title')
len(title)
title_content=title[0].getText()

StandardCode=soup1.select('h5')
len(StandardCode)

content_list=[]
for i in StandardCode:
content=i.getText()
content_list.append(content)

for i in content_list:
print ("i:",i)
if "标准编号" in i or "发布部门" in i or "实施日期" in i:
text.write(i)

text.close()

原文地址:https://www.cnblogs.com/webRobot/p/5278512.html