核心重点lxml

from lxml import html

htmlStr = html.etree.HTML(pagehtml, parser= html.etree.HTMLParser(encoding='utf-8')) #将网页源码转换为 XPath 可以解析的格式
nr= html.tostring(nr,encoding="utf-8").decode(  ) 
原文地址:https://www.cnblogs.com/wwwzzg168/p/9767722.html