解决 lxml报XMLSyntaxError: Opening and ending tag mismatch: meta line 14 and head, line 33, column 8

# !/usr/bin/env python
# -*- coding: utf-8 -*-
import urllib2
import urllib
import cookielib
import json
import httplib
import re
import requests
from lxml import etree
import StringIO
import time

s = requests.session()
print s.headers
url = "https://licai.yingyinglicai.com/user/login.htm"
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0'
}


login_url = 'http://10.3.200.9/login/VerifyLogin.jsp'
data = {'formmethod': 'post', 'logfile': '/wui/theme/ecology8/page/login.jsp?templateId=3&logintype=1&gopage=', 'loginid': '013800','logintype':'1','submit':'登录','userpassword':'432434343'}
# data = urllib.urlencode(data)
response = s.post(login_url, data=data, headers=headers)
#print  response
print response.status_code
print response.content
print response.headers
time.sleep(1)
url = 'http://10.3.200.9/tailong/syslink/daohanglianjie.jsp'
r = s.get(url, headers=headers)
r = r.text
#print r
print type(r)
r=r.encode('utf8')
print type(r)
f=open('fh1.html','w')
f.write(r)
f.close()

htmlEmt=etree.parse('fh1.html')
print htmlEmt
#<a href="http://20.2.200.17:7003/loan-web/4ALogon.jsp?tgt=TGT-130672-O1fcAq4HGXFxYjmzfMfuIxAFlqgOfEzNVXja1fDMm5C7IXb7Ox-cas01.example.org" target="_blank">信贷系统</a>
#根据a标签,获取href的值
result=htmlEmt.xpath('//div/a/@href')
print result
print type(result)
print len(result)


C:Python27python.exe "C:Program FilesJetBrainsPyCharm Community Edition 2017.1.5helperspycharm\_jb_unittest_runner.py" --path C:/Users/tlcb/PycharmProjects/untitled/httprequest/test01.py
Testing started at 18:53 ...
Launching unittests with arguments python -m unittest discover -s C:/Users/tlcb/PycharmProjects/untitled/httprequest -p test01.py -t C:Users	lcbPycharmProjectsuntitledhttprequest in C:Users	lcbPycharmProjectsuntitledhttprequest
{'Connection': 'keep-alive', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*', 'User-Agent': 'python-requests/2.18.4'}
200



{'Content-Length': '793', 'X-XSS-Protection': '1', 'Content-Encoding': 'gzip', 'Set-Cookie': 'loginfileweaver=%2Flogin%2FLogin.jsp%3Flogintype%3D1%26gopage%3D; path=/; expires=Mon, 12-Nov-2018 11:06:57 GMT, loginidweaver=013800; path=/; expires=Mon, 12-Nov-2018 11:06:57 GMT, loginfileweaver=%2Flogin%2FLogin.jsp%3Flogintype%3D1%26gopage%3D; path=/; expires=Mon, 12-Nov-2018 11:06:57 GMT, loginidweaver=6241; path=/; expires=Mon, 12-Nov-2018 11:06:57 GMT, languageidweaver=7; path=/; expires=Mon, 12-Nov-2018 11:06:57 GMT, JSESSIONID=abcbMpuaOjHNKdnHAt9Bw; path=/, BIGipServerMenhu_new_pool=350094090.20480.0000; path=/', 'Server': 'Resin/3.1.8', 'X-UA-Compatible': 'IE=8', 'Cache-Control': 'private', 'Date': 'Sat, 10 Nov 2018 11:06:57 GMT', 'X-Frame-Options': 'SAMEORIGIN', 'Content-Type': 'text/html; charset=UTF-8'}
<type 'unicode'>
<type 'str'>


Ran 1 test in 0.002s

FAILED (errors=1)

Error
Traceback (most recent call last):
  File "C:Python27libunittestcase.py", line 329, in run
    testMethod()
  File "C:Python27libunittestloader.py", line 32, in testFailure
    raise exception
ImportError: Failed to import test module: test01
Traceback (most recent call last):
  File "C:Python27libunittestloader.py", line 254, in _find_tests
    module = self._get_module_from_name(name)
  File "C:Python27libunittestloader.py", line 232, in _get_module_from_name
    __import__(name)
  File "C:Users	lcbPycharmProjectsuntitledhttprequest	est01.py", line 42, in <module>
    htmlEmt=etree.parse('fh1.html')
  File "lxml.etree.pyx", line 2942, in lxml.etree.parse (src/lxml/lxml.etree.c:54187)
  File "parser.pxi", line 1528, in lxml.etree._parseDocument (src/lxml/lxml.etree.c:79485)
  File "parser.pxi", line 1557, in lxml.etree._parseDocumentFromURL (src/lxml/lxml.etree.c:79768)
  File "parser.pxi", line 1457, in lxml.etree._parseDocFromFile (src/lxml/lxml.etree.c:78843)
  File "parser.pxi", line 997, in lxml.etree._BaseParser._parseDocFromFile (src/lxml/lxml.etree.c:75698)
  File "parser.pxi", line 564, in lxml.etree._ParserContext._handleParseResultDoc (src/lxml/lxml.etree.c:71739)
  File "parser.pxi", line 645, in lxml.etree._handleParseResult (src/lxml/lxml.etree.c:72614)
  File "parser.pxi", line 585, in lxml.etree._raiseParseError (src/lxml/lxml.etree.c:71955)
XMLSyntaxError: Opening and ending tag mismatch: meta line 14 and head, line 33, column 8


Process finished with exit code 1

解决办法;

# !/usr/bin/env python
# -*- coding: utf-8 -*-
import urllib2
import urllib
import cookielib
import json
import httplib
import re
import requests
from lxml import etree
import StringIO
import time

s = requests.session()
print s.headers
url = "https://licai.yingyinglicai.com/user/login.htm"
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0'
}


login_url = 'http://10.3.200.9/login/VerifyLogin.jsp'
data = {'formmethod': 'post', 'logfile': '/wui/theme/ecology8/page/login.jsp?templateId=3&logintype=1&gopage=', 'loginid': '013800','logintype':'1','submit':'登录','userpassword':'432434343'}
# data = urllib.urlencode(data)
response = s.post(login_url, data=data, headers=headers)
#print  response
print response.status_code
print response.content
print response.headers
time.sleep(1)
url = 'http://10.3.200.9/tailong/syslink/daohanglianjie.jsp'
r = s.get(url, headers=headers)
r = r.text
#print r
print type(r)
r=r.encode('utf8')
print type(r)
f=open('fh1.html','w')
f.write(r)
f.close()

fh1=open('fh1.html','r')
fh2=fh1.read()
print fh2
print type(fh2)

htmlEmt=etree.HTML(fh2)
print htmlEmt
#<a href="http://20.2.200.17:7003/loan-web/4ALogon.jsp?tgt=TGT-130672-O1fcAq4HGXFxYjmzfMfuIxAFlqgOfEzNVXja1fDMm5C7IXb7Ox-cas01.example.org" target="_blank">信贷系统</a>
#根据a标签,获取href的值
result=htmlEmt.xpath('//div/a/@href')
print result
print type(result)
print len(result)


<Element html at 0x359fc88>
['http://20.2.200.17:7003/loan-web/4ALogon.jsp?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://20.2.200.17:7003/loan-web/sqlogon.html?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.16.75.1:9001/datamanage/IM00000001_4a_signIn.do?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=oics&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=tlorm&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.248.1:7001/UtanWeb?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://20.2.201.13:7001/JF-web?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=qingdanchuandi&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.2.234.3:9080/bcard_manage?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.251.1/ucdb?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'https://31.1.0.27:4008/ZXT?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.236.4:8080/moa/?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.236.2:8080/ETMS?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=itsm&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.200.49:7002/dbportal-web/loginFromOther.do?frUrl=sys_dcdp/21Detail_inquiry/FCT_CST_DEP_BAL_AVG_RANK_P.cpt&channelId=05&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.200.49:7002/dcdp-admin/loginFromPortal.do?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://tb.zjtlcb.com/third/tailong/callback?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=itpm&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/goOsp.jsp?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=mdm&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.215.20:8080/nqsky-meap-manager/index?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=tongyirenzheng&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.243.3:7011/ifmmanage/ssologin?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.243.10:9001/ifmcounter/ssologin?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.243.10:9002/ifmmanage/ssologin?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.243.3:7010/ifmcounter/ssologin?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.16.72.1:8080/portal/index?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/goXwdt.jsp?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=crm&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.16.38.7/innermanage?userId=013800&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://20.3.220.1:8010?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/goMbbk.jsp?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.2.248.30:7002/shepherd?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.16.38.12:9081/mweb/prelogin.do?LoginType=R&_locale=zh_CN&BankId=9999&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.215.20:8080/nqsky-meap-manager/index?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.16.76.3:8080/mdm/admin/login.do?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.242.36:7001/flow?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.16.38.7/Demo/corporbankDemo/login.htm?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.16.38.7/corporbank/login.htm?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.16.38.7/Demo/perbankDemo/login.htm?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.16.38.7/perbank?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://20.2.201.11:7001/abs/?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.200.60/xir/login.action?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.20.3.55/web/index.action?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=anquanshenfen&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.238.4:8080?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://cwhsapp1.zjtlcb.com:8002/?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'href=?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=etlvp&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/goCwhs.jsp?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.200.58:7002/vat?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=mail&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=bpmLogin&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=rdoc&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=bangongyongpin&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=dingcanguanli&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=wangshangdingcan&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=portaloa&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.200.10/ucenter/zonghang/index.do?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/goRlzy.jsp?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.172.5?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/goJxkh.jsp?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=xingzhenkaohe&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=yuangongtousu&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://20.2.201.15:8002/brms/system/Login/token4Alogin.do?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.244.2:7001/bbsp?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.2.247.105:8001/agent?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.232.1:8008/cafcs?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.250.2:8080/t10?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.227.5:9080/MSP/view/manager/login.jsp?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=jiejiegao&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=longqing&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=753check&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.2.233.5:9080/Main_frame?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=shjd&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.246.2:7001/newaml?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/goAml.jsp?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', '/tailong/syslink/oaforward.jsp?idName=crmis&tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org', 'http://10.3.200.16:7001/UtanWeb/index.jsp?tgt=TGT-131756-PpAgby2mmr2VSLfc1spFPisa1ayGWQF031asgnlMNI3BQbAp1y-cas01.example.org']
<type 'list'>
79
原文地址:https://www.cnblogs.com/hzcya1995/p/13348997.html