#coding:utf-8 from HTMLParser import HTMLParser import sys listCount=[] class MyHTMLParser(HTMLParser): #处理开始标签 def handle_starttag(self, tag, attrs): """ recognize start tag, like <div> :param tag: :param attrs: :return: """ print("Encountered a start tag:", tag) #处理结束标签 def handle_endtag(self, tag): """ recognize end tag, like </div> :param tag: :return: """ print("Encountered an end tag :", tag) #处理数据,标签之间的文本,并进行统计 def handle_data(self, data): """ recognize data, html content string :param data: :return: """ print("Encountered some data :", data) print('-----------------------------------') listCount.append(data) changdu=len(listCount) print listCount print changdu if changdu==54: FailCount=listCount.count('Fail') PassCount=listCount.count('Pass') print('统计失败的次数为:'+str(FailCount)) print('统计成功的次数为:'+str(PassCount)) else: print('continue') #处理结束的标签 def handle_startendtag(self, tag, attrs): """ recognize tag that without endtag, like <img /> :param tag: :param attrs: :return: """ print("Encountered startendtag :", tag) #处理注释 def handle_comment(self,data): """ :param data: :return: """ print("Encountered comment :", data) #打开html文件并逐行读取然后传给HTMLPrase进行解析 file=open('123.html','r') while 1: readFile=file.readline() print(readFile) parser = MyHTMLParser() parser.feed(readFile) if not readFile: break file.close()