python练习7

转自

https://www.liaoxuefeng.com/discuss/001409195742008d822b26cf3de46aea14f2b7378a1ba91000/0015108013789836e55bbf7d9da46878a812c8626ba9ae5000

#这个解析xml的让我束手无策

#看了一下评论区大神的代码,觉得非常值得学习,就转载一下

#主要的收获是只要定义三个处理事件的函数就可以解析目录、xml,而这个类可以从dict继承

#但是大神的代码主要集中在start_element函数中,也就是处理了节点中的属性值

#我的疑问是如果要获取和保存节点中的内容该怎么做

  1 from xml.parsers.expat import ParserCreate
  2 import re ,enum,json
  3 
  4 #创建枚举便于查询
  5 @enum.unique
  6 class WeekDay(enum.Enum):
  7     Sun = 0
  8     Mon = 1
  9     Tue = 2
 10     Wed = 3
 11     Thu = 4
 12     Fri = 5
 13     Sat = 6
 14 
 15 
 16 class WeatherParserHandler(dict):
 17 # <yweather:condition text="Haze" code="21" temp="28" date="Wed, 27 May 2015 11:00 am CST" />
 18             #<yweather:forecast day="Wed" date="27 May 2015" low="20" high="33" text="Partly Cloudy" code="30" />
 19 # <yweather:location city="Beijing" region="" country="China"/>
 20 #<yweather:location city="Beijing" region="" country="China"/>
 21     def end_element(self, name):
 22      pass
 23 
 24 
 25     def data(self, text):
 26      pass
 27 
 28     def start_element(self ,name ,attrs):
 29      weather_regex =re.compile(r"yweather:(w*)")
 30      weather_match =weather_regex.match(name)
 31      #符合条件的情况下:
 32      if weather_match:
 33          group_mark =weather_match.group(1)
 34          if group_mark=="location":
 35              self['city']=attrs["city"]
 36              self['country']=attrs["country"]
 37          #检查日期
 38          elif group_mark=="condition":
 39             today =str(attrs['date']).split(",")[0]
 40             if WeekDay[today]:
 41                 self.__today =WeekDay[today].value
 42                 #不是星期六的情况下
 43                 if self.__today!=6:
 44                  self.__tomorrow=self.__today+1
 45                  #星期六的情况下
 46                 else:
 47                  self.__tomorrow=0
 48             pass
 49          #检查天气
 50          elif group_mark=="forecast" :
 51             #创建二级字典
 52             attr =dict()
 53             if WeekDay[attrs["day"]].value==self.__today:
 54                 attr["low"]=int(attrs["low"])
 55                 attr["high"] = int(attrs["high"])
 56                 attr['text'] =attrs['text']
 57                 self["today"]=attr
 58             elif WeekDay[attrs["day"]].value==self.__tomorrow:
 59                 attr["low"] = int(attrs["low"])
 60                 attr["high"] = int(attrs["high"])
 61                 attr['text'] = attrs['text']
 62                 self["tomorrow"]=attr
 63 
 64 
 65 
 66 
 67 def parse_weather(xml):
 68     weather_parser =ParserCreate()
 69     weather_handler =WeatherParserHandler()
 70     weather_parser.StartElementHandler = weather_handler.start_element
 71     weather_parser.EndElementHandler = weather_handler.end_element
 72     weather_parser.CharacterDataHandler = weather_handler.data
 73     weather_parser.Parse(xml)
 74     return  weather_handler
 75     return json.dumps(weather_handler)
 76 
 77 
 78 
 79 # 测试:
 80 data = r'''<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
 81 <rss version="2.0" xmlns:yweather="http://xml.weather.yahoo.com/ns/rss/1.0" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#">
 82     <channel>
 83         <title>Yahoo! Weather - Beijing, CN</title>
 84         <lastBuildDate>Wed, 27 May 2015 11:00 am CST</lastBuildDate>
 85         <yweather:location city="Beijing" region="" country="China"/>
 86         <yweather:units temperature="C" distance="km" pressure="mb" speed="km/h"/>
 87         <yweather:wind chill="28" direction="180" speed="14.48" />
 88         <yweather:atmosphere humidity="53" visibility="2.61" pressure="1006.1" rising="0" />
 89         <yweather:astronomy sunrise="4:51 am" sunset="7:32 pm"/>
 90         <item>
 91             <geo:lat>39.91</geo:lat>
 92             <geo:long>116.39</geo:long>
 93             <pubDate>Wed, 27 May 2015 11:00 am CST</pubDate>
 94             <yweather:condition text="Haze" code="21" temp="28" date="Wed, 27 May 2015 11:00 am CST" />
 95             <yweather:forecast day="Wed" date="27 May 2015" low="20" high="33" text="Partly Cloudy" code="30" />
 96             <yweather:forecast day="Thu" date="28 May 2015" low="21" high="34" text="Sunny" code="32" />
 97             <yweather:forecast day="Fri" date="29 May 2015" low="18" high="25" text="AM Showers" code="39" />
 98             <yweather:forecast day="Sat" date="30 May 2015" low="18" high="32" text="Sunny" code="32" />
 99             <yweather:forecast day="Sun" date="31 May 2015" low="20" high="37" text="Sunny" code="32" />
100         </item>
101     </channel>
102 </rss>
103 '''
104 weather = parse_weather(data)
105 assert weather['city'] == 'Beijing', weather['city']
106 assert weather['country'] == 'China', weather['country']
107 assert weather['today']['text'] == 'Partly Cloudy', weather['today']['text']
108 assert weather['today']['low'] == 20, weather['today']['low']
109 assert weather['today']['high'] == 33, weather['today']['high']
110 assert weather['tomorrow']['text'] == 'Sunny', weather['tomorrow']['text']
111 assert weather['tomorrow']['low'] == 21, weather['tomorrow']['low']
112 assert weather['tomorrow']['high'] == 34, weather['tomorrow']['high']
113 print('Weather:', str(weather))


原文地址:https://www.cnblogs.com/MalcolmMeng/p/8442956.html