b2_trsd_EDSD_new

  1 # -*- coding:utf-8 -*-
  2 import re
  3 
  4 
  5 ss="./data/"
  6 year = '17A'
  7 filename = ss+'EDSD%s.txt'%year
  8 
  9 
 10 '''
 11 适应新版本
 12 
 13 '''
 14 
 15 
 16 
 17 p1 = r"^s{4}(?:X|W)s{2}([A-Z]{3})ss.+
"#TC
 18 p2 = r"(^d{3})s{4}Cd{3}.+[CM]s+d
"#010
 19 p3 = r"^d{3}s{4}(Cd{3}).+[CM]s+d
"#C552
 20 p4 = r"^d{3}s{4}Cd{3}.+([CM])s+d
"#M
 21 p5 = r"^d{3}s{4}Cd{3}.+[CM]s+(d)
"#1
 22 p6= r"(^d{3})s{4}d{4}.+[CM]s{4}ds.*..d+
|(^d{3})s{4}wd{3}s.+[^d]$
"#单独的030
 23 p7 =r"^d{3}s{4}(d{4}).+[CM]s{4}ds.*..d+
|^d{3}s{4}(wd{3})s.+[^d]$
"#单独的3286
 24 p8 =r"^d{3}s{4}d{4}.+([CM])s{4}ds.*..d+
|^s{12}[A-Z].+([CM])s{4}ds.*..d+
"#单独的M
 25 p9 =r"^d{3}s{4}d{4}.+[CM]s{4}(d)s.*..d+
|^s{12}[A-Z].+[CM]s{4}(d)s.*..d+
"#单独的1
 26 p10 =r"^d{3}s{4}d{4}.+[CM]s{4}ds(.*..d+)
|^s{12}[A-Z].+[CM]s{4}ds(.*..d+)
"#单独的an..35
 27 
 28 
 29 
 30 pattern1 = re.compile(p1)
 31 pattern2 = re.compile(p2)
 32 pattern3 = re.compile(p3)
 33 pattern4 = re.compile(p4)
 34 pattern5 = re.compile(p5)
 35 pattern6 = re.compile(p6)
 36 pattern7 = re.compile(p7)
 37 pattern8 = re.compile(p8)
 38 pattern9 = re.compile(p9)
 39 pattern10 = re.compile(p10)
 40 
 41 fr = open(filename)
 42 temp = ();
 43 flag = 0
 44 for line in fr.readlines():
 45     matcher1 = re.findall(pattern1,line)
 46     matcher2 = re.findall(pattern2,line)
 47     matcher3 = re.findall(pattern3,line)
 48     matcher4 = re.findall(pattern4,line)
 49     matcher5 = re.findall(pattern5,line)
 50     matcher6 = re.findall(pattern6,line)
 51     matcher7 = re.findall(pattern7,line)
 52     matcher8 = re.findall(pattern8,line)
 53     matcher9 = re.findall(pattern9,line)
 54     matcher10 = re.findall(pattern10,line)
 55     #print matcher
 56     w2 = open(ss+'b2_%s.csv'%year,'a')#a代表追加 w代表重写
 57     if (matcher1!=[]):
 58         for g in matcher1:
 59             flag = 1
 60             temp = g
 61     if ((matcher2!=[])and(flag ==1 or 2)):
 62         flag = 2
 63         w2.write("
"+temp+",")
 64         for j in matcher2:
 65             for k in j:
 66                 w2.write(k)
 67     if ((matcher3!=[])and(flag ==2)):
 68         flag = 3
 69         w2.write(",")
 70         for j in matcher3:
 71             for k in j:
 72                 w2.write(k)
 73         #复合的缺省为0000
 74         w2.write(",0000")
 75     if ((matcher4!=[])and(flag ==3)):
 76         flag = 4
 77         w2.write(",")
 78         for j in matcher4:
 79             for k in j:
 80                 w2.write(k)
 81         #增加固定列year
 82         w2.write(","+year)
 83     if ((matcher5!=[])and(flag ==4)):
 84         flag = 5
 85         w2.write(",")
 86         for j in matcher5:
 87             for k in j:
 88                 w2.write(k)
 89         w2.write(", ")
 90     # print len(matcher6)
 91     if(len(matcher6)==1 and matcher6!=[''] ):
 92 
 93         flag = 6
 94         w2.write("
"+temp+",")
 95         for j in matcher6:
 96             for k in j:
 97                 w2.write(k)
 98         #单独的缺省为C000
 99         w2.write(",C000")
100     if ((matcher7!=[])and(flag ==6)):
101         flag = 7
102         w2.write(",")
103         for j in matcher7:
104             for k in j:
105                 w2.write(k)
106     if ((matcher8!=[])and(flag ==7)):
107         flag = 8
108         w2.write(",")
109         for j in matcher8:
110             for k in j:
111                 w2.write(k)
112         #增加固定列year
113         w2.write(","+year)
114     if ((matcher9!=[])and(flag ==8)):
115         flag = 9
116         w2.write(",")
117         for j in matcher9:
118             for k in j:
119                 w2.write(k)
120     if ((matcher10!=[])and(flag ==9)):
121         flag = 10
122         w2.write(",")
123         for j in matcher10:
124             for k in j:
125                 w2.write(k)
126     w2.close( )
127 
128 """
129 特殊情况
130 
131 
132 
133 """
原文地址:https://www.cnblogs.com/smuxiaolei/p/7427648.html