backup-analysis-barcode-distribute-recv-py

analysis_barcode.py

---------

  1 # -*- coding:utf-8 -*-
  2 
  3 # python3
  4 
  5 import sys
  6 import re
  7 
  8 # 分析下发和收回的条码
  9 # 1 下发过多少
 10 # 2 回收了多少
 11 # 3 下发过没有回收的有多少
 12 # 4 没下发过但收的有多少
 13 
 14 # 下发格式
 15 # barcode=1234567890, machineNo=8, allowToFillCode=1, ctime=2020-07-22 07:27:39
 16 
 17 # 回收的格式
 18 # time=2020-07-22 06:18:19, barcode=1234567890, machineNo=2, ...
 19 
 20 distribute_file_name_pattern = "RecordBarcodeDistribute-2020-{}.txt"
 21 recv_file_name_pattern = "RecordRecvData-2020-{}.txt"
 22 
 23 
 24 def read_distributed_info(date):
 25     '''
 26     处理下发的数据
 27     返回文件行数, 和 设备号和向该设备下发过的条码列表的字典
 28     (rowCount, {"1": ["barcode0", "barcode1", ...], ...})
 29     '''
 30 
 31     def parse_distribute_row_info(row):
 32         matchInfo = re.match(r'^barcode=(.{10}), machineNo=(d+), allowToFillCode=(d)', row)
 33         if matchInfo is None:
 34             return None
 35         else:
 36             return ( matchInfo.group(1), matchInfo.group(2), matchInfo.group(3))
 37 
 38     def append_to(rowInfo, resultDict):
 39         barcode, machineNo, _ = rowInfo
 40         if machineNo not in resultDict:
 41             resultDict[machineNo] = []
 42         resultDict[machineNo].append(barcode)
 43 
 44     infos = {}
 45     rowCount = 0
 46 
 47     fileName = distribute_file_name_pattern.format(date)
 48     with open(fileName, "r") as distributeFile:
 49         for row in distributeFile:
 50             rowCount += 1
 51 
 52             parseResult = parse_distribute_row_info(row)
 53             if parseResult is None:
 54                 print("[WARN] not matched distribute info:", row)
 55             elif parseResult[2] == "1":
 56                 append_to(parseResult, infos)
 57             # else ignore
 58 
 59     return (rowCount, infos)
 60 
 61 
 62 
 63 def read_received_info(date):
 64     """
 65     处理回收的数据
 66     返回文件行数, 和 设备号和该设备返回的信息
 67     (rowCount, {"1", [{"time": "yyyy-MM-dd HH:mm:ss",
 68                        "barcode": "barcode-value"},
 69                       ...],
 70                 ...
 71                 } )
 72     """
 73 
 74     def parse_recv_row_info(row):
 75         matchInfo = re.match(r'^time=(.{19}), barcode=([^,]+), machineNo=(d+)', row)
 76         if matchInfo is None:
 77             return None
 78         else: 
 79             return {"machineNo": matchInfo.group(3),
 80                     "barcode" : matchInfo.group(2),
 81                     "time": matchInfo.group(1)}
 82 
 83     def append_to(rowInfo, resultDict):
 84         machineNo = rowInfo["machineNo"]
 85         if machineNo not in resultDict:
 86             resultDict[machineNo] = []
 87         resultDict[machineNo].append({"barcode" : rowInfo["barcode"], "time" : rowInfo["time"]})
 88 
 89 
 90     infos = {}
 91     rowCount = 0
 92 
 93     fileName = recv_file_name_pattern.format(date)
 94     with open(fileName, "r") as recvFile:
 95         for row in recvFile:
 96             rowCount += 1
 97 
 98             parseResult = parse_recv_row_info(row)
 99             if parseResult is None:
100                 print("[WARN] not matched recv info:", row)
101             else:
102                 append_to(parseResult, infos)
103 
104     return (rowCount, infos)
105 
106 
107 
108 def calculate_data_count(data):
109     result = 0
110     for a_list in data:
111         result += len(a_list)
112     return result
113 
114 def calculate_count_by_machine(distributeInfo, recvInfo):
115     def merge_machine_no_set(machineNosA, machineNosB):
116         result = list(set(machineNosA).union(set(machineNosB)))
117         result.sort()
118         return result
119 
120     result = []
121     for machineNo in merge_machine_no_set(distributeInfo.keys(), recvInfo.keys()):
122         distCount = len(distributedInfo[machineNo]) if machineNo in distributedInfo else 0
123         recvCount = len(recvInfo[machineNo]) if machineNo in recvInfo else 0
124         result.append({"machineNo": machineNo,
125                        "distCount": distCount,
126                        "recvCount": recvCount,
127                        "diff" : recvCount - distCount})
128 
129     return result
130 
131 
132 def get_repeated_recv_barcodes(recvInfo):
133     # 去重
134     def exists_in(item, itemList):
135         # 判断当前的信息是否与之前的记录重复
136         for i in itemList:
137             if (i["barcode"] == item["barcode"]) and (i["time"] == item["time"]):
138                 return True
139         return False
140 
141     repeatedInfo = []
142     deDuplicationInfo = {}
143 
144     for k, v in recvInfo.items():
145         deDuplicationInfo[k] = []
146         for item in v:
147             if exists_in(item, deDuplicationInfo[k]):
148                 repeatedInfo.append((k, item["barcode"], item["time"]))
149             else:
150                 deDuplicationInfo[k].append(item)
151 
152     return (deDuplicationInfo, repeatedInfo)
153 
154 
155 def get_recv_but_not_distributed_barcodes(distributeInfo, recvInfo):
156     result = {}
157     distributeInfoCopy = {}
158 
159     for k, v in distributeInfo.items():
160         distributeInfoCopy[k] = v.copy()
161 
162     for k, v in recvInfo.items():
163         for item in v:
164             if item["barcode"] in distributeInfoCopy[k]:
165                 distributeInfoCopy[k].remove(item["barcode"])
166             elif k in result:
167                 result[k].append(item["barcode"])
168             else:
169                 result[k] = [item["barcode"]]
170 
171     return result
172 
173 
174 def get_distributed_but_not_recv_barcodes(distributeInfo, recvInfo):
175     # 下发但是没有回收的条码
176     def exists_in(barcode, itemList):
177         # 判断下发的条码是否回收
178         for item in itemList:
179             if barcode == item["barcode"]:
180                 return True
181         return False
182 
183     def remove_from(barcode, itemList):
184         # 从接收的数据副本中移除对应条码的记录
185         targetIdx = None
186         for item in itemList:
187             if barcode == item["barcode"]:
188                 target = item
189                 break
190 
191         if target is not None:
192             itemList.remove(target)
193         else:
194             raise Exception("No target exists, in get barcodes distributed but not received.")
195 
196     result = {}
197     recvInfoCopy = {}
198 
199     for k, v in recvInfo.items():
200         recvInfoCopy[k] = v.copy()
201 
202     for k, v in distributeInfo.items():
203         for barcode in v:
204             if exists_in(barcode, recvInfoCopy[k]):
205                 remove_from(barcode, recvInfoCopy[k])
206             elif k in result:
207                 result[k].append(barcode)
208             else:
209                 result[k] = [barcode]
210 
211     return result
212 
213 
214 
215 if __name__ == "__main__":
216     # 计算哪个日期的文件, 月日, 04-01
217     if len(sys.argv) == 1:
218         print("no input, stopped.")
219         sys.exit(0)
220 
221     date = sys.argv[1]
222 
223     # 读取文件
224     disFileRowCount, distributedInfo = read_distributed_info(date)
225     recvFileRowCount, recvInfo = read_received_info(date)
226 
227     # 显示文件总行数
228     print("distributed file lines count:", disFileRowCount)
229     print("received    file lines count:", recvFileRowCount)
230 
231     # 显示下发和回收的总数量
232     print("
distributed count:", calculate_data_count(distributedInfo.values()))
233     print("received    count:", calculate_data_count(recvInfo.values()))
234 
235     # 每个设备的下发和回收数的统计, 及差值
236     print("
count by machine:")
237     print("machine | distributed-count | recv-count | recv-count - distributed-count")
238     for machineCountInfo in calculate_count_by_machine(distributedInfo, recvInfo):
239         print("{machineNo:2} | {distCount:4} | {recvCount:4} | {diff:4}".format(**machineCountInfo))
240 
241     # 收到的条码有哪些重复, 和去重后的回收结果
242     print("
repeated recv barcode:
machine | barcode | time")
243     deDuplicationRecvInfo, repeatInfo = get_repeated_recv_barcodes(recvInfo)
244     for item in repeatInfo:
245         print("{:2} | {:10} | {}".format(*item))
246     print("
count:", len(repeatInfo))
247 
248     # 去掉重复之后的单设备统计比较
249     print("
count by machine after de duplication:")
250     print("machine | distributed-count | recv-count | recv-count - distributed-count")
251     diffCountInfo = {"distCount" : 0, "recvCount": 0 , "diff" : 0}
252     for machineCountInfo in calculate_count_by_machine(distributedInfo, deDuplicationRecvInfo):
253         diffCountInfo["distCount"] += machineCountInfo["distCount"]
254         diffCountInfo["recvCount"] += machineCountInfo["recvCount"]
255         diffCountInfo["diff"] += machineCountInfo["diff"]
256         print("{machineNo:5} | {distCount:4} | {recvCount:4} | {diff:4}".format(**machineCountInfo))
257     print("count | {distCount:4} | {recvCount:4} | {diff:4}".format(**diffCountInfo))
258 
259     # 没下发但是有回收的条码
260     print("
received but not distributed barcodes:")
261     recvNonDistributedCount = 0
262     for k, v in get_recv_but_not_distributed_barcodes(distributedInfo, deDuplicationRecvInfo).items():
263         print(k, ": count:", len(v))
264         recvNonDistributedCount += len(v)
265         for barcode in v:
266             print("    [{}]".format(barcode))
267     print("count: ", recvNonDistributedCount)
268 
269     # 下发后没回收的条码
270     print("
distributed but not received barcodes:")
271     distributedNonRecvCount = 0
272     for k, v in get_distributed_but_not_recv_barcodes(distributedInfo, deDuplicationRecvInfo).items():
273         print(k, ": count:", len(v))
274         distributedNonRecvCount += len(v)
275         for barcode in v:
276             print("    [{}]".format(barcode))
277     print("count: ", distributedNonRecvCount)

--------- THE END ---------

原文地址:https://www.cnblogs.com/shadow-abyss/p/13383746.html