这部分是PoseTrack数据集的Evaluation Tools中的code:https://github.com/leonid-pishchulin/poseval.git
Step1 :首先是README.md
要求预测的结果要按所属视频分开保存(video_pred_1.mat video_pred_2.mat ...),并且与GT内部的数据形式要相同,下面是最终的json格式:
``` { "annolist": [ { "image": [ { "name": "images/bonn_5sec/000342_mpii/00000001.jpg" } ], "annorect": [ { "x1": [625], "y1": [94], "x2": [681], "y2": [178], "score": [0.9], "track_id": [0], "annopoints": [ { "point": [ { "id": [0], "x": [394], "y": [173], }, { ... } ] } ] }, { ... } ], }, { ... } ] } ```
也就是说在MATLAB中存贮的数据格式应当是:
然后通过poseval-master/matlab/mat2json.m 转换成json格式。然后调用poseval-master/py/evaluate.py --args..得到测试结果。
Step2 : evaluate.py eval_helper.py evaluateAP.py 这三个用到的code解读。
显示利用输入的参数调用eval_helper.load_data_dir(argv) :
gtFramesAll,prFramesAll = eval_helpers.load_data_dir(argv)
这里输出的prFramesAll中有三个keys() score 、 annopoints 、seq_id 依次存贮了所有需要待测图片的结果,同时需要保证len(gt)== len(pr)
然后就是评估结果了:
apAll,preAll,recAll = evaluateAP(gtFramesAll,prFramesAll)
打开evaluateAP.py 代码如下:
def evaluateAP(gtFramesAll, prFramesAll): distThresh = 0.5 # assign predicted poses to GT poses
#
# scoreAll[nJoints][len(gtFrames)] = np.zeros([0,0],dtypr=np.float32)
scoresAll, labelsAll, nGTall, _ = eval_helpers.assignGTmulti(gtFramesAll, prFramesAll, distThresh) # compute average precision (AP), precision and recall per part apAll, preAll, recAll = computeMetrics(scoresAll, labelsAll, nGTall) return apAll, preAll, recAll
scoreAll[ i ][ imgidx ] = np.append( scoreAll[ i ][ imgidx ], s[ i ] ) Prd的第 i 个关键点的score
labelAll[ i ][ imgidx ] = np.append( labelAll[ i ][ imgidx ], m[ i ] ) 如果第i个关键点hasPrd = 1 ,那么 m[ i ] =1 ,否则 =0
nGTall.shape = nJoints x len(gtFrames) 存的是每张图片中各个关节点的数量
接下来看computeMetrics 这个函数:
def computeMetrics(scoresAll, labelsAll, nGTall): apAll = np.zeros((nGTall.shape[0] + 1, 1)) recAll = np.zeros((nGTall.shape[0] + 1, 1)) preAll = np.zeros((nGTall.shape[0] + 1, 1)) # iterate over joints for j in range(nGTall.shape[0]): scores = np.zeros([0, 0], dtype=np.float32) labels = np.zeros([0, 0], dtype=np.int8) # iterate over images for imgidx in range(nGTall.shape[1]): scores = np.append(scores, scoresAll[j][imgidx]) labels = np.append(labels, labelsAll[j][imgidx]) # compute recall/precision values nGT = sum(nGTall[j, :]) precision, recall, scoresSortedIdxs = eval_helpers.computeRPC(scores, labels, nGT) if (len(precision) > 0): apAll[j] = eval_helpers.VOCap(recall, precision) * 100 preAll[j] = precision[len(precision) - 1] * 100 recAll[j] = recall[len(recall) - 1] * 100 apAll[nGTall.shape[0]] = apAll[:nGTall.shape[0], 0].mean() recAll[nGTall.shape[0]] = recAll[:nGTall.shape[0], 0].mean() preAll[nGTall.shape[0]] = preAll[:nGTall.shape[0], 0].mean() return apAll, preAll, recAll
这里涉及到 precision 和 recall . 不了解的可以参考检测和姿态估计的评价标准 http://blog.csdn.net/xiaojiajia007/article/details/78746149
Precison = TP / (TP +FP )
Recall = TP / ( TP + FN )
AP衡量的是学出来的模型在每个类别上的好坏,mAP衡量的是学出的模型在所有类别上的好坏,得到AP后mAP的计算就变得很简单了,就是取所有AP的平均值。
最后附上eval_helper.assignGTmulti(gtFrames,prFrames,distThresh) 代码的标注
def assignGTmulti(gtFrames, prFrames, distThresh): assert (len(gtFrames) == len(prFrames)) nJoints = Joint().count # part detection scores scoresAll = {} # positive / negative labels labelsAll = {} # number of annotated GT joints per image nGTall = np.zeros([nJoints, len(gtFrames)]) for pidx in range(nJoints): scoresAll[pidx] = {} labelsAll[pidx] = {} for imgidx in range(len(gtFrames)): scoresAll[pidx][imgidx] = np.zeros([0, 0], dtype=np.float32) labelsAll[pidx][imgidx] = np.zeros([0, 0], dtype=np.int8) # GT track IDs trackidxGT = [] # prediction track IDs trackidxPr = [] # number of GT poses 实际的每张图中包含pose的个数 nGTPeople = np.zeros((len(gtFrames), 1)) # number of predicted poses 预测的每张图片中有几个人的pose nPrPeople = np.zeros((len(gtFrames), 1)) # container to save info for computing MOT metrics motAll = {} for imgidx in range(len(gtFrames)): # distance between predicted and GT joints dist = np.full((len(prFrames[imgidx]["annorect"]), len(gtFrames[imgidx]["annorect"]), nJoints), np.inf) # score of the predicted jointlen(prFrames[imgidx]["annorect"]), score = np.full((nJoints), np.nan) # body joint prediction exist hasPr = np.zeros((len(prFrames[imgidx]["annorect"]), nJoints), dtype=bool) # body joint is annotated hasGT = np.zeros((len(gtFrames[imgidx]["annorect"]), nJoints), dtype=bool) trackidxGT = [] trackidxPr = [] idxsPr = [] for ridxPr in range(len(prFrames[imgidx]["annorect"])): if (("annopoints" in prFrames[imgidx]["annorect"][ridxPr].keys()) and ("point" in prFrames[imgidx]["annorect"][ridxPr]["annopoints"][0].keys())): idxsPr += [ridxPr]; prFrames[imgidx]["annorect"] = [prFrames[imgidx]["annorect"][ridx] for ridx in idxsPr] nPrPeople[imgidx, 0] = len(prFrames[imgidx]["annorect"]) nGTPeople[imgidx, 0] = len(gtFrames[imgidx]["annorect"]) # iterate over GT poses for ridxGT in range(len(gtFrames[imgidx]["annorect"])): # GT pose rectGT = gtFrames[imgidx]["annorect"][ridxGT] if ("track_id" in rectGT.keys()): trackidxGT += [rectGT["track_id"][0]] pointsGT = [] if len(rectGT["annopoints"]) > 0: pointsGT = rectGT["annopoints"][0]["point"] # iterate over all possible body joints for i in range(nJoints): # GT joint in LSP format ppGT = getPointGTbyID(pointsGT, i) if len(ppGT) > 0: hasGT[ridxGT, i] = True # iterate over predicted poses for ridxPr in range(len(prFrames[imgidx]["annorect"])): # predicted pose rectPr = prFrames[imgidx]["annorect"][ridxPr] if ("track_id" in rectPr.keys()): trackidxPr += [rectPr["track_id"][0]] pointsPr = rectPr["annopoints"][0]["point"] for i in range(nJoints): # predicted joint in LSP format ppPr = getPointGTbyID(pointsPr, i) if len(ppPr) > 0: assert("score" in ppPr.keys() and "keypoint score is missing") score[ridxPr, i] = ppPr["score"][0] hasPr[ridxPr, i] = True if len(prFrames[imgidx]["annorect"]) and len(gtFrames[imgidx]["annorect"]): # predictions and GT are present # iterate over GT poses 得到distance (pred_num x gt_num x nJoints) for ridxGT in range(len(gtFrames[imgidx]["annorect"])): # GT pose # 一张图中的某一个pose rectGT = gtFrames[imgidx]["annorect"][ridxGT] # compute reference distance as head size headSize = getHeadSize(rectGT["x1"][0], rectGT["y1"][0], rectGT["x2"][0], rectGT["y2"][0]) pointsGT = [] if len(rectGT["annopoints"]) > 0: pointsGT = rectGT["annopoints"][0]["point"] # iterate over predicted poses for ridxPr in range(len(prFrames[imgidx]["annorect"])): # predicted pose rectPr = prFrames[imgidx]["annorect"][ridxPr] pointsPr = rectPr["annopoints"][0]["point"] # iterate over all possible body joints for i in range(nJoints): # GT joint ppGT = getPointGTbyID(pointsGT, i) # predicted joint ppPr = getPointGTbyID(pointsPr, i) # compute distance between predicted and GT joint locations if hasPr[ridxPr, i] and hasGT[ridxGT, i]: pointGT = [ppGT["x"][0], ppGT["y"][0]] pointPr = [ppPr["x"][0], ppPr["y"][0]] dist[ridxPr, ridxGT, i] = np.linalg.norm(np.subtract(pointGT, pointPr)) / headSize dist = np.array(dist) hasGT = np.array(hasGT) # number of annotated joints nGTp = np.sum(hasGT, axis=1)#每个pose分别有多少个点 match = dist <= distThresh #dist中距离小于阈值的置1,否则置0 pck = 1.0 * np.sum(match, axis=2)#pck = pr_num x gt_num for i in range(hasPr.shape[0]): for j in range(hasGT.shape[0]): if nGTp[j] > 0: pck[i, j] = pck[i, j] / nGTp[j] #Pred的Pose点个数 / GT 的点个数 # preserve best GT match only idx = np.argmax(pck, axis=1) val = np.max(pck, axis=1) for ridxPr in range(pck.shape[0]): for ridxGT in range(pck.shape[1]): if (ridxGT != idx[ridxPr]): pck[ridxPr, ridxGT] = 0 prToGT = np.argmax(pck, axis=0) val = np.max(pck, axis=0) prToGT[val == 0] = -1 # info to compute MOT metrics mot = {} for i in range(nJoints): mot[i] = {} #将hasGT、hasPr、dist转存到mot中,并且只保留匹配好的点的内容 for i in range(nJoints): ridxsGT = np.argwhere(hasGT[:,i] == True); ridxsGT = ridxsGT.flatten().tolist()#得到hasGT[:,i] = 1的人ID ridxsPr = np.argwhere(hasPr[:,i] == True); ridxsPr = ridxsPr.flatten().tolist() #mot[i]["trackidxGT"] = [trackidxGT[idx] for idx in ridxsGT] #mot[i]["trackidxPr"] = [trackidxPr[idx] for idx in ridxsPr] mot[i]["ridxsGT"] = np.array(ridxsGT) mot[i]["ridxsPr"] = np.array(ridxsPr) mot[i]["dist"] = np.full((len(ridxsGT),len(ridxsPr)),np.nan) for iPr in range(len(ridxsPr)): for iGT in range(len(ridxsGT)): if (match[ridxsPr[iPr], ridxsGT[iGT], i]): mot[i]["dist"][iGT,iPr] = dist[ridxsPr[iPr], ridxsGT[iGT], i] # assign predicted poses to GT poses for ridxPr in range(hasPr.shape[0]): if (ridxPr in prToGT): # pose matches to GT # GT pose that matches the predicted pose ridxGT = np.argwhere(prToGT == ridxPr) assert(ridxGT.size == 1) ridxGT = ridxGT[0,0] s = score[ridxPr, :] m = np.squeeze(match[ridxPr, ridxGT, :])#从数组的形状中删除单维条目,即把shape中为1的维度去掉 hp = hasPr[ridxPr, :] for i in range(len(hp)): if (hp[i]): scoresAll[i][imgidx] = np.append(scoresAll[i][imgidx], s[i]) labelsAll[i][imgidx] = np.append(labelsAll[i][imgidx], m[i]) else: # no matching to GT s = score[ridxPr, :] m = np.zeros([match.shape[2], 1], dtype=bool) hp = hasPr[ridxPr, :] for i in range(len(hp)): if (hp[i]): scoresAll[i][imgidx] = np.append(scoresAll[i][imgidx], s[i]) labelsAll[i][imgidx] = np.append(labelsAll[i][imgidx], m[i]) else: if not len(gtFrames[imgidx]["annorect"]): # No GT available. All predictions are false positives for ridxPr in range(hasPr.shape[0]): s = score[ridxPr, :] m = np.zeros([nJoints, 1], dtype=bool) hp = hasPr[ridxPr, :] for i in range(len(hp)): if hp[i]: scoresAll[i][imgidx] = np.append(scoresAll[i][imgidx], s[i]) labelsAll[i][imgidx] = np.append(labelsAll[i][imgidx], m[i]) # save number of GT joints for ridxGT in range(hasGT.shape[0]): hg = hasGT[ridxGT, :] for i in range(len(hg)): nGTall[i, imgidx] += hg[i] motAll[imgidx] = mot return scoresAll, labelsAll, nGTall, motAll