Python script --- count_words

import os 
import pdb 
import numpy as np 

train_path = '/home/wangxiao/Downloads/TracKit/dataset/train_subset/'
test_path = '/home/wangxiao/Downloads/TracKit/dataset/test_subset/'

trainFiles = os.listdir(train_path) 
testFiles = os.listdir(test_path)   

totalFrameNUM = 0 
Max_sentence_NUM = 0 
wordLIST = [] 
BBox_validateNUM = 0 

f = open('/home/wangxiao/Downloads/TracKit/train_subset_wordList.txt', 'w') 
 
for i in range(len(trainFiles)):  
    videoName = trainFiles[i] 

    print(i, ' | ', len(trainFiles), ' ==>> videoName: ', videoName)

    videoPath = train_path + videoName + '/' 
    language_txt_path = videoPath + 'language.txt'
    imgFiles = os.listdir(videoPath + 'imgs/')

    # BBox_txt_path = videoPath + 'groundtruth.txt'
    # BBox = np.loadtxt(BBox_txt_path, dtype=int) 

    # for idx in range(len(BBox)): 
    #     line = BBox[idx].split(',')  

    #     pdb.set_trace() 
    #     if line[0] + line[1] + line[2] + line[3] > 0: 
    #         BBox_validateNUM = BBox_validateNUM + 1 

    frameNUM = len(imgFiles) 
    totalFrameNUM = totalFrameNUM + frameNUM 
    
    fid = open(language_txt_path, 'r')
    sentences = fid.read() 
    sentences = sentences.split( )

    if len(sentences) > Max_sentence_NUM: 
        Max_sentence_NUM = len(sentences) 

    for wordIDX in range(len(sentences)): 
        currentWORD = sentences[wordIDX] 
        if currentWORD not in wordLIST: 
            wordLIST.append(currentWORD) 

    fid.close() 


for i in range(len(testFiles)): 
    videoName = testFiles[i] 

    print(i, ' | ', len(testFiles), ' ==>> videoName: ', videoName)

    videoPath = TNL2k_test_path + videoName + '/' 
    language_txt_path = videoPath + 'language.txt'
    imgFiles = os.listdir(videoPath + 'imgs/')

    # BBox_txt_path = videoPath + 'groundtruth.txt'
    # BBox = np.loadtxt(BBox_txt_path, dtype=int) 

    # for idx in range(len(BBox)): 
    #     line = BBox[idx] 
    #     if line[0] + line[1] + line[2] + line[3] > 0: 
    #         BBox_validateNUM = BBox_validateNUM + 1 


    frameNUM = len(imgFiles) 
    totalFrameNUM = totalFrameNUM + frameNUM 
    
    fid = open(language_txt_path, 'r')
    sentences = fid.read() 
    sentences = sentences.split( )

    if len(sentences) > Max_sentence_NUM: 
        Max_sentence_NUM = len(sentences) 

    for wordIDX in range(len(sentences)): 
        currentWORD = sentences[wordIDX] 
        if currentWORD not in wordLIST: 
            wordLIST.append(currentWORD) 

    fid.close() 


print('==>> totalFrameNUM: ', totalFrameNUM)  
print('==>> Max_sentence_NUM: ', Max_sentence_NUM)
print('==>> total word num: ', len(wordLIST)) 
print('==>> BBox NUM: ', BBox_validateNUM) 

for i in range(len(wordLIST)): 
    eachWord = wordLIST[i] 
    f.write(eachWord+'
')  
原文地址:https://www.cnblogs.com/wangxiaocvpr/p/13877028.html