python遍历目录下文件

#!/usr/bin/python3.5

import os
import sqlite3
from multiprocessing import Process,Queue
import time
##############################################################
DAY_SECONDS = 24*60*60
QUESIZE=1000

# c.execute('''CREATE TABLE memory (user VARCHAR(20), filename VARCHAR(100), size FLOAT, ctime TIMESTAMP, mtime TIMESTAMP, atime TIMESTAMP)''')

def checkFile(file, user, que):
    statinfo=os.stat(file)
    que.put((user, file, statinfo))


def listDir(dir, user, que):
    for file in os.listdir(dir):
        path=os.path.join(dir,file)
        if(os.path.isfile(path)):
            checkFile(path, user, que)
        elif(os.path.isdir(path)):
            listDir(path, user, que)
        #else:
        #    print("error:",path)


def checkPersonFile(dir, user, que): 
    if(os.path.exists('log')==False):
        os.mkdir('log')
    logfile = "log/" + "check_file.log"
    f = open(logfile,'a')

 
    msg = 'checking %s ...' % user
    print(msg)
    f.write(msg+'
')

    listDir(dir,user,que)

    msg = 'check %s complete' % user
    print(msg)
    f.write(msg+'
')


def write_to_db(que,db):
    conn = sqlite3.connect(db)
    c = conn.cursor()
    cnt = 0
    while True:
        # if que.empty() or que.full():
        #     conn.commit()
        info = que.get()
        if info is None:
            break
        user = info[0]
        file = 'notused'
        statinfo = info[2]
        c.execute('INSERT INTO memory values (?,?,?,?,?,?)',
                  (user, file, statinfo.st_size, statinfo.st_ctime, statinfo.st_mtime, statinfo.st_atime))
        #conn.commit()

        ++cnt
        if cnt>QUESIZE-100:
            cnt=0
            conn.commit()
    conn.commit()
    print('end')


if __name__ == '__main__':
    rootDir = "/train/blue/users/"
    db="memory.db"
    conn = sqlite3.connect(db)
    c = conn.cursor()
    c.execute('''CREATE TABLE if not exists memory (user VARCHAR(20), filename VARCHAR(100), size FLOAT, ctime TIMESTAMP, mtime TIMESTAMP, atime TIMESTAMP)''')
    c.execute('''DELETE FROM memory''')
    conn.commit()

    processes = list()
    que = Queue(QUESIZE)

    pdb = Process(target=write_to_db, args=(que,db))
    pdb.start()
    #processes.append(pdb)

    for filename in os.listdir(rootDir):
        file = os.path.join(rootDir, filename)
        if (os.path.isdir(file)):
            user = os.path.basename(file)
            p = Process(target=checkPersonFile,args=(file, user, que))
            p.start()
            processes.append(p)
    for p in processes:
        p.join()
    que.put(None)
    pdb.join()
    print("check all file complete")


    # analyze the data in db
    # format
    header_format = '%-*s%-*s'
    format = '%-*s%-*.2f'
    width = 80
    user_width = (width / 2).__int__()
    memory_width = (width - user_width).__int__()


    if(os.path.exists('log')==False):
        os.mkdir('log')
    logfile = "log/" + "check_file.log"
    f = open(logfile,'a')

    t = time.strftime("%Y/%b/%d/%H:%M:%S",time.localtime(time.time()))
    f.write('*'*80+'
')
    f.write(t+'
')

    head = 'total memory'
    left = ((width - len(head)) / 2).__int__()
    msg = '*' * left + head + '*' * left
    print(msg)
    f.write(msg+'
')
    result = c.execute('''SELECT user, sum(size)/(1024*1024) as total_size from memory GROUP BY user ORDER BY total_size DESC''')
    for r in result:
        msg = format % (user_width, r[0], memory_width, r[1])
        # msg='    user:%s, memory:%f' % r
        print(msg)
        f.write(msg+'
')

    days = [2, 5, 15]
    whens = ['ctime', 'mtime', 'atime']
    for day in days:
        for when in whens:
            head = 'latest %d days by %s' % (day,when)
            left = ((width - len(head)) / 2).__int__()
            msg = '*' * left + head + '*' * left
            print(msg)
            f.write(msg + '
')

            last_when = time.time() - day * DAY_SECONDS
            sql = '''SELECT user, sum(size)/(1024*1024) as total_size from memory where %s < %f GROUP BY user ORDER BY total_size DESC''' % 
                  (when, last_when)
            result = c.execute(sql)
            for r in result:
                #msg = '    user:%s, memory:%f' % r
                msg = format % (user_width, r[0], memory_width, r[1])
                print(msg)
                f.write(msg + '
')

    # bigger then 4 M
    sizes = [4.0*1024*1024]
    for size in sizes:
        head = 'bigger then %fM total memory:' % (size,)
        left = ((width - len(head)) / 2).__int__()
        msg = '*' * left + head + '*' * left
        print(msg)
        f.write(msg + '
')

        sql = '''SELECT user, sum(size)/(1024*1024) as total_size from memory where size > %f GROUP BY user ORDER BY total_size DESC''' % 
              (size)
        result = c.execute(sql)
        for r in result:
            # msg = '    user:%s, memory:%f' % r
            msg = format % (user_width, r[0], memory_width, r[1])
            print(msg)
            f.write(msg + '
')

    conn.close()
    f.write('*' * 80 + '
')
    f.close()
    print('finish')

原文地址:https://www.cnblogs.com/walkinginthesun/p/10423730.html