Python 根据MD5判断相同文件

import hashlib
import os
import send2trash
import time

'''
根据MD5,查找一个目录下的相同文件,并且可以选择是否删除(只是删除到回收站)
'''

def getMD5(path):
    f=open(path,'rb')
    d5 = hashlib.md5()      #生成一个hash的对象
    with open(path,'rb') as f:
        while True:
            content = f.read(40960)
            if not content:
                break
            d5.update(content)   # 每次读取一部分,然后添加到hash对象里
    # print('MD5 : %s' % d5.hexdigest())
    return d5.hexdigest()        # 打印16进制的hash值

def getSha512(path):
    f = open(path, 'rb')
    sh = hashlib.sha512()
    with open(path,'rb') as f:
        while True:
            content = f.read(40960)
            if not content:
                break
            sh.update(content)
    # print(sh.hexdigest())
    return sh.hexdigest()

# 装饰器,计算时间用的
def timer(func):   # 高阶函数:以函数作为参数
    def deco(*args,**kwargs):    # 嵌套函数,在函数内部以 def 声明一个函数,接受 被装饰函数的所有参数
        time1 = time.time()
        func(*args,**kwargs)
        time2 = time.time()
        use_time = round(time2-time1,1)
        print('Elapsed %ss' %(use_time))
    return deco    # 注意,返回的函数没有加括号!所以返回的是一个内存地址,而不是函数的返回值
    
@timer
def walk(path):
    size = 0
    x = input('Want to delete duplicate file? y/n
')
    if x.lower() == 'y':
        delete = True
    else:
        delete = False
    dict = {}
    n = 1
    for folder,subfolder,filenames in os.walk(path):
        for filename in filenames:
            print('
Has scanned %s files' %n,end='')
            root = os.path.join(folder,filename)
            md5 = getMD5(root)
            if md5 in dict.keys():
                # sha1 = getSha512(root)
                # sha2 = getSha512(dict[md5])
                # if sha1 == sha2:
                # 发送到回收站
                if delete == True:
                    send2trash.send2trash(dict[md5])
                print('
%s
%s
' %(root,dict[md5]))
                # 获取文件大小
                fsize = os.path.getsize(root)
                size += fsize
            else:
                pass
            dict[md5] = root
            n += 1
    # 处理文件大小的单位 M
    size = size/float(1024*1024)
    size = round(size,2)
    if delete == True:
        print('
Saved %sM space.' % size)
    else:
        print("
Wasted %sM sapce." %size)


if __name__ =="__main__":
    x = input('Input path:')
    walk(x)
原文地址:https://www.cnblogs.com/wztshine/p/12363270.html