[原]扫描目录中的重复图片

在 iOS 应用的开发过程中,随着项目的发展,工程中很有可能产生重复图片。

下面这段 Python 脚本可以用来扫描工程中是否有重复图片。

 1 #!/usr/bin/env python
 2 #coding=utf-8
 3 
 4 """Usage: check_project_duplicate.py project_image_dir_path"""
 5 
 6 import os
 7 import sys
 8 import fnmatch
 9 import hashlib
10 
11 def find_file(dir, pattern):
12   result = []
13   for dirpath, dirnames, filenames in os.walk(dir):
14     for filename in fnmatch.filter(filenames, pattern):
15       result.append(os.path.join(dirpath, filename))
16   return result
17 
18 
19 def chunk_reader(fobj, chunk_size=1024):
20     while True:
21         chunk = fobj.read(chunk_size)
22         if not chunk:
23             return
24         yield chunk
25 
26 
27 def main(me, args):
28   images = []
29   images.extend(find_file(args[0], '*.png'))
30   images.extend(find_file(args[0], '*.jpg'))
31   images.extend(find_file(args[0], '*.jpeg'))
32 
33   hashes = {}
34   for file_path in images:
35     hashobj = hashlib.sha1()
36     for chunk in chunk_reader(open(file_path, 'rb')):
37       hashobj.update(chunk)
38     file_id = (hashobj.digest(), os.path.getsize(file_path))
39     duplicate = hashes.get(file_id, None)
40     if duplicate:
41       print "发现重复图片: '%s' 与 '%s'" % (file_path, duplicate)
42     else:
43       hashes[file_id] = file_path
44 
45   return 0
46 
47 
48 if __name__ == '__main__':
49   sys.exit(main(sys.argv[0], sys.argv[1:]))
原文地址:https://www.cnblogs.com/Proteas/p/3244830.html