python递归解压文件夹中所有压缩包

1. 简述

    递归解压文件夹中的所有压缩包到指定文件夹,方便快速搜索文件和整理移动文件。

2. 环境配置

    python解压rar文件需要安装依赖库 (python-unrar

    Windows:

  1. 在 RARLab 官方下载安装库文件 http://www.rarlab.com/rar/UnRARDLL.exe
  2. 默认路径伪 C:Program Files (x86)UnrarDLL
  3. 添加环境变量 UNRAR_LIB_PATH 键值 C:Program Files (x86)UnrarDLLx64UnRAR64.dll,如果是32位就是 C:Program Files (x86)UnrarDLLUnRAR.dll

    Linux:

  1. 下载库文件 https://www.rarlab.com/rar/unrarsrc-5.6.8.tar.gz
  2. $ make lib
    $ make install-lib
  3. 添加环境变量  export UNRAR_LIB_PATH=/usr/lib/libunrar.so

3. 实现

    代码实现

  1 #!/usr/bin/env python3
  2 # .zip .rar .tar .tgz .tar.gz .tar.bz2 .tar.bz .tar.tgz
  3 import os
  4 import zlib
  5 import unrar
  6 import shutil
  7 import zipfile
  8 import tarfile
  9 from time import sleep
 10 # from unrar import rarfile
 11 
 12 filepath = "./dirname"  #relative path
 13 
 14 class BaseTool(object):
 15     def __init__(self, path):
 16         self.path = path
 17         self.compress = [".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz",".zip",".rar"]
 18 
 19     def iszip(self,  file):
 20         for z in self.compress:
 21             if file.endswith(z):
 22                 return z
 23 
 24     def zip_to_path(self, file):
 25         for i in self.compress:
 26             file = file.replace(i,"")
 27         return file
 28 
 29     def error_record(self, info):
 30         with open("error.txt","a+") as r:
 31             r.write(info+"
")
 32 
 33     def un_zip(self, src, dst):
 34         """ src : aa/asdf.zip
 35             dst : unzip/aa/asdf.zip
 36         """
 37         try:
 38             zip_file = zipfile.ZipFile(src)
 39             uz_path = self.zip_to_path(dst)
 40             if not os.path.exists(uz_path):
 41                 os.makedirs(uz_path)
 42             for name in zip_file.namelist():
 43                 zip_file.extract(name, uz_path)
 44             zip_file.close()
 45         except zipfile.BadZipfile:
 46             pass
 47         except zlib.error:
 48             print("zlib error : "+src)
 49             self.error_record("zlib error : "+src)
 50 
 51     def un_rar(self, src, dst):
 52         try:
 53             rar = unrar.rarfile.RarFile(src)
 54             uz_path = self.zip_to_path(dst)
 55             rar.extractall(uz_path)
 56         except unrar.rarfile.BadRarFile:
 57             pass
 58         except Exception as e:
 59             print(e)
 60             self.error_record(str(e)+src)    
 61 
 62     def un_tar(self, src, dst):
 63         try:
 64             tar = tarfile.open(src)
 65             uz_path = self.zip_to_path(dst)
 66             tar.extractall(path = uz_path)
 67         except tarfile.ReadError:
 68             pass
 69         except Exception as e:
 70             print(e)
 71             self.error_record(str(e)+src)
 72 
 73 
 74 class UnZip(BaseTool):
 75     """ UnZip files """
 76     def __init__(self, path):
 77         super(UnZip, self).__init__(self)
 78         self.path = path
 79         self.output = "./unzip/"
 80         self.current_path = os.getcwd()+"/"
 81 
 82     def recursive_unzip(self, repath):
 83         """recursive unzip file
 84         """
 85         for (root, dirs, files) in os.walk(repath):
 86             for filename in files:
 87                 src = os.path.join(root,filename)
 88                 if self.iszip(src) == ".zip":
 89                     print("[+] child unzip: "+src)
 90                     self.un_zip(src, src)
 91                     os.remove(src)
 92                     self.recursive_unzip(self.zip_to_path(src))
 93                     sleep(0.1)
 94                 if self.iszip(src) == ".rar":
 95                     from unrar import rarfile
 96                     print("[+] child unrar : "+src)
 97                     self.un_rar(src,src) 
 98                     os.remove(src)
 99                     self.recursive_unzip(self.zip_to_path(src))
100                     sleep(0.1)
101                 if self.iszip(src) in (".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz"):
102                     print("[+] child untar : "+src)
103                     self.un_tar(src,src)
104                     os.remove(src)
105                     self.recursive_unzip(self.zip_to_path(src))
106                     sleep(0.1)
107 
108     def main_unzip(self):
109         for (root, dirs, files) in os.walk(self.path):
110             for filename in files:
111                 zippath = os.path.join(self.output,root)
112                 if not os.path.exists(zippath):
113                     os.makedirs(zippath)
114                 src = os.path.join(root,filename)
115                 dst = os.path.join(self.output,root,filename)
116                 if self.iszip(src) == ".zip":
117                     print("[+] main unzip : "+src)
118                     self.un_zip(src,dst)
119                 if self.iszip(src) == ".rar":
120                     from unrar import rarfile
121                     print("[+] main unrar : "+src)
122                     self.un_rar(src,dst)
123                 if self.iszip(src) in (".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz"):
124                     print("[+] main untar : "+src)
125                     self.un_tar(src,dst)
126                 else:
127                     try:
128                         shutil.copyfile(src,dst)
129                     except OSError as e:
130                         print(str(e))
131                         self.error_record(str(e))
132                     
133         self.recursive_unzip(self.output+self.path)
134 
135 
136 def main():
137     z = UnZip(filepath)   #relative path
138     z.main_unzip()
139 
140 if __name__ == '__main__':
141     main()

 

4. 多线程

  1 #!/usr/bin/env python3
  2 # .zip .rar .tar .tgz .tar.gz .tar.bz2 .tar.bz .tar.tgz
  3 import os
  4 import zlib
  5 import unrar
  6 import shutil
  7 import zipfile
  8 import tarfile
  9 import argparse
 10 import time
 11 import threading
 12 from time import sleep
 13 from itertools import chain
 14 from unrar import rarfile
 15 
 16 
 17 filepath = "./filepath"  #relative path
 18 thread_num = 1
 19 
 20 class BaseTool(object):
 21     def __init__(self):
 22         super(BaseTool, self).__init__()
 23         self.compress = [".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz",".zip",".rar"]
 24 
 25     def run_threads(self, threads_number: int, target_function: any, *args, **kwargs) -> None:
 26         """ Run function across specified number of threads
 27         :param int thread_number: number of threads that should be executed
 28         :param func target_function: function that should be executed accross specified number of threads
 29         :param any args: args passed to target_function
 30         :param any kwargs: kwargs passed to target function
 31         :return None
 32         """
 33 
 34         threads = []
 35         threads_running = threading.Event()
 36         threads_running.set()
 37 
 38         for thread_id in range(int(threads_number)):
 39             thread = threading.Thread(
 40                 target=target_function,
 41                 args=chain((threads_running,), args),
 42                 kwargs=kwargs,
 43                 name="thread-{}".format(thread_id),
 44             )
 45             threads.append(thread)
 46 
 47             # print("{} thread is starting...".format(thread.name))
 48             thread.start()
 49 
 50         start = time.time()
 51         try:
 52             while thread.isAlive():
 53                 thread.join(1)
 54 
 55         except KeyboardInterrupt:
 56             threads_running.clear()
 57 
 58         for thread in threads:
 59             thread.join()
 60             # print("{} thread is terminated.".format(thread.name))
 61 
 62         print("Elapsed time: {} seconds".format(time.time() - start))
 63 
 64     def iszip(self,  file):
 65         for z in self.compress:
 66             if file.endswith(z):
 67                 return z
 68 
 69     def zip_to_path(self, file):
 70         for i in self.compress:
 71             file = file.replace(i,"")
 72         return file
 73 
 74     def error_record(self, info):
 75         with open("error.txt","a+") as w:
 76             w.write(info+"
")
 77 
 78     def remove(self, filepath):
 79         if os.path.exists(self.zip_to_path(filepath)) and os.path.exists(filepath):
 80             os.remove(filepath)
 81 
 82     def un_zip(self, src, dst):
 83         """ src : aa/asdf.zip
 84             dst : unzip/aa/asdf.zip
 85         """
 86         try:
 87             zip_file = zipfile.ZipFile(src)
 88             uz_path = self.zip_to_path(dst)
 89             if not os.path.exists(uz_path):
 90                 os.makedirs(uz_path)
 91             for name in zip_file.namelist():
 92                 zip_file.extract(name, uz_path)
 93             zip_file.close()
 94         except zipfile.BadZipfile:
 95             pass
 96         except RuntimeError:
 97             self.error_record("pass required : "+src)
 98             return "PassRequired"
 99         except zlib.error:
100             print("zlib error : "+src)
101             self.error_record("zlib error : "+src)
102         except Exception as e:
103             print(e)
104             self.error_record(str(e)+src)  
105 
106     def un_rar(self, src, dst):
107         try:
108             rar = unrar.rarfile.RarFile(src)
109             uz_path = self.zip_to_path(dst)
110             rar.extractall(uz_path)
111         except unrar.rarfile.BadRarFile:
112             pass
113         except Exception as e:
114             print(e)
115             self.error_record(str(e)+src)    
116 
117     def un_tar(self, src, dst):
118         try:
119             tar = tarfile.open(src)
120             uz_path = self.zip_to_path(dst)
121             tar.extractall(path = uz_path)
122         except tarfile.ReadError:
123             pass
124         except Exception as e:
125             print(e)
126             self.error_record(str(e)+src)
127 
128 
129 class LockedIterator(object):
130     def __init__(self, it):
131         self.lock = threading.Lock()
132         self.it = it.__iter__()
133 
134     def __iter__(self):
135         return self
136 
137     def next(self):
138         self.lock.acquire()
139         try:
140             item = next(self.it)
141 
142             if type(item) is tuple:
143                 return (item[0].strip(), item[1].strip(), item[2].strip())
144             elif type(item) is str:
145                 return item.strip()
146 
147             return item
148         finally:
149             self.lock.release()
150 
151 
152 class UnZip(BaseTool):
153     """ UnZip files """
154     def __init__(self, path):
155         super(UnZip, self).__init__()
156         self.path = path
157         self.threads = thread_num
158         self.output = "./unzip/"
159         self.current_path = os.getcwd()+"/"
160         self.parser = argparse.ArgumentParser()
161         self.parser.add_argument("-v","--verbose", action="store_true", help="./zipperpro.py -v")
162         self.args = self.parser.parse_args()
163 
164     def run(self):
165         self.main_unzip(self.path)
166 
167     def recursive_unzip(self, repath):
168         """recursive unzip file
169         """
170         task_list = []
171         for (root, dirs, files) in os.walk(repath):
172             for filename in files:
173                 filename = filename.strip("./")
174                 src = os.path.join("./"+root,filename)
175                 data = (src, src, "child")
176                 task_list.append(data)
177         data = LockedIterator(chain(task_list))
178         print("[+] child unzip ...")
179         self.run_threads(self.threads, self.do_unzip, data)
180                 
181     def main_unzip(self, mainpath):
182         task_list = []
183         print("Initialization......")
184         for (root, dirs, files) in os.walk(mainpath):
185             for filename in files:
186                 zippath = os.path.join(self.output,root)
187                 if not os.path.exists(zippath):
188                     os.makedirs(zippath)
189                 src = os.path.join(root,filename)
190                 dst = os.path.join(self.output,root,filename)
191                 if not os.path.exists(self.zip_to_path(dst)):
192                     data = ((src, dst, "main"))
193                     task_list.append(data)
194         data = LockedIterator(chain(task_list))
195         print("[+] main unzip ...")
196         self.run_threads(self.threads, self.do_unzip, data)
197         self.recursive_unzip(self.output+self.path)
198 
199     def do_unzip(self, running, data):
200         while running.is_set():
201             try:
202                 (src, dst, flag) = data.next()
203                 if flag == "main":
204                     if self.iszip(src) == ".zip":
205                         if self.args.verbose:
206                             print("[+] main unzip : "+src)
207                         self.un_zip(src,dst)
208                     elif self.iszip(src) == ".rar":
209                         if self.args.verbose:
210                             print("[+] main unrar : "+src)
211                         self.un_rar(src,dst)
212                     elif self.iszip(src) in (".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz"):
213                         if self.args.verbose:
214                             print("[+] main untar : "+src)
215                         self.un_tar(src,dst)
216                     else:
217                         try:
218                             shutil.copyfile(src,dst)
219                         except OSError as e:
220                             print(str(e))
221                             self.error_record(str(e))
222                 elif flag == "child":
223                     if self.iszip(src) == ".zip":
224                         if self.args.verbose:
225                             print("[+] child unzip: "+src)
226                         if not self.un_zip(src, src) == "PassRequired":
227                             self.remove(src)
228                             self.recursive_unzip(self.zip_to_path(src))
229                         sleep(0.1)
230                     elif self.iszip(src) == ".rar":
231                         if self.args.verbose:
232                             print("[+] child unrar : "+src)
233                         self.un_rar(src,src) 
234                         self.remove(src)
235                         self.recursive_unzip(self.zip_to_path(src))
236                         sleep(0.1)
237                     elif self.iszip(src) in (".tar.gz",".tar.bz2",".tar.bz",".tar.tgz",".tar",".tgz"):
238                         if self.args.verbose:
239                             print("[+] child untar : "+src)
240                         self.un_tar(src,src)
241                         self.remove(src)
242                         self.recursive_unzip(self.zip_to_path(src))
243                         sleep(0.1)
244                     
245             except StopIteration:
246                 break
247 
248 
249 def main():
250     z = UnZip(filepath) 
251     z.run()
252     
253 
254 
255 if __name__ == '__main__':
256     main()

5. 问题

  • 中文压缩包乱码,中文路径解压出错
  • rar解压遇加密文件卡死

https://github.com/Gitmaninc/SmallTooools/tree/master/unzip-tool

原文地址:https://www.cnblogs.com/trojan-z/p/10043408.html