用python导出mht文件中的JPG图片

import os
import sys
import re
import time
import base64

def extract_data(filename, dir):
	file = open(filename, "r")
	content = file.read()
	index = 0
	for match in re.finditer(r'(?=/9j)(.|\n)*?(?=(\n\n))', content):
		index = index + 1
		print("%s/%s.jpg" % (dir, index))
		newfile = open("%s/%s.jpg" % (dir, index), "wb")
		real = re.sub("\n", "", match.group())
		length = len(real)
		times = (4 - length % 4) % 4
		i = 0
		
		while i < times:
			real = "%sA===" % real
			i = i + 1
			
		#newfile.write(real)	
		newfile.write(base64.b64decode(real))
		newfile.close()
	file.close()
	
def export_file(file_path):
	r  = re.search(r"(?<=\\|/).*", file_path)
	file_name_ext = file_path
	while r != None:	
		r = r.group()
		file_name_ext = r
		r = re.search(r"(?<=\\|/).*", r)		
	
	pattern = "%s"  % file_name_ext
	pattern = re.sub("\[", "\\\[", pattern)
	pattern = re.sub("\]", "\\\]", pattern)
	pattern = re.sub("\(", "\\\(", pattern)
	pattern = re.sub("\)", "\\\)", pattern)
	pattern = ".*?(?=%s)" % pattern
	
	path = re.search(pattern, file_path)
	if path != None:
		path = path.group()
	else:
		path = ""
	dot = re.search(r"\.", file_name_ext)
	file_name = file_name_ext
	if dot != None:
		file_name = re.search(r".*?(?=\.)", file_name_ext)
		file_name = file_name.group()
	else:
		file_name = file_name + "_dir"
		
	full_path = path + file_name
	bExist = os.path.exists(full_path)
	if bExist != True:
		os.mkdir(full_path)		
	else:
		print("The Directory \"%s\" Exists!\n" % full_path)
	extract_data(file_path, full_path)
	
def main():
	print(os.name)
	
	index = 0
	for arg in sys.argv:
		index += 1
		if index == 1:
			continue
		if index == 2:
			export_file(arg)
	
	if index == 1:
		file_path = "E:\\Scripts\\test.mht"
		export_file(file_path)
		
if __name__ == "__main__":
	main()
	print("just wait for 2 seconds!\n")
	time.sleep(2)
	
原文地址:https://www.cnblogs.com/LinuxHunter/p/1915311.html