splitFile2SmallFile

 1. split file into several files

 1 """
 2 this is aa customizable version of the standard unix split command-line
 3 utility;because it is written in python,it also works on windows and can be 
 4 easily modifyed;because it export a function,its logic can also be imported
 5 and resued in other applications
 6 """
 7 import sys,os
 8 kilobytes =1024
 9 megabytes = kilobytes*1000
10 chunksize = int(1.4* megabytes)                  #default roughtly a floppy
11 
12 def split(fromfile,todir,chunksize=chunksize):
13     if not os.path.exists(todir):                 # caller handlers errors
14         os.mkdir(todir)                          #make dir,read/write parts
15     else:
16         for fname in os.listdir(todir):
17             os.remove(os.path.join(todir,fname))  #delete any exiting files
18     partnum =0
19     input = open(fromfile,'rb')
20     while True:
21         chunk = input.read(chunksize)
22         if not chunk:break
23         partnum +=1
24         filename = os.path.join(todir,('part%04d' % partnum))
25         fileobj = open(filename,'wb')
26         fileobj.write(chunk)
27         fileobj.close()
28     input.close()
29     assert partnum<=9999
30     return partnum
31 
32 if __name__ =='__main__':
33     if len(sys.argv) == 2 and sys.argv[1]== '-help':
34         print('use:split.py [file to split target-dir [chunksize]]')
35     else:
36         if len(sys.argv) <3:
37             interactive =True
38             fromfile =input('File to be split?')
39             todir = input('directory to store part files?')
40         else:
41             interactive = False
42             fromfile,todir = sys.argv[1:3]
43             if len(sys.argv) == 4:chunksize =int(sys.argv[3])
44         absfrom,absto = map(os.path.abspath,[fromfile,todir])
45         print('splitting',absfrom,'to',absto,'by',chunksize)
46         try:
47             parts = split(fromfile,todir,chunksize)
48         except:
49             print('error during split:')
50             print(sys.exc_info()[0],sys.exc_info()[1])
51         else:
52             print('split finished:',parts,'parts are in ',absto)
53         if interactive:
54             input('press enter key')  #pause if clicked
55             
View Code

split to 200k

2.CopyAllFiles:

  1 """
  2 Usage: 'python cpall.py dirFrom dirTo'
  3 recursive copy of a directory tree. works like a 'cp -r dirFrom/* dirTo'
  4 unix command,and assumes that dirFrom and dirTo are both directory.
  5 was written to get around fatal error messages under windows drag-and-drop
  6 copies(the first bad file ends the entire copy operation immediately).
  7 but also allows for coding more customized copy operations in python
  8 """
  9 
 10 import os,sys
 11 maxfileload =100000
 12 blksize =1024*500
 13 
 14 def copyfile(pathFrom,pathTo,maxfileload =maxfileload):
 15     """
 16     copy one file pathFrom to pathTo,byte for byte;
 17     use binary file mode to supress unicde decode and endline transform
 18     """
 19     if os.path.getsize(pathFrom) <=maxfileload:
 20         bytesFrom = open(pathFrom,'rb').read()
 21         open(pathTo,'wb').write(bytesFrom)
 22     else:
 23         fileFrom = open(pathFrom,'rb')
 24         fileTo = open(pathTo,'wb')
 25         while True:
 26             bytesFrom = fileFrom.read(blksize)
 27             if not bytesFrom:
 28                 break
 29             fileTo.write(bytesFrom)
 30             
 31 def copytree(dirFrom,dirTo,verbose=0):
 32     """
 33     copy contents of dirFrom and below to dirTo ,return(files,dirs) counts;
 34     may need to use bytes for dirnames if undecodable on other platforms;
 35     may need to do more file type checking on unix:skip links,fifos,etc;
 36     """
 37     fcount = dcount =0
 38     for filename in os.listdir(dirFrom):
 39         pathFrom = os.path.join(dirFrom,filename)
 40         pathTo = os.path.join(dirTo,filename)
 41         if not os.path.isdir(pathFrom):
 42             try:
 43                 if verbose >1:
 44                     print('copying',pathFrom,'to',pathTo)
 45                 copyfile(pathFrom,pathTo)
 46                 fcount +=1
 47             except:
 48                 print('error copying',pathFrom,'to',pathTo,'--skipped')
 49                 print(sys.exc_info()[0],sys.exc_info()[1])
 50         else:
 51             if verbose:
 52                 print('copying dir',pathFrom,'to',pathTo)
 53             try:
 54                 os.mkdir(pathTo)
 55                 below = copytree(pathFrom,pathTo)
 56                 fcount += below[0]
 57                 dcount += below[1]
 58                 dcount+=1
 59             except:
 60                 print('error creating',pathTo,'--skipped')
 61                 print(sys.exc_info()[0],sys.exc_info()[1])
 62     return (fcount,dcount)
 63 
 64 def getargs():
 65     """
 66     get and verify directory name arguments ,return default none on errors
 67     """
 68     try:
 69         dirFrom,dirTo = sys.argv[1:]
 70     except:
 71         print('usage error:cpall.py dirFrom dirTo')
 72     else:
 73         if not os.path.isdir(dirFrom):
 74             print('error:dirFrom is not a dir')
 75         elif not os.path.exists(dirTo):
 76             os.mkdir(dirTo)
 77             print('note:dirTo was created')
 78             return (dirfrom,dirTo)
 79         else:
 80             print('warning:dirto already xists')
 81             if hasattr(os.path,'samefile'):
 82                 same = os.path.samefile(dirFrom,dirTo)
 83             else:
 84                 same = os.path.abspath(dirFrom) == os.path.abspath(dirTo)
 85             if same:
 86                 print('error :dirfrom same as dirTo')
 87             else:
 88                 return (dirFrom,dirTo)
 89             
 90 if __name__ =='__main__':
 91     import time
 92     distuple =getargs()
 93     if distuple:
 94         print('copying...')
 95         start = time.clock()
 96         fcount,dcount = copytree(*distuple)
 97         print('copied',fcount,'files,',dcount,'directories')
 98         print('in ',time.clock()-start,' seconds')
 99     
100             
View Code

3. compare directory and list all files if not same

 1 """
 2 ############################################################################
 3 usage :python dirdiff.py dir1-path dir2-path
 4 compare two directories to find files that exist in one but not the other
 5 this version uses the os.listdir function and list difference. note that
 6 this script checks only filenames,not file contents --see diffall.py for an 
 7 extension that does the latter by comparing .read() results         
 8 #############################################################################
 9 """
10 
11 import os,sys
12 
13 def reportdiffs(unique1,unique2,dir1,dir2):
14     """
15     generate diffs report for one dir: part of comparedirs output
16     """
17     if not (unique1 or unique2):
18         print('directory lists are identical')
19     else:
20         if unique1:
21             print('files unique to',dir1)
22             for file in unique1:
23                 print('......',file)
24         if unique2:
25             print('files unique to',dir2)
26             for file in unique2:
27                 print('......',file)            
28                 
29 def difference(seq1,seq2):
30     """
31     return all items in seq1 only
32     a set(seq1) - set(seq2) would work too,but sets are randomly
33     ordered,so any platform-depent directory order would be lost
34     """
35     return [item for item in seq1 if item not in seq2]
36 
37 def comparedirs(dir1,dir2,files1=None,files2=None):
38     """
39     compare directory contents,but not actual files;
40     may need bytes listdir arg for undecodable filenames on some platforms
41     """
42     print('comparing',dir1,'to',dir2)
43     files1 = os.listdir(dir1) if files1 is None else files1
44     files2 = os.listdir(dir2) if files2 is None else files2
45     unique1 = difference(files1,files2)
46     unique2 = difference(files2,files1)
47     reportdiffs(unique1,unique2,dir1,dir2)
48     return not (unique1,unique2)
49 
50 def getargs():
51     "args for command line mode"
52     try:
53         dir1,dir2 = sys.argv[1:]
54     except:
55         print('usage:dirdiff.py dir1 dir2')
56         sys.exit(1)
57     else:
58         return dir1,dir2
59 
60 if __name__=='__main__':
61     dir1,dir2 = getargs()
62     comparedirs(dir1,dir2)
63     
View Code

原文地址:https://www.cnblogs.com/lxk613/p/4827677.html