fun下载内容批量收集

1.download title and url

 #!/usr/bin/env python

#-*- coding:utf-8 -*-

import re, urllib2,threading

def geturltitle(match, file):
    s = match.group();
    p = re.compile(r'^[mukio=file]');
    downurl = re.sub(p, '', s);

    print downurl;
    # 过滤url
    if downurl:
        file.writelines(downurl);
        file.write(' ');
        # for line in downurl:
        #     file.write(line);

    # 过滤title
    pattern1 = re.compile(r'<meta name="keywords" content="S.*"');
    match1 = pattern1.search(respread);
    if match1:
        s1 = match1.group();
        p1 = re.compile(r'^<meta name="keywords" content="');
        title = re.sub(p1, '', s1);
        print title;
        if title:
            file.writelines(title);
            file.write(' ');
            # for line in title:
            #     file.write(line);

while 1:
    file = open('avfun1.txt''w');
    for  n in range(3600,9000):
        try:
            resp = urllib2.urlopen('http://www.avfun1.com/forum.php?mod=viewthread&tid='+repr(n)+'&mobile=yes', timeout = 2);
            respread = resp.read();
            pattern = re.compile(r'[mukio=file]S.*mp4');
            match = pattern.search(respread);
            print "pid = " + repr(n)

            if match:
                threading.Thread(target=geturltitle(match, file)).start();
            # else:
            #     continue;
            pass
        except Exception, e:
            print e;
            pass
        else:
            pass
        finally:
            pass
        
    file.close();
    break;

2.rename title from file 

#!/usr/bin/env python
#
-*- coding:utf-8 -*-

import re, os

dir = "/Users/apple/Downloads/avfun1/" #文件目录

if os.path.isdir(dir): #检验目录是否有误
  print ("Directory exists!")
else:
  print ("Directory not exist.")

filelist=os.listdir(dir+'aaa')

file = open(dir+'avfun1.txt''rb');

dir = dir + 'aaa'

'''for line in file:
    print line
'''
str = file.read()

for name in filelist:
    match = re.search(name+r' S.*', str)

    if match:
        str1 = match.group();
        tt_match = re.search(r'[^d.mp4 ].*$', str1)

        newfile = tt_match.group()+'.mp4' #获取匹配名存为newfile
        print name
        print newfile
        os.rename(os.path.join(dir,name),os.path.join(dir,newfile))
        
    else:

        print match 

原文地址:https://www.cnblogs.com/jackyshan/p/4376312.html