下载oschina.net源代码

import urllib2
import urllib
import re
import sqlite3


print 'Create table test'
db = sqlite3.connect('test.db')
db.row_factory = sqlite3.Row
db.text_factory = str
db.execute('drop table if exists test')
db.execute('create table test (title text, name text)')


for j in range(14):
    try:
        
        f = urllib2.urlopen('http://www.oschina.net/code/list/7/python?show=time&p=%d' % j).read()
        ll = re.findall('http://www.oschina.net/code/snippet(.*?)"',f)
        for i in ll[:]:
            #urllib.urlretrieve('http://www.oschina.net/code/snippet%s' % i,"c:\\" + "snippet%s" % i+".html")
            f = urllib2.urlopen('http://www.oschina.net/code/snippet%s' % i).read()
            s1 = re.findall('<[H|h]1><[A|a].+>(.*?)</[A|a]></[H|h]1>',f)
            re_co = re.compile('''<div class='code_pieces code_module'>.*">(.*?)</pre>.*<div id='related_codes' class='CodeList code_module'>''',re.S)
            s2 = re_co.findall(f)
            #print s2[0]
            db.execute('insert into test (title, name ) values (?, ?)', (s1[0], s2[0]))
            db.commit()
            print s1[0]
            #break
            #print i
            #print "OK"
        #print f
        #print "Hello"
    except:
        print j
        
db.close()
原文地址:https://www.cnblogs.com/pythonschool/p/2744436.html