wooyun本地数据抓取

----

#-*-coding:utf-8-*-
import re
import urllib
import MySQLdb
import time
from urllib import unquote


def getHtml(url):
    page = urllib.urlopen(url)
    html = page.read()
    html = html.replace('
', '')
    html = html.replace('       ', ' ')
    html = html.replace('   ', '')
    html = html.replace('   ', '')
    #html = html.replace(' ','')
    return html

    return mylist


def gettitle(mylist):
    reg = (r'<a href="/bugs/wooyun-.+">(.*?)</a></td>')
    listre = re.compile(reg)
    mytitle = re.findall(listre, mylist)
    return mytitle


def getoper(html):
    reg = (r'/whitehats/(.*?)">')
    listre = re.compile(reg)
    mylist = re.findall(listre, html)
    return mylist[0]
#-------------------------------------------------


def GetTitle(html):
    reg = (r"漏洞标题:(.*?)</h3>")
    listre = re.compile(reg)
    mylist = re.findall(listre, html)
    return mylist

def BugNum(html):
    reg = (r'http://wooyun.org/bugs/(.*?)">查看原始来源')
    listre = re.compile(reg)
    mylist = re.findall(listre, html)
    return mylist

def JiaFang(html):
    reg = (r'http://www.wooyun.org/corps/(.*?)">')
    listre = re.compile(reg)
    mylist = re.findall(listre, html)
    return mylist


def SubmitTime(html):
    reg = (r"提交时间:(.*?)</h3>")
    listre = re.compile(reg)
    mylist = re.findall(listre, html)
    return mylist


def OpenTime(html):
    reg = (r"公开时间:(.*?)</h3>")
    listre = re.compile(reg)
    mylist = re.findall(listre, html)
    return mylist


def BugClass(html):
    reg = (r"漏洞类型:(.*?)</h3>")
    listre = re.compile(reg)
    mylist = re.findall(listre, html)
    return mylist


def level(html):
    reg = (r"危害等级:(.*?)</h3>")
    listre = re.compile(reg)
    mylist = re.findall(listre, html)
    return mylist


def BugState(html):
    reg = (r"漏洞状态:(.*?)</h3>")
    listre = re.compile(reg)
    mylist = re.findall(listre, html)
    mylist = mylist[0].strip()
    return mylist


def BugSave(html):
    reg = (r'<a id="collection_num">(.*?)</a>人收藏')
    listre = re.compile(reg)
    mylist = re.findall(listre, html)
    return mylist


def OkTime(html):
    reg = (r"确认时间:(.*?)</p>")
    listre = re.compile(reg)
    mylist = re.findall(listre, html)
    return mylist

def Bugrank(html):
    reg = (r"漏洞Rank:(.*?)</p>")
    listre = re.compile(reg)
    mylist = re.findall(listre, html)
    return mylist

def BugMark(html):
    reg = (r"Tags标签:(.*?)</h3>")
    listre = re.compile(reg)
    mylist = re.findall(listre, html)
    return mylist

def ignoreTime(html):
    reg = (r"忽略时间:(.*?)</p>")
    listre = re.compile(reg)
    mylist = re.findall(listre, html)
    if len(mylist)!=0:
        mylist=mylist
    else:
        mylist='1900-01-01 00:00:00'
    return mylist

def Bugeye(html):
    reg = (r'<span id="attention_num">(.*)</span>')
    listre = re.compile(reg)
    mylist = re.findall(listre, html)
    mylist = mylist[0].strip()
    return mylist


conn= MySQLdb.connect(
        host='192.168.1.1',
        port = 3306,
        user='root',
        passwd='root',
        db ='wooyunTongji',
        charset='utf8'
        )


mark = 0

for i in range(53022, 89250, 1):
    try:
        Url = 'http://192.168.1.106/wooyun/select.php?id='+str(i)
        Html = getHtml(Url)
        Htmleye = getHtmleye(Url)
    except:
        print 'error'
    if len(Html)>100:
        
        if len(OkTime(Html)) == 0:
            whotime = ignoreTime(Html)[0].strip()
            whostyle = '忽略'
        else:
            whotime = OkTime(Html)[0].strip()
            whostyle = '确认'
        
        if len(Bugrank(Html)) < 1:
            BugrankFal='0'
        else:
            BugrankFal=Bugrank(Html)[0]
        
        print GetTitle(Html)[0].strip() 
            ,BugNum(Html)[0].strip() 
            ,unquote(JiaFang(Html)[0].strip()) 
            ,unquote(getoper(Html)) 
            ,SubmitTime(Html)[0].strip() 
            ,OpenTime(Html)[0].strip() 
            ,BugClass(Html)[0].strip() 
            ,level(Html)[0] 
            ,BugrankFal
            ,BugState(Html) 
            ,whotime 
            ,whostyle 
            ,BugMark(Html)[0].strip()

        #f = open('wooyunlist.txt', 'a')
        mark += 1
        #f.close()
        cur = conn.cursor()
        mysql1='insert into alldata (id,title,BugNum,jiafang,oper,submittime,opentime,bugclass,level,bugrank,bugstate,oktime,okstyle,bugmark) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
        cur.execute(mysql1,(mark,GetTitle(Html)[0].strip(),BugNum(Html)[0].strip(),unquote(JiaFang(Html)[0].strip()),unquote(getoper(Html)),SubmitTime(Html)[0].strip(),OpenTime(Html)[0].strip(),BugClass(Html)[0].strip(),level(Html)[0],BugrankFal,BugState(Html),whotime,whostyle,BugMark(Html)[0].strip()))

        cur.close()
        conn.commit()
        print mark


conn.close()


print('Over!')

  

原文地址:https://www.cnblogs.com/crac/p/5748308.html