取出所有的连接

# 取出所有的连接

# coding: utf-8
import sys, os, random, time, datetime
import urllib, re

reload(sys)
sys.setdefaultencoding('utf-8')
text = urllib.urlopen("http://www.qq.com").read()
regex = r'''(<a[^>]*?href="([^"]+)"[^>]*?>)|(<a[^>]*?href='([^']+)'[^>]*?>)'''
result_s = re.findall(regex, text)
for result in result_s:
  
 print result[1]

原文地址:https://www.cnblogs.com/xiongwei/p/3358805.html