url extracting 未测试

urlfinders = [
re.compile("([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}|(((news|telnet|nttp|file|http|ftp|https)://)|(www|ftp)[-A-Za-z0-9]*\\.)[-A-Za-z0-9\\.]+)(:[0-9]*)?/[-A-Za-z0-9_\\$\\.\\+\\!\\*\$\$,;:@&=\\?/~\\#\\%]*[^]'\\.}>\\),\\\"]"),
re.compile("([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}|(((news|telnet|nttp|file|http|ftp|https)://)|(www|ftp)[-A-Za-z0-9]*\\.)[-A-Za-z0-9\\.]+)(:[0-9]*)?"),
re.compile("(~/|/|\\./)([-A-Za-z0-9_\\$\\.\\+\\!\\*\$\$,;:@&=\\?/~\\#\\%]|\\\\
)+"),
re.compile("'\\<((mailto:)|)[-A-Za-z0-9\\.]+@[-A-Za-z0-9\\.]+"),
]

yet another horrible version at here

http://stackoverflow.com/questions/827557/how-do-you-validate-a-url-with-a-regular-expression-in-python