python爬虫入门--抓取wiki词条

from bs4 import BeautifulSoup
import re 
from urllib import request
req = request.urlopen("https://en.m.wikipedia.org/wiki/Main_Page").read().decode("utf-8");
soup = BeautifulSoup(req,"html.parser");
for tag in soup.find_all("a",href=re.compile('^/wiki/')):
    if not re.search(".(jpg|JPG)$",tag["href"]):
        print(tag.get_text(),"<--->","http://en.m.wikipedia.org"+tag["href"]);
    
原文地址:https://www.cnblogs.com/lx-1024/p/8025295.html