一个简单的爬虫

import requests
from bs4 import BeautifulSoup


baseurl = "https://www.booli.se/slutpriser/sundsvall/249/?objectType=l%C3%A4genhet&rooms=2,3,4,5&minSoldDate=2014-01-01&page="

f = open("bostad.txt", "w", encoding="utf-8")

for n in range(1,100):
    url = (baseurl + str(n))
    content = requests.get(url)   # generate page url
    soup = BeautifulSoup(content.text)                                                                                                                      # fetch page

    bostad = [a.attrs.get('data-meta') for a in soup.find_all("a", class_="hit__anchor js__hit__anchor")]         # search for 'data-meta'value in tag which class = "hit__anchor js__hit__anchor"

    for line in bostad:                                                                                           # write file
        f.write(line + "
")

    print(url + "
")             # show progress
    print(content)

f.close()
原文地址:https://www.cnblogs.com/firecloud/p/5468710.html