爬虫-某游戏交易网站商品信息爬取

爬虫-某游戏交易网站商品信息爬取

本站下所有关于爬虫相关的技术只限于学习交流,请勿商用!切勿给对方服务器造成压力!

from requests_html import HTMLSession

headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
}
url_api = "http://s.5173.com/search/20c8bbc1b9794fc98bd96859624d4769.shtml?gp=&op=&ga=9ae12dfc4ab641baacdb406f6d0df692&gs=39621d91f05c48dab33f15a5cb335894&cate=243ab3c1e7614ba2b8e96a3a43754603&keyword="
session = HTMLSession()


def get_url(url_api):
    r = session.get(url=url_api, headers=headers)
    res_url = r.html.search('<link rel="canonical" href="{}" />')[0]
    return res_url


def get_shop_info(url):
    r = session.get(url=url, headers=headers)
    content = r.html.find('.sin_pdlbox')
    for item in content:
        print("商品名:", item.find('.pdlist_info .tt h2', first=True).text)
        print("比例:", item.find('.pdlist_unitprice > li', first=True).text)
        print("总价:", item.find('.pdlist_price .pr', first=True).text)
        print('*' * 100)


if __name__ == '__main__':
    url_api = get_url(url_api)
    get_shop_info(url_api)

原文地址:https://www.cnblogs.com/bladecheng/p/12076725.html