KFC 小猪短租

# 分析肯德基门店信息

import requests,json
post_url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname'
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36'}
all_data = []
IDs =[]
cname = input('输入城市名: ')
# for page in range(1,10):    
data = {
    "cname": cname,     
    "pid": "",
    "pageIndex": "1",
    "pageSize": "10",
}
json_obj = requests.post(url=post_url,data=data,headers=headers).json()
num = json_obj['Table'][0]['rowcount']  # 餐厅总数  int
print("餐厅总数",num)

小贴士: 运行后,输入城市或省份地址,得到肯德基门店数量

 # 小猪短租

from bs4 import BeautifulSoup
import requests
import time
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36'}
def judgment_sex(class_name):
    if class_name == ['member_ico1']:
        return ''
    else:
        return ''

def get_links(url):
    wb_data = requests.get(url, headers= headers)
    soup = BeautifulSoup( wb_data. text,'lxml')
    links = soup. select('#page_list > ul > li > a')
    for link in links:
        href = link. get("href")
        get_info(href)

def get_info(url):
    wb_data = requests.get( url, headers= headers)
    soup = BeautifulSoup( wb_data.text,'lxml')
    tittles = soup. select('div.pho_info > h4')
    addresses = soup.select('span.pr5')
    prices = soup.select('#pricePart > div.day_l > span')
    imgs = soup.select('#floatRightBox > div.js_box.clearfix > div.member_pic > a > img')
    names = soup.select('#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > a')
    sexs = soup.select('#floatRightBox > div.js_box.clearfix > div.member_pic > div')
    for tittle, address, price, img, name, sex in zip( tittles, addresses, prices, imgs, names, sexs):
        data = { 'tittle': tittle.get_text().strip(),
                 'address': address.get_text().strip(),
                 'price': price.get_text(),
                 'img': img.get("src"),
                 'name': name.get_text(),
                 'sex': judgment_sex(sex.get("class"))}
        print(data)
if __name__ == '__main__':
    urls = ['http://ty.xiaozhu.com/search-duanzufang-p{}-0/'.format(number) for number in range(1,14)]
    for single_url in urls:
        get_links(single_url)
        time.sleep(0.3)

 # 酷狗500歌曲

from bs4 import BeautifulSoup
import requests
import time
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36'}
def judgment_sex(class_name):
    if class_name == ['member_ico1']:
        return ''
    else:
        return ''


def get_info(url):
    wb_data = requests.get( url, headers= headers)
    soup = BeautifulSoup( wb_data.text,'lxml')
    ranks = soup.select('span.pc_temp_num')
    titles = soup.select('div.pc_temp_songlist > ul > li > a')
    times = soup.select(' span.pc_temp_tips_r > span')


    for rank,title,time in zip(ranks,titles,times):
        data = { 'rank': rank.get_text().strip(),
                 'singer': title.get_text().split('-')[0],
                 'song': title.get_text().split('-')[1],
                 'time': time.get_text().strip()}
        print(data)
if __name__ == '__main__':
    urls = ['http://www.kugou.com/yy/rank/home/{}-8888.html'.format(number) for number in range(1,24)]
    for single_url in urls:
        get_info(single_url)
        time.sleep(0.3)
酷狗500
import re
a = 'xxIxxjshdxxlovexxsffaxxpythonxx'
infos = re.findall('xx(.*?)xx',a,re.S)
print(infos)
# ['I', 'love', 'python']
re 一个
import re
phone = '''
123.-4,567,1234,jdd,cdc.23,the
wfewf ,fer3t
'''
new_phone = re.sub("d+",' ', phone)
aa = re.sub('W+',' ',new_phone)
print(aa)
re匹配数字标点符号
原文地址:https://www.cnblogs.com/zhangchen-sx/p/10902352.html