# 分析肯德基门店信息
import requests,json post_url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=cname' headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36'} all_data = [] IDs =[] cname = input('输入城市名: ') # for page in range(1,10): data = { "cname": cname, "pid": "", "pageIndex": "1", "pageSize": "10", } json_obj = requests.post(url=post_url,data=data,headers=headers).json() num = json_obj['Table'][0]['rowcount'] # 餐厅总数 int print("餐厅总数",num)
小贴士: 运行后,输入城市或省份地址,得到肯德基门店数量
# 小猪短租
from bs4 import BeautifulSoup import requests import time headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36'} def judgment_sex(class_name): if class_name == ['member_ico1']: return '女' else: return '男' def get_links(url): wb_data = requests.get(url, headers= headers) soup = BeautifulSoup( wb_data. text,'lxml') links = soup. select('#page_list > ul > li > a') for link in links: href = link. get("href") get_info(href) def get_info(url): wb_data = requests.get( url, headers= headers) soup = BeautifulSoup( wb_data.text,'lxml') tittles = soup. select('div.pho_info > h4') addresses = soup.select('span.pr5') prices = soup.select('#pricePart > div.day_l > span') imgs = soup.select('#floatRightBox > div.js_box.clearfix > div.member_pic > a > img') names = soup.select('#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > a') sexs = soup.select('#floatRightBox > div.js_box.clearfix > div.member_pic > div') for tittle, address, price, img, name, sex in zip( tittles, addresses, prices, imgs, names, sexs): data = { 'tittle': tittle.get_text().strip(), 'address': address.get_text().strip(), 'price': price.get_text(), 'img': img.get("src"), 'name': name.get_text(), 'sex': judgment_sex(sex.get("class"))} print(data) if __name__ == '__main__': urls = ['http://ty.xiaozhu.com/search-duanzufang-p{}-0/'.format(number) for number in range(1,14)] for single_url in urls: get_links(single_url) time.sleep(0.3)
# 酷狗500歌曲
![](https://images.cnblogs.com/OutliningIndicators/ContractedBlock.gif)
from bs4 import BeautifulSoup import requests import time headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36'} def judgment_sex(class_name): if class_name == ['member_ico1']: return '女' else: return '男' def get_info(url): wb_data = requests.get( url, headers= headers) soup = BeautifulSoup( wb_data.text,'lxml') ranks = soup.select('span.pc_temp_num') titles = soup.select('div.pc_temp_songlist > ul > li > a') times = soup.select(' span.pc_temp_tips_r > span') for rank,title,time in zip(ranks,titles,times): data = { 'rank': rank.get_text().strip(), 'singer': title.get_text().split('-')[0], 'song': title.get_text().split('-')[1], 'time': time.get_text().strip()} print(data) if __name__ == '__main__': urls = ['http://www.kugou.com/yy/rank/home/{}-8888.html'.format(number) for number in range(1,24)] for single_url in urls: get_info(single_url) time.sleep(0.3)
![](https://images.cnblogs.com/OutliningIndicators/ContractedBlock.gif)
import re a = 'xxIxxjshdxxlovexxsffaxxpythonxx' infos = re.findall('xx(.*?)xx',a,re.S) print(infos) # ['I', 'love', 'python']
![](https://images.cnblogs.com/OutliningIndicators/ContractedBlock.gif)
import re phone = ''' 123.-4,567,1234,jdd,cdc.23,the wfewf ,fer3t ''' new_phone = re.sub("d+",' ', phone) aa = re.sub('W+',' ',new_phone) print(aa)