爬取音悦台MV信息(requests,BeautifulSoup,xlwt)----待完善

import requests
from bs4 import BeautifulSoup
import xlwt   #写入Excel的库


def excel_write(MV_list):
    newtable = 'MV.xls'             #创建Excel文件的名称
    wb = xlwt.Workbook(encoding = 'utf-8')   #创建Excel文件
    ws = wb.add_sheet('MV_name')    #创建Excel表格的名称
    headData = ['名称','链接','歌手名称','播放次数','简介']       #创建表格的标题信息
    for colnum in range(0,5):
        #0表示第1行,colnum表示第几列,headData[colnum]表示获取headData里面的信息,xlwt.easyxf加粗
        ws.write(0,colnum,headData[colnum],xlwt.easyxf('font:bold on'))
    
    
    index = 1
    for MV_list in MV_lists:
        for i in range(0,5):
            print(MV_list[i])
            ws.write(index,i,MV_list[i])
            
        index += 1
        wb.save(newtable)

page = range(1,3)  
MV_lists = []      
for S in page:       #循环num,并把树枝赋值给S,然后构建爬去的URL
    url = 'http://mv.yinyuetai.com/all?pageType=page&sort=weekViews&page=%d&tab=allmv&parenttab=mv'%S  
    print(url)
    html = requests.get(url)
    soup = BeautifulSoup(html.text,'html.parser')
    MV_s_html = soup.find('ul',{'class':'clearfix'}).find_all('div',{'class':'info'})   #获取MV信息HTML,类型为列表
    
    
    for MV in MV_s_html:    #遍历MV的HTML,并赋值给变量MV
        MV_title = MV.a.string    #获取MV名称
        MV_href = MV.a['href']      #获取MV链接
        MV_name = MV.find('p').a.string  #获取MV歌手名称
        MV_count = MV.find('span',{'class':'c6'}).get_text()   #获取MV播放次数
        MV_description = MV.find('p',{'class':'description hid J_id'}).get_text()   #获取MV的简介
        MV_list_tuple = (MV_title,MV_href,MV_name,MV_count.replace('
','').strip('	'),MV_description.replace('
','').strip('	'))
        MV_lists.append(MV_list_tuple)

excel_write(MV_lists)       #调用写入Excel的函数,并把MV列表信息掺传入到函数里面
原文地址:https://www.cnblogs.com/114811yayi/p/6757700.html