爬取腾讯/爱奇艺视频80年代所有喜剧电影并写入excle表格

腾讯视频
import
requests import xlwt import re file = xlwt.Workbook(encoding = 'utf-8') sheet = file.add_sheet('汇总表') sheet.write(0,0,'影片名') sheet.write(0,1,'评分') sheet.write(0,2,'主演') sheet.write(0,3,'影片链接') response = requests.get('https://v.qq.com/') #print(response.text) result = re.findall('<a href="(.*?)" data-key="电影" ',response.text) #print(result) detail = 'https://v.qq.com' + result[0] #print(detail) response = requests.get(detail) #print(response.text) result = re.findall('<a href="(.*?)" data-value="itype=100004"',response.text) #print(result) detail = 'https://v.qq.com' + result[0] #print(detail) response = requests.get(detail) #print(response.text) result = re.findall('<a href="(.*?)" class="filter_item " data-key="year" data-value="100039"',response.text) #print(result) detail = result[0] #print(detail) response = requests.get(detail) #print(response.text) #response1 = response #response1.encoding = 'utf-8' result = re.findall('<a href="(.*?)amp;(.*?)&amp;itype=100004"',response.text) #print(result) i = 0 k = 0 l = 1 w = 1 detail1 = detail result1 =result while i < 300: detail = detail1 + result1[0][0][12] + 'itype=100004&' + result1[0][1][0:7] + str(i) i = i + 30 #print(detail) response = requests.get(detail) #print(response.text) result = re.findall('<strong class="figure_title"><a href="(.*?)"',response.text) actor = re.findall('''主演:(.*?)</div>''',response.text,re.S) j = 0 for a in actor: actor = re.findall('title="(.*?)"',a,re.S) print(actor) for b in actor: actor[j] = b + ' ' j = j + 1 j = 0 sheet.write(w,2,actor) w = w + 1 if w == 194 : w = w + 1 title = re.findall('alt="(.*?)" r-imgerr="v">',response.text) score1 = re.findall('<em class="score_l">(.*?)</em>',response.text) score2 = re.findall('<em class="score_s">(.*?)</em>',response.text) score = [score1[k] + score2[k] for k in range(min(len(score1),len(score2)))] #print(result) #print(title) #print(score) for titl in title: print('{:u3000<18}{:u3000>10}{:u3000>50}'.format(titl,score[j],result[j])) #response1 = requests.get(result[j]) sheet.write(l,0,titl) sheet.write(l,1,score[j]) sheet.write(l,3,result[j]) j = j + 1 l = l + 1 file.save('80年代喜剧电影.xls')
爱奇艺
import
requests import xlwt import re j = 1 m = 0 n = 1 file = xlwt.Workbook('80年代喜剧电影') sheet1 = file.add_sheet('汇总表') sheet1.write(0, 0, 'video_name') sheet1.write(0, 1, 'score') sheet1.write(0, 2, 'link') while j < 8: url = 'https://list.iqiyi.com/www/1/8-----------1980_1989--11-' + str(j) + '-1-iqiyi--.html' response = requests.get(url) # print(response.text) detail = re.findall('class="qy-mod-link" href="(.*?)" target="_blank"', response.text) score = re.findall('<span class="text-score">(.*?)</span>', response.text) video_name = re.findall('<img alt="(.*?)"', response.text) # print(score) # print(video_name) # print(detail) # for i,d in enumerate(detail): # detail[i] = 'https:' + d for i in range(len(detail)): detail[i] = 'https:' + detail[i] for i in detail: print(video_name[m], ' ', score[m], ' ', i) sheet1.write(n, 0, video_name[m]) sheet1.write(n, 1, score[m]) sheet1.write(n, 2, i) m = m + 1 n = n + 1 j = j + 1 m = 0 # print(detail) file.save('爱奇艺80年代喜剧电影.xls')
原文地址:https://www.cnblogs.com/zwsmile/p/11242154.html