node.js

Node.js爬虫

var http = require('http') var cheerio = require('cheerio') var url = 'http://www.imooc.com/learn/348' function filterChapters(html){ var $ = cheerio.load(html) var chapters =$('.chapter') var courseData=[] console.log('章节标题:'+' ') chapters.each(function(item){ var chapter = $(this) var chapterTitle = chapter.find('h3').text() console.log(chapterTitle+' ') var videos =chapter.find('.video').children('li') var chapterData = { chapterTitle: chapterTitle, videos:[] } courseData.push(chapterData) }) return courseData } http.get(url, function(res){ var html = '' res.on('data', function(data){ html += data; }) res.on('end', function(){ // filterChapters(html) var courseData = filterChapters(html) //console.log(courseData+'finish'+' ') // printCourseInfo(courseData) }) }).on('error',function(){ console.log('获取课程数据出错') })

效果

原文地址:https://www.cnblogs.com/ironSheet-SRS/p/9990345.html