美女图片小爬虫,嘿嘿

/**
 * Created by Administrator on 2016/9/22 0022.
 */
    
//依赖的模块
var http=require('http');
var fs=require('fs');
var cheerio=require('cheerio');
var request=require('request');
var iconv = require('iconv-lite');

//设置参数
new GetSexyPics({
    initUrl: 'http://www.mm131.com/xinggan/2655_20.html',//初始化网页请求地址
    maxNum: 100,//设置最大爬取数量(maxNum-index的值即爬取图片的数量)
    intervalTime: 800,//设置间隔时间
    index: 0//从0开始标记爬取的数量
});

function GetSexyPics(params) {
    _this=this;
    _this.setParams=params;
    this.createFolder = function (name) {
        fs.exists('./' + name, function (exists) {
            if (!exists) {
                fs.mkdir('./' + name);
            }
        });
    };
    this.getSexyPics = function (initUrl) {
        var resData = '';
        var req=http.get(initUrl, function (res) {
            res.on('data', function (chunk) {
                //转换编码
                resData += iconv.decode(chunk, 'gb2312');
            });
            res.on('end', function () {
                //用cheerio模块解析请求的数据
                var $ = cheerio.load(resData);
                var images = {
                    //获取图片标题
                    title: $('.content h5').text().trim(),
                    //记录请求页面数
                    index: _this.setParams.index = _this.setParams.index + 1
                };
                //下载当前页面的图片
                downloadImg($, images.title, images.index);
                var nextPage = $('.content-page a.page-ch:last-child').attr('href');
                var newUrl = 'http://www.mm131.com/xinggan/' + nextPage;
                //如果没有当前组的图片点完则跳转到下一组
                newUrl = nextPage ? newUrl : $('.updown .updown_r').attr('href');
                newUrl = encodeURI(newUrl);
                //设置最大爬取数量
                if (_this.setParams.index < _this.setParams.maxNum) {
                    setTimeout(function () {
                        _this.getSexyPics(newUrl);
                    }, _this.setParams.intervalTime);
                }
                //保存下次爬取图片时需要初始化的网页地址
                var nextTimeInitUrl = newUrl;
                //把日志保存起来,下次手动改initUrl为日志里的地址
                var logs = '下次需要执行的初始化地址是(替换initUrl即可)---' + nextTimeInitUrl;
                saveLogs(logs);
            });
        }).on('error', function (e) {
            console.log(e);
            console.log("错误:" + e.message);
        });
        function saveLogs(logs) {
            fs.writeFile('./logs/' + '日志' + '.txt', logs, 'utf-8', function (err) {
                if (err)console.log(err);
            })
        }
        function downloadImg($, imgTil, idx) {
            var imgUrl = '';
            $('.content-pic a img').each(function () {
                imgUrl = encodeURI($(this).attr('src'));//获取图片的地址
                var suffix = imgUrl.substring(imgUrl.lastIndexOf('.'));//获取图片后缀名
                console.log('第' + idx + '张图片---' + imgTil + '---' + imgUrl);
                request(imgUrl).pipe(fs.createWriteStream('./img/' + imgTil + suffix));
            });
        }
        req.on('error',function(err){
            console.log(err)
        })
    };
    this.init = function () {
        this.createFolder('img');
        this.createFolder('logs');
        this.getSexyPics(_this.setParams.initUrl);
    };
    this.init();
}

  

觉得好用的给个赞,...

原文地址:https://www.cnblogs.com/leyi/p/5903068.html