用于抓取vijos所有题目信息的node.js脚本

代码如下:

var superagent = require('superagent');
var fs = require('fs');

/*
fetch_vijos_problems
这个脚本用于获取vijos里面的所有题目的描述和信息。
注意:因为1000至1099已经有信息了,所以题目好顺延100。
*/

var fetchProblem = function (problemId, callback) {
    var url = "https://vijos.org/p/" + problemId;
    superagent.get(url)
        .end( (err, res)=> {
            if (err) {
                console.log("[error fetch]" + url);
                if (callback) 
                    callback();
            } else {
                // console.log(res.text);
                var html = res.text;
                var titleIdx1 = html.indexOf('<title>');
                var titleIdx2 = html.indexOf('</title>');
                var title = html.substr(titleIdx1+7, titleIdx2-titleIdx1-15);
                var idx = html.indexOf("section__body typo");
                html = html.substr(idx+21);
                idx = html.indexOf("</div>");
                html = html.substr(0, idx).trim();
                var dir = `${__dirname}/../moon-web/resources/problems/${problemId+100}`;
                if (fs.existsSync(dir) == false) {
                    fs.mkdirSync(dir);
                }
                var info = {
                    "id": "" + (problemId + 100),
                    "title": title,
                    "tags": []
                }
                var descriptionFile = `${dir}/description.html`;
                var infoFile = `${dir}/info.json`;
                fs.writeFileSync(descriptionFile, html, 'utf8');
                fs.writeFileSync(infoFile, JSON.stringify(info), 'utf8');
                if (callback) 
                    callback();
            }
        } );
}

// 获取vijos所有题目,编号1000 - 2056
var fetchProblems = function (problemId) {
    console.log(`fetch ${problemId} ...`);
    setTimeout(()=>{
        fetchProblem(problemId, ()=>{
            console.log(`fetch ${problemId} finished.`);
            if (problemId < 2056)
                fetchProblems(problemId+1);
        });
    }, 1000);
}

fetchProblems(1000);
原文地址:https://www.cnblogs.com/zifeiy/p/10951498.html