node爬取页面元素


/**
* Created by on 2018/12/25.
*/
const http = require("https");
const fs = require('fs');

//读取html插件
const cheerio = require("cheerio");

//读取的文件路径
const filePath = './山炮表格.xlsx';

//表格插件
const xlsx = require('node-xlsx');


const sheets = xlsx.parse('./山炮表格.xlsx');//获取到所有sheets


let allUserId = [];
sheets.forEach(function (sheet) {
// console.log(sheet['name']);
for (var rowId in sheet['data']) {
// console.log(rowId);
var row = sheet['data'][rowId];
// console.log(row);
allUserId.push(row);
}
});
// console.log(allUserId);

//微博搜索用户页
const findUserUrl = 'https://s.weibo.com/user';
let hasVUser = []
let index = 1;
function getV(allIds, index) {
let theCurrentId = allIds[index][1];
let theUrl = 'https://s.weibo.com/user?q=' + theCurrentId + '&Refer=SUer_box';
console.log(theUrl)
http.get(''+theUrl, res => {
let html = "";
// 获取页面数据
res.on("data", function (data) {
html += data;
});
// 数据获取结束
res.on("end", function () {
let $ = cheerio.load(html);
// console.log($('.icon-vip').length);
if($('.icon-vip').length>0){
hasVUser.push(theCurrentId);
}
if(index<allIds.length-1){
index++
getV(allIds,index);
}else{
console.log(hasVUser,'所有带v')
}
});
});
}

getV(allUserId,index);
});
{
  "dependencies": {},
  "devDependencies": {
    "cheerio": "^1.0.0-rc.2",
    "fs": "0.0.1-security",
    "node-xlsx": "^0.12.1"
  }
}
原文地址:https://www.cnblogs.com/1rookie/p/10176216.html