web.js

var page = require('webpage').create(),
 system = require('system'),
 address,output,csvPath,nodePathFile,outOriginalimg,PCSPuserAgent;
var fs = require("fs");
if (system.args.length < 0) {
    console.log('Usage: rasterize.js URL filename');
    phantom.exit(1);
} else {
    address = system.args[1];// ��ȁE�url
    output=system.args[2];
    csvPath=system.args[3];// �����csv
    PCSPuserAgent=system.args[6];
   console.log(PCSPuserAgent);
    var  BeforecsvPath=csvPath.replace('.csv','_1.csv');
    var csvindex=0;
  
     nodePathFile=system.args[4];// ��ȁE�nodepath�ļ�
     outOriginalimg=system.args[5];// ��ȡԭͼ

if(PCSPuserAgent.indexOf("iPhone") > -1)
{
     page.settings = {
       userAgent:PCSPuserAgent,
  	  javascriptEnabled: true,
      loadImages: true
};
}
else
{
     page.settings = {
      javascriptEnabled: true,
      loadImages: true
};
}
    page.viewportSize = {414,height:30};
    page.open(address, function (status) {
      //  fs.write('test.txt', "childNodes Num{_}Node Name{_}NodeIndexPath{_}Width{_}Height{_}x{_}y{_}Dispaly{_}ImgUrlOrBackImgUrl" + "
", 'a');
      if(fs.exists(csvPath))
      {
         fs.remove(csvPath);
       }
       window.setTimeout(function () { 
       VisiteHtmlDom("0",BeforecsvPath); 
 console.log("---------------------FS MOVE------------------------------");
       fs.move(BeforecsvPath,csvPath);
            }, 40000);
 waitFor2(
 function csvCreate()
{  
  if(fs.exists(csvPath)){
if(csvindex<5)
 {

csvindex++;
console.log("csvindex Index:"+csvindex);
 var csvPathNow=csvPath.replace('.csv','__'+csvindex.toString()+'.csv');
 var BeforecsvPathNow=csvPathNow.replace('.csv','_1.csv');
if(fs.exists(csvPathNow))
      {
         fs.remove(csvPathNow);
       }
 console.log("csvindex:"+csvindex+","+"csvPathNow:"+csvPathNow+","+"BeforecsvPathNow:"+BeforecsvPathNow);
VisiteHtmlDom("0",BeforecsvPathNow); 
fs.move(BeforecsvPathNow,csvPathNow);

 }
 else
{
 return true;
}
}
},
  function csvCreate2()
{
console.log("-----------------csvPath2 END---------------");
}
 ); 
waitFor(
    function nodePathFileCheck()
   {
    if(!fs.exists(nodePathFile))
   { 
console.log('nodepathFile Not Find')
   }
   else
   {
   console.log("Find nodePathFile");
   return true;
    }
  } ,
  	  function heheda()
    {    
	window.setTimeout(function () { 
   console.log("---------------------Capture Original Begin------------------------------");	
         var scrollheight=page.evaluate(function () { 
          return  document.body.scrollHeight;
    });
       page.viewportSize = {414,height:scrollheight};

		page.render(outOriginalimg);
  console.log("---------------------Capture Begin------------------------------");
     markCapture();
     console.log("scrollheight:"+scrollheight);
    window.setTimeout(function () {  
   page.render(output);
	        page.close();
             console.log('render ok');
             phantom.exit();
	      }, 10000);
      }, 25000);
 } 
 );

});
}
function VisiteHtmlDom(nodePath,inputcsvPath) {
    var nodeinfo = page.evaluate(function (str) {
        var root = document.getElementsByTagName('html');
        var htmlNode = root[0];
      
        var xpathArr = str.split("/"); 
        	
        for (var i = 1; i < xpathArr.length; i++) {

            var index = parseInt(xpathArr[i]);
            if((htmlNode.nodeName.indexOf("SCRIPT") > -1) || (htmlNode.nodeName.indexOf("NOSCRIPT") > -1)|| (htmlNode.nodeName.indexOf("HEAD") > -1))
            {
            htmlNode = htmlNode;
            }
       else if(htmlNode.nodeName.indexOf("IFRAME") > -1&&window.getComputedStyle(htmlNode).display!="none")
        {
        var iframdom = htmlNode.contentWindow.document;
    
        var roots = iframdom.getElementsByTagName('html');
        htmlNode=roots[0];
      }
            
            else
            {
            htmlNode = htmlNode.childNodes[index];
            } 
        }

        if ((htmlNode.nodeName.indexOf("#text") > -1)) 
        {
                    return htmlNode.childNodes.length + "{_}" + htmlNode.nodeName+ "{_}" + str+"{_}"+"{_}"+"{_}"+"{_}"+"{_}"+"{_}"+"{_}"+ ("{_}" + htmlNode.nodeValue||"").replace(/
|
/ig, "");
        
        }
        else if((htmlNode.nodeName.indexOf("SCRIPT") > -1) || (htmlNode.nodeName.indexOf("NOSCRIPT") > -1)|| (htmlNode.nodeName.indexOf("HEAD") > -1)|| (htmlNode.nodeName.indexOf("#comment") > -1))
        {
        	
            return "0"+"{_}"+htmlNode.nodeName + "{_}" + str+"{_}"+"{_}"+"{_}"+"{_}"+"{_}"+"{_}"+"{_}"+"{_}";
         }
        else {
            // 
            var bgImgUrl= htmlNode.style.getPropertyValue("background-image");
          
            // var bgImgUrl=htmlNode.style.getPropertyValue("background-image").replace(/^(url)(|)/g, '');
            if(bgImgUrl!=null)
                bgImgUrl=bgImgUrl.replace(/^(url)(|)/g, '');
          
            var width = window.getComputedStyle(htmlNode).width;
            var height = window.getComputedStyle(htmlNode).height;
            var left = window.getComputedStyle(htmlNode).left;
            var top = window.getComputedStyle(htmlNode).top;
            var display = window.getComputedStyle(htmlNode).display;
            switch (htmlNode.nodeName) {
                case "IMG":
                    var imgUrl = htmlNode.src; //ȡͼƬ�ĵ�ַ
                    return htmlNode.childNodes.length + "{_}" + htmlNode.nodeName + "{_}" + str + "{_}" + width + "{_}" + height + "{_}" + left + "{_}" + top + "{_}" + display + "{_}" + imgUrl+ "{_}"+ "{_}"; //img bgurl txt
                    break;
                case "EMBED":
                case "#COMMENT":
                    return htmlNode.childNodes.length + "{_}" +htmlNode.nodeName+"{_}" + str+"{_}"+"{_}"+"{_}"+"{_}"+"{_}"+"{_}"+"{_}"+"{_}";
                    break;
                case "IFRAME":
                     var iframeUrl = htmlNode.src; 
                if(window.getComputedStyle(htmlNode).display!="none")
                   {
                     return "1" + "{_}" + htmlNode.nodeName + "{_}" + str + "{_}" + width + "{_}" + height + "{_}" + left + "{_}" + top + "{_}" + display + "{_}" + iframeUrl+ "{_}"+ "{_}"; //img bgurl txt
                     }
                     else
                     { 
                 		   return "0" + "{_}" + htmlNode.nodeName + "{_}" + str + "{_}" + width + "{_}" + height + "{_}" + left + "{_}" + top + "{_}" + display + "{_}" + iframeUrl+ "{_}"+ "{_}"; //img bgurl txt
                   }
                     default:
                     return htmlNode.childNodes.length + "{_}" + htmlNode.nodeName + "{_}" + str + "{_}" + width + "{_}" + height + "{_}" + left + "{_}" + top + "{_}" + display + "{_}" +"{_}"+ bgImgUrl+"{_}";
                     break;
            }
        }
    }, nodePath);

console.log("create CSV");
    fs.write(inputcsvPath, nodeinfo + "
", 'a');
   

    // if(!(nodeinfo.indexOf("undefined")>-1))

    var childNodesCount = nodeinfo.split("{_}")[0].split("/")[0];

    for (var childIndex = 0; childIndex < childNodesCount;childIndex++) {

        var childNodesPath = nodePath + "/" + childIndex.toString();

        VisiteHtmlDom(childNodesPath,inputcsvPath);

    }
}
function markCapture()
{
	var stream = fs.open(nodePathFile, 'r');
    while(!stream.atEnd()) {
    var line = stream.readLine();
    page.evaluate(function (line) { 
        var root = document.getElementsByTagName('html');//��ȡdom
        var htmlNode = root[0];
        var xpathArr=line.split("/");
        for(var i=1;i<xpathArr.length;i++)
    {
    	var index=parseInt(xpathArr[i].substring(xpathArr[i].length - 3, 3));
  		htmlNode= htmlNode.childNodes[index];
    	if(htmlNode.nodeName.indexOf("IFRAME") > -1)
        {
        var iframdom = htmlNode.contentWindow.document;
        var roots = iframdom.getElementsByTagName('html');
        htmlNode=roots[0];
        i++;
        }
    }
     
       if (htmlNode.nodeName.indexOf("text")>-1)
          {
            htmlNode.parentNode.style.boxSizing = "border-box";
            htmlNode.parentNode.style.border = "5px solid #ff0000";
          }
          else
             {
              htmlNode.style.boxSizing = "border-box";
              htmlNode.style.border = "5px solid #ff0000";
              } 
      },line);
  }
  
stream.close();
}
 function waitFor(testFx, onReady, timeOutMillis) {
    var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 120000, //< Default Max Timout is 3m
        start = new Date().getTime(),
        condition = false,
        interval = setInterval(function() {
            if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
                // If not time-out yet and condition not yet fulfilled
                condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code
            } else {
                if(!condition) {
                    // If condition still not fulfilled (timeout but condition is 'false')
                    console.log("'waitFor()' timeout");
                    phantom.exit(1);
                } else {
                    // Condition fulfilled (timeout and/or condition is 'true')
                    console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms.");
                    typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condition is fulfilled
                    clearInterval(interval); //< Stop this interval
                }
            }
        }, 5000); //< repeat  5000ms    
}; 
 function waitFor2(testFx, onReady, timeOutMillis) {
    var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 120000, //< Default Max Timout is 3m
        start = new Date().getTime(),
        condition = false,
        interval = setInterval(function() {
            if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
                // If not time-out yet and condition not yet fulfilled
                condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code
            } else {
                if(!condition) {
                    // If condition still not fulfilled (timeout but condition is 'false')
                    console.log("'waitFor()' timeout");
                    phantom.exit(1);
                } else {
                    // Condition fulfilled (timeout and/or condition is 'true')
                    console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms.");
                if(condition)
                {
                    typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condition is fulfilled
                   
                    clearInterval(interval); //< Stop this interval 
                    }
                }
            }
        }, 5000); //< repeat  5000ms    
}; 

  

原文地址:https://www.cnblogs.com/c-x-a/p/7267747.html