Selenium Java 谷歌浏览器之保存网页为图片

前言

  谷歌浏览器自动化--安装地址:https://www.cnblogs.com/kawhileonardfans/articles/10965856.html

  我上次的需求是做一个爬虫,爬取一些网站的敏感信息,然后要把这个网页敏感信息的证据保存下来,我们这里会保存两种,第一种就是网页内容(HTML),第二种就是我们现在说的截图,把这个网页保存为一张图片。

  这篇文章的方式是通过selenium操作谷歌浏览器进行截图,当然也可以操作火狐浏览器截图(个人感觉比谷歌浏览器效果好,没谷歌这么多问题,比如说谷歌截图截不全)等;除了通过selenium操作浏览器外,我这里还有一种方式,是通过PHANTOMJS对网页截屏,效果不错,请看下面链接:

  使用PHANTOMJS对网页截屏地址:https://www.cnblogs.com/kawhileonardfans/articles/10965906.html

 

案例一:保存网页可见区域为图片

public static void main(String[] args) throws Exception {
    System.setProperty("webdriver.chrome.driver",
            "C:\Users\Administrator\AppData\Local\Google\Chrome\Application\chromedriver.exe");
      
    WebDriver driver = new ChromeDriver();
  
    driver.manage().window().maximize();
     
    driver.get("http://www.baidu.com/");
  
    //找到百度上面的输入框、放入输入内容‘鹿晗人妖’
    driver.findElement(By.id("kw")).sendKeys("鹿晗人妖");
    //点击百度旁边的搜索按钮
    driver.findElement(By.id("su")).click();
    //暂停两秒,让他加载搜索出来的数据
    Thread.sleep(2000);
  
    //对整个网页截图
    File srcFile = ((TakesScreenshot)driver).getScreenshotAs(OutputType.FILE); 
     
    //把截图保存到桌面
    FileUtils.copyFile(srcFile, new File("C:\Users\Administrator\Desktop\1233.png")); 
    driver.quit();
}

案例二:保存网页可见区域中的某一块为图片

public static void main(String[] args) throws Exception {
    System.setProperty("webdriver.chrome.driver",
            "C:\Users\Administrator\AppData\Local\Google\Chrome\Application\chromedriver.exe");
     
    WebDriver driver = new ChromeDriver();
 
    driver.manage().window().maximize();
 
    driver.get("http://tool.oschina.net/highlight");
    Thread.sleep(2000);
 
    //找到class为wrapper的节点
    WebElement webElement = driver.findElement(By.className("wrapper"));
    Point point = webElement.getLocation();
    int eleWidth = webElement.getSize().getWidth();
    int eleHeight = webElement.getSize().getHeight();
     
    //对整个网页截图
    File srcFile = ((TakesScreenshot)driver).getScreenshotAs(OutputType.FILE); 
     
    //在上面的网页截图中,把根据class找到的节点截取出来、并覆盖上面的网页截图
    BufferedImage  fullImg = ImageIO.read(srcFile);
    BufferedImage eleScreenshot= fullImg.getSubimage(point.getX(), point.getY(),
            eleWidth, eleHeight);
    ImageIO.write(eleScreenshot, "png", srcFile);
 
    //把根据class找到的节点截图保存到桌面
    FileUtils.copyFile(srcFile, new File("C:\Users\Administrator\Desktop\1233.png")); 
    driver.quit();
}

案例三:保存网页可见区域为图片、并且标记网页中的关键字

public static void main(String[] args) throws Exception {
    System.setProperty("webdriver.chrome.driver",
            "C:\Users\Administrator\AppData\Local\Google\Chrome\Application\chromedriver.exe");
      
    WebDriver driver = new ChromeDriver();
  
    driver.manage().window().maximize();
     
    driver.get("http://news.baidu.com");
     
    //获取百度新闻中html
    String htmlContent = driver.getPageSource();
  
    //解析html字符串(引入了jsoup-1.8.1.jar)
    Document document = Jsoup.parse(htmlContent);
     
    //删除html下面标签中的onclick属性、href属性(我这里只是截图、点击事件对我没用)
    for (Element element : document.getAllElements()) {
        element.removeAttr("onclick").removeAttr("href");
    }
     
    //删除html下面所有的script标签(我这里只是截图、不需要动态页面)
    for (Element element : document.getElementsByTag("script")) {
        element.remove();
    }
     
    //替换html中的双引号为单引号、删除换行
    String reHtmlContent = document.body().html().replace(""", "'").replaceAll("
|
", "");;
     
    //标记'网页'为敏感字、用红色框给他框住
    reHtmlContent = reHtmlContent.replace("网页", "<span style='border:2px solid red;'>网页</span>");
     
    reHtmlContent = """ + reHtmlContent + """;
     
    //通过js把转换完的html替换到页面的body上面
    JavascriptExecutor js = (JavascriptExecutor) driver;
    js.executeScript("document.body.innerHTML=" + reHtmlContent);
     
    //对整个网页截图
    File srcFile = ((TakesScreenshot)driver).getScreenshotAs(OutputType.FILE); 
      
    //把截图保存到桌面
    FileUtils.copyFile(srcFile, new File("C:\Users\Administrator\Desktop\1233.png")); 
    driver.quit();
}

案例四:保存网页为图片(上面的案例只会保存可见区域)

import java.io.File;
import org.apache.commons.io.FileUtils;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.OutputType;
import org.openqa.selenium.TakesScreenshot;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import com.xjxcc.util.ImageUtils;
 
public class Test1 {
    /**
     * @param args
     * @throws Exception
     */
    public static void main(String[] args) throws Exception {
        System.setProperty("webdriver.chrome.driver",
                "C:\Users\Administrator\AppData\Local\Google\Chrome\Application\chromedriver.exe");
           
        WebDriver driver = new ChromeDriver();
       
        driver.manage().window().maximize();
        driver.get("https://zhidao.baidu.com");
       
        /* 通过js获取浏览器的各种高度 */
        JavascriptExecutor js = (JavascriptExecutor) driver;
        String heightStrs = (String) js.executeScript("return document.body.scrollHeight.toString()+','+document.body.scrollTop.toString() + ',' + window.screen.height.toString()");
        String[] heights = heightStrs.split(",");
        int htmlHeight = Integer.parseInt(heights[0]);//整个页面的高度
        int scrollTop = Integer.parseInt(heights[1]);//滚动条现在所处的高度
        int screenHeight = Integer.parseInt(heights[2]);//电脑屏幕的高度
        screenHeight = screenHeight - 140;
         
        //开始滚动截图
        int count = 0;
        while(scrollTop < htmlHeight){
            scrollTop += screenHeight;
            System.out.println("document.body.scrollTop = " + screenHeight * count);
            ((JavascriptExecutor) driver).executeScript("window.scrollTo(0, "+ (screenHeight * count) +")");
             
            //对整个网页截图
            File srcFile = ((TakesScreenshot)driver).getScreenshotAs(OutputType.FILE); 
               
            //把截图保存到桌面
            FileUtils.copyFile(srcFile, new File("C:\Users\Administrator\Desktop\allImg\"+ (++count) +".png")); 
        }
         
        //拼接图片
        File imgsFile = new File("C:\Users\Administrator\Desktop\allImg");
        if(!imgsFile.isDirectory()){
            throw new RuntimeException("地址不是一个正确的目录...");
        }
        File[] imgsFiles = imgsFile.listFiles();
        ImageUtils.mergeImg(imgsFiles, ImageUtils.IMG_TYPE_PNG, ImageUtils.MERGE_IMG_TYPE_Y, "C:\Users\Administrator\Desktop\111.png");
         
        driver.quit();
    }
}
原文地址:https://www.cnblogs.com/kawhileonardfans/p/10965886.html