由于项目需要把页面打印出来,本项目是用ajax动态获取表单数据的,很多数据通过浏览器打印无法实现完整信息
于是想到爬虫,通过PhantomJS 把页面URL 抓取保存未pdf 并把流数据返给前端附件下载
PhantomJS的安装说明这里不介绍了,pdf 保存用了PhantomJS 自带的rasterize.js
贴代码
html页面 用easyui加了一个按钮
<a href="javascript:void(0)" class="easyui-linkbutton" id="btnPdfExport" onclick="ACWS.html2Pdf()">PDF导出</a>
js 把当前页面的url 和 文件名传给后端control 文件名用了页面的title
/** * html 导出 pdf * @returns */ ACWS.html2Pdf = function (){ var url = window.location.href; var fileName = $(document).attr("title"); $("body").mask(); window.location.href = acwsContext + "/rest/pdfdownload?url="+encodeURIComponent(url)+"&fileName="+encodeURIComponent(fileName); $("body").unmask(); }
control层 这里有一个问题,Process 是子进程单独处理的,但是通过process 去获取流是空的,没办法这里只有等附件生成结束后,在通过File 来获取pdf的流数据返回给前端
/** * 版权所有:华信软件 * 项目名称:gx-pms * 创建者: diaoby * 创建日期: 2019年1月5日 * 文件说明: PhantomControl PDF 导出 */ package com.huaxin.gxgc.phantom.control; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import javax.annotation.Resource; import javax.servlet.http.HttpServletRequest; import org.springframework.http.HttpHeaders; import org.springframework.http.HttpStatus; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; import com.huaxin.acws.common.exception.AcwsGenerelException; import com.huaxin.acws.security.util.Security; import com.huaxin.acws.ucenter.model.User; import com.huaxin.gxgc.common.service.CommonService; import com.huaxin.gxgc.phantom.service.PhantomService; import io.swagger.annotations.Api; /** * @author diaoby * PhantomControl PDF 导出 */ @Api(tags = {"PhantomControl"}) @RestController public class PhantomControl { /** * PhantomService */ @Resource private PhantomService phantomService; /** * 广西工程公用服务类 */ @Resource private CommonService commonService; /** * 关闭命令和流 * @param process * @param inputStream * @throws IOException * @author diaoby */ private void close(Process process, InputStream inputStream) throws IOException { if (inputStream != null) { inputStream.close(); inputStream = null; } if (process != null) { process.destroy(); process = null; } } /** * 删除PDF 临时文件 * @param pdfPath * @return * @author diaoby */ private void delPdfTempleFile(String pdfPath) { File pdfFile = new File(pdfPath); if(pdfFile.exists()){ pdfFile.delete(); } } /** * html 转pdf * @param request * @return * @author diaoby * @throws UnsupportedEncodingException * @throws InterruptedException */ @RequestMapping("/pdfdownload") public ResponseEntity<byte[]> html2Pdf(HttpServletRequest request,@RequestParam("url") String url,@RequestParam("fileName")String fileName) throws UnsupportedEncodingException, InterruptedException{ byte[] body = null; HttpHeaders headers = new HttpHeaders(); fileName=new String(fileName.getBytes("UTF-8"),"iso-8859-1"); headers.setContentType(MediaType.APPLICATION_OCTET_STREAM); headers.setContentDispositionFormData("attachment", fileName+".pdf"); ResponseEntity<byte[]> responseEntity = null; InputStream inputStream = null ; Process process = null; FileInputStream fis = null; ByteArrayOutputStream bos = null; String filePath = phantomService.pdfTempleFilePath(); User currentUser = Security.getCurrentUser(); String token = commonService.getToken2(currentUser.getLoginName()); //url 添加token 和 isPdf 参数,方便过滤页面弹出框 url = url+"&Authorization="+token+"&isPdf=true"; try { process = phantomService.printUrlScreen2pdf(filePath,url); //等到 process 完成 产生pdf附件 process.waitFor(); //读取产生的文件到输出流 File pdfFile = new File(filePath); if(pdfFile.exists()){ fis = new FileInputStream(pdfFile); bos = new ByteArrayOutputStream(); byte[] b = new byte[1024]; int n; while ((n = fis.read(b)) != -1) { bos.write(b, 0, n); } body = bos.toByteArray(); } responseEntity = new ResponseEntity<byte[]>(body,headers,HttpStatus.CREATED); } catch (IOException e) { throw new AcwsGenerelException("pdf导出异常", e); } finally { try { fis.close(); bos.close(); close(process, inputStream); //删除生产的pdf文件 delPdfTempleFile(filePath); } catch (IOException e) { throw new AcwsGenerelException("pdf流关闭异常", e); } } return responseEntity; } }
因为系统有jwt 所以页面url都加上了token
serivice 主要通过配置获取了phantom安装的路径和js 还有pdf存储的临时目录
/** * 版权所有:华信软件 * 项目名称:gx-pms * 创建者: diaoby * 创建日期: 2019年1月8日 * 文件说明: Phantom service */ package com.huaxin.gxgc.phantom.service; import java.io.File; import java.io.IOException; import java.util.UUID; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Service; /** * @author diaoby * */ @Service public class PhantomService { /** * Phantom servie */ /** * 图片保存目录 */ @Value("${phantom.pdf.tempPath}") private String tempPath ; /** * 空格 */ private String BLANK = " "; /** * phantomjs exe */ @Value("${phantom.exe}") private String binPath; /** * rasterize.js */ @Value("${phantom.rasterizejs}") private String jsPath; /** * 执行命令 * @param path * @param url * @return * @author diaoby */ public String cmd(String path, String url) { return binPath + BLANK + jsPath + BLANK + url + BLANK + path; } /** * 页面转pdf * @param url * @throws IOException * @author diaoby */ public Process printUrlScreen2pdf(String pdfPath,String url) throws IOException{ //Java中使用Runtime和Process类运行外部程序 String cmd = cmd(pdfPath,url); Process process = Runtime.getRuntime().exec(cmd); return process; } /** * 返回PDF 临时生产目录 * @param url * @author diaoby */ public String pdfTempleFilePath() { return tempPath+File.separator+UUID.randomUUID().toString()+".pdf"; } }
下载页面
pdf下载