论文热词云

使用python对论文网站进行爬虫,然后分析出出现所有论文中出现次数最多的词,在web端,通过视图可视化将热词以云形式显示出来,点击热词云中的热词,可以弹出链接的文章列表。

package com.dao;

import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;

import com.pjh.domain.Cloud;
import com.pjh.util.BaseConnection;

public class DBUtil {

    public static List<Cloud> queryCloud()
    {
        List<Cloud> list = new ArrayList<Cloud>();
        Connection conn = BaseConnection.getConnection();
        Statement statement = null;
        String sql = "SELECT * FROM lunwen";  
        ResultSet rs = null;
        try {
            statement = conn.createStatement();
            rs = statement.executeQuery(sql);
            Cloud cloud = null;
            while(rs.next())
            {
                String title = rs.getString(4);
                cloud = new Cloud(title);
                list.add(cloud);
            } 
        }catch (SQLException e) {
                e.printStackTrace();
            }finally
            {
                BaseConnection.close(rs, statement, conn);
            }
            return list;
        }
    }

package com.domain;

public class Cloud {
    private String title;
    
    public Cloud(String title) {
        super();
        this.title = title;
    }
    
    public String getTitle() {
        return title;
    }
    
    public void setTitle(String title) {
        this.title = title;
    }
}
package com.servlet;

import java.io.IOException;
import java.util.List;

import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import com.pjh.domain.Cloud;
import com.pjh.dao.DBUtil;;


@WebServlet("/cloudServlet")
public class cloudServlet extends HttpServlet {
    private static final long serialVersionUID = 1L;

    public  cloudServlet() {
        super();
    }
    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        response.setContentType("text/html;charset=UTF-8");
        request.setCharacterEncoding("UTF-8");
        
        String method = request.getParameter("method");
        //System.out.print(method);
        if(method.equals("pc"))
        {
            add(request,response);
        }
    }
    private void add(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        response.setContentType("text/html;charset=UTF-8");
        request.setCharacterEncoding("UTF-8");
        List<Cloud> list =  DBUtil.queryCloud();
        System.out.println(list);
        request.setAttribute("list", list);
        request.getRequestDispatcher("wordCloud.jsp").forward(request,response);
    }
    protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        // TODO Auto-generated method stub
        doGet(request, response);
        
    }
}
package com.util;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

public class BaseConnection {
     public static Connection getConnection(){
         Connection conn=null;
         String driver = "com.mysql.cj.jdbc.Driver";
         String url = "jdbc:mysql://localhost:3306/papercrawl?serverTimezone=UTC&characterEncoding=utf8&useSSL=true";
         String user = "root";
         String password = "123456";
         try{
             Class.forName(driver);
             conn=DriverManager.   
                     getConnection(url,user,password);
         }catch(Exception e){
             e.printStackTrace();
         }
         return conn;
     }
     
     public static void close (Statement state, Connection conn) {
            if (state != null) {
                try {
                    state.close();
                } catch (SQLException e) {
                    e.printStackTrace();
                }
            }
            
            if (conn != null) {
                try {
                    conn.close();
                } catch (SQLException e) {
                    e.printStackTrace();
                }
            }
        }
        
        public static void close (ResultSet rs, Statement state, Connection conn) {
            if (rs != null) {
                try {
                    rs.close();
                } catch (SQLException e) {
                    e.printStackTrace();
                }
            }
            
            if (state != null) {
                try {
                    state.close();
                } catch (SQLException e) {
                    e.printStackTrace();
                }
            }
            
            if (conn != null) {
                try {
                    conn.close();
                } catch (SQLException e) {
                    e.printStackTrace();
                }
            }
        }
}
<%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%>
<%@ page import="java.util.*" %>
<%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html style="height:100%;">
<head>
   <meta charset="utf-8">
   <title>论文热词云</title>
   <script src="js/jquery.min.js"></script>
   <script src="js/echarts.js"></script>
   <script src="js/worldcloud.js"></script>
</head>
 <body>
    <div id="main" style=" 100%; height: 600px"></div>
    <script>
        var gdata1 = [];
        <c:forEach items="${requestScope.list}" var="item" varStatus="status">
            var gd1 = {};
            gd1["name"] = '${item.title}';
            gd1["value"] = Math.ceil(Math.random()*1000000);
            gdata1.push(gd1);
        </c:forEach>
        onload = function () {
            var data = {
                value:gdata1,
                image: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAMgAAADICAYAAACtWK6eAAATdklEQVR4Xu1dfbRcVXXf+87k0wWVxEKMuBRNJZiCQMzHzD3n9Qm4EFQUbbGCCJWWli4URapFi19Va7V8CTQUCm2ooMhqqlVY2mXk9Z1zZ0jiA1tAy2pQlBJSSKyk9iW8vHt31wkTVhIy7829c8+du9/dby3+ytlfv71/3POx5xwE+RMEBIGuCKBgIwgIAt0REIJIdQgCUyAgBJHyEASEIFIDgkA2BOQLkg03kaoIAkKQiiRawsyGgBAkG24iVREEhCAVSbSEmQ0BIUg23ESqIggIQSqSaAkzGwJCkGy4iVRFEBCCVCTREmY2BIQg2XATqYogIASpSKIlzGwICEGy4SZSFUFACFKiRIdheDQRLQCALa1W62clcq2yrghBBpz6RqNxQq1WuwIA3o6Iz+eDiP4XAP4aEa80xjw9YDcra14IMrjU15RSX0TES6dygYh2IOLZxpi7B+dqdS0LQQaUe631vwLAUK/miejD1tqreh0v4/JBQAiSD46ptGitrwOAi9MIERElSXJKq9X6fho5GdsfAkKQ/vBLLa21fjMAfDu1IAAQ0X8j4rGyJsmCXjYZIUg23DJJNRqNefV6/VEAeGkmBc8JfcUYc24f8iKaAgEhSAqw+h0ahuHngiD4WL96EDEcHR1t9atH5KdHQAgyPUa5jBgaGjoqSZJHEHFWvwqJ6D+stcsAIOlXl8hPjYAQpKAK0Vp/DwBOzsscEV1irf1yXvpEz8EREIIUUBlKqTMRcV2eptxB4sTExFEbN27cnqde0bU/AkIQzxWxbNmy2QsWLNgMAC/3YGqtMeZ8D3pFZQcBIYjnUlBK/Tki/pkvM0mSrIii6Ae+9FddrxDEYwW4hTkR/RgA5ng08+/GmNd51F9p1UIQj+nPe2HezVUiuthae4PHUCqrWgjiKfVhGJ4RBME3PanfT60s2P2hLATxgK3nhflBPSaiW621F3gIp9IqhSAe0q+U+hQiftKD6ilVyoI9f8SFIDljumrVqiNnz57ttnV9Lsy7eS0L9pzzKQTJGVCl1LcQ8S05q02j7iJjzI1pBGRsdwSEIDlWRxiGpwVBcE+OKrOo+uX4+Pgrx8bGnskiLDL7IyAEyakiBrEwn2Lb92Zr7YU5hVZpNUKQnNKvtf4EAHw6J3V9qXG/PiSilXLC3heMe4SFIP1jCANemMuCPYccdlMhBMkBXK31NwDgbTmoylvFhcaYm/NWWiV9QpA+s91sNk+q1Wrr+1TjS1wW7H0iKwTpA8Dh4eH65OSk+5Xgq/pQ41v0RmPMRb6NzFT9QpA+MquU+jgifrYPFd5F3Wo9juPXtdvtB70bm4EGhCAZk1rShXm3aDYZY1ZmDLXSYkKQjOlXSq1DxDMzihcuRkQXWGtvLdwwc4NCkAwJLPnCvFtE28bHx5fICXu6hAtB0uEFTBbmB42KiG6w1qa68jQlPDNuuBAkZUq11pcDwOdTipViuCzY06dBCJICsxUrViyaM2fOo4g4P4VY2YbKgj1FRoQgKcBSSt2FiL+dQqSUQ5MkOT+KorWldK5kTglBekwI04V51wV7rVY7amRk5Fc9hl/ZYUKQHlLPeWHeLTwi+rK19pIewq/0ECFID+nXWn8EAP6yh6GchsRJkiyLougRTk4X7asQZBrEZ8jCvNu2b2StVUUXHSd7QpBpsqWUuhMRz+KU1JS+nmuM+UpKmcoMF4JMkeqhoaEmEUUzvBq2yYK9e4aFIN2xCZRSDyPi0hlOEBfe1caYKZ+jrgAGBw1RCNIl82EYXhYEwZcqUhiyYO+SaCHIQYCZyQvzKbZ9ZcF+EHCEIAcBRSl1ByK+uyJfj+fDJKKzrbVfrVrcU8UrBDkAnYoszLvVxJOTk5OvbrfbO4UkzyEgBNm/Eqq0MO/Ggb8yxvyJEEQI8nwNNBqNeUEQrEbEcxCx8k8IuBd0kyQZbbVaP6w6USr5BWk2m4fXajUNAO4UOQSAEwCgXvViODB+ItqBiPcBQBTHsX322WfvGxsbG68STlUgCIZh+FpEdETYQ4iSX9NT5vqbJKJ/c4QBAEcYs2nTpq1ldrhf32YcQZYsWTJn8eLFq4hoDxmIqIGIh/ULlMh3ReCxvYSZnJyM2u32QwBAMwUv9gTRWv96hwx7p0snIuKsmZIghnE8Q0Rt94VxxKnX6/eNjIzsYhgHz12soaGhY4jo+ekSACzhCn5F/J4EgAf2EiaOY9NqtZ7iEnupvyBuurRo0aIV+yymm4i4gAu44ufBESCin+wljGsGjaLoR2WdlpWKICtXrlw4Z84cN1Xau35YjoizpdBmNgJE9D+I2HJTMkSMtmzZsmHz5s3PliHqgRIkDMOjD9hdek0ZQBEfBosAEe0GgLHO4j+amJgY3bhx4/ZBeFUYQdwTZQsXLnz9PuuHJgC8ZBBBi02WCPznXsK4r8zo6OiPi4jCG0EajcaCWq2279nD6wf0NHIROIqN4hHYTkR7pmXuv61bt27yMS3LlSBKKff88RkAMISIRxePmVisOAJtIhqN4/j2vJ576Jsgy5cvnz9v3jzX3HYRIh5R8QRJ+CVBgIjGEPGafn9v3xdB3BcDEdcAwJElwUXcEAT2Q4CIRgDAPf3gtpZT/2UmiNb6VCK6BxGD1FZFQBAoEAEieiJJkhOzHFBmIkij0Vhar9cL2UUoEEcxNYMRcFMua63bKEr1l4kgSqmvI+LvpLIkgwWBASNARGdZa+9K40ZqgqxevfqVs2bN+mkaIzJWECgDAkR0n7W2kcaX1AQJw/DsIAhuT2NExgoCZUCAiJJ6vT5nZGTENVD29JeFIJ8OguATPWmXQYJAyRBIkmRpmgu7UxNEa/0hALiqZHGLO4JArwgcbox5utfBWQjiXlhKtdDp1RkZJwj4RsAYk6rmUw12zg8PD784juNtAFDzHYzoFwTyRICI1llr35lGZ2qCOOVKqW8houu7kj9BgA0CSZK8M4qidWkczkSQit8+mAZfGVsSBIjoIWvtsWndyUQQZyQMw4uDILgurUEZLwgMAIFfxXF8fKvVejSt7cwE6Uy1bkTEP0xrVMYLAkUiQERvtdZ+O4vNvgji7vbVWjvDp2cxLjKCQAEIXGiMuTmrnX4J4na15sZxvBEAUs/vsjotcoJAjwj0/XJW3wRxjrq7boMguB8RX9aj4zJMEPCKABH9k7X2Hf0ayYUgnUW7u6FkIyIe2q9TIi8I9IOAa0rcuXPn0NjYmLsdpa+/3AjSIYlGxPVy9WdfORHh/hDYXKvVVoyMjPyyPzXPSedKkA5JzgqC4M48nBMdgkAaBNwFdABwvLX252nkphqbO0GcMa31xwDgc3k5KXoEgekQIKIJd5uOtXbDdGPT/LsXgnRI8vcAcF4aZ2SsIJAFASIiRHyHMeYbWeQL/4J0DLr3/r6LiKfk7bToEwQOQOBSY8zVPlDx9gVxzro7s+bPn++e8JIzEh/ZE51ARDdZa711c3gliMufnJFIFXtE4B5jjOsq9/ailXeCOHA6t7jLGYnHSqmg6vvd3c++X68qhCAdksgZSQWr2EfIRPSziYmJ5UU8iVAYQTokkTMSHxVTIZ3urCNJkhVZWtezwFQoQTrbv1cAwGeyOCsy1UbA11nHoLZ5u9rVWssZSbVrPXX0Ps86SkcQAJAzktQlUm0BIrrMWntl0SgUPsXaG6CckRSdar72fJ91lPELsscnOSPhW7QFeu79rKO0BNl7RhIEgXvR9EUFgi6meCBQyFlHqQnS2dk6GQC+K5fR8ajaIrws8qyj9ARxDiql3ouIa4sAX2yUG4GizzpYEKQz3ZKb48tdu969G8RZBxuCdKZbXwOAd3nPhBgoHQKDOutgRZDh4eF6HMfr3a/DSpdBccg3Ah81xnzRt5E0+gd2DjKVk2EYHtK5IWVpmmBkLF8EBnnWweoLstfZMAwXI6K7a+sIvmkXz3tEYKBnHSwJ4pxuNpu/WavV3C8S5Yykx0pjOGzgZx1sCdJZtMsZCcOq78Xlspx1sCaIc17OSHopN15jiGhHkiQnFvW7jqzolHKRfrBglFKfRcSPZw1U5MqDABHtJqKToygy5fHq4J6wIUhnuiVnJGWvqB78S5LkXVEUfb2HoQMfwoogckYy8Hrp2wEiutxa+4W+FRWkgBVBHCZyRlJQZfgxs9YYc74f1X60siNIhyRyRuKnHrxpJaLvWWtPBYDEmxEPilkSxOEgZyQeqsGfygfHx8dXj42Njfsz4UczW4I4OJRS9yLisB9oRGuOCHzEGPOlHPUVpoo1QbTWPwKAYwpDSwxlQoCIrrTWXpZJeMBCrAmilPoFIh42YAzF/PQI3G6Mec/0w8o3gi1BOlu+fb9BV76UzDyPOgv0N3KMjC1BVq1adeTs2bMf5wh6BX1+0BhzHMe42RJEKbUcEX/AEfSq+UxET1trD+cYN1uChGF4ehAEd3MEvWo+u5/SWmsDjnGzJYhS6n2IeAtH0Kvo865du166adOmrdxiZ0sQrfXlAPB5boBX1d84jk9otVo/5BY/W4Iopa5BxEu4AV5hf99kjHGXA7L6Y0sQrfVXAeB3WaFdYWeJ6Dxr7W3cIGBLEGkz4VZqwLLdhC1BpM2EF0GI6Cpr7Yd5eQ3AliBKqe2IuIAb4BX2l2W7CUuCSJsJP5pxbTdhSZBGo/Gyer3+X/zKpLoeE9FD1tpjuSHAkiDSZsKtzAC4tpuwJIi0mbAkCMt2E5YEUUr9HiLeyq9MKu/xYmPMk5xQ4EqQP0XEv+AEtPgKwLHdhCVBtNZXA8AHpejYIcCu3YQrQaTNhB039izU2bWbsCSIUur7iPgGhjVSdZdL94LUdAnhSpCHEfG10wUn/14uBDi2m7AkiNZ6GwAsLFf6xZseEGDXbsKVINRDMmRI+RBYb4w5pXxudfeIHUGkzYRTee3vK8d2E3YE0VqfCABjfMukup5zbDdhR5AwDE8LguCe6pYZ38g53m7CjiBaa/e+xN/xLZPKe86q3YQdQZRS0mbCmGPc2k3YEUTaTBiz4znXWbWbsCOIUuoORHw3+zKpaABJkpwfRdFaLuGzI4jWej0AnMQFYPHzBQiwajdhRxCllLSZMGYdt3YTdgSRNhPG7IA9Hb13WGvP4RIFO4IopRJEZOc3l4IowE9W7SasCi0Mw8VBEDxRQBLFhCcEuLWbsCJIs9k8vlarPeApd6VQS0SPIuKrS+GMByeI6Clr7REeVHtRyYogM7jNZDsR3RIEwfWjo6OPh2GoEfEDiHgmANS8ZH5ASrm1m7AiyAxsM3mAiK6v1+t3jIyM7DqwZt2UEhHfj4h/MMN+/8Km3YQVQZRSH0XELwzof365mCWi3Yi4DgCuM8ZEvSgdHh6eu3v37nOCIPgAALB8DHPfODm1m7AiiNb6KgD4UC9FVcIxW5MkuWliYmJNP0+Raa1/i4jc9OttXKdfRHSatfY7JczRC1xiRRCmbSbtJEmu37Vr111jY2O5ves+NDT08iRJLgaA3+d2yz2ndhNWBGHUZuLWE1+L4/ha3+/yuelXHMfnAsD7AYDL5dBs2k1YEUQp9RAiLivrp5mIfg4Aa+I4vqndbv+iaD/DMHyD2/0CgDMQsczPLl9tjLm0aHyy2GNFEK310wDwkiyB+pQhonvdblQURd8EgNinrV50u+mXW6cQ0QWIeFgvMkWO4dRuwoogJWsz+T8A+IckSa6JouiRIgusV1uNRmNeEATv7WwVl+nLy6bdhA1BVqxYsWju3LkDvxm8c9J97cTExNoNGzbs6LVYBz2u2WyeFATBBxHxrYP2BQAeNMaw2K5mQ5Bms/mKWq322CCS605/AeA7RHRdFEVue5LtvVwOR3eegojvA4AXDwJPANhsjPmNAdlOZZYNQYaGho4iop+kiq7/wc+4CyIcMay1Rdvu3/spNLjpV71edxdguN2vY7wae6FyIUjegC9fvnz+/Pnz3bzf+x8RuR9lXT85Obm23W7v9G5wwAbCMHyjW6cAwJuL2P0ioshaqwYcdk/m2XxBXDRKqccQ8RU9RZZ+UExE/9yZRt2bXpy/RGcaewkAuOnXr/mKKEmSNVEU/bEv/XnqZUUQrfUaAPijPAEAgG1E9LcdYmzJWTdLdccdd9yLDj300PPc9AsRl3oI4i3GmLs96M1dJSuC5Px7kPuJ6Fpr7W25ozqDFLrpl9v9AoDTcwrrp8aYV+Wky7saVgRxaGitPwMAV2RBxnXSAsBdrjeq1Wq1s+ioqoxSyhW1O6V3D6gemgUHh3+SJCt9t99k8a2bDDuCdNYit3S2KXvF4kki+hs39221Wk/1KiTjXoiAm34dcsghjiRu+vWaXjFy5CCit0dRxOpeZZYE6ZDkk4j4qakSRESGiG6IoujOXhMp43pHQCn1JkR0069Tp8nDE0EQnD06Ojrau/ZyjGRLkM50yz3D9h4iWg0ArjlvHAAeB4CH4zj+x3a7LRc8FFBnq1atOnLWrFlnuWtFAWD2PiZ3ENG/TE5O3sap62BfyFgTpIDci4mKIyAEqXgBSPhTIyAEkQoRBKZAQAgi5SEICEGkBgSBbAjIFyQbbiJVEQSEIBVJtISZDQEhSDbcRKoiCAhBKpJoCTMbAkKQbLiJVEUQEIJUJNESZjYEhCDZcBOpiiAgBKlIoiXMbAgIQbLhJlIVQUAIUpFES5jZEPh/CZyaIxsyF20AAAAASUVORK5CYII="
            }
            var myChart = echarts.init(document.getElementById('main'));
            var ecConfig = echarts.config;
               myChart.on('click', eConsole);
               
            var maskImage = new Image();
            maskImage.src = data.image

            maskImage.onload = function () {
                myChart.setOption({
                    backgroundColor: '#fff',
                    tooltip: {
                        show: false
                    },
                    series: [{
                            type: 'wordCloud',
                            gridSize: 1,
                            sizeRange: [12, 55],
                            rotationRange: [-45, 0, 45, 90],
                            maskImage: maskImage,
                            textStyle: {
                                normal: {
                                    color: function () {
                                        return 'rgb(' +
                                                Math.round(Math.random() * 255) +
                                                ', ' + Math.round(Math.random() * 255) +
                                                ', ' + Math.round(Math.random() * 255) + ')'
                                    }
                                }
                            },
                            left: 'center',
                            top: 'center',
                            //  '96%',
                            // height: '100%',
                            right: null,
                            bottom: null,
                            //  300,
                            // height: 200,
                            // top: 20,
                            data: data.value
                        }]
                })
            }

        }
        function eConsole(param) {  
            if (typeof param.seriesIndex == 'undefined') {  
                return;  
            }  
            if (param.type == 'click') {
                var form = document.getElementById('test_form');
                var t = document.getElementById("txt");
                t.value = param.name;
                form.submit();
            }
       }
    </script>
    <form name= "f_checkbox" action="${pageContext.request.contextPath}/cloudServlet?method=pc" method="post" id="registForm" onsubmit="return check()">
       &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <button type="submit" >生成热词云</button>
    </form>
    <script>
        $(function(){
            $("#txt").hide()
            $("#Search").hide()
        });
    </script>
    <form id = 'test_form' action="http://openaccess.thecvf.com/ICCV2019_search.py" method="post" accept-charset="utf-8" target="_blank">
        <input type="text" id="txt" name="query">
        <input type="submit" id="Search" value="Search">
    </form>
</body>
</html>

 

原文地址:https://www.cnblogs.com/yangqqq/p/13068587.html