CVPR论文生成热点词汇云图

 

CVPR论文生成热点词汇云图

 

 一、python爬取数据

import requests

import pymysql

from bs4 import BeautifulSoup

 

db = pymysql.connect('127.0.0.1',

                     port=3306,

                     user='root',

                     password='123',

                     db='mytest',

                     charset='utf8')

 

cursor = db.cursor()

 

headers={

        "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"

        }

url="http://openaccess.thecvf.com/CVPR2019.py"

html=requests.get(url)

 

soup=BeautifulSoup(html.content,'html.parser')

 

soup.a.contents=='pdf'

 

pdfs=soup.findAll(name="a",text="pdf")

 

lis = []

jianjie=""

for i,pdf in enumerate(pdfs):

    pdf_name=pdf["href"].split('/')[-1]

    name=pdf_name.split('.')[0].replace("_CVPR_2019_paper","")

    link="http://openaccess.thecvf.com/content_CVPR_2019/html/"+name+"_CVPR_2019_paper.html"

    url1=link

    html1 = requests.get(url1)

    soup1 = BeautifulSoup(html1.content, 'html.parser')

    weizhi = soup1.find('div', attrs={'id':'abstract'})

    if weizhi:

        jianjie =weizhi.get_text();

    print("这是第"+str(i)+"条数据")

    keyword = str(name).split('_')

    keywords = ''

    for k in range(len(keyword)):

        if (k == 0):

            keywords += keyword[k]

        else:

            keywords += ',' + keyword[k]

    info = {}

    info['title'] = name

    info['link'] =link

    info['abstract']=jianjie

    info['keywords']=keywords

    lis.append(info)

 

cursor = db.cursor()

for i in range(len(lis)):

    cols = ", ".join('`{}`'.format(k) for k in lis[i].keys())

    print(cols)  # '`name`, `age`'

 

    val_cols = ', '.join('%({})s'.format(k) for k in lis[i].keys())

    print(val_cols)  # '%(name)s, %(age)s'

 

    sql = "insert into lunwen(%s) values(%s)"

    res_sql = sql % (cols, val_cols)

    print(res_sql)

 

    cursor.execute(res_sql, lis[i])  # 将字典a传入

    db.commit()

    num=1

    print(num)

    print("成功")

 二、分析、查找关键词

 借助Map存储关键词, key为关键词,value为出现的次数。遍历到相同的关键词value+1,然后根据value值排序。

dao层:

package dao;

 

 

import java.sql.Connection;

import java.sql.PreparedStatement;

import java.sql.ResultSet;

import java.sql.SQLException;

import java.sql.Statement;

import java.util.ArrayList;

import java.util.HashMap;

import java.util.LinkedHashMap;

import java.util.Map;

import java.util.stream.Collectors;

import  Bean.copy.*;

import jdbc.Util;

import java.sql.Connection;

import java.sql.PreparedStatement;

import java.sql.ResultSet;

import java.sql.SQLException;

import java.sql.Statement;

import java.util.ArrayList;

import java.util.List;

 

import com.sun.xml.internal.ws.policy.privateutil.PolicyUtils.Collections;

 

public class Dao {

    public static Map<String,Integer> getrc()

    {

        String sql="select * from lunwen";

        Map<String, Integer>map= new HashMap<String, Integer>();

        Map<String, Integer>results= new LinkedHashMap<String, Integer>();

        Connection con=null;

        Statement state=null;

        ResultSet rs=null;

        con=Util.getConn();

        try {

            state=con.createStatement();

            rs=state.executeQuery(sql);

            while(rs.next())

            {

                String keywords=rs.getString("keywords");

                String[] split = keywords.split(",");

                for(int i=0;i<split.length;i++)

                {

                    if(map.get(split[i])==null)

                    {

                        map.put(split[i],0);

                    }

                    else

                    {

                        map.replace(split[i], map.get(split[i])+1);

                    }

                }

            }

        } catch (SQLException e) {

            // TODO Auto-generated catch block

            e.printStackTrace();

        }

        Util.close(rs, state, con);

        map.entrySet()                

        .stream()               

        .sorted((p1, p2) -> p2.getValue().compareTo(p1.getValue()))                

        .collect(Collectors.toList())

        .forEach(ele -> results.put(ele.getKey(), ele.getValue()));

 

       

        return results;

    }

    

    

    public List<Data> list(String keywords) { // 查询所有信息

 

 

        List<Data> list = new ArrayList<Data>(); // 创建集合

        Connection conn = Util.getConn();

        String sql = "select * from lunwen where keywords like "+"'%"+keywords+"%'"; // SQL查询语句

 

        try {

 

            PreparedStatement pst = conn.prepareStatement(sql);

 

            ResultSet rs = pst.executeQuery();

            

            Data data = null;

            

            while (rs.next()) {

 

 

                String title = rs.getString("title");

                

                String link = rs.getString("link");

                

                String as= rs.getString("abstract");

                

 

                

 

                    data = new Data(title,link,as,keywords);

                

                list.add(data);

 

            }

 

            rs.close(); // 关闭

 

            pst.close(); // 关闭

 

        } catch (SQLException e1) {

 

            e1.printStackTrace(); // 抛出异常

 

        }

 

        return list; // 返回一个集合

 

    }

 

 

 

 

}

  servlet层:

package servlet;

 

import java.io.IOException;

import java.util.Map;

 

import javax.servlet.ServletException;

import javax.servlet.annotation.WebServlet;

import javax.servlet.http.HttpServlet;

import javax.servlet.http.HttpServletRequest;

import javax.servlet.http.HttpServletResponse;

 

import dao.Dao;

import net.sf.json.JSONArray;

import net.sf.json.JSONObject;

 

@WebServlet("/RcServlet")

public class RcServlet extends HttpServlet {

    private static final long serialVersionUID = 1L;

       

    /**

     * @see HttpServlet#HttpServlet()

     */

    public RcServlet() {

        super();

        // TODO Auto-generated constructor stub

    }

 

    /**

     * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)

     */

    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

 

        this.doPost(request, response);

    }

 

    /**

     * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)

     */

    protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

        request.setCharacterEncoding("utf-8");

        Map<String, Integer>sortMap=Dao.getrc();

        JSONArray json =new JSONArray();

        int k=0;

        for (Map.Entry<String, Integer> entry : sortMap.entrySet())

        {

            JSONObject ob=new JSONObject();

            ob.put("name", entry.getKey());

            ob.put("value", entry.getValue());

            if(!(entry.getKey().equals("for")||entry.getKey().equals("and")||entry.getKey().equals("With")||entry.getKey().equals("of")||entry.getKey().equals("in")||entry.getKey().equals("From")||entry.getKey().equals("A")||entry.getKey().equals("to")||entry.getKey().equals("a")||entry.getKey().equals("the")||entry.getKey().equals("by")))

            {

                json.add(ob);

                k++;

            }

            if(k==10)

                break;

        }

        System.out.println(json.toString());

        

        response.getWriter().write(json.toString());

    

    }

 

}

  

  三、生成热词汇云图

<%@ page language="java" contentType="text/html; charset=UTF-8"

    pageEncoding="UTF-8"%>

<%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>

 

<%request.setCharacterEncoding("utf-8");

response.setCharacterEncoding("utf-8");%>

<!DOCTYPE html>

<html>

<head>

<meta charset="ISO-8859-1">

<title>热词云</title>

</head>

 

<meta charset="UTF-8">

     <link type="text/css" rel="stylesheet" href="css/style.css">

<script src="js/jquery-3.4.1.min.js"></script>

<script src="js/echarts.min.js"></script>

<script src="js/echarts-cloud.js"></script>

<style>

  

                

           #main{

               30%;

              height: 500px;

              

              border:1px solid #ddd;

              float:right;

          }

          #table{

                overflow-x: auto;

                 overflow-y: auto;

                 70%;

                height: 500px;

                float:left;

                margin-top:100dp;

                padding-top:100dp;

                

            }

 

        </style>

 

</head>

 

<body >

<br>

<h1>热词云</h1>

<br>

<br>

<br>

 

<div id="table">

  <table id='gradient-style' >

    <tr>

      <th align="center">论文连接</th>

    </tr>

    <c:forEach var="item" items="${list}">

      <tr>

        <td><a href="${item.link}">${item.title}</a></td>

      </tr>

    </c:forEach>

  </table>

</div>

 

 

  <div id="main">

  

  </div>

  <script type="text/javascript">

 

    var dt;

   

            $.ajax({

                url : "RcServlet",

                async : true,

                type : "POST",

                data : {        

                },

                dataType : "json",

                success : function(data) {

                    dt = data;

                    

                     var mydata = new Array(0);

                     for (var i = 0; i < dt.length; i++) {

                          var d = {};

                          

                          d["name"] = dt[i].name;

                         

                          d["value"] = dt[i].value;

                          mydata.push(d);

                      }

                     var myChart = echarts.init(document.getElementById('main'));

                     //设置点击效果

                    

                     

                     

                     myChart.setOption({

                         title: {

                             text: ''

                         },

                         tooltip: {},

                         series: [{

                             type : 'wordCloud',  //类型为字符云

                                 shape:'smooth',  //平滑

                                 gridSize : 8, //网格尺寸

                                 size : ['50%','50%'],

                                 //sizeRange : [ 50, 100 ],

                                 rotationRange : [-45, 0, 45, 90], //旋转范围

                                 textStyle : {

                                     normal : {

                                         fontFamily:'微软雅黑',

                                         color: function() {

                                             return 'rgb(' +

                                                 Math.round(Math.random() * 255) +

                                          ', ' + Math.round(Math.random() * 255) +

                                          ', ' + Math.round(Math.random() * 255) + ')'

                                                }

                                         },

                                     emphasis : {

                                         shadowBlur : 5,  //阴影距离

                                         shadowColor : '#333'  //阴影颜色

                                     }

                                 },

                                 left: 'center',

                                 top: 'center',

                                 right: null,

                                 bottom: null,

                                 '100%',

                                 height:'100%',

                                 data:mydata

                         }]

                     });

                     

                     myChart.on('click', function (params) {

                         var url = "ClickServlet?keywords=" + params.name;

                         window.location.href = url;

                       });

                     

                    alert("成功!");

                   

   

                },

                error : function() {

                    alert("请求失败");

                },

           });

    

         

       

 

 

</script>

    

 

</body>

</html>

原文地址:https://www.cnblogs.com/xiatian21/p/13086293.html