顶会热词统计

<%@page import="java.util.Iterator"%>
<%@page import="java.util.HashMap"%>
<%@page import="entity.Cvf"%>
<%@page import="java.util.List"%>
<%@ page language="java" contentType="text/html; charset=UTF-8"
    pageEncoding="UTF-8"%>
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>CVPR词云</title>
    <script type="text/javascript" src="Echart/echarts.js"></script>
    <script type="text/javascript" src="Echart/echarts-wordcloud.min.js"></script>

</head>
<body>
<%         request.setCharacterEncoding("utf-8");
List <Cvf> cvfs =(List<Cvf>) request.getAttribute("cvfs"); 
int i=0;
int j=0;
int k=0;
HashMap<String, Integer> hm=new HashMap();
if(cvfs!=null){
        for(Cvf cvf:cvfs){i++;
        if (!hm.containsKey(cvf.getCkeyword())) {
            hm.put(cvf.getCkeyword(), 1);                
        }else {
            Integer counts=hm.get(cvf.getCkeyword());
            hm.put(cvf.getCkeyword(), counts+1);        
        }

        }
}
%>
<div id="main" style=" 800px; height: 600px"></div>
    <script>
            var myChart = echarts.init(document.getElementById('main'));
            option = {
                    title: {
                        text: '词云',//标题
                        x: 'center',
                        textStyle: {
                            fontSize: 23
                        }

                    },
                    backgroundColor: '#F7F7F7',
                    tooltip: {
                        show: true
                    },
                    series: [{
                        name: '热点分析',//数据提示窗标题
                        type: 'wordCloud',
                        sizeRange: [6, 66],//画布范围,如果设置太大会出现少词(溢出屏幕)
                        rotationRange: [-45, 90],//数据翻转范围
                        //shape: 'circle',
                        textPadding: 0,
                        autoSize: {
                            enable: true,
                            minSize: 6
                        },
                        drawOutOfBound: true,//词云显示完整,超出画布的也显示
                        textStyle: {
                            normal: {
                                color: function() {
                                    return 'rgb(' + [
                                        Math.round(Math.random() * 160),
                                        Math.round(Math.random() * 160),
                                        Math.round(Math.random() * 160)
                                    ].join(',') + ')';
                                }
                            },
                            emphasis: {
                                shadowBlur: 10,
                                shadowColor: '#333'
                            }
                        },
                        data:[
                            <%
                             //获取request域中的数据
                                  Iterator<String> it=hm.keySet().iterator();
                                  while(it.hasNext()) {
                                      String keyName=it.next();

                         %>
                         {name:"<%=keyName%>",value:<%=hm.get(keyName) %>},
                         <%
                         
                             }
                         %>
                        ]
                    }]
                  };

        myChart.setOption(option,true);
        myChart.on('click',function(param){
            var selected = param.name;
            if(selected){              
            window.open("ListServlet?keyword="+selected);
            }

            });
    </script>
</body>
</html>
View Code
<%@page import="entity.Cvf"%>
<%@page import="java.util.List"%>
<%@ page language="java" contentType="text/html; charset=UTF-8"
    pageEncoding="UTF-8"%>
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>链接地址</title>
</head>
<body>
<%         request.setCharacterEncoding("utf-8");
List <Cvf> cvfs =(List<Cvf>) request.getAttribute("cvfs"); 
%>

      <table >
      <thead >
         <tr>
         <th>标题</th>
         <th>关键词</th>
         </tr>
      </thead>
      <tbody class="htbody">
         <%

         if(cvfs!=null){
           for(Cvf cvf:cvfs){
               %>               
               <tr>                   
                   <td><a href="<%=cvf.getChref() %>"><%=cvf.getCname() %></a></td>
                   <td><%=cvf.getCkeyword() %></td>
               </tr>
               <%
           }
         }
         %>
         </tbody>
      </table> 
</div>
</body>
</html>
View Code
package utils;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;

import org.apache.http.HttpEntity;
import org.apache.http.NameValuePair;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;

import com.alibaba.fastjson.JSONObject;

public class HttpClientPool {
/**
 * 这是httpClient连接池
 * @throws Exception 
 */
    public static void HttpClientPool() {
        //创建连接池管理器
        PoolingHttpClientConnectionManager cm =new  PoolingHttpClientConnectionManager();
        
        
        //设置最大连接数
        cm.setMaxTotal(100);
        //设置每个主机的最大连接数
        cm.setDefaultMaxPerRoute(10);
        //使用连接池管理器发起请求
//        doGet(cm);
//        doPost(cm);
    }

public static String doPost(PoolingHttpClientConnectionManager cm) throws Exception {
    //从连接池中获取httpClient对象
        CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();

          //2、输入网址,发起请求,创建httpPost对象
          HttpPost httpPost= new HttpPost("http://openaccess.thecvf.com/CVPR2019.py#");
          System.out.println("发起请求的信息:"+httpPost);
          
          //Post使用,声明List集合,封装表单中的参数
          List<NameValuePair> params= new ArrayList<NameValuePair>();
          params.add(new BasicNameValuePair("",""));
          
          //创建表单的Entity对象,第一个参数是封装好的参数,第二个是编码
          UrlEncodedFormEntity formEntity= new UrlEncodedFormEntity(params,"utf8");
          
          //设置表单的Entity对象到Post请求中
          httpPost.setEntity(formEntity);
          
          
        //配置请求信息
          RequestConfig config = RequestConfig.custom().setConnectTimeout(10000)//设置创建连接的最长时间,单位为毫秒
          .setConnectionRequestTimeout(50000)//设置获取连接的最长时间,单位为毫秒
          .setSocketTimeout(1000*1000)//设置传输数据的最长时间,单位为毫秒
          .build();
          //给请求设置请求信息
          httpPost.setConfig(config);
          
          CloseableHttpResponse response=null;
          String content=null;
          try {
          //3、按回车,发起请求,返回响应,使用httpClient对象发起请求
           response = httpClient.execute(httpPost);
          //解析响应,获取数据
          //判断状态码是否为两百
          if(response.getStatusLine().getStatusCode()==200) {
              HttpEntity httpEntity = response.getEntity();
              if(httpEntity!=null) {
                  content = EntityUtils.toString(httpEntity, "utf8");
                  System.out.println(content.length());
//                  System.out.println(content);
                  }
          }else {
              System.out.println("请求失败"+response);
          }
          }catch(Exception e) {
              e.printStackTrace();
          }finally {
    
              try {
                //关闭response
                  if(response!=null) {
                      //关闭response 
                      response.close();
                  }
                //不关闭httpClient
                //httpClient.close();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
          }
          return content;              
}

public static String doGet(PoolingHttpClientConnectionManager cm) throws Exception {
    //从连接池中获取httpClient对象
    CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();

    //创建URIBuilder
      URIBuilder uribuilder= new URIBuilder("http://openaccess.thecvf.com/CVPR2019.py#");
      //设置参数:参数名+参数值,可设置多个
      
      //2、输入网址,发起请求,创建httpGet对象
      HttpGet httpGet= new HttpGet(uribuilder.build());
      System.out.println("发起请求的信息:"+httpGet);
      
      //配置请求信息
      RequestConfig config = RequestConfig.custom().setConnectTimeout(10000*10000)//设置创建连接的最长时间,单位为毫秒
      .setConnectionRequestTimeout(10000*10000)//设置获取连接的最长时间,单位为毫秒
      .setSocketTimeout(100000*1000000)//设置传输数据的最长时间,单位为毫秒
      .build();
      //给请求设置请求信息
      httpGet.setConfig(config);
      
      
      
      
      
      CloseableHttpResponse response=null;
      String content=null;
      try {
      //3、按回车,发起请求,返回响应,使用httpClient对象发起请求
       response = httpClient.execute(httpGet);
      //解析响应,获取数据
      //判断状态码是否为两百
      if(response.getStatusLine().getStatusCode()==200) {
          HttpEntity httpEntity = response.getEntity();
          if(httpEntity!=null) {
          content = EntityUtils.toString(httpEntity, "utf8");
//          System.out.println(content.length());
//          System.out.println(content);
          }
      }
      }catch(Exception e) {
          e.printStackTrace();
      }finally {

          try {
              if(response!=null) {
                  //关闭response 
                  response.close();
              }
            //不能关闭httpClient
            //httpClient.close();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
      }
      return content; 
}
}
View Code
package utils;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Set;
import java.util.HashMap;  
import java.util.Iterator;  
import java.util.Map;

import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import dao.dao;
import entity.Cvf;



/**
 * 这是使用Jsoup解析
 * @author 张志伟
 *
 */
public class Jsouputil {

    /**
     * 使用Selector选择器获取元素
     */
    public static void testSelector()throws Exception{
        //获取Document对象
        HttpClientPool httpClientPool =new HttpClientPool();
        //创建连接池管理器
        PoolingHttpClientConnectionManager cm =new  PoolingHttpClientConnectionManager();
        //获取网页HTML字符串
        String content=httpClientPool.doGet(cm);
                        
        //解析字符串
        Document doc = Jsoup.parse(content);
//        System.out.println(doc.toString());
    
        //[attr=value],利用属性获取
        Elements elements = doc.select("div[id=content]").select("dl").select("dt[class=ptitle]");
        System.out.println(elements.toString());
        
        Cvf cvf=new Cvf();
        dao dao=new dao();
        if(elements!=null) 
        {
        for(Element ele:elements)
        {
            String href="http://openaccess.thecvf.com/";
            String cname=ele.select("a").text();
            System.out.println(cname);
            String href2=ele.select("a").attr("href");
            String chref=href.concat(href2);
            System.out.println(chref);
            String cabstract =null;
            String ckeyword  =null;
                //获取title的内容
            CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();
                //创建URIBuilder
                  URIBuilder uribuilder= new URIBuilder(chref);
                  HttpGet httpGet= new HttpGet(uribuilder.build());
                  RequestConfig config = RequestConfig.custom().setConnectTimeout(10000*10000)//设置创建连接的最长时间,单位为毫秒
                          .setConnectionRequestTimeout(10000*10000)//设置获取连接的最长时间,单位为毫秒
                          .setSocketTimeout(100000*1000000)//设置传输数据的最长时间,单位为毫秒
                          .build();
                          //给请求设置请求信息
                          httpGet.setConfig(config);
                          CloseableHttpResponse response=null;
                          response = httpClient.execute(httpGet);
                  //解析响应,获取数据
                  //判断状态码是否为两百
                  if(response.getStatusLine().getStatusCode()==200||response.getStatusLine().getStatusCode()==302) {
                      Document document = Jsoup.parse(new URL(chref), 100000);
                      cabstract = document.select("div[id=abstract]").text();
                      System.out.println("已获取摘要");
                    String[] strs = strTostrArray(cname+cabstract);
                      
                     ckeyword=keyword(strs);
                     
                  }
                  else {
                      System.out.println(response.getStatusLine().getStatusCode());
                      cabstract =null;
                      ckeyword=null;
                  }
                  if(response!=null) {
                      //关闭response 
                      response.close();
                  }
                  
            cvf=new Cvf(cname,chref,cabstract,ckeyword);
            dao.add(cvf);

        }
        }
        
    }
    public static String[] strTostrArray(String str) {
          /*
           * 将非字母字符全部替换为空格字符" " 得到一个全小写的纯字母字符串包含有空格字符
           */
          str = str.toLowerCase();// 将字符串中的英文部分的字符全部变为小写
          String regex = "[\W]+";// 非字母的正则表达式 --W:表示任意一个非单词字符
          str = str.replaceAll(regex, " ");
          String[] strs = str.split(" "); // 以空格作为分隔符获得字符串数组
          return strs;
         }
         public static String keyword(String[] strs) {
          /*
           * 建立字符串(String)出现次数(Integer)的映射
           */
          HashMap<String, Integer> strhash = new HashMap<String, Integer>();
          Integer in = null;// 用于存放put操作的返回值
          for (String s : strs) {// 遍历数组 strs

                  in = strhash.put(s, 1);
                  if (in != null) {// 判断如果返回的不是null,则+1再放进去就是出现的次数
                      strhash.put(s, in + 1);
                  }

          }
          Set<java.util.Map.Entry<String, Integer>> entrySet = strhash.entrySet();
          String maxStr = null;// 用于存放出现最多的单词
          int maxValue = 0;// 用于存放出现最多的次数
          for (java.util.Map.Entry<String, Integer> e : entrySet) {
           String key = e.getKey();
           Integer value = e.getValue();
           if(key.equals("a")||key.equals("the")||key.equals("to")||key.equals("and")||key.equals("in")||key.equals("of")||key.equals("our")||key.equals("your")||key.equals("we")||key.equals("is")||key.equals("on")||key.equals("for")||key.equals("that")||key.equals("an")||key.equals("are")) {
               value=0;
           }
           if (value > maxValue) {
            maxValue = value;// 这里有自动拆装箱
            maxStr = key;
           }
          }
          System.out.println("出现最多的单词是:" + maxStr + "出现了" + maxValue + "次");
          return maxStr;
         }
}
View Code
package servlet;

import java.io.IOException;
import java.util.List;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import dao.dao;
import entity.Cvf;
import utils.Jsouputil;


public class QueryServlet extends HttpServlet {

    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        
        /**
         * 这里是设置编码集,以避免出现乱码问题
         */
        request.setCharacterEncoding("utf-8");
        response.setCharacterEncoding("utf-8");
        response.setContentType("text/html;charset=utf-8");
        /**
         * 这是爬取数据
         */
    //Jsouputil jsouptil=new Jsouputil();
    //try {
    //Jsouputil.testSelector();
    //    } catch (Exception e) {
            // TODO Auto-generated catch block
    //    e.printStackTrace();
    //}

        dao dao=new dao();
        
        List<Cvf> cvfs=dao.Query();
        System.out.println(cvfs);
        request.setAttribute("cvfs",cvfs);
        
        request.getRequestDispatcher("show.jsp").forward(request, response);
    }


    protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        // TODO Auto-generated method stub
        doGet(request, response);
    }

}
View Code
package servlet;

import java.io.IOException;
import java.util.List;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import dao.dao;
import entity.Cvf;


public class ListServlet extends HttpServlet {

    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        /**
         * 这里是设置编码集,以避免出现乱码问题
         */
        request.setCharacterEncoding("utf-8");
        String key= request.getParameter("keyword");
        response.setCharacterEncoding("utf-8");
        response.setContentType("text/html;charset=utf-8");
        dao dao=new dao();
        
        List<Cvf> cvfs=dao.Query(key);
        System.out.println(cvfs);
        request.setAttribute("cvfs",cvfs);
        
        request.getRequestDispatcher("list.jsp").forward(request, response);
    }


    protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        // TODO Auto-generated method stub
        doGet(request, response);
    }

}
View Code
package entity;

public class Cvf {
    private int id;
    private String cname;
    private String chref;
    private String cabstract;
    private String ckeyword;
    @Override
    public String toString() {
        return "Cvf [id=" + id + ", cname=" + cname + ", chref=" + chref + ", cabstract=" + cabstract + ", ckeyword="
                + ckeyword + "]";
    }
    public int getId() {
        return id;
    }
    public void setId(int id) {
        this.id = id;
    }
    public String getCname() {
        return cname;
    }
    public void setCname(String cname) {
        this.cname = cname;
    }
    public String getChref() {
        return chref;
    }
    public void setChref(String chref) {
        this.chref = chref;
    }
    public String getCabstract() {
        return cabstract;
    }
    public void setCabstract(String cabstract) {
        this.cabstract = cabstract;
    }
    public String getCkeyword() {
        return ckeyword;
    }
    public void setCkeyword(String ckeyword) {
        this.ckeyword = ckeyword;
    }
    public Cvf(int id, String cname, String chref, String cabstract, String ckeyword) {
        this.id = id;
        this.cname = cname;
        this.chref = chref;
        this.cabstract = cabstract;
        this.ckeyword = ckeyword;
    }
    public Cvf() {
    }
    public Cvf(String cname, String chref, String cabstract, String ckeyword) {
        this.cname = cname;
        this.chref = chref;
        this.cabstract = cabstract;
        this.ckeyword = ckeyword;
    }
    
}
View Code
package dao;

import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;

import entity.Cvf;
import utils.DBUtil;

public class dao {
    //添加数据入库
        public boolean add(Cvf cvf) {
            String sql="insert into cvpr(cname,chref,cabstract,ckeyword) values (?,?,?,?)";
            Object obj[]= {cvf.getCname(),cvf.getChref(),cvf.getCabstract(),cvf.getCkeyword()};
            return DBUtil.executeUpdate(sql, obj);
        }
        //查询数据
         public List<Cvf> Query() {
             List<Cvf> cvfs=new ArrayList();
             Cvf cvf= null;
               ResultSet rs = null; 
             try {
                 String sql="select * from cvpr  " ;
                 Object [] params= {};
                 rs=DBUtil.executeQuery(sql, params);
                 while(rs.next()) {
                     int Id=rs.getInt("id");
                     String cname=rs.getString("cname");
                     String chref=rs.getString("chref");
                     String cabstract=rs.getString("cabstract");
                     String ckeyword=rs.getString("ckeyword");
                     cvf=new Cvf(Id,cname,chref,cabstract,ckeyword);
                     cvfs.add(cvf);
                 }
             }catch(SQLException e) {
                 e.printStackTrace();
             }catch(Exception e) {
                 e.printStackTrace();
             }finally {
                 try {
                     //先开的后关,后开的先关
                 if(rs!=null)rs.close();
                 if(DBUtil.pstmt!=null)DBUtil.pstmt.close();
                 if(DBUtil.connection !=null)DBUtil.connection.close();
                 }catch(SQLException e) {
                     e.printStackTrace();
                 }finally {
                     
                 }
                
             }
             return cvfs;
         }
        //查询数据
         public List<Cvf> Query(String key) {
             List<Cvf> cvfs=new ArrayList();
             Cvf cvf= null;
               ResultSet rs = null; 
             try {
                 String sql="select * from cvpr where ckeyword=? " ;
                 Object [] params= {key};
                 rs=DBUtil.executeQuery(sql, params);
                 while(rs.next()) {
                     int Id=rs.getInt("id");
                     String cname=rs.getString("cname");
                     String chref=rs.getString("chref");
                     String cabstract=rs.getString("cabstract");
                     cvf=new Cvf(Id,cname,chref,cabstract,key);
                     cvfs.add(cvf);
                 }
             }catch(SQLException e) {
                 e.printStackTrace();
             }catch(Exception e) {
                 e.printStackTrace();
             }finally {
                 try {
                     //先开的后关,后开的先关
                 if(rs!=null)rs.close();
                 if(DBUtil.pstmt!=null)DBUtil.pstmt.close();
                 if(DBUtil.connection !=null)DBUtil.connection.close();
                 }catch(SQLException e) {
                     e.printStackTrace();
                 }finally {
                     
                 }
                
             }
             return cvfs;
         }
}
View Code
原文地址:https://www.cnblogs.com/vvxvv/p/13085265.html