热词统计

DAO.java

package dao;

import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;

import entity.Cvf;
import utils.DBUtil;

public class dao {
	//添加数据入库
		public boolean add(Cvf cvf) {
			String sql="insert into cvpr(cname,chref,cabstract,ckeyword) values (?,?,?,?)";
			Object obj[]= {cvf.getCname(),cvf.getChref(),cvf.getCabstract(),cvf.getCkeyword()};
			return DBUtil.executeUpdate(sql, obj);
		}
		//查询数据
		 public List<Cvf> Query() {
	  	   List<Cvf> cvfs=new ArrayList();
	  	   Cvf cvf= null;
		       ResultSet rs = null; 
	  	   try {
		    	 String sql="select * from cvpr  " ;
		    	 Object [] params= {};
		    	 rs=DBUtil.executeQuery(sql, params);
		         while(rs.next()) {
		        	 int Id=rs.getInt("id");
		        	 String cname=rs.getString("cname");
		        	 String chref=rs.getString("chref");
		        	 String cabstract=rs.getString("cabstract");
		        	 String ckeyword=rs.getString("ckeyword");
		        	 cvf=new Cvf(Id,cname,chref,cabstract,ckeyword);
		        	 cvfs.add(cvf);
		         }
		     }catch(SQLException e) {
		    	 e.printStackTrace();
		     }catch(Exception e) {
		    	 e.printStackTrace();
		     }finally {
		    	 try {
	     			//先开的后关,后开的先关
	     		if(rs!=null)rs.close();
	     		if(DBUtil.pstmt!=null)DBUtil.pstmt.close();
	     		if(DBUtil.connection !=null)DBUtil.connection.close();
	     		}catch(SQLException e) {
	     			e.printStackTrace();
	     		}finally {
	     			
	     		}
		    	
		     }
	  	   return cvfs;
	     }
		//查询数据
		 public List<Cvf> Query(String key) {
	  	   List<Cvf> cvfs=new ArrayList();
	  	   Cvf cvf= null;
		       ResultSet rs = null; 
	  	   try {
		    	 String sql="select * from cvpr where ckeyword=? " ;
		    	 Object [] params= {key};
		    	 rs=DBUtil.executeQuery(sql, params);
		         while(rs.next()) {
		        	 int Id=rs.getInt("id");
		        	 String cname=rs.getString("cname");
		        	 String chref=rs.getString("chref");
		        	 String cabstract=rs.getString("cabstract");
		        	 cvf=new Cvf(Id,cname,chref,cabstract,key);
		        	 cvfs.add(cvf);
		         }
		     }catch(SQLException e) {
		    	 e.printStackTrace();
		     }catch(Exception e) {
		    	 e.printStackTrace();
		     }finally {
		    	 try {
	     			//先开的后关,后开的先关
	     		if(rs!=null)rs.close();
	     		if(DBUtil.pstmt!=null)DBUtil.pstmt.close();
	     		if(DBUtil.connection !=null)DBUtil.connection.close();
	     		}catch(SQLException e) {
	     			e.printStackTrace();
	     		}finally {
	     			
	     		}
		    	
		     }
	  	   return cvfs;
	     }
}

  Cvf.java

package dao;

import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;

import entity.Cvf;
import utils.DBUtil;

public class dao {
	//添加数据入库
		public boolean add(Cvf cvf) {
			String sql="insert into cvpr(cname,chref,cabstract,ckeyword) values (?,?,?,?)";
			Object obj[]= {cvf.getCname(),cvf.getChref(),cvf.getCabstract(),cvf.getCkeyword()};
			return DBUtil.executeUpdate(sql, obj);
		}
		//查询数据
		 public List<Cvf> Query() {
	  	   List<Cvf> cvfs=new ArrayList();
	  	   Cvf cvf= null;
		       ResultSet rs = null; 
	  	   try {
		    	 String sql="select * from cvpr  " ;
		    	 Object [] params= {};
		    	 rs=DBUtil.executeQuery(sql, params);
		         while(rs.next()) {
		        	 int Id=rs.getInt("id");
		        	 String cname=rs.getString("cname");
		        	 String chref=rs.getString("chref");
		        	 String cabstract=rs.getString("cabstract");
		        	 String ckeyword=rs.getString("ckeyword");
		        	 cvf=new Cvf(Id,cname,chref,cabstract,ckeyword);
		        	 cvfs.add(cvf);
		         }
		     }catch(SQLException e) {
		    	 e.printStackTrace();
		     }catch(Exception e) {
		    	 e.printStackTrace();
		     }finally {
		    	 try {
	     			//先开的后关,后开的先关
	     		if(rs!=null)rs.close();
	     		if(DBUtil.pstmt!=null)DBUtil.pstmt.close();
	     		if(DBUtil.connection !=null)DBUtil.connection.close();
	     		}catch(SQLException e) {
	     			e.printStackTrace();
	     		}finally {
	     			
	     		}
		    	
		     }
	  	   return cvfs;
	     }
		//查询数据
		 public List<Cvf> Query(String key) {
	  	   List<Cvf> cvfs=new ArrayList();
	  	   Cvf cvf= null;
		       ResultSet rs = null; 
	  	   try {
		    	 String sql="select * from cvpr where ckeyword=? " ;
		    	 Object [] params= {key};
		    	 rs=DBUtil.executeQuery(sql, params);
		         while(rs.next()) {
		        	 int Id=rs.getInt("id");
		        	 String cname=rs.getString("cname");
		        	 String chref=rs.getString("chref");
		        	 String cabstract=rs.getString("cabstract");
		        	 cvf=new Cvf(Id,cname,chref,cabstract,key);
		        	 cvfs.add(cvf);
		         }
		     }catch(SQLException e) {
		    	 e.printStackTrace();
		     }catch(Exception e) {
		    	 e.printStackTrace();
		     }finally {
		    	 try {
	     			//先开的后关,后开的先关
	     		if(rs!=null)rs.close();
	     		if(DBUtil.pstmt!=null)DBUtil.pstmt.close();
	     		if(DBUtil.connection !=null)DBUtil.connection.close();
	     		}catch(SQLException e) {
	     			e.printStackTrace();
	     		}finally {
	     			
	     		}
		    	
		     }
	  	   return cvfs;
	     }
}

  ListServlel.java

package servlet;

import java.io.IOException;
import java.util.List;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import dao.dao;
import entity.Cvf;


public class ListServlet extends HttpServlet {

	protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
		/**
		 * 这里是设置编码集,以避免出现乱码问题
		 */
		request.setCharacterEncoding("utf-8");
		String key= request.getParameter("keyword");
		response.setCharacterEncoding("utf-8");
		response.setContentType("text/html;charset=utf-8");
	    dao dao=new dao();
		
		List<Cvf> cvfs=dao.Query(key);
		System.out.println(cvfs);
		request.setAttribute("cvfs",cvfs);
		
		request.getRequestDispatcher("list.jsp").forward(request, response);
	}


	protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
		// TODO Auto-generated method stub
		doGet(request, response);
	}

}

  QueryServlet.java

package servlet;

import java.io.IOException;
import java.util.List;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import dao.dao;
import entity.Cvf;
import utils.Jsouputil;


public class QueryServlet extends HttpServlet {

	protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
		
		/**
		 * 这里是设置编码集,以避免出现乱码问题
		 */
		request.setCharacterEncoding("utf-8");
		response.setCharacterEncoding("utf-8");
		response.setContentType("text/html;charset=utf-8");
		/**
		 * 这是爬取数据
		 */
//		Jsouputil jsouptil=new Jsouputil();
//		try {
//			Jsouputil.testSelector();
//		} catch (Exception e) {
//			// TODO Auto-generated catch block
//		e.printStackTrace();
//		}

		dao dao=new dao();
		
		List<Cvf> cvfs=dao.Query();
		System.out.println(cvfs);
		request.setAttribute("cvfs",cvfs);
		
		request.getRequestDispatcher("show.jsp").forward(request, response);
	}


	protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
		// TODO Auto-generated method stub
		doGet(request, response);
	}

}

  DBUtil.java

package utils;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;


public class DBUtil {
	//数据库URL和账号密码
			public static String URL="jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=GB18030&useSSL=false&serverTimezone=GMT&allowPublicKeyRetrieval=true";//数据源  !!!!注意若出现加载或者连接数据库失败一般是这里出现问题
			
			private static final String UNAME="root";
			private static final String UPWD="1234";
			public static PreparedStatement  pstmt=null;
			public static ResultSet  rs = null;
			public static Connection  connection=null;
			
			
     //增删改
	 public static boolean executeUpdate(String sql,Object [] params) {
		 boolean flag = false;
     	try {
     		
     	//a.导入驱动,加载具体的驱动类
     	Class.forName("com.mysql.cj.jdbc.Driver");
     	//b.与数据库建立连接
     	connection = DriverManager.getConnection(URL,UNAME,UPWD);
     	
     	pstmt = connection.prepareStatement(sql);
     	for(int i=0;i<params.length;i++) {
     		pstmt.setObject(i+1, params[i]);
     	}
     	int count=pstmt.executeUpdate();//返回值表示,增删改几条数据
     	//处理结果
     	if(count>0)
     	{
     		System.out.println("操作成功!!!");
     		flag=true;
     	}
     	
     	}catch(ClassNotFoundException e) {
     		e.printStackTrace();
     	}catch(SQLException e) {
     		e.printStackTrace();
     	}catch(Exception e){
     		e.printStackTrace();
     	}finally {
     		try {
     			//先开的后关,后开的先关
     		if(pstmt!=null)pstmt.close();
     		if(connection !=null)connection.close();
     		}catch(SQLException e) {
     			e.printStackTrace();
     		}finally {
     			
     		}
     	}
		return flag;
	 }
	//查
	 public static ResultSet executeQuery(String sql,Object [] params) {
		 
		     	try {
		     		
		     	//a.导入驱动,加载具体的驱动类
		     	Class.forName("com.mysql.cj.jdbc.Driver");
		     	//b.与数据库建立连接
		     	connection = DriverManager.getConnection(URL,UNAME,UPWD);
		     	
		     	pstmt = connection.prepareStatement(sql);
		     	if(params!=null) {
		     	for(int i=0;i<params.length;i++) {
		     		pstmt.setObject(i+1, params[i]);
		     	}
		     	}
		     	 rs = pstmt.executeQuery();
		     	 return rs;
		     	}catch(ClassNotFoundException e) {
		     		e.printStackTrace();
		     		return null;
		     	}catch(SQLException e) {
		     		e.printStackTrace();
		     		return null;
		     	}catch(Exception e){
		     		e.printStackTrace();
		     		return null;
		     	}
		     
		}
}

  HttpclientPool.java

package utils;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;

import org.apache.http.HttpEntity;
import org.apache.http.NameValuePair;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;

import com.alibaba.fastjson.JSONObject;

public class HttpClientPool {
/**
 * 这是httpClient连接池
 * @throws Exception 
 */
	public static void HttpClientPool() {
		//创建连接池管理器
		PoolingHttpClientConnectionManager cm =new  PoolingHttpClientConnectionManager();
		
		
		//设置最大连接数
		cm.setMaxTotal(100);
		//设置每个主机的最大连接数
		cm.setDefaultMaxPerRoute(10);
		//使用连接池管理器发起请求
//		doGet(cm);
//		doPost(cm);
	}

public static String doPost(PoolingHttpClientConnectionManager cm) throws Exception {
	//从连接池中获取httpClient对象
		CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();

      	//2、输入网址,发起请求,创建httpPost对象
    	  HttpPost httpPost= new HttpPost("http://openaccess.thecvf.com/CVPR2019.py#");
    	  System.out.println("发起请求的信息:"+httpPost);
    	  
    	  //Post使用,声明List集合,封装表单中的参数
    	  List<NameValuePair> params= new ArrayList<NameValuePair>();
    	  params.add(new BasicNameValuePair("",""));
    	  
    	  //创建表单的Entity对象,第一个参数是封装好的参数,第二个是编码
    	  UrlEncodedFormEntity formEntity= new UrlEncodedFormEntity(params,"utf8");
    	  
    	  //设置表单的Entity对象到Post请求中
    	  httpPost.setEntity(formEntity);
    	  
    	  
    	//配置请求信息
    	  RequestConfig config = RequestConfig.custom().setConnectTimeout(10000)//设置创建连接的最长时间,单位为毫秒
    	  .setConnectionRequestTimeout(50000)//设置获取连接的最长时间,单位为毫秒
    	  .setSocketTimeout(1000*1000)//设置传输数据的最长时间,单位为毫秒
    	  .build();
    	  //给请求设置请求信息
    	  httpPost.setConfig(config);
    	  
    	  CloseableHttpResponse response=null;
    	  String content=null;
    	  try {
    	  //3、按回车,发起请求,返回响应,使用httpClient对象发起请求
    	   response = httpClient.execute(httpPost);
    	  //解析响应,获取数据
    	  //判断状态码是否为两百
    	  if(response.getStatusLine().getStatusCode()==200) {
    		  HttpEntity httpEntity = response.getEntity();
    		  if(httpEntity!=null) {
    	          content = EntityUtils.toString(httpEntity, "utf8");
    			  System.out.println(content.length());
//    			  System.out.println(content);
    			  }
    	  }else {
    		  System.out.println("请求失败"+response);
    	  }
    	  }catch(Exception e) {
    		  e.printStackTrace();
    	  }finally {
    
    		  try {
    			//关闭response
    			  if(response!=null) {
    				  //关闭response 
    				  response.close();
    			  }
				//不关闭httpClient
				//httpClient.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
    	  }
		  return content;    	  	
}

public static String doGet(PoolingHttpClientConnectionManager cm) throws Exception {
	//从连接池中获取httpClient对象
	CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();

	//创建URIBuilder
	  URIBuilder uribuilder= new URIBuilder("http://openaccess.thecvf.com/CVPR2019.py#");
	  //设置参数:参数名+参数值,可设置多个
	  
	  //2、输入网址,发起请求,创建httpGet对象
	  HttpGet httpGet= new HttpGet(uribuilder.build());
	  System.out.println("发起请求的信息:"+httpGet);
	  
	  //配置请求信息
	  RequestConfig config = RequestConfig.custom().setConnectTimeout(10000*10000)//设置创建连接的最长时间,单位为毫秒
	  .setConnectionRequestTimeout(10000*10000)//设置获取连接的最长时间,单位为毫秒
	  .setSocketTimeout(100000*1000000)//设置传输数据的最长时间,单位为毫秒
	  .build();
	  //给请求设置请求信息
	  httpGet.setConfig(config);
	  
	  
	  
	  
	  
	  CloseableHttpResponse response=null;
	  String content=null;
	  try {
	  //3、按回车,发起请求,返回响应,使用httpClient对象发起请求
	   response = httpClient.execute(httpGet);
	  //解析响应,获取数据
	  //判断状态码是否为两百
	  if(response.getStatusLine().getStatusCode()==200) {
		  HttpEntity httpEntity = response.getEntity();
		  if(httpEntity!=null) {
          content = EntityUtils.toString(httpEntity, "utf8");
//		  System.out.println(content.length());
//		  System.out.println(content);
		  }
	  }
	  }catch(Exception e) {
		  e.printStackTrace();
	  }finally {

		  try {
			  if(response!=null) {
				  //关闭response 
				  response.close();
			  }
			//不能关闭httpClient
			//httpClient.close();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	  }
	  return content; 
}
}

  Jsouputil.java

package utils;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Set;
import java.util.HashMap;  
import java.util.Iterator;  
import java.util.Map;

import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import dao.dao;
import entity.Cvf;



/**
 * 这是使用Jsoup解析
 */
public class Jsouputil {

	/**
	 * 使用Selector选择器获取元素
	 */
	public static void testSelector()throws Exception{
		//获取Document对象
		HttpClientPool httpClientPool =new HttpClientPool();
		//创建连接池管理器
		PoolingHttpClientConnectionManager cm =new  PoolingHttpClientConnectionManager();
		//获取网页HTML字符串
		String content=httpClientPool.doGet(cm);
						
		//解析字符串
		Document doc = Jsoup.parse(content);
//		System.out.println(doc.toString());
	
		//[attr=value],利用属性获取
		Elements elements = doc.select("div[id=content]").select("dl").select("dt[class=ptitle]");
		System.out.println(elements.toString());
		
		Cvf cvf=new Cvf();
		dao dao=new dao();
		if(elements!=null) 
		{
		for(Element ele:elements)
		{
			String href="http://openaccess.thecvf.com/";
			String cname=ele.select("a").text();
			System.out.println(cname);
			String href2=ele.select("a").attr("href");
			String chref=href.concat(href2);
			System.out.println(chref);
			String cabstract =null;
			String ckeyword  =null;
				//获取title的内容
			CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();
				//创建URIBuilder
				  URIBuilder uribuilder= new URIBuilder(chref);
				  HttpGet httpGet= new HttpGet(uribuilder.build());
				  RequestConfig config = RequestConfig.custom().setConnectTimeout(10000*10000)//设置创建连接的最长时间,单位为毫秒
						  .setConnectionRequestTimeout(10000*10000)//设置获取连接的最长时间,单位为毫秒
						  .setSocketTimeout(100000*1000000)//设置传输数据的最长时间,单位为毫秒
						  .build();
						  //给请求设置请求信息
						  httpGet.setConfig(config);
						  CloseableHttpResponse response=null;
						  response = httpClient.execute(httpGet);
		    	  //解析响应,获取数据
		    	  //判断状态码是否为两百
		    	  if(response.getStatusLine().getStatusCode()==200||response.getStatusLine().getStatusCode()==302) {
		    		  Document document = Jsoup.parse(new URL(chref), 100000);
				      cabstract = document.select("div[id=abstract]").text();
				      System.out.println("已获取摘要");
					String[] strs = strTostrArray(cname+cabstract);
					  
					 ckeyword=keyword(strs);
					 
		    	  }
		    	  else {
		    		  System.out.println(response.getStatusLine().getStatusCode());
		    		  cabstract =null;
		    		  ckeyword=null;
		    	  }
		    	  if(response!=null) {
					  //关闭response 
					  response.close();
				  }
		    	  
			cvf=new Cvf(cname,chref,cabstract,ckeyword);
			dao.add(cvf);

		}
		}
		
	}
	public static String[] strTostrArray(String str) {
		  /*
		   * 将非字母字符全部替换为空格字符" " 得到一个全小写的纯字母字符串包含有空格字符
		   */
		  str = str.toLowerCase();// 将字符串中的英文部分的字符全部变为小写
		  String regex = "[\W]+";// 非字母的正则表达式 --W:表示任意一个非单词字符
		  str = str.replaceAll(regex, " ");
		  String[] strs = str.split(" "); // 以空格作为分隔符获得字符串数组
		  return strs;
		 }
		 public static String keyword(String[] strs) {
		  /*
		   * 建立字符串(String)出现次数(Integer)的映射
		   */
		  HashMap<String, Integer> strhash = new HashMap<String, Integer>();
		  Integer in = null;// 用于存放put操作的返回值
		  for (String s : strs) {// 遍历数组 strs

				  in = strhash.put(s, 1);
				  if (in != null) {// 判断如果返回的不是null,则+1再放进去就是出现的次数
					  strhash.put(s, in + 1);
				  }

		  }
		  Set<java.util.Map.Entry<String, Integer>> entrySet = strhash.entrySet();
		  String maxStr = null;// 用于存放出现最多的单词
		  int maxValue = 0;// 用于存放出现最多的次数
		  for (java.util.Map.Entry<String, Integer> e : entrySet) {
		   String key = e.getKey();
		   Integer value = e.getValue();
		   if(key.equals("a")||key.equals("the")||key.equals("to")||key.equals("and")||key.equals("in")||key.equals("of")||key.equals("our")||key.equals("your")||key.equals("we")||key.equals("is")||key.equals("on")||key.equals("for")||key.equals("that")||key.equals("an")||key.equals("are")) {
			   value=0;
		   }
		   if (value > maxValue) {
		    maxValue = value;// 这里有自动拆装箱
		    maxStr = key;
		   }
		  }
		  System.out.println("出现最多的单词是:" + maxStr + "出现了" + maxValue + "次");
		  return maxStr;
		 }
}

  

原文地址:https://www.cnblogs.com/xk1013/p/13085854.html