爬虫效果--详细

项目简介:
此项目为了方便同学们进行爬虫编写。涉及到的知识点有Html,Javascript、JQuery
Echarts,Servlet 3.0,Jsoup,FastJson、IO流
项目概要设计:
1、包结构的设计 必须满足MVC开发模式---解耦

 1 package com.hyxy.food.entity;
 2 public class Food {
 3     private int id;//区别不同的商店
 4     private String name;//名称
 5     private int num;//评论条数
 6     public int getId() {
 7         return id;
 8     }
 9     public void setId(int id) {
10         this.id = id;
11     }
12     public String getName() {
13         return name;
14     }
15     public void setName(String name) {
16         this.name = name;
17     }
18     public int getNum() {
19         return num;
20     }
21     public void setNum(int num) {
22         this.num = num;
23     }
24     public Food(int id, String name, int num) {
25         super();
26         this.id = id;
27         this.name = name;
28         this.num = num;
29     }
30     public Food() {
31         super();
32     }    
33 }
 1 package com.hyxy.food.util;
 2 
 3 import java.io.IOException;
 4 import java.io.InputStream;
 5 import java.sql.Connection;
 6 import java.sql.DriverManager;
 7 import java.util.Properties;
 8 public class ConnectMysqlDB {
 9     public static Connection getConnection() {
10         Connection conn=null;
11         Properties p=new Properties();
12         InputStream in=ConnectMysqlDB.class.getClassLoader().getResourceAsStream("jdbc.properties");
13         try {
14         p.load(in);
15             String url=p.getProperty("url");
16             String user=p.getProperty("user");
17             String password=p.getProperty("pwd");
18             String driver=p.getProperty("driver");
19             Class.forName(driver);
20             conn=DriverManager.getConnection(url, user, password);
21         } catch (Exception e) {
22             // TODO Auto-generated catch block
23             e.printStackTrace();
24         }
25 
26         return conn;
27     }
28     public static void main(String[] args) {
29         getConnection();
30     }
31 }
1 jdbc.properties  --->src根目录下
2 driver=com.mysql.jdbc.Driver
3 url=jdbc:mysql://192.168.0.199:3306/spider?useUnicode=true&characterEncoding=utf-8
4 user=1805
5 pwd=123456
 1 package com.hyxy.food.dao;
 2 import java.sql.Connection;
 3 import com.hyxy.food.entity.Food;
 4 import com.hyxy.food.util.ConnectMysqlDB;
 5 import java.sql.PreparedStatement;
 6 import java.sql.ResultSet;
 7 import java.sql.SQLException;
 8 import java.util.ArrayList;
 9 import java.util.List;
10 
11 public class FoodDao {
12     private  Connection conn;
13     public FoodDao() {
14        if(conn==null) {
15         conn=ConnectMysqlDB.getConnection();
16     }
17      }
18     public boolean addFood(Food f) {
19         String sql="insert into food(name,num) value(?,?)";
20         PreparedStatement ps;
21         boolean flag=false;
22         try {
23             ps = conn.prepareStatement(sql);
24             ps.setString(1, f.getName());
25             ps.setInt(2, f.getNum());
26             ps.executeUpdate();
27             flag=true;
28         } catch (SQLException e) {
29             // TODO Auto-generated catch block
30             e.printStackTrace();
31         }
32         return flag;
33     }
34     public List<Food> list(){
35         String sql="select name,num from food";
36         List<Food> list=new ArrayList<Food>();
37         try {
38             PreparedStatement ps=conn.prepareStatement(sql);
39             ResultSet rs= ps.executeQuery();
40             while(rs.next()) {
41                 Food f=new Food();
42                 f.setName(rs.getString("name"));
43                 f.setNum(rs.getInt("num"));
44                 list.add(f);
45             }
46         } catch (SQLException e) {
47             // TODO Auto-generated catch block
48             e.printStackTrace();
49         }
50         return list;
51     }
52 }
 1 package com.hyxy.food.controller;
 2 
 3 import java.io.IOException;
 4 import javax.servlet.ServletException;
 5 import javax.servlet.annotation.WebServlet;
 6 import javax.servlet.http.HttpServlet;
 7 import javax.servlet.http.HttpServletRequest;
 8 import javax.servlet.http.HttpServletResponse;
 9 
10 import org.jsoup.Jsoup;
11 import org.jsoup.nodes.Document;
12 import org.jsoup.nodes.Element;
13 import org.jsoup.select.Elements;
14 
15 import com.hyxy.food.dao.FoodDao;
16 import com.hyxy.food.entity.Food;
17 @WebServlet("/food")
18 public class FoodServlet extends HttpServlet {
19     private static final long serialVersionUID = 1L;
20     public FoodServlet() {
21         super();
22         // TODO Auto-generated constructor stub
23     }
24     protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
25         doPost(request, response);
26     }
27     protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
28         String url=request.getParameter("url");//http://www.mafengwo.cn/cy/10035/0-0-0-0-0-
29         int sum=Integer.parseInt(request.getParameter("sum"));
30         //爬虫开始
31         for (int i = 1; i <= sum; i++) {
32             System.out.println("爬取第"+i+"页数据");
33             //爬虫第一步,获取一个爬虫的Document对象
34             Document d=Jsoup.connect(url+i+".html").get();
35             //输出Document对象
36     //        System.out.println(d.html());
37             //第二步观察网页具体信息,爬取想要的信息
38             Elements es= d.select("li[class=item clearfix]");
39             boolean flag=false;
40             for (Element element : es) {
41                 String title=element.select("div[class=title]").select("h3").select("a").first().text();
42                 int num=Integer.parseInt(element.select("div[class=grade]").select("p[class=rev-num]").select("em").text());
43                 Food food=new Food();
44                 food.setName(title);
45                 food.setNum(num);
46                 FoodDao dao=new FoodDao();
47                 flag = dao.addFood(food);
48             }
49             if(flag) {
50                 System.out.println("success!!!");
51             }else {
52                 System.out.println("false!!!");
53             }
54 //            request.getRequestDispatcher("list").forward(request, response);
55         }
56     }
57 
58 }
 1 package com.hyxy.food.test;
 2 
 3 import java.io.IOException;
 4 
 5 import org.jsoup.Jsoup;
 6 import org.jsoup.nodes.Document;
 7 import org.jsoup.nodes.Element;
 8 import org.jsoup.select.Elements;
 9 
10 public class FoodTest {
11     public static void main(String[] args) throws IOException {
12         int sum=20;
13         for (int i = 1; i <= sum; i++) {
14             System.out.println("爬取第"+i+"页数据");
15             String url="http://www.mafengwo.cn/cy/10035/0-0-0-0-0-"+i+".html";
16             //爬虫第一步,获取一个爬虫的Document对象
17             Document d=Jsoup.connect(url).get();
18             //输出Document对象
19     //        System.out.println(d.html());
20             //第二步观察网页具体信息,爬取想要的信息
21             Elements es= d.select("li[class=item clearfix]");
22             for (Element element : es) {
23                 String title=element.select("div[class=title]").select("h3").select("a").first().text();
24                 int num=Integer.parseInt(element.select("div[class=grade]").select("p[class=rev-num]").select("em").text());
25                 System.out.println(title+":"+num);
26             }
27         }            
28     }
29 }
原文地址:https://www.cnblogs.com/yangfanfan/p/11342017.html