java爬虫中jsoup的使用

jsoup可以用来解析HTML的内容,其功能非常强大,它可以向javascript那样直接从网页中提取有用的信息

例如1:

  •  从html字符串中解析数据
//直接从字符串中获取
    public static void getParByString()
    {
        String html = "<html><head><title> 这里是字符串内容</title></head"+ ">"+"<body><p class='p1'> 这里是 jsoup 作用的相关演示</p></body></html>";
       Document doc = Jsoup.parse(html);
       Elements links = doc.select("p[class]");
       for(Element link:links){
        String linkclass = link.className();
            String linkText = link.text();
            System.out.println(linkText);
            System.out.println(linkclass);
        }
    }
  •    从本地文件中解析数据
//从本地文件中获取
    public static void getHrefByLocal()
    {
        File input = new File("C:\Users\Idea\Desktop\html\Home.html");
        Document doc = null;
        try {
            doc = Jsoup.parse(input,"UTF-8","http://www.oschina.net/"); //这里后面加了网址是为了解决后面绝对路径和相对路径的问题
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        Elements links = doc.select("a[href]");
        for(Element link:links){
            String linkHref = link.attr("href");
            String linkText = link.text();
            System.out.println(linkText+":"+linkHref);
        }
        
    }
  • 直接从网络上解析数据
public static HashMap getHrefByNet(String url)
    {    
      HashMap hm = new HashMap();
      String href = null;
         try {
            //这是get方式得到的
            Document doc = Jsoup.connect(url).get();
            String title = doc.title();
            Elements links = doc.select("a[href]");
            
            for(Element link:links){
                
                String linkHref = link.attr("abs:href");
                String linkText = link.text();
                //System.out.println(linkText+":"+linkHref);
                hm.put(linkText, linkHref);
                href=linkText;
            }
            //System.out.println("***************");
            //另外一种是post方式
            /*@SuppressWarnings("unused")
            Document doc_Post = Jsoup.connect(url)
                    .data("query","Java")
                    .userAgent("I am jsoup")
                    .cookie("auth","token")
                    .timeout(10000)
                    .post();
            Elements links_Post = doc.select("a[href]");
             for(Element link:links_Post){
                    String linkHref = link.attr("abs:href");
                    String linkText = link.text();
                    //System.out.println(linkText+":"+linkHref);
                    
                    //map.put(linkText, linkHref);
                }*/
            
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
            hm.put("加载失败", "error");
        }
         
        return hm ;
    }
     

注意:需要引用的jar为以下:

import org.jsoup.*;
import org.jsoup.nodes.*;
import org.jsoup.select.Elements;

最后附上jar包下载地址

http://jsoup.org/packages/jsoup-1.8.1.jar
具体
实际项目请看java爬虫实战项目

 循环遍历Hashtable中的键和值

/*创建一个测试的键值对*/
Hashtable h = new Hashtable();
/*往键值对中添加数据*/
h.put(key, value);
/*然后依次循环取出hashtable中的键和值*/
Iterator it = h.entrySet().iterator();
        while(it.hasNext())
        {
            Map.Entry m = (Map.Entry)it.next();
            System.out.println(m.getValue());
            System.out.println(m.getKey());
        }

 java文件夹的创建(先判断是否存在,如果不存在就创建)

//创建文件夹(如果不存在就创建,存在就不变)
     public void makedir(){
         //定义文件夹路径
         String filePath = "D://home//Lucy";
         File file = new File(filePath);
         if(!file.exists()&&!file.isDirectory())
         {
             System.out.println("不存在");
             file.mkdirs();  //创建文件夹  注意mkdirs()和mkdir()的区别
             //判断是否创建成功
             if(file.exists()&&file.isDirectory())  //文件夹存在并且是文件夹
             {
                 System.out.println("文件夹创建成功!");
             }
             else{
                 System.out.println("文件创建不成功!");
             }
         }
         else{
             System.out.println("文件已经存在!");
         }
         
     }

 java文件的创建(先判断是否存在,如果不存在就创建)

//创建文件,如果不存在就创建文件
     public void makeFile()
     {   
         String fileName = "D://file2.txt";
         File file = new File(fileName);
         if(!file.exists()&&!file.isFile())
         {
            try {
                if(file.createNewFile())  //创建文件,返回布尔值,如果成功为true,否则为false
                {
                    System.out.println("文件创建成功!");
                }
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
         }
         else{
          System.out.println("文件已经存在!");
          }
     }

在文件中写入内容

 //往文件中写入文本
     public void writeText(String s) 
     {
         String fileName = "D://file2.txt";
        File file = new File(fileName);
        if(file.exists()&&file.isFile()) //如果文件存在,可以写入内容
        {
            FileOutputStream fos = null;
            try {
                fos = new FileOutputStream(fileName);
            } catch (FileNotFoundException e2) {
                // TODO Auto-generated catch block
                e2.printStackTrace();
            }
            try {
                fos.write(s.getBytes());
            } catch (IOException e1) {
                // TODO Auto-generated catch block
                e1.printStackTrace();
            }
            try {
                fos.close();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
        else{
            System.out.println("文件不存在,不能写入内容");
        }
     }

java获取系统时间:

public static void getTime()
    {
        SimpleDateFormat f = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");  
        Date date = new Date();
        System.out.println(f.format(date));
        System.out.println(new SimpleDateFormat("yyyy年MM月dd日   HH时mm分ss秒").format(date));
        System.out.println(date);
    }

java连接mysql数据库

   首先添加jar包:下载jar包

public class connectDoctorMySql {
        
    /*
        public static final String url = "jdbc:mysql://192.168.0.16/hive";  
        public static final String name = "com.mysql.jdbc.Driver";  
        public static final String user = "hive";  
        public static final String password = "hive";  
        public Connection conn = null;  
        public PreparedStatement pst = null; 
        public Statement stmt = null;
        ResultSet rs = null;*/
        public static final String url = "jdbc:mysql://127.0.0.1/orcl?useUnicode=true&characterEncoding=utf-8&useSSL=false";  
        public static final String name = "com.mysql.jdbc.Driver";
        public static final String user = "root";  
        public static final String password = "China123";  
        public Connection conn = null;  
        public PreparedStatement pst = null; 
        public Statement stmt = null;
        ResultSet rs = null;
   //初始化数据库
     public void init(){
                 try {  
                        Class.forName(name);//指定连接类型  
                         conn = DriverManager.getConnection(url, user, password);//获取连接  
                         stmt = conn.createStatement();
                    } catch (Exception e) {  
                        System.out.println("数据库连接失败. . .");
                        e.printStackTrace();  
                    }  
          }
        
   //执行sql语句
    public void excute(String sql){
            init();
            try {
                int result =stmt.executeUpdate(sql);
            } catch (SQLException e) {
                System.out.println("数据执行失败:"+sql);//打印sql语句
                e.printStackTrace();
                }finally{ 
                     try {
                         if (rs!=null){
                            rs.close();
                          }
                         if(pst!=null){
                           pst.close();
                            }
                         if(conn!=null) {
                          conn.close();
                          }
                  }catch (SQLException e) {
                   e.printStackTrace();
                   }  
          }
        }
//查询语句
    public ArrayList select(String sql,int x,int y){
            init();
             ArrayList result= new ArrayList();
            try {
                ResultSet rs = stmt.executeQuery(sql);
                while(rs.next())
                {   String[] str = new String[2];
                    str[0]=rs.getString(x);
                    str[1]=rs.getString(y);
                    result.add(str);    
                }
            } catch (SQLException e) {
                e.printStackTrace();
                }finally{
                       try {
                         if (rs!=null){
                            rs.close();
                          }
                         if(pst!=null){
                           pst.close();
                            }
                         if(conn!=null) {
                          conn.close();
                          }
                  }catch (SQLException e) {
                   e.printStackTrace();
                   }  
          }
                return result;
        }

java连接oracle数据库

public class connectDoctor {
      //连接oracl数据库
        public static final String url = "jdbc:oracle:thin:@127.0.0.1:1521:orcl"; 
        //@127.0.0.1
        public static final String name = "oracle.jdbc.driver.OracleDriver";  
        public static final String user = "c238891";  
        public static final String password = "Rapid111";  
        public Connection conn = null;  
        public PreparedStatement pst = null; 
        public Statement stmt = null;
        ResultSet rs = null;  
        //初始化数据库
        public void init(){
                 try {  
                        Class.forName(name);//指定连接类型  
                         conn = DriverManager.getConnection(url, user, password);//获取连接  
                         stmt = conn.createStatement();
                    } catch (Exception e) {  
                        System.out.println("插入数据失败:");
                        e.printStackTrace();  
                    }  
          }
        
        //测试连接数据库
        public void start()
        {  
            init();
            String sql = "select * from emp";
            try {
                pst = conn.prepareStatement(sql);
                 rs = pst.executeQuery();  
                                 while (rs.next()) {  
                                    System.out.println("编号:" + rs.getString("empno")   
                                                    + ";姓名:" + rs.getString("ename")  
                                                    + "; 工作:" + rs.getString("job")  
                                                    + "; 领导:" + rs.getString("mgr")  
                                                    + "; 雇佣日期:" + rs.getString("hiredate")  
                                                    + "; 工资:" + rs.getString("sal")  
                                                     + "; 奖金:" + rs.getString("comm")  
                                                     + "; 部门:" + rs.getString("deptno"));  
                                 }  
            } catch (SQLException e) {
                e.printStackTrace();
            }finally{
                 try {
                     if (rs!=null){
                     rs.close();
                     if(pst!=null)
                     {
                         pst.close();
                     }
                     if(conn!=null)
                     {
                         conn.close();
                     }
                    }
                } catch (SQLException e) {
                    e.printStackTrace();
                }  
             
            }
        }

  //执行sql语句
        public void excute(String sql){
            init();
            try {
                int result =stmt.executeUpdate(sql);
            } catch (SQLException e) {
                System.out.println(sql);
                //System.out.println("错误");
                e.printStackTrace();
                }finally{ 
                     try {
                         if (rs!=null){
                            rs.close();
                          }
                         if(pst!=null){
                           pst.close();
                            }
                         if(conn!=null) {
                          conn.close();
                          }
                  }catch (SQLException e) {
                   e.printStackTrace();
                   }  
          }
        }
   
  //查询语句
        public ArrayList select(String sql,int x,int y){
            init();
             ArrayList result= new ArrayList();
            try {
                ResultSet rs = stmt.executeQuery(sql);
                while(rs.next())
                {   String[] str = new String[2];
                    str[0]=rs.getString(x);
                    str[1]=rs.getString(y);
                    result.add(str);    
                }
            } catch (SQLException e) {
                e.printStackTrace();
                }finally{
                       try {
                         if (rs!=null){
                            rs.close();
                          }
                         if(pst!=null){
                           pst.close();
                            }
                         if(conn!=null) {
                          conn.close();
                          }
                  }catch (SQLException e) {
                   e.printStackTrace();
                   }  
          }
                return result;
        }
原文地址:https://www.cnblogs.com/Jims2016/p/5652493.html