获取网页上的所有QQ号码,并生成exel报表

需要的jar如下:

package jsoup.zr.com.utils;
/**
 * 
 * @author LF
 *
 */
public class Constant {
    
    /**
     * 网站链接地址ַ
     */
    public static String URL = "http://bbs.tianya.cn/post-enterprise-758850-1.shtml";
    
    /**
     * 生成目标文件所存放的路径
     * 注意:路径请用"/",勿用""
     */
    public static String PATH = "/Users/apple/Desktop/";
    
    /**
     * exel文件的命名
     */
    public static String NAME = "QQ";
    
    /**
     * 正则表达式(第一位1-9之间的数字,第二位0-9之间的数字,数字范围4-14)
     */
    public static String QQREGEX = "[1-9][0-9]{4,14}";
    
    /**
     * 正则表达式(QQ、微信号、手机)
     */
    public static String QQ_WEIXIN_PHONE = "^[a-zA-Z0-9_-]{5,19}$";
}
package jsoup.zr.com.utils;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

/**
 * 
 * @author LF
 *
 */
public class ParseHTML {
    
    public static void main(String[] args) {
        List<String> list = getInfoByURL(Constant.URL,Constant.QQREGEX);
        ProduceExel.writeByList(list, "QQ号码");
        
    }
    
    /**
     * 获取网页内容
     */
    public static List<String> getInfoByURL(String urlString,String regexString){
        List<String> list = new ArrayList<String>();
        URL url = null;
        try {
            url = new URL(urlString);
        } catch (MalformedURLException e) {
            e.printStackTrace();
        }
        // 打开连接
        URLConnection connection = null;
        try {
            connection = url.openConnection();
        } catch (IOException e) {
            e.printStackTrace();
        }
        BufferedReader bReader = null;
        try {
            // 读取输入流
            bReader = new BufferedReader(new InputStreamReader(connection.getInputStream()));
        } catch (IOException e) {
            e.printStackTrace();
        }
        
        Pattern pattern = Pattern.compile(regexString);
        String line = null;
        try {
            while ((line = bReader.readLine()) != null) {
                Matcher matcher = pattern.matcher(line);
                // 如果匹配,存起来
                while (matcher.find()) {
                    String str = matcher.group();
                    list.add(str);
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        
        
        return list;
    }
    
    

    
    
}
package jsoup.zr.com.utils;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
/**
 * 
 * @author LF
 *
 */
public class ProduceExel {
    

    /**
     * 通过集合list生成exel报表
     * @param list 集合
     * @param lineName exel首行的名称
     * @throws IOException 异常
     */
    public static void writeByList(List<String> list,String lineName){
        System.out.println(list.size());
        // 如果没有数据,不做处理
        if (list==null || list.size()==0) {
            return;
        }
        String path = Constant.PATH+Constant.NAME+".xls";
        System.out.println(path);
        OutputStream outputStream = null;
        try {
            outputStream = new FileOutputStream(new File(path));
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
      
        // 初始化一个HSSFWorkbook对象
        HSSFWorkbook workbook = new HSSFWorkbook();
        // 创建一个表
        HSSFSheet sheet = workbook.createSheet("lf");
        // 创建行
        HSSFRow row = sheet.createRow(0);
        // 创建单元格
        HSSFCell cell0 = row.createCell(0);
        cell0.setCellValue(new HSSFRichTextString(lineName));
        int i = 0;
        // 遍历
        for (String str : list) {
            System.out.println("====="+str);
            // 创建行
            HSSFRow rowi = sheet.createRow(i+1);
            // 创建单元格
            HSSFCell celli = rowi.createCell(0);
            // 单元格添加内容
            celli.setCellValue(new HSSFRichTextString(str));
            i++;
        }
        // 写(输出)
        try {
            workbook.write(outputStream);
        } catch (IOException e) {
            e.printStackTrace();
        }
     }

    /**
     * 通过集合map生成exel报表
     * @param map 集合
     * @param lineName exel首行的名称
     * @throws IOException 异常
     */
    public static void write(Map<String, String> map,String lineName){
        // 如果没有数据,不做处理
        if (map==null || map.size()==0) {
            return;
        }
        String path = Constant.PATH+Constant.NAME+".xls";
        System.out.println(path);
        OutputStream outputStream = null;
        try {
            outputStream = new FileOutputStream(new File(path));
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
      
        // 初始化一个HSSFWorkbook对象
        HSSFWorkbook workbook = new HSSFWorkbook();
        // 创建一个表
        HSSFSheet sheet = workbook.createSheet("lf");
        // 创建行
        HSSFRow row = sheet.createRow(0);
        // 创建单元格
        HSSFCell cell0 = row.createCell(0);
        cell0.setCellValue(new HSSFRichTextString(lineName));
        int i = 0;
        // 遍历
        for (String str : map.values()) {
            // 创建行
            HSSFRow rowi = sheet.createRow(i+1);
            // 创建单元格
            HSSFCell celli = rowi.createCell(0);
            // 单元格添加内容
            celli.setCellValue(new HSSFRichTextString(str));
            i++;
        }
        // 写(输出)
        try {
            workbook.write(outputStream);
        } catch (IOException e) {
            e.printStackTrace();
        }
     }
    
}
package jsoup.zr.com.utils;
/**
 * 
 * @author LF
 *
 */
public class Verify {
    
    /**
     * 验证是否是QQ号码
     * @param QQNumber
     * @return
     */
    public static boolean verifyQQNumber(String QQNumber){
        System.out.println(QQNumber.matches(Constant.QQREGEX));
        return QQNumber.matches(Constant.QQREGEX);
    }
    
}
原文地址:https://www.cnblogs.com/lantu1989/p/6792831.html