java正则表达式

   11年前正抓紧高考,记得当时对计算机特别着迷,基本每周都要买一份电脑报,介绍计算机硬件,软件方面的东西,上课也偷偷的拿出来你看。

无意中接触到了互联网开发语言java,便下载了一些尚学堂的基础视频教程,主讲老师 马士兵,讲课特幽默,听他讲课也是一种享受,从那时就走上了it之路。

最近想做一个数据采集器,需要用到正则表达式,也想回味听一下当年马士兵老师讲课的视频,便整理了如下java正则表达式学习笔记

1.正则表达式基础

2.邮件地址页面抓取

3.代码统计

正则表达式基础:

  1     public static void main(String[] args) {
  2         //简单认识java正则表达式
  3         p("abc".matches("..."));//一个"."表示一个字符
  4         p("a8729a".replaceAll("\d", "-"));//替换,java里面用两个\代表一个
  5         
  6         //编译后执行
  7         Pattern p = Pattern.compile("[a-z]{3}");
  8         Matcher m = p.matcher("fgh");
  9         p(m.matches());
 10         p("fgh".matches("[a-z]{3}"));//上面可以这样写
 11         
 12         
 13         //初步认识 . * + ?
 14         p("a".matches("."));//.表示一个字符
 15         p("aa".matches("aa"));
 16         p("aaaa".matches("a*"));//*表示0个或多个
 17         p("".matches("a*"));
 18         p("aaaa".matches("a+"));//+表示1个或多个
 19         p("aaaa".matches("a?"));//?表示0个或1个
 20         p("".matches("a?"));
 21         p("a".matches("a?"));
 22         p("214523145234532".matches("\d{3,100}"));//数字 3位至100位
 23         p("192.168.0.aaa".matches("\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}"));//ip地址验证
 24         p("192".matches("[0-2][0-9][0-9]"));
 25         
 26         
 27         //范围
 28         p("a".matches("[abc]"));//取其中abc里面的一个字符
 29         p("a".matches("[^abc]"));//非abc任意字符
 30         p("A".matches("[a-zA-Z]"));
 31         p("A".matches("[a-z]|[A-Z]"));//
 32         p("A".matches("[a-z[A-Z]]"));
 33         p("R".matches("[A-Z&&[RFG]]"));//取交集
 34         
 35         
 36         //类型  认识s w d  
 37         /**
 38          s:表示	
x0Bf
   and  S:表示非s
 39          w:表示 [a-zA-Z_0-9]构成单词字符      and W:表示非w
 40          d:表示0-9 and D:表示非0-9
 41          */
 42         p(" 

	".matches("\s{4}"));
 43         p(" ".matches("\S"));
 44         p("a_8".matches("\w{3}"));
 45         p("abc888&^%".matches("[a-z]{1,3}\d+[&^#%]+"));
 46         p("\".matches("\\"));
 47         
 48         
 49         //POSIX Style linux操作系统标准写法
 50         p("a".matches("\p{Lower}"));
 51         
 52         //边界处理 ^ $ :单词边界  (^位于[]是取反)
 53         p("hello sir".matches("^h.*"));//^ 开头的为h
 54         p("hello sir".matches(".*ir$"));//$ 前面有0-多个并且以ir结尾
 55         p("hello sir".matches("^h[a-z]{1,3}o\b.*"));
 56         p("hellosir".matches("^h[a-z]{1,3}o\b.*"));
 57         //whilte lines
 58         p(" 
".matches("^[\s&&[^\n]]*\n$"));
 59         p("aaa 8888c".matches(".*\d{4}."));
 60         p("aaa 8888c".matches(".*\b\d{4}."));
 61         p("aaa8888c".matches(".*\d{4}."));
 62         p("aaa8888c".matches(".*\b\d{4}."));
 63         
 64         
 65         //email
 66         p("asdfasdfsafsf@dsdfsdf.com".matches("[\w[.-]]+@[\w[.-]]+\.[\w]+"));
 67         
 68         /*//查找方式 matches find lookingAt
 69         Pattern p = Pattern.compile("\d{3,5}");
 70         String s = "123-34345-234-00";
 71         Matcher m = p.matcher(s);
 72         p(m.matches());
 73         m.reset();
 74         p(m.find());
 75         p(m.start() + "-" + m.end());
 76         p(m.find());
 77         p(m.start() + "-" + m.end());
 78         p(m.find());
 79         p(m.start() + "-" + m.end());
 80         p(m.find());
 81         //p(m.start() + "-" + m.end());
 82         p(m.lookingAt());
 83         p(m.lookingAt());
 84         p(m.lookingAt());
 85         p(m.lookingAt());*/
 86         
 87         
 88         //字符串替换 replacement
 89         /*
 90         Pattern p = Pattern.compile("java", Pattern.CASE_INSENSITIVE);
 91         Matcher m = p.matcher("java Java JAVa JaVa IloveJAVA you hateJava afasdfasdf");
 92         StringBuffer buf = new StringBuffer();
 93         int i=0;
 94         while(m.find()) {
 95             i++;
 96             if(i%2 == 0) {
 97                 m.appendReplacement(buf, "java");
 98             } else {
 99                 m.appendReplacement(buf, "JAVA");
100             }
101         }
102         m.appendTail(buf);
103         p(buf);
104         */
105         
106         //分组 group
107         /*
108         Pattern p = Pattern.compile("(\d{3,5})([a-z]{2})");
109         String s = "123aa-34345bb-234cc-00";
110         Matcher m = p.matcher(s);
111         while(m.find()) {
112             p(m.group());
113         }
114         */
115         
116         //qulifiers
117         /*
118         Pattern p = Pattern.compile(".{3,10}+[0-9]");
119         String s = "aaaa5bbbb68";
120         Matcher m = p.matcher(s);
121         if(m.find())
122             p(m.start() + "-" + m.end());
123         else 
124             p("not match!");
125         */
126         
127         //non-capturing groups
128         /*
129         Pattern p = Pattern.compile(".{3}(?=a)");
130         String s = "444a66b";
131         Matcher m = p.matcher(s);
132         while(m.find()) {
133             p(m.group());
134         }
135         */
136         
137         //back refenrences
138         /*
139         Pattern p = Pattern.compile("(\d(\d))\2");
140         String s = "122";
141         Matcher m = p.matcher(s);
142         p(m.matches());
143         */
144         
145         //flags简写
146         //Pattern p = Pattern.compile("java", Pattern.CASE_INSENSITIVE);
147 //        p("Java".matches("(?i)(java)"));
148     }
149     
150     public static void p(Object o) {
151         System.out.println(o);
152     }

邮件地址页面抓取

 1     public static void main(String[] args) {
 2         try {
 3             BufferedReader br = new BufferedReader(new FileReader("D:\share\courseware\1043633.html"));
 4             String line = "";
 5             while((line=br.readLine()) != null) {
 6                 parse(line);
 7             }
 8         } catch (FileNotFoundException e) {
 9             // TODO Auto-generated catch block
10             e.printStackTrace();
11         } catch (IOException e) {
12             // TODO Auto-generated catch block
13             e.printStackTrace();
14         }
15     }
16 
17     private static void parse(String line) {
18         Pattern p = Pattern.compile("[\w[.-]]+@[\w[.-]]+\.[\w]+");
19         Matcher m = p.matcher(line);
20         while(m.find()) {
21             System.out.println(m.group());
22         }
23     }

代码统计

 1     static long normalLines = 0;
 2     static long commentLines = 0;
 3     static long whiteLines = 0;
 4     
 5     public static void main(String[] args) {
 6         File f = new File("D:\share\JavaProjects\TankWar1.9.11\src");
 7         File[] codeFiles = f.listFiles();
 8         for(File child : codeFiles){
 9             if(child.getName().matches(".*\.java$")) {
10                 parse(child);
11             }
12         }
13         
14         System.out.println("normalLines:" + normalLines);
15         System.out.println("commentLines:" + commentLines);
16         System.out.println("whiteLines:" + whiteLines);
17         
18     }
19 
20     private static void parse(File f) {
21         BufferedReader br = null;
22         boolean comment = false;
23         try {
24             br = new BufferedReader(new FileReader(f));
25             String line = "";
26             while((line = br.readLine()) != null) {
27                 line = line.trim();
28                 if(line.matches("^[\s&&[^\n]]*$")) {
29                     whiteLines ++;
30                 } else if (line.startsWith("/*") && !line.endsWith("*/")) {
31                     commentLines ++;
32                     comment = true;    
33                 } else if (line.startsWith("/*") && line.endsWith("*/")) {
34                     commentLines ++;
35                 } else if (true == comment) {
36                     commentLines ++;
37                     if(line.endsWith("*/")) {
38                         comment = false;
39                     }
40                 } else if (line.startsWith("//")) {
41                     commentLines ++;
42                 } else {
43                     normalLines ++;
44                 }
45             }
46         } catch (FileNotFoundException e) {
47             e.printStackTrace();
48         } catch (IOException e) {
49             e.printStackTrace();
50         } finally {
51             if(br != null) {
52                 try {
53                     br.close();
54                     br = null;
55                 } catch (IOException e) {
56                     e.printStackTrace();
57                 }
58             }
59         }
60     }
关注我的微信共享学习,讨论更多技术知识
原文地址:https://www.cnblogs.com/liyuan3210/p/6510691.html