正则表达式

所有对于正则表达式的操作位于java.util.regex包下。

两个重要的类：Matcher Pattern

package com.anllin.regex;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

public class Matches

{

public static void main(String[] args)

{

Matcher matcher = Pattern.compile("\\w+").matcher(

"this is a regex test, the fisrt program");

while (matcher.find())

{

System.out.println("[" + matcher.group() + "]");

}

System.out.println("---------------------------");

int i = 0;

while (matcher.find(i))

{

System.out.print("[" + matcher.group() + "]");

i++;

}

输出结果　

[this]

[is]

[a]

[regex]

[test]

[the]

[fisrt]

[program]

---------------------------

[this][his][is][s][is][is][s][a][a][regex][regex][egex][gex][ex][x][test][test][est][st][t][the][the][the][he][e][fisrt][fisrt][isrt][srt][rt][t][program][program][rogram][ogram][gram][ram][am][m]

分组

import java.util.regex.Matcher;

import java.util.regex.Pattern;

public class Group

{

public static void main(String[] args)

{

String statement = "this is a test about the regex group, the method groupCount is used";

Matcher m = Pattern.compile("(?m)(\\S+)\\s+((\\S+)\\s+(\\S+))")

.matcher(statement);

while (m.find())

{

for (int i = 0; i < m.groupCount(); i++)

{

System.out.println("[" + m.group(i) + "]");

}

输出结果

[this is a]

[this]

[is a]

[is]

[test about the]

[test]

[about the]

[about]

[regex group, the]

[regex]

[group, the]

[group,]

[method groupCount is]

[method]

[groupCount is]

[groupCount]

Start() 和end()的使用

import java.util.regex.Matcher;

import java.util.regex.Pattern;

public class StartEnd

{

public static void main(String[] args)

{

String[] input = new String[] { "java has regular expressing in 1.4",

"regular expressing now expressing in java",

"java represses oracular expressions" };

Pattern p1 = Pattern.compile("re\\w*");

Pattern p2 = Pattern.compile("java.*");

for (int i = 0; i < input.length; i++)

{

System.out.println("input" + i + ":" + input[i]);

Matcher m1 = p1.matcher(input[i]);

Matcher m2 = p2.matcher(input[i]);

while (m1.find())

{

System.out.println("m1.find() [" + m1.group() + "] start = "

+ m1.start() + ",end = " + m1.end());

}

while (m2.find())

{

System.out.println("m2.find() [" + m2.group() + "] start = "

+ m2.start() + ",end = " + m2.end());

}

if(m1.lookingAt())

{

System.out.println("m1.lookingAt() [" + m1.group() + "] start = "

+ m1.start() + ",end = " + m1.end());

}

if(m2.lookingAt())

{

System.out.println("m2.lookingAt() [" + m2.group() + "] start = "

+ m2.start() + ",end = " + m2.end());

}

if(m1.matches())

{

System.out.println("m1.matches() [" + m1.group() + "] start = "

+ m1.start() + ",end = " + m1.end());

}

if(m2.matches())

{

System.out.println("m2.matches() [" + m2.group() + "] start = "

+ m2.start() + ",end = " + m2.end());

}

System.out.println();

}

输出结果：

input0:java has regular expressing in 1.4

m1.find() [regular] start = 9,end = 16

m1.find() [ressing] start = 20,end = 27

m2.find() [java has regular expressing in 1.4] start = 0,end = 34

m2.lookingAt() [java has regular expressing in 1.4] start = 0,end = 34

m2.matches() [java has regular expressing in 1.4] start = 0,end = 34

input1:regular expressing now expressing in java

m1.find() [regular] start = 0,end = 7

m1.find() [ressing] start = 11,end = 18

m1.find() [ressing] start = 26,end = 33

m2.find() [java] start = 37,end = 41

m1.lookingAt() [regular] start = 0,end = 7

input2:java represses oracular expressions

m1.find() [represses] start = 5,end = 14

m1.find() [ressions] start = 27,end = 35

m2.find() [java represses oracular expressions] start = 0,end = 35

m2.lookingAt() [java represses oracular expressions] start = 0,end = 35

m2.matches() [java represses oracular expressions] start = 0,end = 35

模式标记

public static Pattern compile(String regex,int flags)

import java.util.regex.Matcher;

import java.util.regex.Pattern;

public class Flag

{

public static void main(String[] args)

{

Pattern p = Pattern.compile("^java", Pattern.CASE_INSENSITIVE

| Pattern.MULTILINE);

Matcher m = p.matcher("java has regex \njava has regex \n"

+ "JAVA has pretty good regular expressions\n"

+ "Regular expressions are in java");

while (m.find())

{

System.out.println(m.group());

}

输出结果：

java

JAVA

Split()方法的使用

public String[] split(CharSequence input,int limit)

public String[] split(CharSequence input)

import java.util.Arrays;

import java.util.regex.Pattern;

public class SplitDemo

{

public static void main(String[] args)

{

String input = "This!!unusual use!!of exclamation!!points";

System.out.println(Arrays.asList(Pattern.compile("!!").split(input)));

System.out

.println(Arrays.asList(Pattern.compile("!!").split(input, 3)));

System.out.println(Arrays.asList("Aha! String has a split() built in"

.split(" ")));

}

输出结果：

[This, unusual use, of exclamation, points]

[This, unusual use, of exclamation!!points]

[Aha!, String, has, a, split(), built, in]

替换操作

import java.io.BufferedReader;

import java.io.FileInputStream;

import java.io.InputStreamReader;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

public class ReplaceTest

{

public static void main(String[] args) throws Exception

{

BufferedReader reader = new BufferedReader(new InputStreamReader(

new FileInputStream("src/com/anllin/regex/ReplaceTest.java")));

String str = null;

StringBuffer sb = new StringBuffer();

while (null != (str = reader.readLine()))

{

sb.append(str);

}

String s = sb.toString();

// Match the specially-commented block of text above:

Matcher mInput = Pattern.compile("/\\*!(.*)!\\*/", Pattern.DOTALL)

.matcher(s);

if (mInput.find())

{

// Captured by parentheses

s = mInput.group(1);

}

// Replace two or more spaces with a single space:

s = s.replaceAll(" {2,}", " ");

// Replace one or more spaces at the beginning of each line with no

// spaces.Must enable MULTILINE mode.

s = s.replaceAll("(?m)^+", "");

System.out.println(s);

s = s.replaceFirst("[aeiou]", "(VOWEL1)");

StringBuffer sbuf = new StringBuffer();

Pattern p = Pattern.compile("[aeiou]");

Matcher m = p.matcher(s);

// Process the find information as you perform the replacements:

while (m.find())

{

m.appendReplacement(sbuf, m.group().toUpperCase());

}

// Put in the remainder of ther text:

m.appendTail(sbuf);

System.out.println(sbuf);

}

输出结果

package com.anllin.regex;import java.io.BufferedReader;import java.io.FileInputStream;import java.io.InputStreamReader;import java.util.regex.Matcher;import java.util.regex.Pattern;public class ReplaceTest{ public static void main(String[] args) throws Exception { BufferedReader reader = new BufferedReader(new InputStreamReader( new FileInputStream("src/com/anllin/regex/ReplaceTest.java"))); String str = null; StringBuffer sb = new StringBuffer(); while (null != (str = reader.readLine())) { sb.append(str); } String s = sb.toString(); // Match the specially-commented block of text above: Matcher mInput = Pattern.compile("/\\*!(.*)!\\*/", Pattern.DOTALL) .matcher(s); if (mInput.find()) { // Captured by parentheses s = mInput.group(1); } // Replace two or more spaces with a single space: s = s.replaceAll(" {2,}", " "); // Replace one or more spaces at the beginning of each line with no // spaces.Must enable MULTILINE mode. s = s.replaceAll("(?m)^+", ""); System.out.println(s); s = s.replaceFirst("[aeiou]", "(VOWEL1)"); StringBuffer sbuf = new StringBuffer(); Pattern p = Pattern.compile("[aeiou]"); Matcher m = p.matcher(s); // Process the find information as you perform the replacements: while (m.find()) { m.appendReplacement(sbuf, m.group().toUpperCase()); } // Put in the remainder of ther text: m.appendTail(sbuf); System.out.println(sbuf); }}

--------------------------------------------------------------------

p(VOWEL1)ckAgE cOm.AnllIn.rEgEx;ImpOrt jAvA.IO.BUffErEdREAdEr;ImpOrt jAvA.IO.FIlEInpUtStrEAm;ImpOrt jAvA.IO.InpUtStrEAmREAdEr;ImpOrt jAvA.UtIl.rEgEx.MAtchEr;ImpOrt jAvA.UtIl.rEgEx.PAttErn;pUblIc clAss REplAcETEst{ pUblIc stAtIc vOId mAIn(StrIng[] Args) thrOws ExcEptIOn { BUffErEdREAdEr rEAdEr = nEw BUffErEdREAdEr(nEw InpUtStrEAmREAdEr( nEw FIlEInpUtStrEAm("src/cOm/AnllIn/rEgEx/REplAcETEst.jAvA"))); StrIng str = nUll; StrIngBUffEr sb = nEw StrIngBUffEr(); whIlE (nUll != (str = rEAdEr.rEAdLInE())) { sb.AppEnd(str); } StrIng s = sb.tOStrIng(); // MAtch thE spEcIAlly-cOmmEntEd blOck Of tExt AbOvE: MAtchEr mInpUt = PAttErn.cOmpIlE("/\\*!(.*)!\\*/", PAttErn.DOTALL) .mAtchEr(s); If (mInpUt.fInd()) { // CAptUrEd by pArEnthEsEs s = mInpUt.grOUp(1); } // REplAcE twO Or mOrE spAcEs wIth A sInglE spAcE: s = s.rEplAcEAll(" {2,}", " "); // REplAcE OnE Or mOrE spAcEs At thE bEgInnIng Of EAch lInE wIth nO // spAcEs.MUst EnAblE MULTILINE mOdE. s = s.rEplAcEAll("(?m)^+", ""); SystEm.OUt.prIntln(s); s = s.rEplAcEFIrst("[AEIOU]", "(VOWEL1)"); StrIngBUffEr sbUf = nEw StrIngBUffEr(); PAttErn p = PAttErn.cOmpIlE("[AEIOU]"); MAtchEr m = p.mAtchEr(s); // PrOcEss thE fInd InfOrmAtIOn As yOU pErfOrm thE rEplAcEmEnts: whIlE (m.fInd()) { m.AppEndREplAcEmEnt(sbUf, m.grOUp().tOUppErCAsE()); } // PUt In thE rEmAIndEr Of thEr tExt: m.AppEndTAIl(sbUf); SystEm.OUt.prIntln(sbUf); }}

Reset()方法的使用

import java.util.regex.Matcher;

import java.util.regex.Pattern;

public class ResetTest

{

public static void main(String[] args)

{

Matcher m = Pattern.compile("[frb][aiu][gx]").matcher(

"fix the rug with bags");

while (m.find())

{

System.out.println(m.group());

}

m.reset("fix the rig with rags");

while (m.find())

{

System.out.println(m.group());

}

Output:

fix

rug

bag

fix

rig

rag

Summary of regular-expression constructs

Construct	Matches

Characters
x	The character x
\\	The backslash character
\0n	The character with octal value 0n (0 <= n <= 7)
\0nn	The character with octal value 0nn (0 <= n <= 7)
\0mnn	The character with octal value 0mnn (0 <= m <= 3, 0 <= n <= 7)
\xhh	The character with hexadecimal value 0xhh
\uhhhh	The character with hexadecimal value 0xhhhh
\t	The tab character ('\u0009')
\n	The newline (line feed) character ('\u000A')
\r	The carriage-return character ('\u000D')
\f	The form-feed character ('\u000C')
\a	The alert (bell) character ('\u0007')
\e	The escape character ('\u001B')
\cx	The control character corresponding to x

Character classes
[abc]	a, b, or c (simple class)
[^abc]	Any character except a, b, or c (negation)
[a-zA-Z]	a through z or A through Z, inclusive (range)
[a-d[m-p]]	a through d, or m through p: [a-dm-p] (union)
[a-z&&[def]]	d, e, or f (intersection)
[a-z&&[^bc]]	a through z, except for b and c: [ad-z] (subtraction)
[a-z&&[^m-p]]	a through z, and not m through p: [a-lq-z](subtraction)

Predefined character classes
.	Any character (may or may not match line terminators)
\d	A digit: [0-9]
\D	A non-digit: [^0-9]
\s	A whitespace character: [ \t\n\x0B\f\r]
\S	A non-whitespace character: [^\s]
\w	A word character: [a-zA-Z_0-9]
\W	A non-word character: [^\w]

POSIX character classes (US-ASCII only)
\p{Lower}	A lower-case alphabetic character: [a-z]
\p{Upper}	An upper-case alphabetic character:[A-Z]
\p{ASCII}	All ASCII:[\x00-\x7F]
\p{Alpha}	An alphabetic character:[\p{Lower}\p{Upper}]
\p{Digit}	A decimal digit: [0-9]
\p{Alnum}	An alphanumeric character:[\p{Alpha}\p{Digit}]
\p{Punct}	Punctuation: One of !"#$%&'()*+,-./:;<=>?@[\]^_`{\|}~
\p{Graph}	A visible character: [\p{Alnum}\p{Punct}]
\p{Print}	A printable character: [\p{Graph}\x20]
\p{Blank}	A space or a tab: [ \t]
\p{Cntrl}	A control character: [\x00-\x1F\x7F]
\p{XDigit}	A hexadecimal digit: [0-9a-fA-F]
\p{Space}	A whitespace character: [ \t\n\x0B\f\r]

java.lang.Character classes (simple java character type)
\p{javaLowerCase}	Equivalent to java.lang.Character.isLowerCase()
\p{javaUpperCase}	Equivalent to java.lang.Character.isUpperCase()
\p{javaWhitespace}	Equivalent to java.lang.Character.isWhitespace()
\p{javaMirrored}	Equivalent to java.lang.Character.isMirrored()

Classes for Unicode blocks and categories
\p{InGreek}	A character in the Greek block (simple block)
\p{Lu}	An uppercase letter (simple category)
\p{Sc}	A currency symbol
\P{InGreek}	Any character except one in the Greek block (negation)
[\p{L}&&[^\p{Lu}]]	Any letter except an uppercase letter (subtraction)

Boundary matchers
^	The beginning of a line
$	The end of a line
\b	A word boundary
\B	A non-word boundary
\A	The beginning of the input
\G	The end of the previous match
\Z	The end of the input but for the final terminator, if any
\z	The end of the input

Greedy quantifiers
X?	X, once or not at all
X*	X, zero or more times
X+	X, one or more times
X{n}	X, exactly n times
X{n,}	X, at least n times
X{n,m}	X, at least n but not more than m times

Reluctant quantifiers
X??	X, once or not at all
X*?	X, zero or more times
X+?	X, one or more times
X{n}?	X, exactly n times
X{n,}?	X, at least n times
X{n,m}?	X, at least n but not more than m times

Possessive quantifiers
X?+	X, once or not at all
X*+	X, zero or more times
X++	X, one or more times
X{n}+	X, exactly n times
X{n,}+	X, at least n times
X{n,m}+	X, at least n but not more than m times

Logical operators
XY	X followed by Y
X\|Y	Either X or Y
(X)	X, as a capturing group

Back references
\n	Whatever the n^th capturing group matched

Quotation
\	Nothing, but quotes the following character
\Q	Nothing, but quotes all characters until \E
\E	Nothing, but ends quoting started by \Q

Special constructs (non-capturing)
(?:X)	X, as a non-capturing group
(?idmsux-idmsux)	Nothing, but turns match flags i d m s u x on - off
(?idmsux-idmsux:X)	X, as a non-capturing group with the given flags i d m s u x on - off
(?=X)	X, via zero-width positive lookahead
(?!X)	X, via zero-width negative lookahead
(?<=X)	X, via zero-width positive lookbehind
(?<!X)	X, via zero-width negative lookbehind
(?>X)	X, as an independent, non-capturing group