【正则表达式】正则表达式应用

JavaScript表单验证

<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Insert title here</title>
</head>
<body>
	<form action="#" method="post" id="frmInfo">
		<div id="err" style="color: red"></div>
		<div>
			姓名:<input id="name" name="name" />
		</div>
		<div>
			身份证:<input id="idno" name="idno" />
		</div>
		<div>
			<input type="submit" />
		</div>
	</form>
	<script type="text/javascript">
		document.getElementById("frmInfo").onsubmit = function() {
			//在JS中定义正则表达式对象只需要在/正则表达式/
			var regex1 = /^[u4e00-u9fa5]{2,8}$/; //全字匹配2-8个中文字符
			var regex2 = /^[1234568]d{16}[0-9xX]$/; 
			var name = document.getElementById("name").value;
			var idno = document.getElementById("idno").value;
			if (regex1.test(name) == false) { //利用正则校验数据,true代表匹配成功,false代表匹配失败
				document.getElementById("err").innerHTML = "无效姓名";
				return false; //不提交表单数据
			}else if(regex2.test(idno) == false) {
				document.getElementById("err").innerHTML = "无效身份证号";
				return false;
			}else {
				alert("验证通过,准备提交");
				return true;
			}
		}
	</script>
</body>
</html>

Java中Web页面信息提取

提取Web页面中的城市中文名和英文名

Web页面

<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>国际主要城市</title>
</head>
<body>
	<h1>国际主要城市</h1>
	<ul>
		<li>纽约NewYork</li>
		<li>伦敦London</li>
		<li>东京Tokyo</li>
		<li>巴黎Paris</li>
		<li>香港HongKong</li>
		<li>新加坡Singapore</li>
		<li>悉尼Sydney</li>
		<li>米兰Milano</li>
		<li>上海Shanghai</li>
		<li>北京Beijing</li>
		<li>马德里Madrid</li>
		<li>莫斯科Moscow</li>
		<li>首尔Seoul</li>
		<li>曼谷Bangkok</li>
		<li>多伦多Toronto</li>
		<li>布鲁塞尔Brussels</li>
		<li>芝加哥Chicago</li>
		<li>吉隆坡KualaLumpur</li>
	</ul>
</body>
</html>

信息提取

package com.imooc.regex;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStreamReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class RegexSample {
	public static void main(String[] args) {
		StringBuilder content = new StringBuilder();
		try {
			FileInputStream fis = new FileInputStream("D:/workspace/regex/WebContent/sample.html"); //原始的文件输入流,擅长处理二进制数据
			InputStreamReader isr = new InputStreamReader(fis,"UTF-8"); //转换为可读的字符串流
			BufferedReader bufferedReader = new BufferedReader(isr); //从缓冲流读取,提高读取效率
			String lineText = "";
			while((lineText = bufferedReader.readLine()) != null) {
//				System.out.println(lineText);
				content.append(lineText + "
");
			}
			bufferedReader.close();
			System.out.println(content);
		
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		//1.创建正则表达式对象
		Pattern p = Pattern.compile("<li>([\u4e00-\u9fa5]{2,10})([a-zA-Z]+)</li>");
		//2.匹配正则表达式
		Matcher m = p.matcher(content);
		//3.查找匹配的结果
		while(m.find()) {
//			System.out.println(m.group(0)); //完整匹配信息
			String chs = m.group(1); //分组1
			String eng = m.group(2); //分组2
			System.out.println(chs + "-" + eng);
		}
	}
}

原文地址:https://www.cnblogs.com/huowuyan/p/11300745.html