使用正则进行HTML页面属性的替换

  使用正则表达式拼接富文本框

package com.goboosoft.common.utils;

import org.apache.commons.lang3.StringUtils;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Description:
 *
 * @author cy
 * @date 2019年04月01日 17:35
 * version 1.0
 */
public class HtmlUtils {

    /**
     * 替换指定标签的属性和值
     * @param str 需要处理的字符串
     * @param tag 标签名称
     * @param tagAttrib 要替换的标签属性值
     * @param startTag 新标签开始标记
     * @param endTag  新标签结束标记
     * @return
     * @author huweijun
     * @date 2016年7月13日 下午7:15:32
     */
    public static String replaceHtmlTag(String str, String tag, String tagAttrib, String startTag, String endTag) {
        String regxpForTag = "<\s*" + tag + "\s+([^>]*)\s*" ;
        String regxpForTagAttrib = tagAttrib + "=\s*"([^"]+)"" ;
        Pattern patternForTag = Pattern.compile (regxpForTag,Pattern. CASE_INSENSITIVE );
        Pattern patternForAttrib = Pattern.compile (regxpForTagAttrib,Pattern. CASE_INSENSITIVE );
        Matcher matcherForTag = patternForTag.matcher(str);
        StringBuffer sb = new StringBuffer();
        boolean result = matcherForTag.find();
        while (result) {
            StringBuffer sbreplace = new StringBuffer( "<"+tag+" ");
            Matcher matcherForAttrib = patternForAttrib.matcher(matcherForTag.group(1));
            if (matcherForAttrib.find()) {
                String attributeStr = matcherForAttrib.group(1);
                matcherForAttrib.appendReplacement(sbreplace, startTag + attributeStr + endTag);
            }
            matcherForAttrib.appendTail(sbreplace);
            matcherForTag.appendReplacement(sb, sbreplace.toString());
            result = matcherForTag.find();
        }
        matcherForTag.appendTail(sb);
        return sb.toString();
    }

    public static String replaceImgSrc(String content,String domain){
        if(StringUtils.isBlank(content)){
            return null;
        }
        String buf = "src="" + domain;
        String s = replaceHtmlTag(content, "img", "src", buf, """);
        return s;
    }

    public static void main(String[] args) {
        StringBuffer content = new StringBuffer();
        content.append("<ul class="imgBox"><li><img id="160424" src="uploads/allimg/160424/1-160424120T1-50.jpg" class="src_class"></li>");
        content.append("<li><img id="150628" src="uploads/allimg/150628/1-15062Q12247.jpg" class="src_class"></li></ul>");
        System.out.println("原始字符串为:"+content.toString());
        String s = replaceImgSrc(content.toString(), "http://files.goboosoft.com/zwjm/");
        System.out.println("替换后为:"+s);
    }

}
/**
     * 去除图片中的domain
     * @param htmlStr html字符串
     * @return String
     */
    private static String deleteImgSrcDomain(String htmlStr) {
        List<String> pics = new ArrayList<String>();
        String img = "";
        String repimg = "";
        Pattern p_image;
        Matcher m_image;
        String regEx_img = "<img.*src\s*=\s*(.*?)[^>]*?>";
        p_image = Pattern.compile(regEx_img, Pattern.CASE_INSENSITIVE);
        m_image = p_image.matcher(htmlStr);
        while (m_image.find()) {
            // 得到<img />数据
            img = m_image.group();
            // 匹配<img>中的src数据
            Matcher m = Pattern.compile("src\s*=\s*"?(.*?)("|>|\s+)").matcher(img);
            while (m.find()) {
                String s = m.group(1);
                pics.add(s);
                // 处理图片信息
                String substring = s.substring(s.lastIndexOf("/") + 1, s.length());
                repimg = img.replace(s, substring);
                htmlStr = htmlStr.replace(img, repimg);
                img = repimg;
            }
        }
        return htmlStr;
    }
原文地址:https://www.cnblogs.com/chengyangyang/p/10640498.html