DFA敏感词过滤

import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang3.StringUtils;
import org.json.JSONException;
import org.json.JSONObject;
  
public class DFA {        
    /**  
     * 根节点  
     */    
    private static TreeNode rootNode = new TreeNode();    
    
    /**
     * 创建关键词树时的临时根节点
     */
    private static TreeNode rootNodeTemp = new TreeNode();
        
    /**  
     * 关键词缓存  
     */    
    private static ByteBuffer keywordBuffer = ByteBuffer.allocate(1024);       
        
    /**  
     * 关键词编码  
     */    
    private static String charset = "utf-8";    
    
    /**
     * 关键词树是否初始化
     */
    private static boolean treeIsInit = false;    
    
    /**  
     * 创建DFA树  
     * @param keywordList  
     * @throws UnsupportedEncodingException   
     */    
    public static void createKeywordTree(List<String> keywordList) throws UnsupportedEncodingException{    
        for (String keyword : keywordList) {    
            if(keyword == null) continue;    
            keyword = keyword.trim().toLowerCase();    
            byte[] bytes = keyword.getBytes(charset);    
            TreeNode tempNode = rootNodeTemp;    
            for (int i = 0; i < bytes.length; i++) {    
                int index = bytes[i] & 0xff;     
                TreeNode node = tempNode.getSubNode(index);    
                if(node == null){    
                    node = new TreeNode();    
                    tempNode.setSubNode(index, node);    
                }   
                tempNode = node;  
                if(i == bytes.length - 1){    
                    tempNode.setKeywordEnd(true);      
                }    
            }    
        }  
        synchronized (DFA.class)
        {
            rootNode = rootNodeTemp;
            if(!treeIsInit)treeIsInit = true;
        }
        
    }    
        
    /**
     * 检索关键词 
     * @param text
     * @return
     * @throws UnsupportedEncodingException
     */
    public static JSONObject searchKeyword(String text) throws UnsupportedEncodingException{
        if(StringUtils.isEmpty(text)){
            JSONObject json = new JSONObject();
            try
            {
                json.put("islegal", "1");
                json.put("deststr", "");
            }
            catch (JSONException e)
            {
                e.printStackTrace();
            }            
            return json;
        }
        if(!treeIsInit)createKeywordTree();
        System.out.println("searchKeyword:"+text);
        return searchKeyword(text.getBytes(charset),text.toLowerCase().getBytes(charset));    
    }    
     
    /**
     * 检索关键词
     * @param oldBytes
     * @param bytes
     * @return
     */
    public static JSONObject searchKeyword(byte[] oldBytes,byte[] bytes){  
        String is_legal = "1";   
        JSONObject json = new JSONObject();
        String result = null; 
        if(bytes == null || bytes.length == 0){    
            result = "";  
        }else{ 
            synchronized (DFA.class)
            {         
                TreeNode tempNode = rootNode;    
                int rollback = 0;     
                int position = 0;   
                while (position < bytes.length) {    
                    int index = bytes[position] & 0xFF;    
                    keywordBuffer.put(bytes[position]);   
                    tempNode = tempNode.getSubNode(index);   
                    if(tempNode == null){  
                        position = position - rollback;  
                        rollback = 0;    
                        tempNode = rootNode;        
                        keywordBuffer.clear();    
                    }    
                    else if(tempNode.isKeywordEnd()){    
                        keywordBuffer.flip();    
                        for (int i = 0; i <= rollback; i++) {  
                                bytes[position-i] = 42;  
                                oldBytes[position-i] = 42;
                        }  
                        keywordBuffer.limit(keywordBuffer.capacity());    
                        rollback = 1;   
                        is_legal = "0";
                    }else{     
                        rollback++;   
                    }    
                    position++;    
                } 
            }             
            try {  
                 result  =  new String(oldBytes,charset);    
                  
            } catch (Exception e) {  
                e.printStackTrace();  
            }  
             
        } 
        try
        {
            json.put("islegal", is_legal);
            json.put("deststr", result);
        }
        catch (JSONException e)
        {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }         
        return json;  
    }    
        
    public void setCharset(String charset) {    
        this.charset = charset;    
    } 
    
    public static void main(String[] args){
        try{
            List<String> keys = new ArrayList<String>();
            keys.add("TMD");
            keys.add("변형");
            String text ="你真TMD是个변형啊!";
            DFA.createKeywordTree(keys);
            JSONObject a = DFA.searchKeyword(text);
            System.out.println(a);
        }
        catch(Exception ex){
            ex.printStackTrace();
        }
    }
} 
import java.util.ArrayList;
import java.util.List;

public class TreeNode {  
    private static final int NODE_LEN = 256;  
      
    /** 
     * true 关键词的终结 ; false 继续 
     */  
    private boolean end = false;   
      
    private List<TreeNode> subNodes = new ArrayList<TreeNode>(NODE_LEN);  
      
    public TreeNode(){  
        for (int i = 0; i < NODE_LEN; i++) {  
            subNodes.add(i, null);  
        }  
    }  
      
    /** 
     * 向指定位置添加节点树 
     * @param index 
     * @param node 
     */  
    public void setSubNode(int index, TreeNode node){  
        subNodes.set(index, node);  
    }  
      
    public TreeNode getSubNode(int index){  
        //System.out.println("index:"+index+" node:"+this+"--subNodes:"+subNodes.get(index));
        return subNodes.get(index);  
    }  
      
  
    public boolean isKeywordEnd() {  
        return end;  
    }  
  
    public void setKeywordEnd(boolean end) {  
        this.end = end;  
    }  
} 
原文地址:https://www.cnblogs.com/ld-swust/p/6132861.html