XmlAnalyzer1.00 源码

此工程用途:将xml同级属性/子节点按字母序排列重新输出.

源码下载: https://files.cnblogs.com/files/heyang78/XmlAnalyzer-20200526-1.zip

核心类:

Token,此类用于将XML文件中的文本分类:

package com.heyang;

public class Token {
    public final static int TYPE_OPEN_ANGLEBRACKET=0; // <
    public final static int TYPE_CLOSE_ANGLEBRACKET=1;// >
    public final static int TYPE_slant =2;              // /
    public final static int TYPE_TEXT=3;              // text
    public final static int TYPE_EQUAL =4;              // =
    public final static int TYPE_EMPTY_CLOSE =5;      // />
    public final static int TYPE_END_OPEN =6;          // </
    
    private int type;
    private String text;
    private int index;// Used to remember location
    
    public Token(char c,int type) {
        this.text=String.valueOf(c);
        this.type=type;
    }
    
    public Token(String word,int type) {
        this.text=word;
        this.type=type;
    }
    
    public String toString() {
        return String.format("token(text=%s,type=%d,index=%d)", text,type,index);
    }

    public int getType() {
        return type;
    }

    public void setType(int type) {
        this.type = type;
    }

    public String getText() {
        return text;
    }

    public void setText(String text) {
        this.text = text;
    }

    public int getIndex() {
        return index;
    }

    public void setIndex(int index) {
        this.index = index;
    }
}

Lexer,此类用于分词:

package com.heyang;

import java.util.ArrayList;
import java.util.List;

import org.apache.commons.lang.StringUtils;

public class Lexer {
    private List<Token> tokens;
    
    public Lexer(String inputTxt) {
        tokens = new ArrayList<Token>();

        String bundle = "";
        for (int i = 0; i < inputTxt.length(); i++) {
            char c = inputTxt.charAt(i);

            if (Character.isWhitespace(c)) {
                if (StringUtils.isNotEmpty(bundle.trim())) {
                    addText2Tokens(bundle);
                    bundle = "";
                }
                
                continue;
            } else if (c == '<') {
                int next=i+1;
                if(next<inputTxt.length() && inputTxt.charAt(next)=='/') {
                    addText2Tokens(bundle);
                    bundle="";
                    tokens.add(new Token("</",Token.TYPE_END_OPEN));
                    i++;
                }else {
                    tokens.add(new Token(c, Token.TYPE_OPEN_ANGLEBRACKET));
                }
                
            } else if (c == '>') {
                if (StringUtils.isNotEmpty(bundle)) {
                    addText2Tokens(bundle);
                    bundle = "";
                }

                tokens.add(new Token(c, Token.TYPE_CLOSE_ANGLEBRACKET));
            }else if (c == '=') {
                if (StringUtils.isNotEmpty(bundle)) {
                    addText2Tokens(bundle);
                    bundle = "";
                }

                tokens.add(new Token(c, Token.TYPE_EQUAL));
            }  else if (c == '/') {
                int next=i+1;
                if(next<inputTxt.length() && inputTxt.charAt(next)=='>') {
                    addText2Tokens(bundle);
                    bundle="";
                    tokens.add(new Token("/>",Token.TYPE_EMPTY_CLOSE));
                    i++;
                }else {
                    tokens.add(new Token(c, Token.TYPE_slant));
                }
                
            } else if(c == '"') {
                int idx=i+1;
                
                while(idx<inputTxt.length()) {
                    char cEnd = inputTxt.charAt(idx);
                    
                    if (cEnd == '"') {
                        break;
                    }
                    
                    idx++;
                }
                
                String sub=inputTxt.substring(i, idx+1);
                tokens.add(new Token(sub, Token.TYPE_TEXT));
                i=idx;
            } else {
                bundle += c;
            }
        }
        
        setTokenIndexes();
    }
    
    private boolean addText2Tokens(String text) {
        if(StringUtils.isNotEmpty(text)) {
            tokens.add(new Token(text, Token.TYPE_TEXT));
            return true;
        }else {
            return false;
        }
    }

    public void setTokenIndexes() {
        int idx = 0;
        for (Token t : tokens) {
            idx++;
            t.setIndex(idx);
        }
    }

    public void printTokens() {
        int idx = 0;
        for (Token t : tokens) {
            idx++;
            t.setIndex(idx);
            System.out.println("#" + idx + " " + t.getText());
        }
    }
    
    public String getCompactJsonTxt() {
        StringBuilder sb=new StringBuilder();
        
        for (Token t : tokens) {
            sb.append(t.getText()+" ");
        }
        
        return sb.toString();
    }
    
    public List<Token> getTokens() {
        return tokens;
    }
}

Node,此类代表一个xml节点:

package com.heyang;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

public class Node implements Comparable<Node>{
    private String text;
    private String name;
    private List<Node> children;
    private List<Property> proterties;
    private int depth=0;
    
    public int compareTo(Node another) {
        return this.name.compareTo(another.name);
    }
    
    public void addChild(Node n) {
        if(children==null) {
            children=new ArrayList<Node>();
        }
        
        children.add(n);
        adjustDepth();
    }
    
    private void adjustDepth() {
        if(children==null) {
            return;
        }
        for(Node json:children) {
            json.depth=this.depth+1;
            json.adjustDepth();
        }
    }
    
    public void addProperty(Property p) {
        if(proterties==null) {
            proterties=new ArrayList<Property>();
        }
        
        proterties.add(p);
    }
    
    public String toString() {
        String tabs=getIndentSpace();
        
        StringBuilder sb=new StringBuilder();
        sb.append(tabs);
        
        sb.append("<"+name);
        if(proterties!=null) {
            Collections.sort(proterties);
            
            for(Property p:proterties) {
                sb.append(" "+p.getName()+"="+p.getValue());
            }
        }
        
        if(text==null && children==null) {
            sb.append("/>");
            return sb.toString();
        }else {
            sb.append(">");
        }
        
        if(text!=null) {
            sb.append(text);
        }
        
        if(children!=null) {
            
            
            Collections.sort(children);
            for(Node child:children) {
                sb.append("
");
                sb.append(child);
            }
        }
        
        if(children!=null) {
            sb.append("
"+tabs+"</"+name+">");
        }else {
            sb.append("</"+name+">");
        }
        
        return sb.toString();
    }
    
    private String getIndentSpace() {
        return String.join("", Collections.nCopies(this.depth, "    "));
    }
    
    public String getText() {
        return text;
    }
    public void setText(String text) {
        this.text = text;
    }
    public String getName() {
        return name;
    }
    public void setName(String name) {
        this.name = name;
    }
    public List<Node> getChildren() {
        return children;
    }
    public void setChildren(List<Node> children) {
        this.children = children;
    }
    public List<Property> getProterties() {
        return proterties;
    }
    public void setProterties(List<Property> proterties) {
        this.proterties = proterties;
    }
}

property,此类代表xml的属性:

package com.heyang;

public class Property implements Comparable<Property>{
    private String name;
    private String value;
    
     public int compareTo(Property another) {
        return this.name.compareTo(another.name);
    }
    
    public Property(String name,String value) {
        this.name=name;
        this.value=value;
    }
    
    public String getName() {
        return name;
    }
    public void setName(String name) {
        this.name = name;
    }
    public String getValue() {
        return value;
    }
    public void setValue(String value) {
        this.value = value;
    }
}

TreeBuilder,此类用于自顶向下构建一棵树:

package com.heyang;

import java.util.List;

public class TreeBuilder {
    private Node root;
    private List<Token> tokens;
    private int tokenIdx;
    
    public TreeBuilder(List<Token> tokens) throws Exception{
        this.tokens=tokens;
        this.tokenIdx=0;
        
        root=new Node();
        parseNode(root);
    }
    
    private void parseNode(Node parent) throws Exception{
        Token token;
        
        token=fetchToken();
        if(token.getType()!=Token.TYPE_OPEN_ANGLEBRACKET) {
            throw new Exception("Expected:'<' actual:"+token.getText()+" "+token);
        }

        token=fetchToken();
        if(token.getType()!=Token.TYPE_TEXT) {
            throw new Exception("Expected:text actual:"+token.getText()+" "+token);
        }
        
        // get node name
        parent.setName(token.getText());
        
        // get properties
        for(;;) {
            token=fetchToken();
            if(token.getType()!=Token.TYPE_TEXT) {
                // 不满足属性条件,退回并退出
                returnToken();
                break;
            }
            
            String name=token.getText();
            
            // =
            token=fetchToken();
            if(token.getType()!=Token.TYPE_EQUAL) {
                throw new Exception("Expected:= actual:"+token.getText()+" "+token);
            }
            
            token=fetchToken();
            if(token.getType()!=Token.TYPE_TEXT) {
                throw new Exception("Expected:= actual:"+token.getText()+" "+token);
            }
            
            String value=token.getText();
            
            parent.addProperty(new Property(name,value));
        }
        
        
        token=fetchToken();
        if(token.getType()==Token.TYPE_EMPTY_CLOSE) {
            // 节点结束,无子节点,无文本
            return;
        }else if(token.getType()==Token.TYPE_CLOSE_ANGLEBRACKET) {
            // 存在子节点或文本,继续向下
        }else {
            // 未正常结束,抛出异常
            throw new Exception("Expected:'>' actual:"+token.getText()+" "+token);
        }
        
        // 取文本或子节点
        for(;;) {
            token=fetchToken();
            
            if(token.getType()==Token.TYPE_TEXT) {
                // 取得文本
                parent.setText(token.getText());
            }else if(token.getType()==Token.TYPE_OPEN_ANGLEBRACKET)  {
                // TODO:取子节点,加子节点,递归向下
                Node child=new Node();
                parent.addChild(child);
                
                returnToken();
                parseNode(child);
            }else {
                // 不满足属性条件,退回并退出
                returnToken();
                break;
            }
        }
        
        token=fetchToken();
        if(token.getType()!=Token.TYPE_END_OPEN) {
            throw new Exception("Expected:'</' actual:"+token.getText()+" "+token);
        }
        
        token=fetchToken();
        if(token.getType()!=Token.TYPE_TEXT) {
            throw new Exception("Expected:text actual:"+token.getText()+" "+token);
        }
        
        String name=token.getText();
        if(!name.equals(parent.getName())) {
            throw new Exception("Expected node name:"+parent.getName()+" actual:"+name+" "+token);
        }
        
        token=fetchToken();
        if(token.getType()!=Token.TYPE_CLOSE_ANGLEBRACKET) {
            throw new Exception("Expected:'>' actual:"+token.getText()+" "+token);
        }
    }
    
    private Token fetchToken() {
        if(tokenIdx>=tokens.size()) {
            return null;
        }else {
            Token t=tokens.get(tokenIdx);
            tokenIdx++;
            return t;
        }        
    }
    
    private void returnToken() {
        if(tokenIdx>0) {
            tokenIdx--;
        }
    }
    
    public Node getRoot() {
        return root;
    }
}

最后整合调用:

package com.heyang;

import com.heyang.util.BracketChecker;
import com.heyang.util.CommonUtil;
import com.heyang.util.Renderer;

public class EntryPoint {
    public static void main(String[] args) {
        try {
            // Read context from file
            String jsonTxt=CommonUtil.readTextFromFile("C:\hy\files\xml\01.xml");
            System.out.println("原文="+jsonTxt);
            
            // Is brackets balanced
            BracketChecker checker=new BracketChecker();
            boolean isBalanced=checker.isBalanced(jsonTxt);
            if(isBalanced==false) {
                System.out.println(Renderer.paintBrown(checker.getErrMsg()));
                return;
            }
            
            // Parse json to tokens
            Lexer lex=new Lexer(jsonTxt);
            //System.out.println("紧缩文本="+lex.getCompactJsonTxt());
            //lex.printTokens();
            
            // Build tree
            TreeBuilder builder=new TreeBuilder(lex.getTokens());
            Node root=builder.getRoot();
            System.out.println("整形后文本:
"+root);
        }catch(Exception ex) {
            System.out.println(Renderer.paintBrown(ex.getMessage()));
            ex.printStackTrace();
        }
    }
}

整形效果:

原文=<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">  <modelVersion>4.0.0</modelVersion>  <groupId>com.heyang</groupId>  <artifactId>XmlAnalyzer</artifactId>  <version>1.00</version>    <dependencies>        <dependency>            <groupId>ch.qos.logback</groupId>            <artifactId>logback-classic</artifactId>            <version>1.1.11</version>        </dependency>        <dependency>            <groupId>ch.qos.logback</groupId>            <artifactId>logback-core</artifactId>            <version>1.1.11</version>        </dependency>                <dependency>            <groupId>commons-lang</groupId>            <artifactId>commons-lang</artifactId>            <version>2.6</version>        </dependency>    </dependencies></project>
整形后文本:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    <artifactId>XmlAnalyzer</artifactId>
    <dependencies>
        <dependency>
            <artifactId>logback-classic</artifactId>
            <groupId>ch.qos.logback</groupId>
            <version>1.1.11</version>
        </dependency>
        <dependency>
            <artifactId>logback-core</artifactId>
            <groupId>ch.qos.logback</groupId>
            <version>1.1.11</version>
        </dependency>
        <dependency>
            <artifactId>commons-lang</artifactId>
            <groupId>commons-lang</groupId>
            <version>2.6</version>
        </dependency>
    </dependencies>
    <groupId>com.heyang</groupId>
    <modelVersion>4.0.0</modelVersion>
    <version>1.00</version>
</project>

2020-5-22 解析算术表达式

2020-5-25 解析Json

2002-5-26 解析XML

感觉编译器/解释器的路越走越宽了.

--2020年5月26日--

原文地址:https://www.cnblogs.com/heyang78/p/12964733.html