【Json】Json分词器

package com.hy;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.lang.StringUtils;

class Token{
    static final int TYPE_LBRACE=0;// 左大括号
    static final int TYPE_RBRACE=1;// 右大括号
    static final int TYPE_TEXT=2;// 文本
    static final int TYPE_COMMA=3;// 逗号
    static final int TYPE_COLON=4;// 冒号
    static final int TYPE_LBRACKET=5;// 左中括号
    static final int TYPE_RBRACKET=6;// 右中括号
    
    int type;
    String text;
    
    public Token(char c,int type) {
        this.text=String.valueOf(c);
        this.type=type;
    }
    
    public Token(String word,int type) {
        this.text=word;
        this.type=type;
    }
}
/**
 * Json文本分词器
 * @author 逆火
 *
 * 2019年12月1日 上午11:35:43
 */
public class Lexer {
    private List<Token> tokenList;

    /**
     * Contructor
     * @param jsonStr
     */
    public Lexer(String jsonStr) {
        tokenList=new ArrayList<Token>();
        
        String line="";
        for(int i=0;i<jsonStr.length();i++){
            char c=jsonStr.charAt(i);
            
            if(Character.isWhitespace(c)){
                continue;
            }else if(c=='{'){
                Token t=new Token(c,Token.TYPE_LBRACE);
                tokenList.add(t);
            }else if(c=='}'){
                if(StringUtils.isNotEmpty(line)) {
                    Token w=new Token(line,Token.TYPE_TEXT);
                    tokenList.add(w);
                    line="";
                }
                
                
                Token t=new Token(c,Token.TYPE_RBRACE);
                tokenList.add(t);
            }else if(c=='['){
                Token t=new Token(c,Token.TYPE_LBRACKET);
                tokenList.add(t);
            }else if(c==']'){
                Token t=new Token(c,Token.TYPE_RBRACKET);
                tokenList.add(t);
            }else if(c==',') {
                if(StringUtils.isNotEmpty(line)) {
                    Token w=new Token(line,Token.TYPE_TEXT);
                    tokenList.add(w);
                    line="";
                }
                
                Token t=new Token(c,Token.TYPE_COMMA);
                tokenList.add(t);
            }else if(c==':') {
                if(StringUtils.isNotEmpty(line)) {
                    Token w=new Token(line,Token.TYPE_TEXT);
                    tokenList.add(w);
                    line="";
                }
                
                Token t=new Token(c,Token.TYPE_COLON);
                tokenList.add(t);
            }else {
                line+=c;
            }
        }
    }
    
    public List<Token> getTokenList() {
        return tokenList;
    }
    
    public void printTokens() {
        int idx=0;
        for(Token t:tokenList) {
            idx++;
            System.out.println("#"+idx+" "+t.text);
        }
    }
    
    /**
     * Entry point
     */
    public static void main(String[] args) {
        String filePathname="D:\logs\1.json";
        try {
            StringBuilder sb=new StringBuilder();
            BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(filePathname), "UTF-8"));  
            String line = null;  
            while( ( line = br.readLine() ) != null ) {
                sb.append(line);
            }
            br.close();  
            
            String jsonStr=sb.toString();
            System.out.println("Raw json="+jsonStr);
            
            Lexer l=new Lexer(jsonStr);
            l.printTokens();
        } catch (FileNotFoundException ex) {
            ex.printStackTrace();
        } catch (IOException ex) {
            ex.printStackTrace();
        } 
    }
    
}

输出:

Raw json={    "status": "0000",    "message": "success",    "data": {        "title": {            "id": "001",            "name" : "白菜"        },        "content": [            {                "id": "001",                "value":"你好 白菜"            },            {                "id": "002",                 "value":"你好 萝卜"             }        ]    }}
#1 {
#2 "status"
#3 :
#4 "0000"
#5 ,
#6 "message"
#7 :
#8 "success"
#9 ,
#10 "data"
#11 :
#12 {
#13 "title"
#14 :
#15 {
#16 "id"
#17 :
#18 "001"
#19 ,
#20 "name"
#21 :
#22 "白菜"
#23 }
#24 ,
#25 "content"
#26 :
#27 [
#28 {
#29 "id"
#30 :
#31 "001"
#32 ,
#33 "value"
#34 :
#35 "你好白菜"
#36 }
#37 ,
#38 {
#39 "id"
#40 :
#41 "002"
#42 ,
#43 "value"
#44 :
#45 "你好萝卜"
#46 }
#47 ]
#48 }
#49 }

--END-- 2019年12月1日12:29:00

原文地址:https://www.cnblogs.com/heyang78/p/11965621.html