处理SQL及分词效果:
select * from ( select rownum as rn,tb1.stuid,tb1.summary from ( select stuid,sum(score) as summary from gk_score group by stuid order by summary desc ) tb1 order by tb1.summary desc ) tb2 where rn<11 Index Type No Text Type Desc ------------------------------------------------------------------------------------ 0 1 select KW:select 1 2 * Text 2 4 from KW:from 3 18 ( ( 4 1 select KW:select 5 2 rownum Text 6 13 as KW:as 7 2 rn Text 8 3 , Comma 9 2 tb1.stuid Text 10 3 , Comma 11 2 tb1.summary Text 12 4 from KW:from 13 18 ( ( 14 1 select KW:select 15 2 stuid Text 16 3 , Comma 17 2 sum Text 18 18 ( ( 19 2 score Text 20 19 ) ) 21 13 as KW:as 22 2 summary Text 23 4 from KW:from 24 2 gk_score Text 25 14 group KW:group 26 10 by KW:by 27 2 stuid Text 28 9 order KW:order 29 10 by KW:by 30 2 summary Text 31 11 desc KW:asc 32 19 ) ) 33 2 tb1 Text 34 9 order KW:order 35 10 by KW:by 36 2 tb1.summary Text 37 11 desc KW:asc 38 19 ) ) 39 2 tb2 Text 40 5 where KW:where 41 2 rn Text 42 16 < < 43 2 11 Text
程序:
package com.heyang.easysql.lex10; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Collections; import java.util.List; class Token{ static final int TYPE_SELECT=1; static final int TYPE_TEXT=2; static final int TYPE_COMMA=3; static final int TYPE_FROM=4; static final int TYPE_WHERE=5; static final int TYPE_AND=6; static final int TYPE_EQUAL=7; static final int TYPE_OR=8; static final int TYPE_ORDER=9; static final int TYPE_BY=10; static final int TYPE_ASC=11; static final int TYPE_DESC=12; static final int TYPE_AS=13; static final int TYPE_GROUP=14; static final int TYPE_HAVING=15; static final int TYPE_LESSTHAN=16; static final int TYPE_GREATERTHAN=17; static final int TYPE_OPEN_PARENTHESIS=18; static final int TYPE_CLOSE_PARENTHESIS=19; static final int TYPE_CONNECT=20; static final int TYPE_LESSTHAN_OR_EQUAL=21; static final int TYPE_GREATERTHAN_OR_EQUAL=22; static final int TYPE_LESSTHAN_OR_GREATERTHAN=23; static final int TYPE_CASE=24; static final int TYPE_WHEN=25; static final int TYPE_THEN=26; static final int TYPE_ELSE=27; static final int TYPE_END=28; static final int TYPE_IS=29; static final int TYPE_NULL=30; static final int TYPE_TRUE=31; static final int TYPE_FALSE=32; static final int TYPE_PLUS=33; static final int TYPE_MINUS=34; //static final int TYPE_MULTI=35; static final int TYPE_DEVIDE=36; static final int TYPE_DISTINCT=37; static final int TYPE_OVER=38; static final int TYPE_STRING_CONCAT=39; static final int TYPE_ON=40; static final int TYPE_JOIN=41; static final int TYPE_INNER=42; static final int TYPE_LEFT=43; static final int TYPE_RIGHT=44; static final int TYPE_OUTER=45; static final int TYPE_FULL=46; static final int TYPE_WITHIN=47; static final int TYPE_PARTITION=48; int type; String text; public Token(char c,int type) { this.text=String.valueOf(c); this.type=type; } public Token(String word,int type) { this.text=word; this.type=type; } public String getTypeStr() { if(type==TYPE_SELECT) { return "KW:select"; }else if(type==TYPE_FROM) { return "KW:from"; }else if(type==TYPE_COMMA) { return "Comma"; }else if(type==TYPE_TEXT) { return "Text"; }else if(type==TYPE_WHERE) { return "KW:where"; }else if(type==TYPE_AND) { return "KW:and"; }else if(type==TYPE_EQUAL) { return "="; }else if(type==TYPE_OR) { return "KW:or"; }else if(type==TYPE_ORDER) { return "KW:order"; }else if(type==TYPE_BY) { return "KW:by"; }else if(type==TYPE_ASC) { return "KW:asc"; }else if(type==TYPE_DESC) { return "KW:desc"; }else if(type==TYPE_AS) { return "KW:as"; }else if(type==TYPE_GROUP) { return "KW:group"; }else if(type==TYPE_HAVING) { return "KW:having"; }else if(type==TYPE_LESSTHAN) { return "<"; }else if(type==TYPE_GREATERTHAN) { return ">"; }else if(type==TYPE_OPEN_PARENTHESIS) { return "("; }else if(type==TYPE_CLOSE_PARENTHESIS) { return ")"; }else if(type==TYPE_CONNECT) { return "KW:connect"; }else if(type==TYPE_LESSTHAN_OR_EQUAL) { return "<="; }else if(type==TYPE_GREATERTHAN_OR_EQUAL) { return ">="; }else if(type==TYPE_LESSTHAN_OR_GREATERTHAN) { return "<>"; }else if(type==TYPE_CASE) { return "KW:case"; }else if(type==TYPE_WHEN) { return "KW:when"; }else if(type==TYPE_THEN) { return "KW:then"; }else if(type==TYPE_ELSE) { return "KW:else"; }else if(type==TYPE_END) { return "KW:end"; }else if(type==TYPE_IS) { return "KW:is"; }else if(type==TYPE_NULL) { return "KW:null"; }else if(type==TYPE_TRUE) { return "KW:true"; }else if(type==TYPE_FALSE) { return "KW:false"; }else if(type==TYPE_PLUS) { return "+"; }else if(type==TYPE_MINUS) { return "-"; }else if(type==TYPE_DEVIDE) { return "/"; }else if(type==TYPE_DISTINCT) { return "KW:distinct"; }else if(type==TYPE_OVER) { return "KW:over"; }else if(type==TYPE_STRING_CONCAT) { return "||"; }else if(type==TYPE_ON) { return "KW:on"; }else if(type==TYPE_JOIN) { return "KW:join"; }else if(type==TYPE_INNER) { return "KW:inner"; }else if(type==TYPE_LEFT) { return "KW:left"; }else if(type==TYPE_RIGHT) { return "KW:right"; }else if(type==TYPE_OUTER) { return "KW:outer"; }else if(type==TYPE_FULL) { return "KW:full"; }else if(type==TYPE_WITHIN) { return "KW:within"; }else if(type==TYPE_PARTITION) { return "KW:partition"; } return null; } } public class Lexer { private List<Token> tokenList; public Lexer(String inputSql) { String sql=pretreat(inputSql); String swallowed=""; tokenList=new ArrayList<Token>(); for(int i=0;i<sql.length();i++){ char c=sql.charAt(i); if(Character.isWhitespace(c)){ addTextToList(swallowed); swallowed=""; }else if(c==','){ addTextToList(swallowed); swallowed=""; tokenList.add(new Token(c,Token.TYPE_COMMA)); }else if(c=='='){ addTextToList(swallowed); swallowed=""; tokenList.add(new Token(c,Token.TYPE_EQUAL)); }else if(c=='<'){ int next=i+1; if(next<sql.length() && sql.charAt(next)=='=') { addTextToList(swallowed); swallowed=""; tokenList.add(new Token("<=",Token.TYPE_LESSTHAN_OR_EQUAL)); i++; }else if(next<sql.length() && sql.charAt(next)=='>') { addTextToList(swallowed); swallowed=""; tokenList.add(new Token("<>",Token.TYPE_LESSTHAN_OR_GREATERTHAN)); i++; }else { addTextToList(swallowed); swallowed=""; tokenList.add(new Token(c,Token.TYPE_LESSTHAN)); } }else if(c=='>'){ int next=i+1; if(next<sql.length() && sql.charAt(next)=='=') { addTextToList(swallowed); swallowed=""; tokenList.add(new Token(">=",Token.TYPE_GREATERTHAN_OR_EQUAL)); i++; }else { addTextToList(swallowed); swallowed=""; tokenList.add(new Token(c,Token.TYPE_GREATERTHAN)); } }else if(c=='|'){ int next=i+1; if(next<sql.length() && sql.charAt(next)=='|') { addTextToList(swallowed); swallowed=""; tokenList.add(new Token("||",Token.TYPE_STRING_CONCAT)); i++; } }else if(c=='('){ addTextToList(swallowed); swallowed=""; tokenList.add(new Token(c,Token.TYPE_OPEN_PARENTHESIS)); }else if(c==')'){ addTextToList(swallowed); swallowed=""; tokenList.add(new Token(c,Token.TYPE_CLOSE_PARENTHESIS)); }else if(c=='+'){ addTextToList(swallowed); swallowed=""; tokenList.add(new Token(c,Token.TYPE_PLUS)); }else if(c=='-'){ addTextToList(swallowed); swallowed=""; tokenList.add(new Token(c,Token.TYPE_MINUS)); }else if(c=='/'){ addTextToList(swallowed); swallowed=""; tokenList.add(new Token(c,Token.TYPE_DEVIDE)); }else { swallowed+=c; } } } private int findTypeByText(String text) { Object[][] arr= { {"select", Token.TYPE_SELECT}, {"from", Token.TYPE_FROM}, {"where", Token.TYPE_WHERE}, {"and", Token.TYPE_AND}, {"or", Token.TYPE_OR}, {"order", Token.TYPE_ORDER}, {"by", Token.TYPE_BY}, {"asc", Token.TYPE_ASC}, {"desc", Token.TYPE_ASC}, {"asc", Token.TYPE_DESC}, {"as", Token.TYPE_AS}, {"group", Token.TYPE_GROUP}, {"having", Token.TYPE_HAVING}, {"connect", Token.TYPE_CONNECT}, {"case", Token.TYPE_CASE}, {"when", Token.TYPE_WHEN}, {"then", Token.TYPE_THEN}, {"else", Token.TYPE_ELSE}, {"end", Token.TYPE_END}, {"is", Token.TYPE_IS}, {"null", Token.TYPE_NULL}, {"true", Token.TYPE_TRUE}, {"false", Token.TYPE_FALSE}, {"distinct", Token.TYPE_DISTINCT}, {"over", Token.TYPE_OVER}, {"on", Token.TYPE_ON}, {"join", Token.TYPE_JOIN}, {"inner", Token.TYPE_INNER}, {"left", Token.TYPE_LEFT}, {"right", Token.TYPE_RIGHT}, {"outer", Token.TYPE_OUTER}, {"full", Token.TYPE_FULL}, {"within", Token.TYPE_WITHIN}, {"partition", Token.TYPE_PARTITION}, }; for(Object[] arrInner:arr) { String keyword=String.valueOf(arrInner[0]); if(keyword.equalsIgnoreCase(text)) { return Integer.parseInt(arrInner[1].toString()); } } return Token.TYPE_TEXT; } private void addTextToList(String text) { int type=findTypeByText(text); addToken2List(text,type); } private void addToken2List(String text,int type) { if(text.trim().length()>0) { tokenList.add(new Token(text,type)); } } public void printTokenList() { final String continuousStar = createRepeatedStr("-", 84); final String layout = "%-20s %-20s %-20s %-20s %s"; StringBuilder sb = new StringBuilder(); sb.append(String.format(layout, "Index", "Type No","Text","Type Desc"," ")); sb.append(continuousStar + " "); int index=0; for(Token token:tokenList) { sb.append(String.format(layout, String.valueOf(index),String.valueOf(token.type), token.text,token.getTypeStr()," ")); index++; } System.out.println(sb.toString()); } private static String createRepeatedStr(String seed, int n) { return String.join("", Collections.nCopies(n, seed)); } private String pretreat(String raw) { return raw.trim()+" "; } public static void main(String[] args) throws Exception{ String sql=removeExtraSpace(readSqlFromFile("c:\temp\12.sql")); System.out.println(sql); new Lexer(sql).printTokenList();; } private static String readSqlFromFile(String filePath) throws Exception{ StringBuilder sb=new StringBuilder(); BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(filePath), "UTF-8")); String line = null; while( ( line = br.readLine() ) != null ) { sb.append(line); } br.close(); return sb.toString(); } private static String removeExtraSpace(String raw) { return raw.replaceAll("\s{2,}", " "); } }
--2020年5月13日 16点07分--