Lexical Analyzer Generator

  This program generates a random Regular Expression in the form of Binary Tree, and repeatedly checks whether an input string match that Regex ("$" as the end of input):

  (1) Use McNaughton-Yamada-Thompson (MYT) Construction to generate the NFA of the given regex;

  (2) Use the Subset Construction to generate the DFA from the given NFA;

  (3) Implement the DFA to check whether an input string match the regex.

  1 import java.util.*;
  2 import java.io.*;
  3 
  4 class State {
  5     public boolean ac;
  6     public Edge next;
  7 }
  8 
  9 class Edge {
 10     public static Set<Character> digit;
 11     public static Set<Character> letter;
 12     public static Set<Character> binop;
 13     public static Set<Character> uniop;
 14     public static Set<Character> or;
 15     public static Set<Character> plus;
 16     public static Set<Character> star;
 17     static {
 18         digit = new HashSet<Character>();
 19         for (int i=0;i<10;i++) {
 20             digit.add(new Character((char)('0'+i)));
 21         }
 22         letter = new HashSet<Character>();
 23         for (int i=0;i<26;i++) {
 24             letter.add(new Character((char)('a'+i)));
 25             letter.add(new Character((char)('A'+i)));
 26         }
 27         binop = new HashSet<Character>();
 28         binop.add(new Character('|'));
 29         binop.add(new Character('^'));
 30         binop.add(new Character('&'));
 31         binop.add(new Character('+'));
 32         binop.add(new Character('-'));
 33         binop.add(new Character('*'));
 34         binop.add(new Character('/'));
 35         binop.add(new Character('%'));
 36         binop.add(new Character('<'));
 37         binop.add(new Character('>'));
 38         binop.add(new Character('='));
 39         uniop = new HashSet<Character>();
 40         uniop.add(new Character('~'));
 41         uniop.add(new Character('!'));
 42         uniop.add(new Character('-'));
 43         // or, plus and star are just sentinels
 44         or = new HashSet<Character>();
 45         plus = new HashSet<Character>();
 46         star = new HashSet<Character>();
 47     }
 48 
 49     public Set<Character> data;
 50     public Edge next;
 51     public int end;
 52 }
 53 
 54 class Node {
 55     public Set<Character> data;
 56     public Node left, right;
 57     
 58     public Node() {
 59         int rand = (new Random()).nextInt(16);
 60         if (rand<2) {
 61             data = Edge.digit;
 62         } else if (rand<5) {
 63             data = Edge.letter;
 64         } else if (rand<7) {
 65             data = Edge.binop;
 66         } else if (rand<8) {
 67             data = Edge.uniop;
 68         } else if (rand<11) {
 69             data = Edge.or;
 70             left = new Node();
 71             right = new Node();
 72         } else if (rand<14) {
 73             data = Edge.plus;
 74             left = new Node();
 75             right = new Node();
 76         } else {
 77             data = Edge.star;
 78             left = new Node();
 79         }
 80     }
 81     public String toString() {
 82         if (data==Edge.digit) {
 83             return "DIGIT";
 84         } else if (data==Edge.letter) {
 85             return "LETTER";
 86         } else if (data==Edge.uniop) {
 87             return "UNIOP";
 88         } else if (data==Edge.binop) {
 89             return "BINOP";
 90         }
 91         String leftStr="", rightStr="";
 92         if (left!=null) {
 93             leftStr = "("+left.toString()+")";
 94         }
 95         if (right!=null) {
 96             rightStr = "("+right.toString()+")";
 97         }
 98         if (data==Edge.or) {
 99             return leftStr+"|"+rightStr;
100         } else if (data==Edge.star) {
101             return leftStr+"*";
102         } else {
103             return leftStr+rightStr;
104         }
105     }
106 }
107 
108 class NFA {
109     private List<State> state;
110     
111     public NFA(Node root) {
112         state = new LinkedList<State>();
113         state.add(new State());    // start
114         state.add(new State());    // final
115         state.get(1).ac = true;
116         preOrder(root,0,1);
117     }
118     private void preOrder(Node sub,int from,int to) {
119         // MYT Construction: regex -> NFA
120         if (sub.data==Edge.or) {
121             preOrder(sub.left,from,to);
122             preOrder(sub.right,from,to);
123         } else if (sub.data==Edge.plus) {
124             int mid = insertState();
125             preOrder(sub.left,from,mid);
126             preOrder(sub.right,mid,to);
127         } else if (sub.data==Edge.star) {
128             int a = insertState();
129             int b = insertState();
130             insertEdge(from,null,a);
131             insertEdge(b,null,a);
132             insertEdge(b,null,to);
133             insertEdge(from,null,to);
134             preOrder(sub.left,a,b);
135         } else {
136             insertEdge(from,sub.data,to);
137         }
138     }
139     private int insertState() {
140         // Insert a new state and return its index:
141         state.add(new State());
142         return state.size()-1;
143     }
144     private void insertEdge(int idx,Set<Character> data,int end) {
145         // Insert an Edge: NTran[idx,end] = data
146         Edge e = new Edge();
147         e.data = data;
148         e.next = state.get(idx).next;
149         e.end = end;
150         state.get(idx).next = e;
151     }
152     public List<State> getStates() {
153         return state;
154     }
155 }
156 
157 class DFA {
158     private List<State> dState;
159     private List<State> nState;
160     private List<Set<State>> nSet;
161     private Queue<Integer> q;
162     private List<Boolean> vis;
163     
164     public DFA(Node root) {
165         nState = (new NFA(root)).getStates();
166         dState = new LinkedList<State>();
167         nSet = new LinkedList<Set<State>>();
168         q = new LinkedList<Integer>();
169         vis = new LinkedList<Boolean>();
170         bfs();
171     }
172     private void bfs() {
173         // Create the first dState:
174         dState.add(new State());
175         nSet.add(new HashSet<State>());
176         addClosure(nState.get(0),nSet.get(0));
177         // Breadth-First Search:
178         q.add(new Integer(0));
179         vis.add(new Boolean(false));
180         while (!q.isEmpty()) {
181             int idx = q.poll().intValue();
182             if (!vis.get(idx)) {
183                 vis.set(idx, new Boolean(true));
184                 addTrans(idx,Edge.digit);
185                 addTrans(idx,Edge.letter);
186                 addTrans(idx,Edge.binop);
187                 addTrans(idx,Edge.uniop);
188             }
189         }
190     }
191     private void addTrans(int idx,Set<Character> data) {
192         // Add to set all nStates that can be accessed from start through data:
193         Set<State> set = new HashSet<State>();
194         for (State nstate:nSet.get(idx)) {
195             Edge itr = nstate.next;
196             while (itr!=null) {
197                 if (itr.data==data) {
198                     addClosure(nState.get(itr.end),set);
199                 }
200                 itr = itr.next;
201             }
202         }
203         if (!set.isEmpty()) {
204             for (int i=0;i<nSet.size();i++) {
205                 // If set exists in nSet, only insert an edge:
206                 if (nSet.get(i).equals(set)) {
207                     insertEdge(idx,data,i);
208                     return;
209                 }
210             }
211             insertEdge(idx,data,dState.size());
212             dState.add(new State());
213             nSet.add(set);
214             for (State itr:set) {
215                 // Mark on accepting states:
216                 if (itr.ac) {
217                     dState.get(dState.size()-1).ac = true;
218                     break;
219                 }
220             }
221             q.add(new Integer(dState.size()-1));
222             vis.add(new Boolean(false));
223         }
224     }
225     private void addClosure(State start,Set<State> set) {
226         // Add the Epsilon-Closure of nState.get(start) to set:
227         set.add(start);
228         Edge itr = start.next;
229         while (itr!=null) {
230             if (itr.data==null && !set.contains(nState.get(itr.end))) {
231                 addClosure(nState.get(itr.end),set);
232             }
233             itr = itr.next;
234         }
235     }
236     private void insertEdge(int idx,Set<Character> data,int end) {
237         // Insert an Edge: DTran[idx,end] = data
238         Edge e = new Edge();
239         e.data = data;
240         e.next = dState.get(idx).next;
241         e.end = end;
242         dState.get(idx).next = e;
243     }
244     public boolean test(int pos,String expr,int i) {
245         // Test expr.char(i) at dState.get(pos):
246         if (i==expr.length()) {
247             return dState.get(pos).ac;
248         }
249         Edge itr = dState.get(pos).next;
250         while (itr!=null) {
251             if (itr.data.contains(new Character(expr.charAt(i)))) {
252                 return test(itr.end,expr,i+1);
253             }
254             itr = itr.next;
255         }
256         return false;
257     }
258 }
259 
260 class Regex {
261     private Node root;
262     private DFA dfa;
263     
264     public Regex() {
265         root = new Node();
266         dfa = new DFA(root);
267     }
268     public String toString() {
269         return root.toString();
270     }
271     public boolean test(String expr) {
272         return dfa.test(0,expr,0);
273     }
274 }
275 
276 public class LexicalAnalyzer {
277     
278     public static void main(String[] args) {
279         Regex regex = new Regex();
280         System.out.println("Regex:	"+regex);
281         Scanner in = new Scanner(System.in);
282         System.out.print("Test Expressions:
	");
283         String expr = in.nextLine();
284         while (!expr.equals("$")) {
285             if (regex.test(expr)) {
286                 System.out.print("	ACCEPTED
	");
287             } else {
288                 System.out.print("	WRONG
	");
289             }
290             expr = in.nextLine();
291         }
292         in.close();
293     }
294 }

  I have been determined to refactor this program (I wrote it last year) since I took part in the Touchpal Interview on Wednesday (April 1, 2015) and failed to give a correct answer at that moment.

 1     public static boolean match(String pattern,String expr) {
 2         if (pattern.length()==0) {
 3             return expr.length()==0;
 4         } else if (pattern.charAt(0)=='.') {
 5             return expr.length()>0 
 6                     && match(pattern.substring(1),expr.substring(1));
 7         } else if (pattern.length()==1) {
 8             return expr.equals(pattern);
 9         } else if (pattern.charAt(1)!='*') {
10             return pattern.charAt(0)==expr.charAt(0)
11                     && match(pattern.substring(1),expr.substring(1));
12         } else {
13             if (match(pattern.substring(2),expr)) {
14                 return true;
15             } else if (pattern.charAt(0)==expr.charAt(0)) {
16                 return match(pattern.substring(2),expr.substring(1))
17                         || match(pattern,expr.substring(1));
18             } else {
19                 return false;
20             }
21         }
22     }

References:

  1. Aho, Alfred V. et al. Compilers: Principles, Techniques, and Tools [M]. 北京:机械工业出版社, 2008-12

原文地址:https://www.cnblogs.com/DevinZ/p/4438319.html