编译器学习(二)非确定有穷自动机的整理

重写了上次的代码

1.将node分为三种,voidchar,char,manychars,分别表示空node,单字符node,多字符node(针对自定义的w, ,a);

2.顺序建树;

3.空节点的父子节点为非空节点,非空节点的父子节点为空节点;

4.空节点有多个子节点,非空节点只有一个子节点,根节点为空节点;

5.每次receive一个正则表达式,就在根节点建一子树;

6.转确定有穷自动机时,每次只需沿子节点前进两个节点(未实现)。

这样就清晰多了。

// main.cpp
#include <iostream>
#include <map>
#include <vector>
#include <string>
#include <cstdlib>
using namespace std;

#define DEBUG
const int MAXMATCHSIZE = 5;
const char *word = "\w";
const char *number = "\n";
const char *alnum = "\a";
const char *allchar = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";

struct node {
    int type;
    vector<node*> voidnode;    //  type == 0
    char schar;                //  type == 1
    vector<char> mchar;        //  type == 2
    node *next;                //  type >= 1
    int number;
    node(int t = 0):next(NULL), type(t), voidnode(), mchar(), schar(0) { 
        static int count = 0;
        number = count++;
        #ifdef DEBUG 
            cout << "new node,count=" << count << ",type=" << type << endl;
        #endif
    }
};
class parser {
private:
    node *head;
    map< string, vector<node*> > endnode;
    node* addnode(const string& name,node *cur, char c, bool pack_flag, bool end_flag);// a letter
    node* addnode(const string& name,node *cur, string, bool pack_flag, bool end_flag);// or string
public:
    parser();
    void receive(string ,string);
};

int main() {
    parser p;
    p.receive("begin[\w]end","first");
    p.receive("123[word]456","second");
    return 0;
}

//  ensure that cur --> voidchar node
node* parser::addnode(const string& name,node *cur, char c, bool pack_flag, bool end_flag) {
    //cout <<c<<endl;
    node *p = new node(1);
    p->schar = c;
    p->next = new node;
    cur->voidnode.push_back(p);
    if (pack_flag) p->next->voidnode.push_back(p);
    if (end_flag) endnode[name].push_back(cur->next);
    return p->next;
}
node* parser::addnode(const string& name,node *cur, string s, bool pack_flag, bool end_flag) {
    //cout<<s<<endl;
    node *p = new node(2);
    p->next = new node;
    if (s[0] == '\') {
        int begin = 0, end = 0;
        if (s[1] == word[1])begin = 10, end = 10 + 26 + 26;
        else if (s[1] == alnum[1])end = 36 + 26;
        else end = 10;
        p->mchar = vector<char>(allchar + begin, allchar + end);
    } else {
        p->mchar = vector<char>(s.begin(),s.end());
    }
    cur->voidnode.push_back(p);
    if (pack_flag) p->next->voidnode.push_back(cur);
    if (end_flag) endnode[name].push_back(cur->next);
    return p->next;
}

parser::parser():head(NULL) {
    head = new node();
}

void parser::receive(string s, string name) {
    if (s.empty()) return;
    node *cur = head;
    bool pack_flag = false;
    int len = s.size();
    for (int i = 0; i < len; ) {
        if ( s[i] == '*' ) pack_flag = true, i++;
        else if (s[i] == '[') {
            int j;
            for (j = i + 1; j < len && s[j] != ']'; j++); 
            cur = addnode(name,cur ,s.substr(i + 1 , j - i - 1) , pack_flag, j == len - 1);
            pack_flag = false;
            i = j + 1;
        }
        else if ( isalnum(s[i]) ) {
            cur = addnode(name,cur, s[i], pack_flag, i == 0);
            pack_flag = false;
            i++;
        }
    }
}
原文地址:https://www.cnblogs.com/backinfile/p/5911191.html