正则表达式在c++中的实现

这个是最基础的解释器,它实现了串联、并联、克林闭包,字符集为除了()|*的ASCII字符,而且不能判断表达式合法,效率还很低,内存利用率低。

它只能判读输入的字符串是否符合表达式。

#include<bits/stdc++.h>
using namespace std;
namespace GDRegex
{
    const int EPSILON=-143165576;
    const int ISEND=-71582788;
    const int MAXN=1<<16;
    struct Node;
    struct Edge;
    struct Fa;
    int TI,c1[MAXN],c2[MAXN];
    struct Edge
    {
        Edge*next;
        Node*to;
        int w;
    };
    struct Node
    {
        Edge*head,*tail;
        int num;
        Node()
        {
            num=++TI;
            head=new Edge;
            head->w=ISEND;
            tail=head;
        }
        inline void add(Node*v,int w)
        {
            assert(w!=ISEND);
            tail->to=v;
            tail->w=w;
            tail->next=new Edge;
            tail=tail->next;
            tail->w=ISEND;
        }
    }*wait1[MAXN],*wait2[MAXN];
    struct FA
    {
        Node*start,*end;
        int endPos;
        FA()
        {
            start=new Node();
            end=new Node();
            start->add(end,EPSILON);
            endPos=end->num;
        };
        FA(int ch)
        {
            start=new Node();
            end=new Node();
            start->add(end,ch);
            endPos=end->num;
        }
        FA(string str)
        {
            start=new Node();
            end=new Node();
            Node*p=start;
            for(int i=0;i<str.size();++i)
            {
                Node*x=new Node;
                p->add(x,str[i]);
                p=x;
            }
            p->add(end,EPSILON);
            endPos=end->num;
        }
        bool match(string str)
        {
            int tot=0;
            map<pair<int,int>,bool>vis;
            int len1=0,len2=0,len=str.size();
            c1[len1]=0;
            wait1[len1++]=start;
            vis[make_pair(0,start->num)]=1;
            while(len1)
            {
                len2=0;
                for(int i=0;i<len1;++i)
                {
                    Node*u=wait1[i];
                    for(Edge*e=u->head;e->w!=ISEND;e=e->next)
                    {
                        Node*v=e->to;
//                        cout<<"NOW "<<u->num<<" "<<v->num<<" "<<(char)e->w<<" "<<str[c1[i]]<<" "<<c1[i]<<endl;
                        ++tot;
                        if(e->w==EPSILON)
                        {
                            if(c1[i]==len&&v->num==endPos)
                                return true;
                            if(vis[make_pair(v->num,c1[i])])
                                continue;
                            c2[len2]=c1[i];
                            wait2[len2]=v;
                            vis[make_pair(v->num,c2[len2++])]=1;
                        }
                        else if(e->w==str[c1[i]]&&!vis[make_pair(v->num,c1[i]+1)])
                        {
                            if(c1[i]+1==len&&v->num==endPos)
                                return true;
                            if(c1[i]+1>len)
                                continue;
                            c2[len2]=c1[i]+1;
                            wait2[len2]=v;
                            vis[make_pair(v->num,c2[len2++])]=1;
                        }
                    }
                }
                for(int i=0;i<max(len1,len2);++i)
                    swap(wait1[i],wait2[i]),swap(c1[i],c2[i]);
                swap(len1,len2);
            }
            return false;
        }
    };
    FA getSeries(const FA&A,const FA&B)
    {
        FA C=A;
        C.end->add(B.start,EPSILON);
        C.end=B.end;
        C.endPos=B.endPos;
        return C;
    }
    FA getParallel(const FA&A,const FA&B)
    {
        FA C,D=A,E=B;
        C.end->add(D.start,EPSILON);
        C.end->add(E.start,EPSILON);
        D.end->add(E.end,EPSILON);
        C.end=E.end;
        C.endPos=E.endPos;
        return C;
    }
    FA getKleene(const FA&A)
    {
        FA B,C=A;
        B.start->add(C.start,EPSILON);
        C.end->add(B.start,EPSILON);
        return B;
    }
    FA getFA(string str)
    {
        stack<pair<FA,int> >S;
        for(int i=0;i<str.size();++i)
        {
            if(str[i]=='(')
                S.push(make_pair(FA(),2));
            else if(str[i]==')')
            {
                pair<FA,int>A=S.top();
                S.pop();
                while(!S.empty())
                {
                    pair<FA,int>B=S.top();
                    if(B.second==2)
                        break;
                    else if(B.second==0)
                        A.first=getSeries(B.first,A.first);
                    else if(B.second==1)
                        A.first=getParallel(B.first,A.first);
                    S.pop();
                }
                A.second=0;
                S.push(A);
            }
            else if(str[i]=='*')
            {
                pair<FA,int>A=S.top();
                S.pop();
                A.first=getKleene(A.first);
                A.second=0;
                S.push(A);
            }
            else if(str[i]=='|')
            {
                pair<FA,int>A=S.top();
                S.pop();
                while(!S.empty())
                {
                    pair<FA,int>B=S.top();
                    if(B.second==1||B.second==2)
                        break;
                    A.first=getSeries(B.first,A.first);
                    S.pop();
                }
                A.second=1;
                S.push(A);
            }
            else
                S.push(make_pair(FA(str[i]),0));
        }
        FA A;
        while(!S.empty())
        {
            pair<FA,int>B=S.top();
            S.pop();
            if(B.second==0)
                A=getSeries(B.first,A);
            else if(B.second==1)
                A=getParallel(B.first,A);
        }
        return A;
    }
}
int main()
{
    ios::sync_with_stdio(false);
    using namespace GDRegex;
    FA A=getFA("(0|1|2|3|4|5|6|7|8|9)(0|1|2|3|4|5|6|7|8|9)(0|1|2|3|4|5|6|7|8|9)");
    while(true)
    {
        string str;
        cin>>str;
        cout<<A.match(str)<<endl;
    }
    return 0;
}
View Code

 2020_02_01

原文地址:https://www.cnblogs.com/GreenDuck/p/12247505.html