正规表达式 转 NFA C++

  今天来为大家分享一个编译原理中用正规表达式转NFA的小程序

正规表达式就是类似正则一样的式子,例如:(a|b)*abb,最后应该转化为:

大致的处理流程为:

例子中的表达式:(a|b)*abb,|和*都是运算法则,而且容易识别,但是处理abb就不是你那么方便了,所以我们在abb中间加上+号,就可以像|*那样识别了,所以处理后为(a|b)*a+b+b

 

 我们识别出来之后,首先根据书中提供的运算符->NFA部件的图转化为NFA部件,之后再根据优先级和各个部件组建NFA

 

 运算符对应NFA中的各个部件图为:

ε符:

        

φ符:

        

输入符号:

        

| 符:      

  

+符:

    

*符:

    

 

 有一个问题,NFA的开始和终止都是状态集合,但是整改了好多次,没能设计出来,所以该程序产生的NFA均只有一个开始状态和一个终止状态

 

代码注释挺清楚的,我就不过多描述了

 

G[S]->NFA代码如下:

#ifndef _GNFA_
#define _GNFA_

/*
@author:Lv
@time:2018-11
@title:正规式转NFA
*/

#include <iostream>
#include <string>
#include <vector>
#include <map>
#include <set>

namespace GNFAs
    {
#define stds std::

    /*
    @brief 记录NFA每一个状态
    @member 记录名称
    */
    struct state        
        {
        stds string _staName;
        //state(const stds string& name = "#") :_staName(name) {  }
        bool operator<(const state& b)const { return _staName < b._staName; }
        };

    /*
    @brief 记录状态之间转换的边信息
    @member 起始状态、终止状态、状态转换输入符号
    */
    struct edge            
        {
        state _edgStart;        
        state _edgEnd;    
        char _edgSymbol;    
        };

    /*
    NFA-class
    */
    class NFA
        {
    public:
        using ostream = stds ostream;
        using istream = stds istream;
        using exptype = stds string;
        using map_sta = stds vector<edge>;
        using container_sta = stds set<state>;
        using container_sym = stds set<char>;
    public:
        /*
        @brief nfaunit 用于记录NFA数据集合--NFA基本数据类型--NFA单元
        @member K 状态集合
                Σ 字母表
                f 状态映射        
                S 开始状态
                Z 终止状态集合
        */
        typedef struct NFAunit
            {
            container_sta K;
            container_sym Σ;
            map_sta f;
            state S;
            state Z;    

            NFAunit()
                {
                f.clear();
                K.clear();
                Σ.clear();
                Z = state();
                S = state();
                }

            NFAunit(const NFAunit& other)
                :K(other.K)
                ,Σ(other.Σ)
                ,f(other.f)
                ,S(other.S)
                ,Z(other.Z)
                {

                }

            NFAunit& operator=(const NFAunit& other)
                {
                if (this != &other)
                    {
                    K = other.K;
                    Σ = other.Σ;
                    f = other.f;
                    S = other.S;
                    Z = other.Z;
                    }
                return *this;
                }

            NFAunit& operator+=(const NFAunit& other)
                {
                K.insert(other.K.begin(), other.K.end());
                Σ.insert(other.Σ.begin(), other.Σ.end());
                f.insert(f.end(), other.f.begin(), other.f.end());
                return *this;
                }
            } _type_;
        
    public:
        NFA();

        NFA(const NFA&);

        NFA& operator=(const NFA&);

        _type_ getNFA()const { return _data; }

        exptype getExpression()const { return _expression; }


    public:
        /*
        @brief 输入正规式
        */
        void input();

        /*
        @brief 转成NFA
        */
        void toNFA();

        /*
        @brief 展示NFA
        */
        void show()const;

        /*
        @brief 刷新数据
        */
        void update();

        /*
        @brief 运行
        */
        void run();

    private:
        /*
        @brief 检查正规式是否合法
        @retur 是否合法
        */
        bool _checkExp()const;

        /*
        @brief 检查正规式字符是否合法
        @retur 是否合法
        */
        bool _checkSym()const;

        /*
        @brief 检查正规式语法,如:括号匹配
        @retur 是否合法
        */
        bool _checkSync()const;

        /*
        @brief 做一些处理便于表达式转换
        */
        void change();

        /*
        @brief 中缀转后缀
        */
        void postexp();

        /*
        @brief 栈内优先级
        */
        int inpriority(const char)const;

        /*
        @brief 栈外优先级
        */
        int outpriority(const char)const;

        /*
        @brief 整合a|b
        */
        _type_ _or(_type_, _type_);

        /*
        @brief 整合ab
        */
        _type_ _mul(_type_, _type_);

        /*
        @brief 整合a*
        */
        _type_ _star(_type_);

        /*
        @brief 整合单元
        */
        _type_ _unit(const char);

    private:
        int _staNum;
        ostream& _out;
        istream& _in;
        NFAunit _data;
        exptype _expression;
        };
    }

#endif //_GNFA_
#include "GNFA.h"
using namespace GNFAs;

#include <stack>

using stack_unit = stds stack<NFA::NFAunit>;
using stack_char = stds stack<char>;

#define enter stds endl

NFA::NFA()
    :_staNum(0)
    , _out(std::cout)
    , _in(std::cin)
    {    }

void NFA::input()
{
    _out << "请输入正规式:" << enter;

    while (!_checkExp())        _in >> _expression;
}

bool NFA::_checkExp()const
{
    if (!_checkSym())
    {
        _out << "含有非法字符!" << enter;
        return false;
    }
    if (!_checkSync())
    {
        _out << "含有语法错误!" << enter;
        return false;
    }
    return _expression != exptype();
}

bool NFA::_checkSym()const
{
    for (int i = 0; i < _expression.size(); ++i)
    {
        if (islower(_expression[i]))    continue;
        else if (_expression[i] == '(' || _expression[i] == ')' || _expression[i] == '*' || _expression[i] == '|')
            continue;
        else
            return false;
    }
    return true;
}

bool NFA::_checkSync()const
{
    stack_char stack;
    for (int i = 0; i < _expression.size(); ++i)
    {
        if (_expression[i] == '(')
            stack.push('(');
        else if (_expression[i] == ')')
        {
            if (stack.size() && stack.top() == '(')
                stack.pop();
            else return false;
        }
        if (_expression[i] == '*')
        {
            if (i &&_expression[i - 1] != '|')
                continue;
            else return false;
        }
    }
    if (stack.size())return false;
    return true;
}

void NFA::change()
{
    exptype t;

    char s, e;

    for (int i = 0; i < _expression.size(); ++i)
    {
        s = _expression[i];
        e = _expression[i + 1];
        t += s;

        if (s != '(' && s != '|' && islower(e))        t += '+';
        else if (e == '(' && s != '|' && s != '(')    t += '+';
    }
    t += e;
    _expression = t;
}

int NFA::inpriority(const char c)const
{
    switch (c)
    {
    case '#': return 0;
    case '(': return 1;
    case '*': return 7;
    case '|': return 5;
    case '+': return 3;
    case ')': return 8;
    }
    return -1;
}

int NFA::outpriority(const char c)const
{
    switch (c)
    {
    case '#': return 0;
    case '(': return 8;
    case '*': return 6;
    case '|': return 4;
    case '+': return 2;
    case ')': return 1;
    }
    return -1;
}

void NFA::postexp()
{
    _expression += '#';
    exptype t = "";
    stack_char s;
    char ch = '#', ch1, op;
    s.push(ch);

    //读一个字符
    int read_location = 0;
    ch = _expression.at(read_location++);
    while (!s.empty())
    {
        if (islower(ch))
        {
            t += ch;
            ch = _expression.at(read_location++);
        }
        else
        {
            ch1 = s.top();
            if (inpriority(ch1)<outpriority(ch))
            {
                s.push(ch);
                ch = _expression.at(read_location++);
            }
            else if (inpriority(ch1)>outpriority(ch))
            {
                op = s.top();
                s.pop();
                t += op;
            }
            else 
            {
                op = s.top();
                s.pop();

                if (op == '(')
                    ch = _expression.at(read_location++);
            }
        }
    }
    t.erase(t.end() - 1);
    _expression = t;
}

void NFA::toNFA()
{
    char item;
    _type_ left, right;
    stack_unit stack;

    for (int i = 0; i < _expression.size(); ++i)
    {
        item = _expression[i];
        switch (item)
        {
        case '|':
            right = stack.top();
            stack.pop();
            left = stack.top();
            stack.pop();
            _data = _or(left, right);
            stack.push(_data);
            break;
        case '*':
            left = stack.top();
            stack.pop();
            _data = _star(left);
            stack.push(_data);
            break;
        case '+':
            right = stack.top();
            stack.pop();
            left = stack.top();
            stack.pop();
            _data = _mul(left, right);
            stack.push(_data);
            break;
        default:
            _data = _unit(item);
            stack.push(_data);
        }
    }

    _data = stack.top();
    stack.pop();
}

NFA::_type_ NFA::_or(_type_ unitl, _type_ unitr)
{
    _type_ unit;
    exptype name;
    edge e1, e2, e3, e4;

    state start { name += _staNum++ + 'A' };
    name = "";
    state end{ name += _staNum++ + 'A' };

    e1._edgStart = start;
    e1._edgEnd = unitl.f[0]._edgStart;
    e1._edgSymbol = '#';

    e2._edgStart = start;
    e2._edgEnd = unitr.f[0]._edgStart;
    e2._edgSymbol = '#';

    e3._edgStart = unitl.f[unitl.f.size() - 1]._edgEnd;
    e3._edgEnd = end;
    e3._edgSymbol = '#';

    e4._edgStart = unitr.f[unitr.f.size() - 1]._edgEnd;
    e4._edgEnd = end;
    e4._edgSymbol = '#';

    unit = unitl;
    unit += unitr;
    unit.f.push_back(e1);
    unit.f.push_back(e2);
    unit.f.push_back(e3);
    unit.f.push_back(e4);

    unit.S = start;
    unit.Z = end;

    return unit;
}

NFA::_type_ NFA::_mul(_type_ unitl, _type_ unitr)
{
    for (auto &it : unitr.f)
    {
        if (it._edgStart._staName == unitr.S._staName)
        {
            it._edgStart = unitl.Z;
            _staNum--;
        }
        else if (it._edgEnd._staName == unitr.S._staName)
        {
            it._edgEnd = unitl.Z;
            _staNum--;
        }
    }
    unitr.S = unitl.Z;
    unitl += unitr;
    unitl.Z = unitr.Z;

    return unitl;
}

NFA::_type_ NFA::_star(_type_ u)
{
    _type_ unit;
    exptype name;
    edge e1, e2, e3, e4;

    state start{ name += _staNum++ + 'A' };
    name = "";
    state end{ name += _staNum++ + 'A' };

    e1._edgStart = start;
    e1._edgEnd = end;
    e1._edgSymbol = '#';

    e2._edgStart = u.Z;
    e2._edgEnd = u.S;
    e2._edgSymbol = '#';

    e3._edgStart = start;
    e3._edgEnd = u.Z;
    e3._edgSymbol = '#';

    e4._edgStart = u.Z;
    e4._edgEnd = start;
    e4._edgSymbol = '#';

    unit = u;

    unit.f.push_back(e1);
    unit.f.push_back(e2);
    unit.f.push_back(e3);
    unit.f.push_back(e4);

    unit.S = start;
    unit.Z = end;

    return unit;
}

NFA::_type_ NFA::_unit(const char ch)
{
    _type_ unit;
    exptype name;
    edge e;

    state start{  name += _staNum++ + 'A' };
    name = "";
    state end{ name += _staNum++ + 'A' };

    e._edgStart = start;
    e._edgEnd = end;
    e._edgSymbol = ch;

    unit.f.push_back(e);
    unit.S = start;
    unit.Z = end;

    return unit;
}

void NFA::show()const
{
    _out << "NFA 的起始状态:" << _data.S._staName << enter;
    _out << "NFA 的结束状态:" << _data.Z._staName << enter << enter;

    for (auto it : _data.f)
    {
        _out << "from state:  " << it._edgStart._staName
            << "	    to state: " << it._edgEnd._staName
            << "		by  ";
        if (it._edgSymbol == '#') _out << R"+(ε)+" << enter;
        else _out << it._edgSymbol << enter;
    }
    _out << enter;
}

void NFA::update()
{
    for (auto it : _data.f)
    {
        _data.K.insert(it._edgStart);
        _data.K.insert(it._edgEnd);
        _data.Σ.insert(it._edgSymbol);
    }
}

void NFA::run()
{
    input();

    change();

    postexp();

    toNFA();

    show();

    update();
}
#include "GNFA.h"
using namespace GNFAs;

int main()
{
    NFA nfa;
    nfa.run();
}

 测试结果:

 

 感谢您的阅读,生活愉快~

原文地址:https://www.cnblogs.com/lv-anchoret/p/10153745.html