最初步的正则表达式引擎:将显示的连接符改为了非显示的连接符

由于当前的连接符变为非显示,所以在有些时候需要考虑当前输入指针所指的位置是否缺少连接符,如果缺少,则将连接符入栈。

  1 #include <stdio.h>
  2 #include <malloc.h>
  3 #include <string.h>
  4 //这里默认是没有显示的连接运算符,运算符的优先级为括号、闭包*、连接.、并|
  5 //在括号后及闭包后可能缺少显示的连接符,因此需要考虑添加连接符,而由于并操作符的优先级比连接符低
  6 //所以就不需要在处理并操作符的时候去考虑是否缺少显示的连接符了
  7 int token[100];
  8 int token_pointer;
  9 char reg_operator[100];
 10 int reg_operator_pointer;
 11 int name_number;
 12 int input_pointer;
 13 char reg_input[20];
 14 int is_operator(char for_in)//判断输入字符是否是操作符
 15 {
 16     switch(for_in)
 17     {
 18     case'(':
 19     case')':
 20     case'*':
 21     case'|':
 22          return 1;
 23     default: return 0;
 24     }
 25 }
 26 void tackle_or()//处理并操作符
 27 {
 28     if(reg_operator_pointer!=0)//如果操作符栈中已经有操作符了
 29     {
 30         if(reg_operator[reg_operator_pointer-1]!='(')//括号另外说
 31         {
 32             name_number++;
 33             if(reg_operator[reg_operator_pointer-1]=='.')
 34                 //如果前面的优先级比当前的高,则处理前面的优先级
 35             {
 36                 printf("name%d is concat of name%d and name%d
",name_number,token[token_pointer-2],token[token_pointer-1]);

 37             }
 38             else
 39                 //这里处理的是相同优先级的情况,其实这里可以与前面的合并的,只不过打印信息不同
 40             {
 41                 printf("name%d is  name%d or name%d
",name_number,token[token_pointer-2],token[token_pointer-1]);
 42             }
 43             token[token_pointer-2]=name_number;
 44             token_pointer--;
 45             reg_operator[reg_operator_pointer-1]='|';
 46             input_pointer++;
 47         }
 48         else//对于括号,则直接入栈
 49         {
 50             reg_operator[reg_operator_pointer++]='|';
 51             input_pointer++;
 52         }
 53     }
 54     else//对于空操作符栈,也是直接入栈
 55     {
 56         reg_operator[reg_operator_pointer++]='|';
 57         input_pointer++;
 58     }
 59 }
 60 void tackle_cat()//处理连接符,事实上跟前面的or操作符讨论的差不多,差异就在优先级那
 61 {
 62     if(reg_operator_pointer!=0)//如果操作符栈不为空
 63     {
 64         if(reg_operator[reg_operator_pointer-1]=='(')//如果前面有括号,则直接入栈
 65         {
 66             reg_operator[reg_operator_pointer++]='.';
 67         }
 68         else//对于前面不是括号的情况下
 69         {
 70             if(reg_operator[reg_operator_pointer-1]=='.')//优先级相同则输出前面的那个
 71             {
 72                 name_number++;
 73                 printf("name%d is the concat of name%d and name%d
",name_number,token[token_pointer-2],token[token_pointer-1]);
 74                 token[token_pointer-1]=0;
 75                 token[token_pointer-2]=name_number;
 76                 token_pointer--;
 77             }
 78             else//否则的话,前面的优先级比当前优先级低,操作符入栈
 79             {
 80                 reg_operator[reg_operator_pointer++]='.';
 81             }
 82         }
 83     }
 84     else//如果操作符栈为空,则入栈
 85     {
 86         reg_operator[reg_operator_pointer++]='.';
 87     }
 88 }
 89 void tackle_parenthesis(void)//处理闭括号模式,这里有点复杂
 90 {
 91     if(reg_operator[reg_operator_pointer-1]=='(')//如果前面那个操作符为开括号,则匹配输出
 92     {
 93         name_number++;
 94         printf("name%d is (name%d)
",name_number,token[token_pointer-1]);
 95         token[token_pointer-1]=name_number;
 96         input_pointer++;
 97         reg_operator[--reg_operator_pointer]='';
 98         //这时候需要考虑后面的是否少了显示的连接符,如果判断缺少连接符,则需要加上去
 99         if(!is_operator(reg_input[input_pointer]))
100         {
101             if(reg_input[input_pointer]!='')
102             {
103                 tackle_cat();
104             }
105         }
106         else
107         {
108             if(reg_input[input_pointer]=='(')
109             {
110                 tackle_cat();
111             }
112         }
113     }
114     else//如果闭括号前面还有运算符,那么根据他们的优先级输出,这个时候输入指针是不变的,注意
115     {
116         name_number++;
117         if(reg_operator[reg_operator_pointer-1]=='.')
118         {
119             printf("name%d is the concat of name%d and name%d
",name_number,token[token_pointer-2],token[token_pointer-1]);
120         }
121         else
122         {
123             printf("name%d is  name%d or name%d
",name_number,token[token_pointer-2],token[token_pointer-1]);
124         }
125         token[token_pointer-1]=0;
126         token[token_pointer-2]=name_number;
127         token_pointer--;
128         reg_operator_pointer--;
129     }
130 }
131 int main(void)
132 {
133     reg_operator_pointer=name_number=token_pointer=0;
134     for(input_pointer=0;input_pointer<100;input_pointer++)//初始化栈 
135     {
136         reg_operator[input_pointer]='';
137         token[input_pointer]=0;
138     }
139     input_pointer=0;
140     printf("please  type in you regex short phrase
");
141     scanf("%s",reg_input);
142     while(reg_input[input_pointer]!='')
143     {
144         if(!is_operator(*(reg_input+input_pointer)))//对于操作符和非操作符分开讨论
145         {
146             name_number++;
147             token[token_pointer++]=name_number;
148             printf("name%d is %c
",name_number,*(reg_input+input_pointer));
149             input_pointer++;
150             //非操作符直接进字符栈,这个时候需要考虑后面是否缺少显示的连接符
151             if(!is_operator(reg_input[input_pointer]))
152             {
153                 if(reg_input[input_pointer]!='')
154                 {
155                     tackle_cat();
156                 }
157             }
158             else
159             {
160                 if(reg_input[input_pointer]=='(')
161                 {
162                     tackle_cat();
163                 }
164             }
165         }
166         else//通过多路选择来处理操作符的情况
167         {
168             switch(reg_input[input_pointer])
169             {
170             case '('://开括号直接入栈
171                 reg_operator[reg_operator_pointer++]='(';
172                 input_pointer++;
173                 break;
174             case ')'://闭括号专门处理
175                 tackle_parenthesis();
176                 break;
177             case '*'://由于*运算符的优先级第二高,只比括号低,所以可以直接输出
178                 name_number++;
179                 printf("name%d is multiple of name%d
",name_number,token[token_pointer-1]);
180                 token[token_pointer-1]=name_number;
181                 input_pointer++;
182                 //这个时候仍然需要考虑后面缺少显示连接符的情况
183                 if(!is_operator(reg_input[input_pointer])&&reg_input[input_pointer]!='')
184                 {
185                     tackle_cat();
186                 }
187                 else
188                 {
189                     if(reg_input[input_pointer]=='(')
190                     {
191                         tackle_cat();
192                     }
193                 }
194                 break;
195             case '|'://对于并操作符,调用函数处理
196                 tackle_or();
197                 break;
198             default: break;
199             }
200         }
201     }
202     while(reg_operator_pointer>=1)//如果全部的输入都弄完了,可是 操作符栈中还有数据,则输出 
203     {
204         name_number++;
205         if(reg_operator[reg_operator_pointer-1]=='.')
206         {
207             printf("name%d is concat of name%d and name%d
",name_number,token[token_pointer-2],token[token_pointer-1]);    
208         }
209         else
210         {
211             printf("name%d is name%d or name%d
",name_number,token[token_pointer-2],token[token_pointer-1]);
212         }
213         token[token_pointer-2]=name_number;
214         token_pointer--;
215         reg_operator_pointer--;
216     }
217 }
原文地址:https://www.cnblogs.com/huangfeidian/p/3149674.html