[源码试]写xml的解析器

写了一个对xml文件的词法分析,程序的分析引擎用switch+state写的,主要是为加深对词法分析的理解.




 
while (sr.Peek()!=-1)
            
{
                
//int c = sr.Read();
                
//Char ch = (char)c;
                
//Console.WriteLine("{0}\t {1}\t {2}",ch,c, Convert.ToString(c,2).PadLeft(16,'0') );
                ////switch
                c = (char)sr.Read();
                
switch(state)
                
{
                    
case 0://普通
                        jumpspace();
                        
if(c=='<')
                            state 
= 10;
                        
break;
                    
case 10://<[?]
                        switch(c)
                        
{
                            
case '?': state =11;//<? 
                                break;
                            
case ' ':
                            
case '\t':
                            
case '\r':
                            
case '\n':
                                
throw new Exception("该位置上不允许有空白");
                                
break;
                            
case '/':
                                state 
= 50;//结束<[/].>
                                break;
                            
default:
                                state 
= 20;//元素名
                                sb.Remove(0,sb.Length);//加元素
                                sb.Append(c);

                                
while (sr.Peek() != -1)
                                
{
                                    
char nextchar = (char)sr.Peek();
                                    
if (char.IsLetter(nextchar))
                                    
{
                                        sb.Append(nextchar);
//element
                                        c = (char)sr.Read();
                                    }

                                    
else
                                    
{
                                        
//完成元素
                                        
//string temp = sb.ToString();
                                        Console.WriteLine(sb.ToString());
                                        StID.Push(sb.ToString());
                                        sb.Remove(
0, sb.Length);

                                        state 
= 15;//后接 /  > ' '
                                        jumpspace();
                                        
break;
                                    }

                                }

                                
break;
                        }

                        
break;
                    
case 11://<?[x]  --系统
                        switch(c)
                        
{
                            
case 'x':
                                state 
=12;
                                
break;                                
                        }

                        
break;
                    
case 15:
                        jumpspace();
                        
if (c == '/' )
                        
{
                            Console.WriteLine( StID.Pop());
                            
//完成
                            c =(char)sr.Read();
                            
if (c != '>')
                                
throw new Exception("/>");

                            
//下来可能是节点,也可能是文本
                            state = 0;
                        }

                        
else if (c == '>')
                        
{
                            state 
= 0;//原始状态
                        }


                        
else if (char.IsLetterOrDigit(c))
                        
{
                            
//sb.Append(c);//属性
                            
//属性开始
                            state = 30;
                            
goto case 30;
                        }

                        
else
                        
{
                            
throw new Exception("错误的属性");
                        }

                        
                        
break;
                    
case 20://元素名-第二个 只是元素
                        if (char.IsLetterOrDigit(c))
                        
{
                            sb.Append(c);
//element
                        }

                        
else if (char.IsWhiteSpace(c))//遍历空白
                        {
                            
//完成
                            
//string temp = sb.ToString();
                            Console.WriteLine(sb.ToString());
                            StID.Push(sb.ToString());
                            sb.Remove(
0, sb.Length);
                            
while (true)
                            
{
                                c 
= (char)sr.Peek();
                                
if (!char.IsWhiteSpace(c))//如果不空白,跳出
                                    break;
                                sr.Read();
                            }

                            state 
= 30;//属性
                        }

                        
else if (c == '>')
                        
{
                            
//完成
                            
//string temp = sb.ToString();
                            Console.WriteLine(sb.ToString());
                            StID.Push(sb.ToString());
                            sb.Remove(
0, sb.Length);
                            state 
= 15;//中间状态,下一个可能是元素也可能是text
                        }

                        
else
                        
{
                            
throw new Exception("无效的字符");
                        }
                        
                        
break;

                    
case 30://属性名first 后继
                        if (char.IsLetterOrDigit(c))
                        
{
                            sb.Append(c);
//属性
                        }
     
                        
else
                        
{
                            
goto case 31;
                        }

                        
break;
                    
case 31:
                        jumpspace();
                        
if(c=='=')
                        
{
                            state 
=32;
                            
                        }
                        
                        
else
                        
{
                            
throw new Exception("属性无效字符!");
                        }

                        
break;
                    
case 32://属性引号
                        jumpspace();                        
                        
switch(c)
                        
{
                            
case '\'':
                                state =36;//单引号;
                                break;
                            
case '\"':
                                state 
= 37;//双引号
                                break;
                            
default:
                                
throw new Exception("应该是引号");

                        }

                        
                        
break;
                    
case 36:
                        
                        
switch (c)
                        
{
                            
case '\'':
                                state = 30;//复原,下一属性
                                jumpspace();
                                state 
= 15;
                                
break;   
                            
case '>':
                                
throw new Exception("没有结束引号");
                                
break;
                        }

                        
break;
                    
case 37:
                        
switch (c)
                        
{
                            
case '\"':
                                state 
= 30;//复原,下一属性
                                jumpspace();
                                state 
= 15;
                                
break;
                            
case '>':
                                
throw new Exception("没有结束引号");
                                
break;
                        }

                        
break;
                    
case 50:
                        sb.Remove(
0,sb.Length);
                        
if (char.IsLetterOrDigit(c))
                            sb.Append(c);
                        
else
                            
throw new Exception("错误的结束字符");

                        
while (sr.Peek() != -1)
                        
{
                            tempc 
= (char)sr.Peek();
                            
if (char.IsLetterOrDigit(tempc))
                            
{
                                sb.Append(tempc);
                                c 
= (char)sr.Read();
                            }

                            
else
                            
{
                                
//Console.WriteLine(StID.Peek
                                tempstr = StID.Peek();
                                
if(StID.Pop()!=sb.ToString())
                                    
throw new Exception (string.Format( "结束标记 '{0}' 与开始标记 '{1}' 不匹配"
                                        ,sb.ToString()
                                        ,tempstr));
                                state 
=51;//处理结尾 ' ' >
                                break;
                            }

                            
                        }

                        
break;
                    
case 51:
                        jumpspace();
                        
switch (c)
                        
{
                            
case '>':
                                state 
= 0;//普通状态
                                break;
                            
default:
                                
throw new Exception("名称包含无效字符");
                        }

                        
break;
                        

 

                }





下载源码

原文地址:https://www.cnblogs.com/lxf120/p/1025756.html