10.10词法分析程序的设计与实现

词法分析程序(Lexical Analyzer)要求:

- 从左至右扫描构成源程序的字符流

-  识别出有词法意义的单词(Lexemes

-  返回单词记录(单词类别,单词本身)

-  滤掉空格

-  跳过注释

-  发现词法错误

 

程序结构:

输入:字符流(什么输入方式,什么数据结构保存)

处理:

–遍历(什么遍历方式)

–词法规则

输出:单词流(什么输出形式)

–二元组

单词类别:

1.标识符(10)

2.无符号数(11)

3.保留字(一词一码)

4.运算符(一词一码)

5.界符(一词一码)

单词符号

种别码

单词符号

种别码

begin

1

:

17

if

2

:=

18

then

3

20

while

4

<=

21

do

5

<> 

22

end

6

23

l(l|d)*

10

>=

24

dd*

11

=

25

+

13

;

26

-

14

(

27

*

15

)

28

/

16

#

0

源代码如下:

#include<stdio.h>
#include <iostream>
#include<string.h>
using namespace std;

char *reserved[6] = {"begin","if","then","while","do","end"};
char input[80],output[8]; 
char ch;
int syn,p,i;//syn为种别码,p为扫描与添加 
int m = 0,n,row,sum = 0,count=0; 
void scanner(){
    for( i = 0 ; i < 8 ; i++ ){
        output[i] = NULL;
    }//初始化输出
    ch = input[p++];
    
    while( ch == ' ' ){
        ch = input[p] ;
        p ++ ;
    }
    //持续读入 
    if( ( ch >= 'a' && ch <= 'z' ) || ( ch >= 'A' && ch <= 'Z' ) ){
        m = 0 ;
        while( ( ch >= 'a' && ch <= 'z' ) || ( ch >= 'A' && ch <= 'Z' ) || ( ch >= '0' && ch <= '9' ) ){
            output[m++]  = ch ;
            ch = input[p++];
        }
        output[m++] = '';
        p -- ;
        syn = 10 ;
        for( n = 0 ; n < 6 ; n ++ ){
            if( strcmp(output,reserved[n]) == 0 ){
                syn = n + 1;
                break;
            }
        }
    
    }else if( ( ch >= '0' && ch <= '9' ) ){
        sum = 0 ;
        while( ch >= '0' && ch <= '9' ){
            sum=sum*10+ch-'0' ;
            ch = input[p++] ;
        }
        p -- ;
        syn = 11;
    }
    else if(ch == '/'){
        ch = input[p++] ;
        if(ch == '/'){
            while(ch != '
'){
                ch = input[p++] ;
            }
            scanner();    
            
        }
        else if(ch == '*'){
            ch = input[p++];
            int flag=0;
        //    printf("%c %c",ch,input[p]);
            while((ch == '*' && input[p] != '/')||(ch != '*' && input[p] == '/') || (ch != '*' && input[p] != '/')){
                ch = input[p++] ;
                if(p==count-2 && (ch != '*' && input[p] != '/')){
                    flag =1;
                    syn = 100;
                    break;
                }
            }
            p++;
            if(flag==0){
                scanner();    
            }
            
        }else{
            p = p - 2 ;
            ch = input[p++] ; 
            output[0] = ch ;
            syn = 16 ;
        } 
        
    }else switch(ch){
        case '+':
            output[0] = ch ; 
            syn = 13;
            break;
        case '-':
            output[0] = ch ; 
            syn = 14;
            break;
        case '*':
            output[0] = ch ;
            syn = 15 ;
            break;
        case '/':
            output[0] = ch ;
            syn = 16 ;
            break;
        case ':':
            i = 0;
            output[i++] = ch ;
            ch = input[p++];
            if( ch == '=' ){
                output[i++] = ch;
                syn = 18 ;
            }else{
                syn = 17;
                p-- ;
            }
            break;
        case '<':
            i = 0 ;
            output[i++] = ch ;
            ch = input[p++] ;
            if( ch == '=' ){
                output[i++] = ch ;
                syn = 21 ;
            }else if( ch == '>' ){
                output[i++] = ch ;
                syn = 22 ;
            }else{
                syn = 20 ;
                p-- ;
            }
            break;
        case '>':
            i = 0;
            output[i++] = ch ;
            ch = input[p++] ;
            if( ch == '=' ){
                output[i++] = ch;
                syn=24;
            }else{
                syn=23;
                p--;
            }
            break;
        case '=':
            output[0] = ch ;
            syn = 25 ;
        break;
        case ';':
            output[0] = ch ;
            syn = 26 ;
            break;
        case '(':
            output[0] = ch ;
            syn = 27 ;
            break;
        case ')':
            output[0] = ch ;
            syn = 28 ;
            break;
        case '#':
            output[0] = ch ;
            syn = 0;
            break;
        case '
':
            syn = 99 ;
            break;
        default:
            syn = -1 ;
            break;    
    }
    
}
int main() {
    p = 0;
    printf("请输入源程序:");
    do{
        ch = getchar();
        input[p++] = ch;
    }while(ch != '#');
    count = p;
    p = 0;
    
    printf("		单词符号  种别码
");
    do{
        scanner();
        switch(syn){
            case -1:
                printf("在第%d行有错误!",row+1);
                break;
            case 11:
                printf("		  %d   	   %d
",sum,syn);
                break;
            case 99:
                row+=1;
                break;
            case 100:
                printf("注释到尾结束!
");
                break;
            default:
                printf("		  %s   	   %d
",output,syn);
                break;
        }
    }while(syn != 0); 
    return 0;
}

运行结果如下:

 

原文地址:https://www.cnblogs.com/Azan1999/p/11650151.html