2016.9.30 编译原理词法分析器

词法分析程序的功能:

对用户输入的字符串从左到右进行扫描和分解,根据词法规则识别出一个一个具有独立意义的单词符号,并产生相应单词的种别码;若是发现词法错误,则返回出错信息。

符号与种别码对照表:

用文法描述词法规则:

代码如下:

#include<stdio.h>
#include<string.h>
char str[300],token[10],ch;
int i,x,n,typenum;
char *word[6] = { "begin", "if", "then", "while", "do", "end" };
void judge();
int main()
{
    int length;
    printf("请输入字符串:");
    gets(str);
    length = strlen(str);
    i = 0;
    do
    {
        judge();
        if (typenum == -1)
        {
            printf("错误!\n");
            return 0;
        }
        else
        {
            printf("%d\t%s\n",typenum,token);
        }
    } while (i<length);
    return 0;
}
void judge()
{
    for (x = 0; x < 10; x++)    //初始化token数组
    {
        token[x] = NULL;
    }
    ch = str[i];
    while (ch == ' ')    //忽视空格
    {
        i++;
        ch = str[i];
    }
    x = 0;
    i++;
    if (((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')))        //访问到字母
    {
        while (((ch <= 'z') && (ch >= 'a')) || ((ch <= 'Z') && (ch >= 'A')) || ((ch >= '0') && (ch <= '9')))
        {
            token[x] = ch;
            x++;
            ch = str[i];
            i++;
        }
        i--;
        typenum = 10;
        for (n = 0; n < 6; n++)           //判断是否是关键字
        {
            if (strcmp(token, word[n]) == 0)
            {
                typenum = n + 1;
                break;
            }
        }
    }
    else if ((ch >= '0') && (ch <= '9'))        //访问到数字
    {
        while ((ch >= '0') && (ch <= '9'))
        {
            token[x] = ch;
            x++;
            ch = str[i];
            i++;
        }
        i--;
        typenum = 11;
    }
    else
    {
        switch (ch)
        {
        case '<':
            token[x] = ch;
            x++;
            ch = str[i];
            i++;
            if (ch == '=')
            {
                typenum = 21;
                token[x] = ch;
                x++;
            }
            else if (ch == '>')
            {
                typenum = 22;
                token[x] = ch;
                x++;
            }
            else
            {
                typenum = 20;
                i--;
            }
            break;
        case '>':
            token[x] = ch;
            x++;
            ch = str[i];
            i++;
            if (ch == '=')
            {
                typenum = 24;
                token[x] = ch;
                x++;
            }
            else
            {
                typenum = 23;
                i--;
            }
            break;
        case '+':  
            typenum = 13;
            token[x] = ch;
            x++;
            break;
        case '-':
            typenum = 14;
            token[x] = ch;
            x++;
            break;
        case '=':
            typenum = 25;
            token[x] = ch;
            x++;
            break;
        case '*':
            typenum = 15;
            token[x] = ch;
            x++;
            break;
        case '(':
            typenum = 27;
            token[x] = ch;
            x++;
            break;
        case ')':
            typenum = 28;
            token[x] = ch;
            x++;
            break;

        case ';':
            typenum = 26;
            token[x] = ch;
            x++;
            break;
        case '/':
            typenum = 16;
            token[x] = ch;
            x++;
            break;
        case '#':
            typenum = 0;
            token[x] = ch;
            x++;
            break;
        case ':':
            token[x] = ch;
            x++;
            ch = str[i];
            i++;
            if (ch == '=')
            {
                typenum = 18;
                token[x] = ch;
                x++;
            }
            else
            {
                typenum = 17;
                i--;
            }
            break;
        default:
            typenum = -1;
            break;
        }
    }
}
原文地址:https://www.cnblogs.com/131li/p/5924274.html