一个简易的词法分析器

设计思路:查看画得太丑

内容:只实现C++子集源程序的词法分析,也没有出错处理。

源程序:

#include<bits/stdc++.h>
using namespace std;
const string KEYWORD[19]={"if","else","void","return","while","then","for","do",      //关键字
                    "int","char","double","float","case","cin","cout","include","using","namespace","iostream"};
const char SEPARATER[8]={';',',','{','}','[',']','(',')'};    //分隔符
const char OPERATOR[9]={'+','-','*','/','>','<','=','!','#'};     //运算符
const string Muloperator[13] = {"+=", "-=", "*=", "/=", "<=", ">=", "==", "<<=", ">>=", "++", "--", "<<", ">>"};
const char FILTER[4]={' ','	','
','
'};                    //过滤符

/**判断是否为关键字**/
bool IsKeyword(string word){
    for(int i=0;i<19;i++){
        if(KEYWORD[i]==word){
            return true;
        }
    }
    return false;
}
/**判断是否为分隔符**/
bool IsSeparater(char ch){
    for(int i=0;i<8;i++){
        if(SEPARATER[i]==ch){
            return true;
        }
    }
    return false;
}
/**判断是否为运算符**/
bool IsOperator(char ch){
    for(int i=0;i<9;i++){
        if(OPERATOR[i]==ch){
            return true;
        }
    }
    return false;
}
/**判断是否为多位运算符**/
bool IsMuloperator(string str){
    for(int i=0;i<13;i++){
        if(Muloperator[i] == str) return true;
    }
    return false;
}
/**判断是否为过滤符**/
bool IsFilter(char ch){
    for(int i=0;i<4;i++){
        if(FILTER[i]==ch){
            return true;
        }
    }
    return false;
}
/**判断是否为大写字母或小写字母**/
bool IsUpAndLowLetter(char ch){
    if(ch>='A' && ch<='Z') return true;
    if(ch>='a' && ch<='z') return true;
    return false;
}

/**判断是否为数字**/
bool IsDigit(char ch){
    if(ch>='0' && ch<='9') return true;
    return false;
}

/**词法分析**/
void analyse(FILE * fpin){
    char ch=' ';
    string arr="";
    int line=1;
    int i=0;
    while((ch=fgetc(fpin))!=EOF){
        i++;
        arr="";
        if(IsFilter(ch)){   //判断是否为过滤符
            if(ch=='
')line++;
        }
        else if(IsUpAndLowLetter(ch) ||ch=='_'){//是否为标识符(push_back())
            while(IsUpAndLowLetter(ch)||ch=='_'||IsDigit(ch)){
                arr += ch;
                ch=fgetc(fpin);
                //printf("%c ", ch);
            }
            if(ch!=' ')fseek(fpin,-1L,SEEK_CUR);

            if(IsKeyword(arr))cout<<line<<"   关键字   "<<arr<<endl;
            else  cout<<line<<"   标识符   "<<arr<<endl;
        }
        else if(IsDigit(ch)){       //判断是否为数字(整数或者浮点数)     2b等不可识别符
            bool dian=false;
            bool worry=false;
            while(IsDigit(ch)||ch=='.'){
                arr += ch;
                ch=fgetc(fpin);
                if(ch=='.')dian=true;
                if(IsUpAndLowLetter(ch)){
                    worry=true;
                    arr+=ch;ch=fgetc(fpin);
                    while(IsUpAndLowLetter(ch)||IsDigit(ch)){
                        arr+=ch;ch=fgetc(fpin);
                    }
                    break;
                }
            }
            if(ch!=' ')fseek(fpin,-1L,SEEK_CUR);
            if(worry)cout<<line<<"   不可识别符   "<<arr<<endl;
            else if(!dian)cout<<line<<"   整型数   "<<arr<<endl;
            else cout<<line<<"   浮点数   "<<arr<<endl;
        }

        else if(IsOperator(ch)){    //判断是否为运算符
            while(IsOperator(ch)){
                arr+=ch;
                ch=fgetc(fpin);
            }
            if(ch!=' ') fseek(fpin,-1L,SEEK_CUR);
            if(IsMuloperator(arr)) cout<<line<<"   运算符   "<<arr<<endl;              //符合运算符
            else if(arr.length() == 1 && IsOperator(arr[0]))  cout<<line<<"   运算符   "<<arr<<endl;  //单个运算符
            else  cout<<line<<"   不可识别符   "<<arr<<endl;

        }
        else if(IsSeparater(ch)){
            arr+=ch;
            cout<<line<<"   分隔符   "<<arr<<endl;
        }
        else cout<<line<<"   不可识别符   "<<ch<<endl;
    }

}
int main()
{
    char inFile[40];
    FILE *fpin;
    fpin=fopen("test.cpp","r");
    cout<<"------词法分析如下------"<<endl;
    analyse(fpin);
    return 0;
}

测试程序:

#include<iostream>
using namespace std;

int main()
{
    cout<<"Hello World!"<<endl;
    return 0;
}

结果:

参考链接:https://wenku.baidu.com/view/43cd3c29e009581b6ad9eb4b.html

原文地址:https://www.cnblogs.com/lfri/p/11865134.html