词法分析器

  标识符只能由小写英文字母组成,运算符个数有限,关键字个数有限

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

//单词符号结构体
typedef struct wordClass{
    char word[10];  //具体内容
    int class;
    struct wordClass *next;
}link;

int flag=13;  //记录当前识别的类别号
int bit;  //记录当前位
char name[10];  //记录识别的符号
int keyword;  //记录是否是关键词

link *known;  //预定义链表,一符一码
link *identifier;  //标识符链表
link *constant;  //常数链表


char name[10];

//FILE *fout;
void setKnown();
link* find_known(char* ch, link *p, FILE *fout);
link* find_keyword(char* ch, link *p, FILE *fout);



int main(){
    setKnown();
    FILE *fp;
    FILE *fout=fopen("result","a+");
    char ch;
    link *p=(link *)malloc(sizeof(link));    

    if((fp=fopen("source_code","r"))==NULL){
        printf("file cannot be opened/n");
        exit(1);
    }
    while((ch=fgetc(fp))!=35){
        if(ch!=32&&ch!=10){
            if(ch==61||ch==43||ch==42||ch==45||ch==62||ch==59){  //如果遇到运算符或者界限符
                if(flag!=1){
                    find_known(name,known,fout);
                    memset(name,0,10);
                    bit=0;
                }
                flag=1;
                name[bit++]=ch;
            }
            else if(47<ch&&ch<58){  //如果遇到数字
               if(flag!=12){
                   find_known(name,known,fout);   
                   memset(name,0,10);  //如果是从其他类别转过来则清空
                   bit=0;
               }
               flag=12;
               name[bit++]=ch;
            }
            else if(96<ch&&ch<123){
               if(flag!=13){
                   find_known(name,known,fout);
                   memset(name,0,10);
                   bit=0;
                   flag=13;
               }
               //flag=13;
               name[bit++]=ch;
               find_keyword(name,known,fout);  //关键字的最先匹配
               if(keyword==1){
                   memset(name,0,10);
                   bit=0;
                   keyword=0;
               }
            }
        }
    }
    printf("
");
    fclose(fp);  fclose(fout);
    return 0;
}

void setKnown(){
    known = (link *)malloc(sizeof(link));
    known->next=NULL;
    constant=(link *)malloc(sizeof(link));
    constant->next=NULL;
    identifier=(link *)malloc(sizeof(link));
    identifier->next=NULL;

    link *p=(link *)malloc(sizeof(link));
    link *r=known;
   
   strcpy(p->word,"if");    p->class=1;    p->next=NULL;
    r->next = p;  r=p;
    
    p=(link *)malloc(sizeof(link));
    strcpy(p->word,"else");    p->class=2;    p->next=NULL;
    r->next = p;  r=p;
    
    p=(link *)malloc(sizeof(link));
    strcpy(p->word,"then");    p->class=3;    p->next=NULL;
    r->next=p; r=p;
    
    p=(link *)malloc(sizeof(link));
    strcpy(p->word,"endif");    p->class=4;    p->next=NULL;
    r->next=p; r=p;
    
    p=(link *)malloc(sizeof(link));
    strcpy(p->word,"int");    p->class=5;    p->next=NULL;
    r->next=p; r=p;
   
    p=(link *)malloc(sizeof(link));
    strcpy(p->word,"=");    p->class=6;    p->next=NULL;
    r->next=p; r=p;

    p=(link *)malloc(sizeof(link));
    strcpy(p->word,"+");    p->class=7;    p->next=NULL;
    r->next=p; r=p;
   
    p=(link *)malloc(sizeof(link));
    strcpy(p->word,"*");    p->class=8;    p->next=NULL;
    r->next=p; r=p;

    p=(link *)malloc(sizeof(link));
    strcpy(p->word,"-");    p->class=9;    p->next=NULL;
    r->next=p; r=p;

    p=(link *)malloc(sizeof(link));
    strcpy(p->word,">");    p->class=10;    p->next=NULL;
    r->next=p; r=p;

    p=(link *)malloc(sizeof(link));
    strcpy(p->word,";");    p->class=11;    p->next=NULL;
    r->next=p; r=p;
}

link* find_known(char* ch, link *p, FILE *fout){
    link *q;
    q=(link *)malloc(sizeof(link));
    q=p->next;

    while(q){
        if(strcmp(q->word,ch)==0){  //strstr() 
            fprintf(fout,"find %s in known[class:%d addr:%p] 
",ch,q->class,q);
            break;
            return q;
        }
        else
            q=q->next;
    }
    if(!q){
        if(flag==12){  //当读入为数字时,先判断在常数链表中是否存在对应值
            q=constant->next;
            while(q){
                if(strcmp(q->word,ch)==0){
                    //fprintf(fout,"find %s in constant[class:%d addr:%p] 
",ch,q->class,q);
                    return q;
                }
                else  q=q->next;
            }
            //链表中无对应值,则插入新的节点
            link *p=constant;
            link *q=(link *)malloc(sizeof(link));
            while(p->next)  p=p->next;
            q->class=12;  strcpy(q->word,ch);  q->next=NULL;
            p->next=q;
            fprintf(fout,"new constant %s[class:%d addr:%p]
",ch,q->class,q);
            return q;
        }
        else if(flag==13){  //当读入为字母时,同理
            q=identifier->next;
            while(q){
                if(strcmp(q->word,ch)==0){
                    fprintf(fout,"find %s in identifier[class:%d addr:%p]
",ch,q->class,q);
                    return q;
                }
                else  q=q->next;
            }
            link *p=identifier;
            link *q=(link *)malloc(sizeof(link));
            while(p->next)  p=p->next;
            q->class=13; strcpy(q->word,ch);  q->next=NULL;
            p->next=q;
            fprintf(fout,"new identifier %s[class:%d addr:%p]
",ch,q->class,q);
            return q;
       }
    }
}

link* find_keyword(char* ch, link *p, FILE *fout){  //关键词的最先匹配
    link *q;
    q=(link *)malloc(sizeof(link));
    q=p->next;
    while(q){
        if(strcmp(ch,q->word)==0){
            fprintf(fout,"keyword:%s[class:%d addr:%p]
",ch,q->class,q);
            keyword=1;
            break;
        }
        else
            q=q->next;
    }
    return q;
}
原文地址:https://www.cnblogs.com/waynelin/p/6138749.html