简单的词法分析和语法分析(C++实现,CodeBlocks+GCC编译)

说明:

  分析的语言是SNL语言,详见《编译程序的设计与实现》( 刘磊、金英、张晶、张荷花、单郸编著

  词法分析就是实现了词法分析的自动机

  语法分析使用递归下降法

运行结果:

  词法分析 得到TokenList

  语法分析 输出语法树

  

  运行输出:

    

代码:

main.cpp

   1 #include<stdio.h>
   2 #include<stdlib.h>
   3 #include<vector>
   4 #include<string.h>
   5 using namespace std;
   6 #define NUMOFRESERVED 22
   7 #define NUMOFSYMBOLS 20
   8 #define RES 0
   9 #define ID 1
  10 #define NUM 2
  11 #define SYM 3
  12 #define STR 4
  13 
  14 #define PLUS 100
  15 #define SUB 101
  16 #define MUL 102
  17 #define DIV 103
  18 #define LT 104
  19 #define LBRACK 105
  20 #define RBRACK 106
  21 #define LSQUBRACK 107
  22 #define RSQUBRACK 108
  23 #define POINT 109
  24 #define SEMI 110
  25 #define LBRACE 111
  26 #define RBRACE 112
  27 #define EOFF 113
  28 #define BLANK 114
  29 #define QUO 115
  30 #define EQU 116
  31 #define INDEX 117
  32 #define ASSI 118
  33 #define COM 119
  34 
  35 
  36 #define uint unsigned
  37 
  38 typedef struct Token {
  39     int id;
  40     int ptr;
  41     int linenum;
  42 } Token;
  43 typedef struct Node {
  44     char *desc;
  45     vector<Node*> children;
  46     Node(const char* str) {
  47         desc=new char[30];
  48         strcpy(desc,str);
  49     }
  50 } Node;
  51 
  52 vector<Token>* tokens;
  53 int Token_n=0;
  54 const char* reserved_words[NUMOFRESERVED]= {"begin","end","program","var",
  55                                             "type","procedure","while","endwh",
  56                                             "integer","char","array","of",
  57                                             "intc","record","if","then",
  58                                             "else","fi","do","write",
  59                                             "read","return"
  60                                            };
  61 
  62 static const char* symbol_table[NUMOFSYMBOLS]= {"+","-","*","/","<",
  63                                                 "(",")","[","]",".",
  64                                                 ";","{","}","EOF","BLANK",
  65                                                 "\'","=","..",":=",","
  66                                                };
  67 void syntaxError(const char* message);
  68 Node* MatchRES(const char* expected);
  69 Node* MatchSYM(const int expected);
  70 Node* MatchID();
  71 Node* MatchNUM();
  72 bool isRES(const char* res);
  73 bool isSYM(const int res);
  74 bool isID();
  75 bool isNUM();
  76 bool isLineEnd();
  77 Node* Program();
  78 Node* ProgramHead();
  79 Node* DeclarePart();
  80 Node* TypeDec();
  81 Node* TypeDeclaration();
  82 Node* TypeDecList();
  83 Node* TypeDecMore();
  84 Node* TypeId();
  85 Node* TypeName();
  86 Node* BaseType();
  87 Node* StructureType();
  88 Node* ArrayType();
  89 Node* RecType();
  90 Node* FieldDecList();
  91 Node* FieldDecMore();
  92 Node* IdList();
  93 Node* IdMore();
  94 Node* VarDec();
  95 Node* VarDeclaration();
  96 Node* VarDecList();
  97 Node* VarDecMore();
  98 Node* VarIdList();
  99 Node* VarIdMore();
 100 Node* ProcDec();
 101 Node* ProcDeclaration();
 102 Node* ParamList();
 103 Node* ParamDecList();
 104 Node* ParamMore();
 105 Node* Param();
 106 Node* FormList();
 107 Node* FidMore();
 108 Node* ProcDecPart();
 109 Node* ProcBody();
 110 Node* ProgramBody();
 111 Node* StmList();
 112 Node* StmMore();
 113 Node* Stm();
 114 Node* AssCall();
 115 Node* AssignmentRest();
 116 Node* ConditionalStm();
 117 Node* LoopStm();
 118 Node* InputStm();
 119 Node* OutputStm();
 120 Node* ReturnStm();
 121 Node* CallStmRest();
 122 Node* ActParamList();
 123 Node* ActParamMore();
 124 Node* Exp();
 125 Node* Term();
 126 Node* OtherFactor();
 127 Node* OtherTerm();
 128 Node* Factor();
 129 Node* Variable();
 130 Node* VariMore();
 131 Node* FieldVar();
 132 Node* FieldVarMore();
 133 Node* Parse();
 134 Node* CmpOp();
 135 Node* AddOp();
 136 Node* MultOp();
 137 
 138 void show_tree(Node* root,int depth,vector<int>* v,bool islast);
 139 
 140 int add(vector<Node* >& children,Node* node) {
 141     if(node!=NULL) {
 142         children.push_back(node);
 143     }
 144     return 0;
 145 }
 146 #define COMMENT_ERROR -1000
 147 #define STR_ERROR -1001
 148 #define SYM_ERROR -1002
 149 #define NOT_REC_SYMBOL -1003
 150 vector<char*> ID_table;
 151 vector<uint> NUM_table;
 152 vector<char*> STR_table;
 153 int ID_n=0,NUM_n=0,STR_n=0;
 154 int num_of_lines=1;
 155 FILE * file;
 156 
 157 
 158 
 159 /***************************************************************************************/
 160 /*****************                   词法分析                          *****************/
 161 /***************************************************************************************/
 162 
 163 void handle_error(int error_num,const char* str) {
 164     printf("行号:%d \t",num_of_lines);
 165     switch(error_num) {
 166     case COMMENT_ERROR:
 167         printf("注释未结束 \t");
 168         break;
 169     case SYM_ERROR:
 170         printf("出现未定义的符号,您想输入的是\":=\"吗? \t");
 171         break;
 172     case STR_ERROR:
 173         printf("字符串未结束 \t");
 174         break;
 175     case NOT_REC_SYMBOL:
 176         printf("出现未定义的符号 \t");
 177         break;
 178     }
 179     printf("%s\n",str);
 180     exit(-1);
 181 }
 182 char getNextChar() {
 183     char c;
 184     c=fgetc(file);
 185     return c;
 186 }
 187 
 188 void ungetNextChar() {
 189     fseek(file,-1,SEEK_CUR);
 190 }
 191 
 192 int reservedLookup(char* str) {
 193     for(int i=0; i<NUMOFRESERVED; i++) {
 194         if(strcmp(str,reserved_words[i])==0)return i;
 195     }
 196     return -1;
 197 }
 198 int str_to_num(char* str) {
 199     int len=strlen(str);
 200     int n=0;
 201     for(int i=0; i<len; i++) {
 202         n=n*10+str[i]-'0';
 203     }
 204     return n;
 205 }
 206 int addIDTable(char* str) {
 207     ID_table.push_back(str);
 208     return ID_n++;
 209 }
 210 int addNUMTable(int num) {
 211     NUM_table.push_back(num);
 212     return NUM_n++;
 213 }
 214 int addSTRTable(char* str ) {
 215     STR_table.push_back(str);
 216     return STR_n++;
 217 }
 218 Token scan() {
 219     static const char symbols[12]= {'+','-','*','/','<','(',')','[',']',';','=',','};
 220     static const int symbols_n[12]= {PLUS,SUB,MUL,DIV,LT,LBRACK,RBRACK,LSQUBRACK,RSQUBRACK,SEMI,EQU,COM};
 221     Token t;
 222     t.linenum=num_of_lines;
 223     char* str=new char[256];
 224     strcpy(str,"");//字符串要初始化,不然有乱码
 225     char c[2]= {'a','\0'};
 226     c[0]=getNextChar();
 227 LS0:
 228     if(c[0]>='a'&&c[0]<='z')goto LS1;
 229     if(c[0]>='A'&&c[0]<='Z')goto LS1;
 230     if(c[0]>='0'&&c[0]<='9')goto LS2;
 231     int indexofsym;
 232     for(indexofsym=0; indexofsym<12; indexofsym++) {
 233         if(c[0]==symbols[indexofsym]) {
 234             goto LS3;
 235         }
 236     }
 237     if(c[0]=='.')goto LS4;
 238     if(c[0]=='{')goto LS5;
 239     if(c[0]==':')goto LS6;
 240     if(c[0]=='\'')goto LS7;
 241     if(c[0]=='\n')goto LS8;
 242     if(c[0]==EOF)goto LS9;
 243     if(c[0]=='\t'||c[0]==' ')goto LS10;
 244     goto OTHER;
 245 LS1:
 246     strcat(str,c);
 247     c[0]=getNextChar();
 248     if(c[0]>='a'&&c[0]<='z')goto LS1;
 249     else if(c[0]>='A'&&c[0]<='Z')goto LS1;
 250     else if(c[0]>='0'&&c[0]<='9')goto LS1;
 251     ungetNextChar();
 252     int nres;
 253     if(nres=reservedLookup(str),nres!=-1) {
 254         t.id=RES;
 255         t.ptr=nres;
 256         return t;
 257     } else {
 258         t.id=ID;
 259         t.ptr=addIDTable(str);
 260         return t;
 261     }
 262 LS2:
 263     strcat(str,c);
 264     c[0]=getNextChar();
 265     if(c[0]>='0'&&c[0]<='9')goto LS2;
 266     ungetNextChar();
 267     t.id=NUM;
 268     t.ptr=addNUMTable(str_to_num(str));
 269     return t;
 270 LS3:
 271     t.ptr=symbols_n[indexofsym];
 272     t.id=SYM;
 273     return t;
 274 LS4:
 275     strcat(str,c);
 276     c[0]=getNextChar();
 277     if(c[0]=='.') {
 278         t.id=SYM;
 279         t.ptr=INDEX;
 280         return t;
 281     }
 282     else {
 283         t.id=SYM;
 284         t.ptr=POINT;
 285         ungetNextChar();
 286         return t;
 287     }
 288 LS5:
 289     while(c[0]=getNextChar(),c[0]!=EOF&&c[0]!='}') {
 290         if(c[0]=='\n'||c[0]=='\r')num_of_lines++;
 291     }
 292     if(c[0]=='}') {
 293         c[0]=getNextChar();
 294         goto LS0;
 295     } else {
 296         handle_error(COMMENT_ERROR,"");
 297     }
 298 LS6:
 299     strcat(str,c);
 300     c[0]=getNextChar();
 301     if(c[0]=='=') {
 302         t.id=SYM;
 303         t.ptr=ASSI;
 304         return t;
 305     } else {
 306         handle_error(SYM_ERROR,"符号为 \':\'");
 307     }
 308 LS7:
 309     while(c[0]=getNextChar(),c[0]!=EOF&&c[0]!='\'') {
 310         strcat(str,c);
 311     }
 312     if(c[0]=='\'') {
 313         t.id=STR;
 314         t.ptr=addSTRTable(str);
 315         return t;
 316     } else {
 317         handle_error(STR_ERROR,"");
 318     }
 319 LS8:
 320     t.id=-1;
 321     num_of_lines++;
 322     return t;
 323 LS9:
 324     t.id=SYM;
 325     t.ptr=EOFF;
 326     return t;
 327 LS10:
 328     t.id=-1;
 329     return t;
 330 OTHER:
 331     handle_error(NOT_REC_SYMBOL,c);
 332     t.id=-1;
 333     return t;
 334 }
 335 void show_token(Token t) {
 336     if(t.id==STR) {
 337         printf("<%d %s %s> \n",t.linenum,"STR",STR_table[t.ptr]);
 338         return ;
 339     }
 340     if(t.id==NUM) {
 341         printf("<%d %s %d> \n",t.linenum,"NUM",NUM_table[t.ptr]);
 342         return ;
 343     }
 344     if(t.id==ID) {
 345         printf("<%d %s %s> \n",t.linenum,"ID",ID_table[t.ptr]);
 346         return ;
 347     }
 348     if(t.id==RES) {
 349         printf("<%d %s %s> \n",t.linenum,"RES",reserved_words[t.ptr]);
 350         return ;
 351     }
 352     if(t.id==SYM) {
 353         printf("<%d %s %s> \n",t.linenum,"SYM",symbol_table[t.ptr-100]);
 354         return ;
 355     }
 356     return ;
 357 }
 358 vector<Token>*  getTokenlist(const char* filename) {
 359     tokens->clear();
 360     file=fopen(filename,"r");
 361     Token temp;
 362     while(temp=scan(),!(temp.id==SYM&&temp.ptr==EOFF)) {
 363         if(temp.id!=-1) {
 364             tokens->push_back(temp);
 365             if(temp.id==RES&&strcmp("end",reserved_words[temp.ptr])==0) {
 366                 char c;
 367                 if(c=getNextChar(),c=='.') {
 368                     break;
 369                 } else {
 370                     ungetNextChar();
 371                 }
 372             }
 373         }
 374     }
 375     fclose(file);
 376     return tokens;
 377 }
 378 void show_Token_list() {
 379     int len=tokens->size();
 380     for(int i=0; i<len; i++) {
 381         show_token(tokens->at(i));
 382     }
 383 }
 384 
 385 void write_token(FILE* file,Token t) {
 386     if(t.id==STR) {
 387         fprintf(file,"<%s %s> \n","STR",STR_table[t.ptr]);
 388         return ;
 389     }
 390     if(t.id==NUM) {
 391         fprintf(file,"<%s %d> \n","NUM",NUM_table[t.ptr]);
 392         return ;
 393     }
 394     if(t.id==ID) {
 395         fprintf(file,"<%s %s> \n","ID",ID_table[t.ptr]);
 396         return ;
 397     }
 398     if(t.id==RES) {
 399         fprintf(file,"<%s %s> \n","RES",reserved_words[t.ptr]);
 400         return ;
 401     }
 402     if(t.id==SYM) {
 403         fprintf(file,"<%s %s> \n","SYM",symbol_table[t.ptr-100]);
 404         return ;
 405     }
 406     return ;
 407 }
 408 void save_Token_list(const char* filename) {
 409     FILE* f=fopen(filename,"w");
 410     int len=tokens->size();
 411     for(int i=0; i<len; i++) {
 412         write_token(f,tokens->at(i));
 413     }
 414     fclose(f);
 415 }
 416 
 417 
 418 
 419 
 420 
 421 
 422 /***************************************************************************************/
 423 /*****************                   语法分析                          *****************/
 424 /***************************************************************************************/
 425 
 426 void syntaxError(const char* message) {
 427     if(strcmp(message,"outofrange")==0) {
 428         printf("出现越界错误");
 429         printf("   Token_n:%d    tokens->size:%d\n",Token_n,tokens->size());
 430         exit(-1);
 431     }
 432     printf("行号:%d    %s",tokens->at(Token_n).linenum,message);
 433     printf("当前token:");
 434     show_token(tokens->at(Token_n));
 435     exit(-1);
 436 }
 437 Node* MatchRES(const char* expected) {
 438     if(Token_n>=(int)tokens->size())syntaxError("outofrange");
 439     int id=tokens->at(Token_n).id,ptr=tokens->at(Token_n).ptr;
 440     if(id==RES&&strcmp(expected,reserved_words[ptr])==0) {
 441         Node*t =new Node(expected);
 442         Token_n++;
 443         return t;
 444     }
 445     char* message=new char[100];
 446     strcpy(message,"match res error.\nexpect: ");
 447     strcat(message,expected);
 448     strcat(message,"\n");
 449     syntaxError(message);
 450     delete message;
 451     return NULL;
 452 }
 453 Node* MatchNUM() {
 454     if(Token_n>=(int)tokens->size())syntaxError("outofrange");
 455     int id=tokens->at(Token_n).id;
 456     if(id==NUM) {
 457         Node* t= new Node("NUM");
 458         Token_n++;
 459         return t;
 460     }
 461     syntaxError("match num error\n");
 462     return NULL;
 463 }
 464 Node* MatchSTR() {
 465     if(Token_n>=(int)tokens->size())syntaxError("outofrange");
 466     int id=tokens->at(Token_n).id;
 467     if(id==STR) {
 468         Node* t= new Node("STR");
 469         Token_n++;
 470         return t;
 471     }
 472     syntaxError("match str error\n");
 473     return NULL;
 474 }
 475 Node* MatchID() {
 476     if(Token_n>=(int)tokens->size())syntaxError("outofrange");
 477     int id=tokens->at(Token_n).id;
 478     if(id==ID) {
 479         Node* t =new Node("ID");
 480         Token_n++;
 481         return t;
 482     }
 483     syntaxError("match id error\n");
 484     return NULL;
 485 }
 486 Node* MatchSYM(const int expected) {
 487     if(Token_n>=(int)tokens->size())syntaxError("outofrange");
 488     int id=tokens->at(Token_n).id,ptr=tokens->at(Token_n).ptr;
 489     if(id==SYM&&ptr==expected) {
 490         Node* t =new Node(symbol_table[ptr-100]);
 491         Token_n++;
 492         return t;
 493     }
 494     char* message=new char[100];
 495     strcpy(message,"match symbol error.\nexpect: ");
 496     strcat(message,symbol_table[expected-100]);
 497     strcat(message,"\n");
 498     syntaxError(message);
 499     delete message;
 500     return NULL;
 501 }
 502 bool isRES(const char* res) {
 503     if(Token_n>=(int)tokens->size())syntaxError("outofrange");
 504     int id=tokens->at(Token_n).id,ptr=tokens->at(Token_n).ptr;
 505     if(id==RES&&strcmp(reserved_words[ptr],res)==0)return true;
 506     return false;
 507 }
 508 bool isSYM(const int res) {
 509     if(Token_n>=(int)tokens->size())syntaxError("outofrange");
 510     int id=tokens->at(Token_n).id,ptr=tokens->at(Token_n).ptr;
 511     if(id==SYM&&ptr==res)return true;
 512     return false;
 513 }
 514 bool isID() {
 515     if(Token_n>=(int)tokens->size())syntaxError("outofrange");
 516     int id=tokens->at(Token_n).id;
 517     if(id==ID)return true;
 518     return false;
 519 }
 520 bool isNUM() {
 521     if(Token_n>=(int)tokens->size())syntaxError("outofrange");
 522     int id=tokens->at(Token_n).id;
 523     if(id==NUM)return true;
 524     return false;
 525 }
 526 bool isSTR() {
 527     if(Token_n>=(int)tokens->size())syntaxError("outofrange");
 528     int id=tokens->at(Token_n).id;
 529     if(id==STR)return true;
 530     return false;
 531 }
 532 Node* Program() {
 533     Node* t=new Node("Program");
 534     add(t->children,ProgramHead());
 535     add(t->children,DeclarePart());
 536     add(t->children,ProgramBody());
 537     return t;
 538 }
 539 Node* ProgramHead() {
 540     Node* t=new Node("ProgramHead");
 541     add(t->children,MatchRES("program"));
 542     add(t->children,MatchID());
 543     return t;
 544 }
 545 
 546 Node* DeclarePart() {
 547     Node* t=new Node("DeclarePart");
 548     add(t->children,TypeDeclaration());
 549     add(t->children,VarDeclaration());
 550     add(t->children,ProcDeclaration());
 551     return t;
 552 }
 553 Node* TypeDec() {
 554     Node* t=new Node("TypeDec");
 555     add(t->children,MatchRES("type"));
 556     add(t->children,TypeDecList());
 557     return t;
 558 }
 559 Node* TypeDeclaration() {
 560     Node*t=NULL;
 561     if(isRES("type")) {
 562         t=new Node("TypeDeclaration");
 563         add(t->children,TypeDec());
 564     }
 565     return t;
 566 }
 567 Node* TypeDecList() {
 568     Node* t=new Node("TypeDecList");
 569     add(t->children,TypeId());
 570     add(t->children,MatchSYM(EQU));
 571     add(t->children,TypeName());
 572     add(t->children,MatchSYM(SEMI));
 573     add(t->children,TypeDecMore());
 574     return t;
 575 }
 576 Node* TypeDecMore() {
 577     Node*t=NULL;
 578     if(isID()) {
 579         t=new Node("TypeDecMore");
 580         add(t->children,TypeDecList());
 581     }
 582     return t;
 583 }
 584 Node* TypeId() {
 585     Node* t=new Node("TypeId");
 586     add(t->children,MatchID());
 587     return t;
 588 }
 589 Node* TypeName() {
 590     Node* t=new Node("TypeName");
 591     if(isRES("integer")||isRES("char")) {
 592         add(t->children,BaseType());
 593         return t;
 594     }
 595     if(isRES("array")||isRES("record")) {
 596         add(t->children,StructureType());
 597         return t;
 598     }
 599     add(t->children,MatchID());
 600     return t;
 601 }
 602 Node* BaseType() {
 603     Node*t=NULL;
 604     if(isRES("integer")) {
 605         t=new Node("BaseType");
 606         add(t->children,MatchRES("integer"));
 607         return t;
 608     }
 609     if(isRES("char")) {
 610         t=new Node("BaseType");
 611         add(t->children,MatchRES("char"));
 612         return t;
 613     }
 614     syntaxError("not a base type\n");
 615     return t;
 616 }
 617 Node* StructureType() {
 618     Node*t=NULL;
 619     if(isRES("array")) {
 620         t=new Node("StructureType");
 621         add(t->children,ArrayType());
 622         return t;
 623     }
 624     if(isRES("record")) {
 625         t=new Node("StructureType");
 626         add(t->children,RecType());
 627         return t;
 628     }
 629     return t;
 630 }
 631 Node* ArrayType() {
 632     Node* t=new Node("ArrayType");
 633     add(t->children,MatchRES("array"));
 634     add(t->children,MatchSYM(LSQUBRACK));
 635     add(t->children,MatchNUM());
 636     add(t->children,MatchSYM(INDEX));
 637     add(t->children,MatchNUM());
 638     add(t->children,MatchSYM(RSQUBRACK));
 639     add(t->children,MatchRES("of"));
 640     add(t->children,BaseType());
 641     return t;
 642 }
 643 Node* RecType() {
 644     Node* t=new Node("RecType");
 645     add(t->children,MatchRES("record"));
 646     add(t->children,FieldDecList());
 647     add(t->children,MatchRES("end"));
 648     return t;
 649 }
 650 Node* FieldDecList() {
 651     Node*t=NULL;
 652     if(isRES("integer")||isRES("char")) {
 653         t=new Node("FieldDecList");
 654         add(t->children,BaseType());
 655         add(t->children,IdList());
 656         add(t->children,MatchSYM(SEMI));
 657         add(t->children,FieldDecMore());
 658         return t;
 659     }
 660     if(isRES("array")) {
 661         t=new Node("FieldDecList");
 662         add(t->children,ArrayType());
 663         add(t->children,IdList());
 664         add(t->children,MatchSYM(SEMI));
 665         add(t->children,FieldDecMore());
 666         return t;
 667     }
 668     syntaxError("field declare list error\n");
 669     return t;
 670 }
 671 Node* FieldDecMore() {
 672     Node*t=NULL;
 673     if(isRES("integer")||isRES("char")||isRES("array")) {
 674         t=new Node("FieldDecMore");
 675         add(t->children,FieldDecList());
 676     }
 677     return t;
 678 }
 679 Node* IdList() {
 680     Node* t=new Node("IdList");
 681     add(t->children,MatchID());
 682     add(t->children,IdMore());
 683     return t;
 684 }
 685 Node* IdMore() {
 686     Node*t=NULL;
 687     if(isSYM(COM)) {
 688         t=new Node("IdMore");
 689         add(t->children,MatchSYM(COM));
 690         add(t->children,IdList());
 691     }
 692     return t;
 693 }
 694 Node* VarDec() {
 695     Node* t=new Node("VarDec");
 696     add(t->children,MatchRES("var"));
 697     add(t->children,VarDecList());
 698     return t;
 699 }
 700 Node* VarDeclaration() {
 701     Node*t=NULL;
 702     if(isRES("var")) {
 703         t=new Node("VarDeclaration");
 704         add(t->children,VarDec());
 705     }
 706     return t;
 707 }
 708 Node* VarDecList() {
 709     Node* t=new Node("VarDecList");
 710     add(t->children,TypeName());
 711     add(t->children,VarIdList());
 712     add(t->children,MatchSYM(SEMI));
 713     add(t->children,VarDecMore());
 714     return t;
 715 }
 716 Node* VarDecMore() {
 717     Node*t=NULL;
 718     if(isRES("integer")||isRES("char")||isRES("array")||isRES("record")||isID()) {
 719         t=new Node("VarDecMore");
 720         add(t->children,VarDecList());
 721     }
 722     return t;
 723 }
 724 Node* VarIdList() {
 725     Node* t=new Node("VarIdList");
 726     add(t->children,MatchID());
 727     add(t->children,VarIdMore());
 728     return t;
 729 }
 730 Node* VarIdMore() {
 731     Node*t=NULL;
 732     if(isSYM(COM)) {
 733         t=new Node("VarIdMore");
 734         add(t->children,MatchSYM(COM));
 735         add(t->children,VarIdList());
 736     }
 737     return t;
 738 }
 739 Node* ProcDec() {
 740     Node* t=new Node("ProcDec");
 741     add(t->children,MatchRES("procedure"));
 742     add(t->children,MatchID());
 743     add(t->children,MatchSYM(LBRACK));
 744     add(t->children,ParamList());
 745     add(t->children,MatchSYM(RBRACK));
 746     add(t->children,MatchSYM(SEMI));
 747     add(t->children,DeclarePart());
 748     add(t->children,ProcBody());
 749     add(t->children,ProcDeclaration());
 750     return t;
 751 }
 752 Node* ProcDeclaration() {
 753     Node*t=NULL;
 754     if(isRES("procedure")) {
 755         t=new Node("ProcDeclaration");
 756         add(t->children,ProcDec());
 757     }
 758     return t;
 759 }
 760 Node* ParamList() {
 761     Node*t=NULL;
 762     if(isRES("integer")||isRES("char")||isRES("array")||isRES("record")||isID()||isRES("var")) {
 763         t=new Node("ParamList");
 764         add(t->children,ParamDecList());
 765     }
 766     return t;
 767 }
 768 Node* ParamDecList() {
 769     Node* t=new Node("ParamDecList");
 770     add(t->children,Param());
 771     add(t->children,ParamMore());
 772     return t;
 773 }
 774 Node* ParamMore() {
 775     Node*t=NULL;
 776     if(isSYM(SEMI)) {
 777         t=new Node("ParamMore");
 778         add(t->children,MatchSYM(SEMI));
 779         add(t->children,ParamDecList());
 780     }
 781     return t;
 782 }
 783 Node* Param() {
 784     Node*t=NULL;
 785     if(isRES("integer")||isRES("char")||isRES("array")||isRES("record")||isID()) {
 786         t=new Node("Param");
 787         add(t->children,TypeName());
 788         add(t->children,FormList());
 789         return t;
 790     }
 791     if(isRES("var")) {
 792         t=new Node("Param");
 793         add(t->children,MatchRES("var"));
 794         add(t->children,TypeName());
 795         add(t->children,FormList());
 796         return t;
 797     }
 798     syntaxError("param error\n");
 799     return t;
 800 }
 801 Node* FormList() {
 802     Node* t=new Node("FormList");
 803     add(t->children,MatchID());
 804     add(t->children,FidMore());
 805     return t;
 806 }
 807 Node* FidMore() {
 808     Node*t=NULL;
 809     if(isSYM(COM)) {
 810         t=new Node("FidMore");
 811         add(t->children,MatchSYM(COM));
 812         add(t->children,FormList());
 813     }
 814     return t;
 815 }
 816 Node* ProcDecPart() {
 817     Node* t=new Node("ProcDecPart");
 818     add(t->children,DeclarePart());
 819     return t;
 820 }
 821 Node* ProcBody() {
 822     Node* t=new Node("ProcBody");
 823     add(t->children,ProgramBody());
 824     return t;
 825 }
 826 Node* ProgramBody() {
 827     Node* t=new Node("ProgramBody");
 828     add(t->children,MatchRES("begin"));
 829     add(t->children,StmList());
 830     add(t->children,MatchRES("end"));
 831     return t;
 832 }
 833 Node* StmList() {
 834     Node* t=new Node("StmList");
 835     add(t->children,Stm());
 836     add(t->children,StmMore());
 837     return t;
 838 }
 839 Node* StmMore() {
 840     Node*t=NULL;
 841     if(isSYM(SEMI)) {
 842         t=new Node("StmMore");
 843         add(t->children,MatchSYM(SEMI));
 844         add(t->children,StmList());
 845     }
 846     return t;
 847 }
 848 Node* Stm() {
 849     Node* t=new Node("Stm");
 850     if(isRES("if")) {
 851         add(t->children,ConditionalStm());
 852         return t;
 853     }
 854     if(isRES("while")) {
 855         add(t->children,LoopStm());
 856         return t;
 857     }
 858     if(isRES("read")) {
 859         add(t->children,InputStm());
 860         return t;
 861     }
 862     if(isRES("write")) {
 863         add(t->children,OutputStm());
 864         return t;
 865     }
 866     if(isRES("return")) {
 867         add(t->children,ReturnStm());
 868         return t;
 869     }
 870     if(isID()) {
 871         add(t->children,MatchID());
 872         add(t->children,AssCall());
 873         return t;
 874     }
 875     delete t;
 876     return NULL;
 877 }
 878 Node* AssCall() {
 879     Node*t=NULL;
 880     if(isSYM(LSQUBRACK)||isSYM(POINT)||isSYM(ASSI)) {
 881         t=new Node("AssCall");
 882         add(t->children,AssignmentRest());
 883         return t;
 884     }
 885     if(isSYM(LBRACK)) {
 886         t=new Node("AssCall");
 887         add(t->children,CallStmRest());
 888         return t;
 889     }
 890     syntaxError("ass call error\n");
 891     return t;
 892 }
 893 Node* AssignmentRest() {
 894     Node* t=new Node("AssignmentRest");
 895     if(isSYM(LSQUBRACK)||isSYM(POINT)) {
 896         add(t->children,VariMore());
 897     }
 898     add(t->children,MatchSYM(ASSI));
 899     add(t->children,Exp());
 900     return t;
 901 }
 902 Node* ConditionalStm() {
 903     Node* t=new Node("ConditionalStm");
 904     add(t->children,MatchRES("if"));
 905     add(t->children,Exp());
 906     if(isSYM(LT))add(t->children,MatchSYM(LT));
 907     else if(isSYM(EQU))add(t->children,MatchSYM(EQU));
 908     else syntaxError("condition error\n");
 909     add(t->children,Exp());
 910     add(t->children,MatchRES("then"));
 911     add(t->children,StmList());
 912     add(t->children,MatchRES("else"));
 913     add(t->children,StmList());
 914     add(t->children,MatchRES("fi"));
 915     return t;
 916 }
 917 Node* LoopStm() {
 918     Node* t=new Node("LoopStm");
 919     add(t->children,MatchRES("while"));
 920     add(t->children,Exp());
 921     if(isSYM(LT))add(t->children,MatchSYM(LT));
 922     else if(isSYM(EQU))add(t->children,MatchSYM(EQU));
 923     else syntaxError("condition error\n");
 924     add(t->children,Exp());
 925     add(t->children,MatchRES("do"));
 926     add(t->children,StmList());
 927     add(t->children,MatchRES("endwh"));
 928     return t;
 929 }
 930 Node* InputStm() {
 931     Node* t=new Node("InputStm");
 932     add(t->children,MatchRES("read"));
 933     add(t->children,MatchSYM(LBRACK));
 934     add(t->children,MatchID());
 935     add(t->children,MatchSYM(RBRACK));
 936     return t;
 937 }
 938 Node* OutputStm() {
 939     Node* t=new Node("OutputStm");
 940     add(t->children,MatchRES("write"));
 941     add(t->children,MatchSYM(LBRACK));
 942     add(t->children,Exp());
 943     add(t->children,MatchSYM(RBRACK));
 944     return t;
 945 }
 946 Node* ReturnStm() {
 947     Node* t=new Node("ReturnStm");
 948     add(t->children,MatchRES("return"));
 949     return t;
 950 }
 951 Node* CallStmRest() {
 952     Node* t=new Node("CallStmRest");
 953     add(t->children,MatchSYM(LBRACK));
 954     add(t->children,ActParamList());
 955     add(t->children,MatchSYM(RBRACK));
 956     return t;
 957 }
 958 Node* ActParamList() {
 959     Node*t=NULL;
 960     if(isSYM(LBRACK)||isNUM()||isID()) {
 961         t=new Node("ActParamList");
 962         add(t->children,Exp());
 963         add(t->children,ActParamMore());
 964     }
 965     return t;
 966 }
 967 Node* ActParamMore() {
 968     Node*t=NULL;
 969     if(isSYM(COM)) {
 970         t=new Node("ActParamMore");
 971         add(t->children,MatchSYM(COM));
 972         add(t->children,ActParamList());
 973     }
 974     return t;
 975 }
 976 Node* Exp() {
 977     Node* t=new Node("Exp");
 978     add(t->children,Term());
 979     add(t->children,OtherTerm());
 980     return t;
 981 }
 982 Node* OtherTerm() {
 983     Node*t=NULL;
 984     if(isSYM(PLUS)||isSYM(SUB)) {
 985         t=new Node("OtherTerm");
 986         add(t->children,AddOp());
 987         add(t->children,Exp());
 988     }
 989     return t;
 990 }
 991 Node* Term() {
 992     Node* t=new Node("Term");
 993     add(t->children,Factor());
 994     add(t->children,OtherFactor());
 995     return t;
 996 }
 997 Node* OtherFactor() {
 998     Node*t=NULL;
 999     if(isSYM(MUL)||isSYM(DIV)) {
1000         t=new Node("OtherFactor");
1001         add(t->children,MultOp());
1002         add(t->children,Term());
1003     }
1004     return t;
1005 }
1006 Node* Factor() {
1007     Node* t=new Node("Factor");
1008     if(isNUM()) {
1009         add(t->children,MatchNUM());
1010         return t;
1011     }
1012     if(isSTR()) {
1013         add(t->children,MatchSTR());
1014         return t;
1015     }
1016     if(isSYM(LBRACK)) {
1017         add(t->children,MatchSYM(LBRACK));
1018         add(t->children,Exp());
1019         add(t->children,MatchSYM(RBRACK));
1020         return t;
1021     }
1022     if(isID()) {
1023         add(t->children,Variable());
1024         return t;
1025     }
1026     delete t;
1027     syntaxError("factor error\n");
1028     return NULL;
1029 }
1030 Node* Variable() {
1031     Node* t=new Node("Variable");
1032     add(t->children,MatchID());
1033     add(t->children,VariMore());
1034     return t;
1035 }
1036 Node* VariMore() {
1037     Node*t=NULL;
1038     if(isSYM(LSQUBRACK)) {
1039         t=new Node("VariMore");
1040         add(t->children,MatchSYM(LSQUBRACK));
1041         add(t->children,Exp());
1042         add(t->children,MatchSYM(RSQUBRACK));
1043     }
1044     if(isSYM(POINT)) {
1045         t=new Node("VariMore");
1046         add(t->children,MatchSYM(POINT));
1047         add(t->children,FieldVar());
1048     }
1049     return t;
1050 }
1051 Node* FieldVar() {
1052     Node* t=new Node("FieldVar");
1053     add(t->children,MatchID());
1054     add(t->children,FieldVarMore());
1055     return t;
1056 }
1057 Node* FieldVarMore() {
1058     Node*t=NULL;
1059     if(isSYM(LSQUBRACK)) {
1060         t=new Node("FieldVarMore");
1061         add(t->children,MatchSYM(LSQUBRACK));
1062         add(t->children,Exp());
1063         add(t->children,MatchSYM(RSQUBRACK));
1064     }
1065     return t;
1066 }
1067 Node* CmpOp() {
1068     Node*t=NULL;
1069     if(isSYM(LT)) {
1070         t=new Node("CmpOp");
1071         add(t->children,MatchSYM(LT));
1072         return t;
1073     }
1074     if(isSYM(EQU)) {
1075         t=new Node("CmpOp");
1076         add(t->children,MatchSYM(EQU));
1077         return t;
1078     }
1079     syntaxError("cmpop error\n");
1080     return t;
1081 }
1082 Node* AddOp() {
1083     Node*t=NULL;
1084     if(isSYM(PLUS)) {
1085         t=new Node("AddOp");
1086         add(t->children,MatchSYM(PLUS));
1087         return t;
1088     }
1089     if(isSYM(SUB)) {
1090         t=new Node("AddOp");
1091         add(t->children,MatchSYM(SUB));
1092         return t;
1093     }
1094     syntaxError("cmpop error\n");
1095     return t;
1096 }
1097 Node* MultOp() {
1098     Node*t=NULL;
1099     if(isSYM(MUL)) {
1100         t=new Node("MultOp");
1101         add(t->children,MatchSYM(MUL));
1102         return t;
1103     }
1104     if(isSYM(DIV)) {
1105         t=new Node("MultOp");
1106         add(t->children,MatchSYM(DIV));
1107         return t;
1108     }
1109     syntaxError("cmpop error\n");
1110     return t;
1111 }
1112 Node* Parse() {
1113     num_of_lines=1;
1114     Node* root=Program();
1115     return root;
1116 }
1117 void show_tree(Node* root,int depth,vector<int>* v,bool islast) {
1118     if(root==NULL)return ;
1119     printf("\n");
1120     for(int i=0; i<depth; i++) {
1121         if(v->at(i)==1)printf("");
1122         else printf("  ");
1123     }
1124     if(islast) {
1125         printf(" └─%s",root->desc);
1126         v->at(depth)=0;
1127     } else printf(" ├─%s",root->desc);
1128     if(depth+1==(int)v->size())v->push_back(1);
1129     v->at(depth+1)=1;
1130     int len=root->children.size();
1131     for(int i=0; i<len; i++) {
1132         if(i==len-1)show_tree(root->children.at(i),depth+1,v,true);
1133         else show_tree(root->children.at(i),depth+1,v,false);
1134     }
1135 }
1136 
1137 int main() {
1138     tokens=new vector<Token>();
1139     getTokenlist("cs1.txt");
1140     save_Token_list("tokens.txt");
1141     //打印显示
1142     printf("TOKENS:\n\n");
1143     show_Token_list();
1144     printf("\n\n\n\n语法树:\n\n");
1145     vector<int>* v=new vector<int>();
1146     v->push_back(0);
1147     show_tree(Parse(),0,v,true);
1148     return 0;
1149 }

cs1.txt

 1 {冒泡排序}
 2 {输入m,表示要排序的数的个数;接着输入m个整数;
 3  输出从小到大排序后的结果}
 4 
 5 program  p
 6 var  integer  i,j,num;
 7       array[1..20] of  integer a;
 8 
 9 procedure  q(integer num);
10 var  integer i,j,k;
11      integer t;
12 begin
13   i:='lklklk';
14   j:=1;
15   while i < num do
16      j:=num-i+1;
17      k:=1;
18      while k<j  do
19          if a[k+1] < a[k]  
20          then  t:=a[k];
21            a[k]:=a[k+1];
22            a[k+1]:=t
23          else  t:=0
24          fi;   
25      k:=k+1
26      endwh;
27   i:=i+1
28   endwh
29 end
30 
31 begin
32    read(num);
33    i:=1;
34    while i<(num+1)  do
35      read(j);
36      a[i]:=j;
37      i:=i+1
38    endwh;
39    q(num);
40    i:=1;
41    while  i<(num+1) do 
42        write(a[i]);
43        i:=i+1
44    endwh
45 end.
46   

END

代码写于大三下6月份,编译原理课程设计

随笔写于2016.7.13

 

原文地址:https://www.cnblogs.com/maxuewei2/p/5666289.html