简单的C语言编译器--语法分析器

  语法分析算是最难的一部分了。总而言之,语法分析就是先设计一系列语法,然后再用设计好的语法去归约词法分析中的结果。最后将归约过程打印出来,或者生成抽象语法树。

1. 设计文法

以下是我的文法(引入的M和N是方便以后的语义分析):

1.1、基本框架

Program -> Type main() Block
Type -> int | bool
Block -> { Stmts return Num ; }
Decl -> Type Array ;
Array -> Identifier [ Num ] | Identifier [ Identifier ] | Identifier
Stmts -> Stmts M Stmt | Stmt

1.2、标识符和常数

Letter -> A | B | C | D | E | F | G | H | I | J | K | L | M | N 
    | O | P | Q | R | S | T | U | V | W | X | Y | Z | a | b
    | c | d | e | f | g | h | i | j | k | l | m | n | o | p
    | q | r | s | t | u | v | w | x | y | z | _
Digit -> 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
Non_digit -> 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
Num -> Num Digit | Non_digit
Identifier -> Identifier Digit | Identifier Letter | Letter
Bool_value -> true | false

1.3、运算符

Self_op -> ++ | --
HLogic_op -> &&
LLogic_op -> ||
HMath_op -> * | / | %
LMath_op -> + | - 
Judge_op -> == | != | >= | <= | > | <

1.4、语句块框架

Stmt -> Assignment ;
	| Decl
    | if ( Bool ) M Stmt
    | if ( Bool ) M Stmt N else M Stmt
    | while M ( Bool ) M Stmt
    | for ( Fora ; M Forb ; Forc ) M Stmt
    | { Stmts }
Fora -> Assignment | ε
Forb -> Bool | ε
Forc -> ForAssignment | ε

1.5、赋值语句

Assignment -> LArray = Bool ; | LArray Self_op ; | Self_op LArray ; 

ForAssignment -> LArray = Bool ; | LArray Self_op ; | Self_op LArray ; 

1.6、条件语句

Factor -> ( Bool ) | Array | Num | Bool_value | ! ( Bool )
HExpr -> HExpr HMath_op Factor | Factor
LExpr -> LExpr LMath_op HExpr | HExpr
Rel -> Rel Judge_op LExpr | LExpr
HRel -> HRel HLogic_op M Rel | Rel
Bool -> Bool LLogic_op M HRel | HRel

1.7、辅助

M -> ε 
N -> ε	

注意,标识符和常数那块是为了词法分析构造DFA,实际语法分析的时候将所有标识符当做Identifier,把所有的常数当成Num即可。

2. 代码部分

  代码的关键是用getTable.h/cpp实现语法分析表,然后用状态栈分析测试的语法是否能够被归约成功,并且得到归约的顺序,即语法分析树。myParser.h/cpp便是利用得到的分析表进行栈分析,但是由于是制导翻译,所以分析这块和语义翻译一起做,所以下一节再介绍。

getTable.h

#ifndef MYPARSER_H
#define MYPARSER_H

#include <fstream>
#include <unordered_map>
#include <unordered_set>
#include <string>
#include <vector>
#include <cstddef>
#include <functional>


struct Project {
	int pro_num;  //表示第pro_num个产生式,0表示拓展的产生式
	int dot_position;  //当前点的位置,位置的取值比产生式右端长度大1
	std::unordered_set<std::string> successors;  //后继符
	bool operator==(Project &right);
};

struct Grammer {
	std::string left;
	std::vector<std::string> right;
};	

/*
 * hash函数返回哈希值
 * eq则为set提供相等操作
 * 这样就可以把Project作为set的关键词
 */


size_t hasher(const Project &p);
bool eq(const Project &p1, const Project &p2);


class GetTable {
	public:
		typedef std::unordered_set<Project, decltype(hasher) *, decltype(eq) *> Status; 
		typedef std::unordered_map<int, std::unordered_map<std::string, std::string>> Atable;
		typedef std::unordered_map<int, std::unordered_map<std::string, int>> Gtable;
		GetTable();
		void get_grammer(std::ifstream &in); //从文本中读取文法规则
		void get_first();	//获得first集合
		void get_follow(); //获得follow集合
		bool inVT(std::string s); //判断在V还是T里,1为V
		void judge_first(std::vector<std::string> s , std::unordered_set<std::string> & result); //判断串s的first集合是否包含ε
		void get_closure(Status &p); //求项目闭包
		bool judge_repeat(Status s1, Status s2); //判断两个状态是否重复
		bool judge_conflict(Status & s, std::unordered_set<std::string> &result); //判断一个状态是否有移进-归约冲突
		void get_status(); //获得状态集合
		void print(std::ofstream &o1, std::ofstream &o2); //打印归约产生式子
	private:
		std::unordered_map<std::string,std::unordered_set<int>> index; //存储各非终结符的定义的序列号集合
		std::vector<Grammer> G;  //存储文法
		std::vector<std::string> V, T; //存储非终结符和终结符
		std::unordered_map<std::string, std::unordered_set<std::string>> first, follow;  //存储first和follow集合
		std::unordered_map<int, Status> statuses; //规范状态集
		Atable action;
		Gtable goTo;

};

#endif

getTable.cpp

#include <iostream>
#include "getTable.h"


bool Project::operator==(Project &right) {
	if(this->pro_num == right.pro_num && this->dot_position == right.dot_position && this->successors.size() == right.successors.size()) {
		for(auto t : right.successors) {
			if(this->successors.find(t) == this->successors.end())
				return false;
		}

		return true;
	}

	return false;
}


size_t hasher(const Project &p) {
	return std::hash<int>() (p.pro_num*p.dot_position);
}


bool eq(const Project &p1, const Project &p2) {
	if(p1.pro_num == p2.pro_num && p1.dot_position == p2.dot_position) 
		return true;
	return false;
}


GetTable::GetTable(){
	//初始化非终结符和终结符
	V = {"S","Program","Type","Block","Stmts","Decl","Stmt","ForAssignment","Assignment","Bool", "Rel", "LExpr","HExpr","Factor","Self_op","HLogic_op","LLogic_op","HMath_op","LMath_op","Judge_op","Bool_value","Array","Fora","Forb","Forc","HRel","LArray","M","N"};

	T = {"(" ,")","main","int","bool","return",";","{","}","if","else","while","for","Identifier","Num","[","]","true","false","==","!=",">=","<=",">","<","+","-","*","/","%","||","&&","++","--","!","-",";","=","ε"};

	std::unordered_set<std::string> empty_set;
	std::unordered_set<int> empty_set_int;

	//初始化first和follow集
	for(auto i : V) { 
		first.insert({i, empty_set});
		follow.insert({i, empty_set});
		index.insert({i,empty_set_int});
	}


	follow["Program"].insert("#");
}


void GetTable::get_grammer(std::ifstream &in) {
	std::string tmp;
	Grammer gtmp;
	gtmp.left = "S";
	gtmp.right = {"Program"};
	G.push_back(gtmp);

	while(in >> tmp) {
		gtmp.left = tmp;
		gtmp.right.clear();
		in >> tmp >> tmp;
		while(tmp != "#") {
			gtmp.right.push_back(tmp);	
			in >> tmp;
		}
		G.push_back(gtmp);
	}
	
	for(int i = 0; i < static_cast<int>(G.size()); i++) 
		index[G[i].left].insert(i);	
}

bool GetTable::inVT(std::string s) {
	for(auto i : V) {
		if(i == s)
			return true;
	}

	return false;
}


void GetTable::get_first() {
	bool change = true; //表示若改动了一处,则需要重新便利
	bool is_empty; //表示产生式右端为空串
	int t; 
	//循环,直到没有改动为止,即change = false
	while(change) {
		change = false;
		//循环每个文法
		for(auto &g : G) {
			is_empty = true;
			t = 0;
			while(is_empty && t < static_cast<int>(g.right.size())) {
				is_empty = false;
				if(!inVT(g.right[t])) {
					if(first[g.left].find(g.right[t]) == first[g.left].end()) {
						first[g.left].insert(g.right[t]);
						change = true;
					}
					continue;
				}  
				for(auto i : first[g.right[t]]) {
					if(first[g.left].find(i) == first[g.left].end()) {
						first[g.left].insert(i);
						change = true;
					}
				}

				if(first[g.right[t]].find("ε") != first[g.right[t]].end()) {
					is_empty = true;
					t++;
				}
			}
			if(t == static_cast<int>(g.right.size()) && first[g.left].find("ε") == first[g.left].end()) {
				first[g.left].insert("ε");
				change = true;
			}
		}
	}

	first.erase("S");
	
}

void GetTable::judge_first(std::vector<std::string> s, std::unordered_set<std::string> &result) {
	int count = 0;
	for(auto i : s) {
		if(!inVT(i)) {
			result.insert(i);
			break;
		}
		if(first[i].find("ε") == first[i].end()) {
			result.insert(first[i].begin(),first[i].end());
			break;
		} 

		result.insert(first[i].begin(), first[i].end());
		result.erase("ε");
		count++;
	
	}

	if(count == static_cast<int>(s.size()))
		result.insert("ε");
}

void GetTable::get_follow() {
	//与求first集类似,设置change
	bool change = true;
	while(change) {
		change = false;
		//循环每个文法
		std::vector<std::string>::iterator search_next; //向后扫描指针
		for(auto &g : G) {
			for(auto it = g.right.begin(); it != g.right.end(); it++) {
				if(!inVT(*it)) 
					continue;
				auto original = follow[*it].size();
				search_next = it + 1;
				if(search_next != g.right.end()) {
					std::vector<std::string> tmp(search_next,g.right.end());	
					std::unordered_set<std::string> stmp;
					judge_first(tmp,stmp);
					follow[*it].insert(stmp.begin(),stmp.end());
					if(stmp.find("ε") != stmp.end()) {
						follow[*it].erase("ε");
						follow[*it].insert(follow[g.left].begin(),follow[g.left].end());
					} 
					if(original < follow[*it].size())
						change = true;
				} else {
					follow[*it].insert(follow[g.left].begin(), follow[g.left].end());
					if(original < follow[*it].size())
						change = true;
				}
			}
		}
	}

	follow.erase("S");
}

void GetTable::get_closure(Status &p) {
	bool change = true;
	Status pptmp;
	while(change) {
		change = false;
		pptmp = p;
		for(auto &pro : pptmp) {
			if(pro.dot_position == static_cast<int>(G[pro.pro_num].right.size()))
				continue;
			std::string V = G[pro.pro_num].right[pro.dot_position];
			if(!inVT(V)) 
				continue;
			std::unordered_set<std::string> new_successor;

			//求新项目的后继符集
			if(pro.dot_position == static_cast<int>(G[pro.pro_num].right.size()) - 1)
				new_successor = pro.successors;
			else {
				std::vector<std::string> vtmp(G[pro.pro_num].right.begin()+pro.dot_position+1,G[pro.pro_num].right.end()); 
				judge_first(vtmp, new_successor);
				if(new_successor.find("ε") != new_successor.end()) {
					new_successor.insert(pro.successors.begin(),pro.successors.end());
					new_successor.erase("ε");
				}
			}
			Project ptmp;
			for(auto &i : index[V]) {
				ptmp.pro_num = i;
				ptmp.dot_position = 0;
				ptmp.successors = new_successor;
				if(p.find(ptmp) == p.end()) {
					p.insert(ptmp);
					change = true;
				}
				else {
					auto original = p.find(ptmp)->successors.size(); 
					ptmp.successors.insert(p.find(ptmp)->successors.begin(),p.find(ptmp)->successors.end());
					p.erase(*p.find(ptmp));
					p.insert(ptmp);

					if(original < ptmp.successors.size()) 
						change = true;
				}	
			}
		}
	}
}

bool GetTable::judge_repeat(Status s1, Status s2) {
	if(s1.size() == s2.size()) {
		for(auto pros : s1) {
			if(s2.find(pros) == s2.end())
				return false;	
			else {
				Project tmp = *(s2.find(pros));
				if(!(tmp == pros))
					return false;
			}
		}
		return true;
	}
	return false;
}


bool GetTable::judge_conflict(Status & s, std::unordered_set<std::string> &result) {
	bool flag = false;
	std::unordered_set<std::string> tmp;
	for(auto pro : s) {
		if(pro.dot_position == static_cast<int>(G[pro.pro_num].right.size())) 
			tmp.insert(pro.successors.begin(),pro.successors.end());
	}

	for(auto pro : s) {
		if(pro.dot_position < static_cast<int>(G[pro.pro_num].right.size())) {
			std::string next = G[pro.pro_num].right[pro.dot_position];
			if(tmp.find(next) != tmp.end()) {
				result.insert(next);
				flag = true;
			}
		}
	}

	return flag;

}
	

void GetTable::get_status() {
	//初始化第一个状态0
	int t = 0;
	Project ptmp;
	ptmp.dot_position = 0;
	ptmp.pro_num = 0;
	ptmp.successors = {"#"};
	Status tmp_status(10,hasher,eq);
	tmp_status.insert(ptmp);
	get_closure(tmp_status);

	statuses.insert({t,tmp_status});

	//获取状态集合
	bool change = true;
	std::unordered_set<int> record; //记录已经求过的状态
	std::unordered_map<int, Status> sstmp;
	std::unordered_set<std::string> conflict;

	while(change) {
		change = false;
		sstmp = statuses;
		
		for(auto &sta : sstmp) {
			if(record.find(sta.first) != record.end()) 
				continue;
			record.insert(sta.first);
			//judge_conflict(sta.second,conflict);
			/*
			 * 开始球状态转移,用一个set<string>记录被转移过的状态
			 */
			std::unordered_set<std::string> record_status;
			for(auto pros : sta.second) {
				if(G[pros.pro_num].right[0] == "ε" || pros.dot_position == static_cast<int>(G[pros.pro_num].right.size())) {
					for(auto sucess : pros.successors) {  
						if(action[sta.first].find(sucess) == action[sta.first].end())
						action[sta.first][sucess] = "r" + std::to_string(pros.pro_num);
					}
					continue;
				}
				std::string trans = G[pros.pro_num].right[pros.dot_position];
				if(record_status.find(trans) != record_status.end()) 
					continue;
				record_status.insert(trans);
				tmp_status.clear();
				ptmp.pro_num = pros.pro_num;
				ptmp.dot_position = pros.dot_position + 1;
				ptmp.successors = pros.successors;
				tmp_status.insert(ptmp);
				for(auto protmp : sta.second) {
					if(G[protmp.pro_num].right[protmp.dot_position] == trans && !(protmp == pros)) {
						ptmp.pro_num = protmp.pro_num;
						ptmp.dot_position = protmp.dot_position + 1;
						ptmp.successors = protmp.successors;
						tmp_status.insert(ptmp);
					}
				}
				get_closure(tmp_status);
				bool flag = true;
				for(auto s : sstmp) {
					if(judge_repeat(s.second,tmp_status)) {
						if(inVT(trans))
							goTo[sta.first][trans] = s.first;
						else
							action[sta.first][trans] = "s" + std::to_string(s.first);
						flag = false;
						break;
					}							
				}
				if(!flag) 
					continue;
				statuses.insert({++t, tmp_status});
				change = true;

				//填写goTo表和action表
				if(inVT(trans)) 
					goTo[sta.first][trans] = t;
				else 
					action[sta.first][trans] = "s" + std::to_string(t);
			}
		}
	}
	//添加acc
	action[goTo[0]["Program"]]["#"] = "acc";

}


void GetTable::print(std::ofstream &o1, std::ofstream &o2) {
	for(auto f1 : action) {
		for(auto f2 : f1.second)
			o1 << f1.first << " " << f2.first << " " << f2.second << std::endl;
	}

	for(auto f1 : goTo) {
		for(auto f2 : f1.second)
			o2 << f1.first << " " << f2.first << " " << f2.second << std::endl;
	}
}


int main() {
	GetTable test;
	std::ifstream gin("grammer.txt");
	std::ifstream lin("lexer.out");
	std::ofstream action("action.out");
	std::ofstream goTo("goto.out");
	test.get_grammer(gin);
	test.get_first();
	test.get_follow();
	test.get_status();
	test.print(action,goTo);



	gin.close();
	lin.close();
	action.close();
	goTo.close();
	return 0;
}
原文地址:https://www.cnblogs.com/vachester/p/6884350.html