编译原理之(1)C的Lex词法文件和yacc语法文件

C的Lex词法文件
发信站: 瀚海星云 (2005年11月15日18:
02:26 星期二), 站内信件 WWWPOST

D            [
0-9]
L            [a
-zA-Z_]
H            [a
-fA-F0-9]
E            [Ee][
+-]?{D}+
FS            (f
|F|l|L)
IS            (u
|U|l|L)*

%{
#include 
<stdio.h>
#include 
"y.tab.h"

void count();
%}


%%
"/*"            { comment(); }

"auto"            { count(); return(AUTO); }
"break"            { count(); return(BREAK); }
"case"            { count(); return(CASE); }
"char"            { count(); return(CHAR); }
"const"            { count(); return(CONST); }
"continue"        { count(); return(CONTINUE); }
"default"        { count(); return(DEFAULT); }
"do"            { count(); return(DO); }
"double"        { count(); return(DOUBLE); }
"else"            { count(); return(ELSE); }
"enum"            { count(); return(ENUM); }
"extern"        { count(); return(EXTERN); }
"float"            { count(); return(FLOAT); }
"for"            { count(); return(FOR); }
"goto"            { count(); return(GOTO); }
"if"            { count(); return(IF); }
"int"            { count(); return(INT); }
"long"            { count(); return(LONG); }
"register"        { count(); return(REGISTER); }
"return"        { count(); return(RETURN); }
"short"            { count(); return(SHORT); }
"signed"        { count(); return(SIGNED); }
"sizeof"        { count(); return(SIZEOF); }
"static"        { count(); return(STATIC); }
"struct"        { count(); return(STRUCT); }
"switch"        { count(); return(SWITCH); }
"typedef"        { count(); return(TYPEDEF); }
"union"            { count(); return(UNION); }
"unsigned"        { count(); return(UNSIGNED); }
"void"            { count(); return(VOID); }
"volatile"        { count(); return(VOLATILE); }
"while"            { count(); return(WHILE); }

{L}({L}|{D})*        { count(); return(check_type()); }

0[xX]{H}+{IS}?        { count(); return(CONSTANT); }
0{D}+{IS}?        { count(); return(CONSTANT); }
{D}+{IS}?        { count(); return(CONSTANT); }
L
?'(\\.|[^\\'])+'    { count(); return(CONSTANT); }

{D}+{E}{FS}?        { count(); return(CONSTANT); }
{D}*"."{D}+({E})?{FS}?    { count(); return(CONSTANT); }
{D}+"."{D}*({E})?{FS}?    { count(); return(CONSTANT); }

L
?\"(\\.|[^\\"])*\"    { count(); return(STRING_LITERAL); }

""            { count(); return(ELLIPSIS); }
">>="            { count(); return(RIGHT_ASSIGN); }
"<<="            { count(); return(LEFT_ASSIGN); }
"+="            { count(); return(ADD_ASSIGN); }
"-="            { count(); return(SUB_ASSIGN); }
"*="            { count(); return(MUL_ASSIGN); }
"/="            { count(); return(DIV_ASSIGN); }
"%="            { count(); return(MOD_ASSIGN); }
"&="            { count(); return(AND_ASSIGN); }
"^="            { count(); return(XOR_ASSIGN); }
"|="            { count(); return(OR_ASSIGN); }
">>"            { count(); return(RIGHT_OP); }
"<<"            { count(); return(LEFT_OP); }
"++"            { count(); return(INC_OP); }
"--"            { count(); return(DEC_OP); }
"->"            { count(); return(PTR_OP); }
"&&"            { count(); return(AND_OP); }
"||"            { count(); return(OR_OP); }
"<="            { count(); return(LE_OP); }
">="            { count(); return(GE_OP); }
"=="            { count(); return(EQ_OP); }
"!="            { count(); return(NE_OP); }
";"            { count(); return(';'); }
(
"{"|"<%")        { count(); return('{'); }
(
"}"|"%>")        { count(); return('}'); }
","            { count(); return(','); }
":"            { count(); return(':'); }
"="            { count(); return('='); }
"("            { count(); return('('); }
")"            { count(); return(')'); }
(
"["|"<:")        { count(); return('['); }
(
"]"|":>")        { count(); return(']'); }
"."            { count(); return('.'); }
"&"            { count(); return('&'); }
"!"            { count(); return('!'); }
"~"            { count(); return('~'); }
"-"            { count(); return('-'); }
"+"            { count(); return('+'); }
"*"            { count(); return('*'); }
"/"            { count(); return('/'); }
"%"            { count(); return('%'); }
"<"            { count(); return('<'); }
">"            { count(); return('>'); }
"^"            { count(); return('^'); }
"|"            { count(); return('|'); }
"?"            { count(); return('?'); }

[ \t\v\n\f]        
{ count(); }
.            
/* ignore bad characters */ }

%%

yywrap()
{
    
return(1);
}



comment()
{
    
char c, c1;

loop:
    
while ((c = input()) != '*' && c != 0)
        putchar(c);

    
if ((c1 = input()) != '/' && c != 0)
    
{
        unput(c1);
        
goto loop;
    }


    
if (c != 0)
        putchar(c1);
}



int column = 0;

void count()
{
    
int i;

    
for (i = 0; yytext[i] != '\0'; i++)
        
if (yytext[i] == '\n')
            column 
= 0;
        
else if (yytext[i] == '\t')
            column 
+= 8 - (column % 8);
        
else
            column
++;

    ECHO;
}



int check_type()
{
/*
* pseudo code --- this is what it should check
*
*    if (yytext == type_name)
*        return(TYPE_NAME);
*
*    return(IDENTIFIER);
*/


/*
*    it actually will only return IDENTIFIER
*/


    
return(IDENTIFIER);
}



C的Yacc语法文件
发信站: 瀚海星云 (2005年11月15日18:
02:44 星期二), 站内信件 WWWPOST

%token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF
%token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP
%token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN
%token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN
%token XOR_ASSIGN OR_ASSIGN TYPE_NAME

%token TYPEDEF EXTERN STATIC AUTO REGISTER
%token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID
%token STRUCT UNION ENUM ELLIPSIS

%token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN

%start translation_unit
%%

primary_expression
    : IDENTIFIER
    
| CONSTANT
    
| STRING_LITERAL
    
| '(' expression ')'
    ;

postfix_expression
    : primary_expression
    
| postfix_expression '[' expression ']'
    
| postfix_expression '(' ')'
    
| postfix_expression '(' argument_expression_list ')'
    
| postfix_expression '.' IDENTIFIER
    
| postfix_expression PTR_OP IDENTIFIER
    
| postfix_expression INC_OP
    
| postfix_expression DEC_OP
    ;

argument_expression_list
    : assignment_expression
    
| argument_expression_list ',' assignment_expression
    ;

unary_expression
    : postfix_expression
    
| INC_OP unary_expression
    
| DEC_OP unary_expression
    
| unary_operator cast_expression
    
| SIZEOF unary_expression
    
| SIZEOF '(' type_name ')'
    ;

unary_operator
    : 
'&'
    
| '*'
    
| '+'
    
| '-'
    
| '~'
    
| '!'
    ;

cast_expression
    : unary_expression
    
| '(' type_name ')' cast_expression
    ;

multiplicative_expression
    : cast_expression
    
| multiplicative_expression '*' cast_expression
    
| multiplicative_expression '/' cast_expression
    
| multiplicative_expression '%' cast_expression
    ;

additive_expression
    : multiplicative_expression
    
| additive_expression '+' multiplicative_expression
    
| additive_expression '-' multiplicative_expression
    ;

shift_expression
    : additive_expression
    
| shift_expression LEFT_OP additive_expression
    
| shift_expression RIGHT_OP additive_expression
    ;

relational_expression
    : shift_expression
    
| relational_expression '<' shift_expression
    
| relational_expression '>' shift_expression
    
| relational_expression LE_OP shift_expression
    
| relational_expression GE_OP shift_expression
    ;

equality_expression
    : relational_expression
    
| equality_expression EQ_OP relational_expression
    
| equality_expression NE_OP relational_expression
    ;

and_expression
    : equality_expression
    
| and_expression '&' equality_expression
    ;

exclusive_or_expression
    : and_expression
    
| exclusive_or_expression '^' and_expression
    ;

inclusive_or_expression
    : exclusive_or_expression
    
| inclusive_or_expression '|' exclusive_or_expression
    ;

logical_and_expression
    : inclusive_or_expression
    
| logical_and_expression AND_OP inclusive_or_expression
    ;

logical_or_expression
    : logical_and_expression
    
| logical_or_expression OR_OP logical_and_expression
    ;

conditional_expression
    : logical_or_expression
    
| logical_or_expression '?' expression ':' conditional_expression
    ;

assignment_expression
    : conditional_expression
    
| unary_expression assignment_operator assignment_expression
    ;

assignment_operator
    : 
'='
    
| MUL_ASSIGN
    
| DIV_ASSIGN
    
| MOD_ASSIGN
    
| ADD_ASSIGN
    
| SUB_ASSIGN
    
| LEFT_ASSIGN
    
| RIGHT_ASSIGN
    
| AND_ASSIGN
    
| XOR_ASSIGN
    
| OR_ASSIGN
    ;

expression
    : assignment_expression
    
| expression ',' assignment_expression
    ;

constant_expression
    : conditional_expression
    ;

declaration
    : declaration_specifiers 
';'
    
| declaration_specifiers init_declarator_list ';'
    ;

declaration_specifiers
    : storage_class_specifier
    
| storage_class_specifier declaration_specifiers
    
| type_specifier
    
| type_specifier declaration_specifiers
    
| type_qualifier
    
| type_qualifier declaration_specifiers
    ;

init_declarator_list
    : init_declarator
    
| init_declarator_list ',' init_declarator
    ;

init_declarator
    : declarator
    
| declarator '=' initializer
    ;

storage_class_specifier
    : TYPEDEF
    
| EXTERN
    
| STATIC
    
| AUTO
    
| REGISTER
    ;

type_specifier
    : VOID
    
| CHAR
    
| SHORT
    
| INT
    
| LONG
    
| FLOAT
    
| DOUBLE
    
| SIGNED
    
| UNSIGNED
    
| struct_or_union_specifier
    
| enum_specifier
    
| TYPE_NAME
    ;

struct_or_union_specifier
    : struct_or_union IDENTIFIER 
'{' struct_declaration_list '}'
    
| struct_or_union '{' struct_declaration_list '}'
    
| struct_or_union IDENTIFIER
    ;

struct_or_union
    : STRUCT
    
| UNION
    ;

struct_declaration_list
    : struct_declaration
    
| struct_declaration_list struct_declaration
    ;

struct_declaration
    : specifier_qualifier_list struct_declarator_list 
';'
    ;

specifier_qualifier_list
    : type_specifier specifier_qualifier_list
    
| type_specifier
    
| type_qualifier specifier_qualifier_list
    
| type_qualifier
    ;

struct_declarator_list
    : struct_declarator
    
| struct_declarator_list ',' struct_declarator
    ;

struct_declarator
    : declarator
    
| ':' constant_expression
    
| declarator ':' constant_expression
    ;

enum_specifier
    : ENUM 
'{' enumerator_list '}'
    
| ENUM IDENTIFIER '{' enumerator_list '}'
    
| ENUM IDENTIFIER
    ;

enumerator_list
    : enumerator
    
| enumerator_list ',' enumerator
    ;

enumerator
    : IDENTIFIER
    
| IDENTIFIER '=' constant_expression
    ;

type_qualifier
    : CONST
    
| VOLATILE
    ;

declarator
    : pointer direct_declarator
    
| direct_declarator
    ;

direct_declarator
    : IDENTIFIER
    
| '(' declarator ')'
    
| direct_declarator '[' constant_expression ']'
    
| direct_declarator '[' ']'
    
| direct_declarator '(' parameter_type_list ')'
    
| direct_declarator '(' identifier_list ')'
    
| direct_declarator '(' ')'
    ;

pointer
    : 
'*'
    
| '*' type_qualifier_list
    
| '*' pointer
    
| '*' type_qualifier_list pointer
    ;

type_qualifier_list
    : type_qualifier
    
| type_qualifier_list type_qualifier
    ;


parameter_type_list
    : parameter_list
    
| parameter_list ',' ELLIPSIS
    ;

parameter_list
    : parameter_declaration
    
| parameter_list ',' parameter_declaration
    ;

parameter_declaration
    : declaration_specifiers declarator
    
| declaration_specifiers abstract_declarator
    
| declaration_specifiers
    ;

identifier_list
    : IDENTIFIER
    
| identifier_list ',' IDENTIFIER
    ;

type_name
    : specifier_qualifier_list
    
| specifier_qualifier_list abstract_declarator
    ;

abstract_declarator
    : pointer
    
| direct_abstract_declarator
    
| pointer direct_abstract_declarator
    ;

direct_abstract_declarator
    : 
'(' abstract_declarator ')'
    
| '[' ']'
    
| '[' constant_expression ']'
    
| direct_abstract_declarator '[' ']'
    
| direct_abstract_declarator '[' constant_expression ']'
    
| '(' ')'
    
| '(' parameter_type_list ')'
    
| direct_abstract_declarator '(' ')'
    
| direct_abstract_declarator '(' parameter_type_list ')'
    ;

initializer
    : assignment_expression
    
| '{' initializer_list '}'
    
| '{' initializer_list ',' '}'
    ;

initializer_list
    : initializer
    
| initializer_list ',' initializer
    ;

statement
    : labeled_statement
    
| compound_statement
    
| expression_statement
    
| selection_statement
    
| iteration_statement
    
| jump_statement
    ;

labeled_statement
    : IDENTIFIER 
':' statement
    
| CASE constant_expression ':' statement
    
| DEFAULT ':' statement
    ;

compound_statement
    : 
'{' '}'
    
| '{' statement_list '}'
    
| '{' declaration_list '}'
    
| '{' declaration_list statement_list '}'
    ;

declaration_list
    : declaration
    
| declaration_list declaration
    ;

statement_list
    : statement
    
| statement_list statement
    ;

expression_statement
    : 
';'
    
| expression ';'
    ;

selection_statement
    : IF 
'(' expression ')' statement
    
| IF '(' expression ')' statement ELSE statement
    
| SWITCH '(' expression ')' statement
    ;

iteration_statement
    : WHILE 
'(' expression ')' statement
    
| DO statement WHILE '(' expression ')' ';'
    
| FOR '(' expression_statement expression_statement ')' statement
    
| FOR '(' expression_statement expression_statement expression ')' statement
    ;

jump_statement
    : GOTO IDENTIFIER 
';'
    
| CONTINUE ';'
    
| BREAK ';'
    
| RETURN ';'
    
| RETURN expression ';'
    ;

translation_unit
    : external_declaration
    
| translation_unit external_declaration
    ;

external_declaration
    : function_definition
    
| declaration
    ;

function_definition
    : declaration_specifiers declarator declaration_list compound_statement
    
| declaration_specifiers declarator compound_statement
    
| declarator declaration_list compound_statement
    
| declarator compound_statement
    ;

%%
#include 
<stdio.h>

extern char yytext[];
extern int column;

yyerror(s)
char *s;
{
    fflush(stdout);
    printf(
"\n%*s\n%*s\n", column, "^", column, s);
}



--
※ 来源:·瀚海星云 bbs.ustc.edu.cn·[FROM: 
61.191.193.133]
原文地址:https://www.cnblogs.com/cutepig/p/813638.html