编译原理作业五

词法分析程序（Lexical Analyzer）要求：

- 从左至右扫描构成源程序的字符流

- 识别出有词法意义的单词（Lexemes）

- 返回单词记录（单词类别，单词本身）

- 滤掉空格

- 跳过注释

- 发现词法错误

程序结构：

输入：字符流（什么输入方式，什么数据结构保存）

处理：

–遍历（什么遍历方式）

–词法规则

输出：单词流（什么输出形式）

–二元组

单词类别：

1.标识符（10）

2.无符号数（11）

3.保留字（一词一码）

4.运算符（一词一码）

5.界符（一词一码）

单词符号	种别码	单词符号	种别码
begin	1	:	17
if	2	:=	18
then	3	<	20
while	4	<=	21
do	5	<>	22
end	6	>	23
l(l\|d)*	10	>=	24
dd*	11	=	25
+	13	;	26
-	14	(	27
*	15	)	28
/	16	#	0

word类：

public class word {     //二元组类结构
    private int typenum;   //种别码
     private String word;// 扫描得到的词
    public int getTypenum() {
        return typenum;
    }
    public void setTypenum(int typenum) {
        this.typenum = typenum;
    }
    public String getWord() {
        return word;
    }
    public void setWord(String word) {
        this.word = word;
    }

}

keywords类：

public class keywords {
    private static String KEYWORDS = "关键字";//关键字判断
    private word words;
    private char[] input = new char[255];
    private char[] token = new char[255];
    private int p_input=0;//用于输入字符记数
    private int p_token=0;//用于数字字符记数
    private char ch;    //获取单个字符

    private String[] rwtab = {"begin", "if", "then","while", "do", "end", KEYWORDS};

    public keywords(char [] input){
    this.input = input;

    }

    public char ch_getCh(){         //获取下一个字符，用于多位符号判断
    if (p_input<input.length){
    ch=input[p_input];
    p_input++;
    }
        return  ch;
    }

    public void getbch(){       //获取标识符或空格的下一位字符
        while ((ch == ' '||ch =='	')&&(p_input<input.length)){
        ch = input[p_input];
        p_input++;

        }

    }

    public void concat(){   //连接字符
        token[p_token] = ch;
        p_token++;
        token[p_token]='';

    }

    public boolean letter() {       //判断字符是否为字母

        if(ch>='a'&&ch<='z'||ch>='A'&&ch<='Z')

            return true;

        else

            return false;

    }

    public boolean num() {      //      判断字符是否为数字

        if(ch>='0'&&ch<='9')

            return true;

        else

            return false;

    }


    public void re() {//回退一个字符

        p_input--;

    }

    public String dtb() {
        int num = token[0] - 48;
        for(int i = 1; i < p_token; i++) {
            num = num * 10 + token[i] - 48;
        }
        StringBuilder result = new StringBuilder(); //字符串连接
        while(num>0) {
            int r = num % 2;
            int s = num / 2;
            result.append(r);
            num = s;
        }
        return result.reverse().toString();
    }
    public int reserve() {  //   用于检查token字符串内是否存在关键字
        int  i=0;
        while(rwtab[i].compareTo(KEYWORDS)!=0) {
            if(rwtab[i].compareTo(new String(token).trim()) == 0) {//移除字符串两侧的空白字符或其他预定义字符
                return i+1;
            }
            i++;
        }
        return 10;
    }

    public word scan()  //开始扫描的大哥函数
    {
        token = new char[255];
        word myWord = new word();
        myWord.setTypenum(10);
        myWord.setWord("");
        p_token=0;
        ch_getCh();
        getbch();

        if(letter()) {  //找出标识符
            while(letter()||num()) {
                concat();
                ch_getCh();
            }
            re();
            myWord.setTypenum(reserve());
            myWord.setWord(new String(token).trim());
            return myWord;
        }
        else if(num()) {    
            while(num()) {
                concat();
                ch_getCh();
            }
            re();
            myWord.setTypenum(11);
            myWord.setWord(new String(token).trim());  //输出token中的数字串字符形式//
             myWord.setWord(dtb());                   //输出token中的数字串10进制值的二进制字符串形式
            return myWord;
        }
        else
            switch (ch) {
            case '=':
                myWord.setTypenum(25);
                myWord.setWord("=");
                return myWord;
            case '+':
                myWord.setTypenum(13);
                myWord.setWord("+");
                return myWord;
            case '-':
                myWord.setTypenum(14);
                myWord.setWord("-");
                return myWord;
            case '*':
                    myWord.setTypenum(15);
                    myWord.setWord("*");
                    return myWord;
            case '/':
                ch_getCh();             //识别单行注释，个人用了30备注
                if (ch == '/') {
                    while(ch_getCh() != '
');
                    myWord.setTypenum(30);
                    myWord.setWord("\n");
                    return myWord;
                }
                //识别多行注释，个人用31来表示
                if(ch=='*') {
                    String string = "";
                    while(true) {
                        if (ch == '*') {
                            if (ch_getCh() == '/') {    //找到多行注释结尾
                                myWord.setTypenum(31);
                                myWord.setWord(string);
                                return myWord;
                            }
                            re();
                        }
                        if (ch_getCh() == '
') {   //找到多行注释的下一行
                            string += "\n";
                        }
                    }
                }
                re();
                myWord.setTypenum(16);
                myWord.setWord("/");
                return myWord;
            case ':':
                ch_getCh();
                if(ch=='=') {
                    myWord.setTypenum(18);
                    myWord.setWord(":=");
                    return myWord;
                }
                re();
                myWord.setTypenum(17);
                myWord.setWord(":");
                return myWord;
            case '<':
                ch_getCh();
                if(ch=='=') {
                    myWord.setTypenum(21);
                    myWord.setWord("<=");
                    return myWord;
                }
                else if (ch == '>') {
                    myWord.setTypenum(22);
                    myWord.setWord("<>");
                    return myWord;
                }
                re();
                myWord.setTypenum(20);
                myWord.setWord("<");
                return myWord;
            case '>':
                    ch_getCh();
                    if(ch=='=') {
                        myWord.setTypenum(24);
                        myWord.setWord(">=");
                        return myWord;
                    }
                    re();
                    myWord.setTypenum(23);
                    myWord.setWord(">");
                    return myWord;
            case ';':
                myWord.setTypenum(26);
                myWord.setWord(";");
                return myWord;
            case '(':
                myWord.setTypenum(27);
                myWord.setWord("(");
                return myWord;
            case ')':
                myWord.setTypenum(28);
                myWord.setWord(")");
                return myWord;
            case '
':
                myWord.setTypenum(30);
                myWord.setWord("\n");
                return myWord;
            case '#':
                myWord.setTypenum(0);
                myWord.setWord("#");
                return myWord;
            default:
                concat();
                myWord.setTypenum(-1);
                myWord.setWord("其他字符 "" + new String(token).trim() + """);
                return myWord;
        }
    }

}

main类：
import java.io.*;
import java.util.ArrayList;
import java.util.Scanner;

public class main {
    private File inputFile;
    private File outputFile;
    private String fileContent;
    private ArrayList<word> list = new ArrayList<>();
    public main(String input,String output) {
        inputFile = new File(input);
        outputFile = new File(output);
    }

    public String getContent() {
        StringBuilder stringBuilder = new StringBuilder();
        try(Scanner reader = new Scanner(inputFile)) {
            while (reader.hasNextLine()) {
                String line = reader.nextLine();
                stringBuilder.append(line + "
");
            }
        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return fileContent = stringBuilder.toString();
    }

    public void analyze(String fileContent) {
        int over = 1;
        word word = new word();
        keywords scanner = new keywords(fileContent.toCharArray());       
        while (over != 0) {          
            word = scanner.scan();
            list.add(word);          
            over = word.getTypenum();     
        }     saveResult();  
    }

    public void saveResult() {
        if (!outputFile.exists())
            try {
                outputFile.createNewFile();
            } catch (IOException e1) {
                // TODO Auto-generated catch block
                e1.printStackTrace();
            }
        try(Writer writer = new FileWriter(outputFile)){
                for (word word : list) {
                    writer.write("(" + word.getTypenum() + " ," + word.getWord() + ")
");
                }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args) {
        main Mains = new main("C:\Users\曾梓楷\Desktop\1.txt","C:\Users\曾梓楷\Desktop\output.txt");
        Mains.analyze(Mains.getContent());
    }
}

1.txt：
int main(){
 int n,i,j,k;
 printf("请输入班级人数：");
 scanf("%d",&n);
 struct student Stu[n];
 
 for(i=0;i<n;i++){
  printf("请按顺序输入第%d位学生姓名,学号和成绩:
",i+1);
  printf("姓名 学号 成绩
");
  scanf("%s %d %d",Stu[i].name,&Stu[i].num,&Stu[i].cj);
 }
 for(i=0;i<n-1;i++)
  for(j=0;j<n-i-1;j++)
   if(Stu[j].cj<Stu[j+1].cj)                   
   {
    t=Stu[j];
    Stu[j]=Stu[j+1];
    Stu[j+1]=t;
   }
   printf("他们的排名如下:
");
   for(i=0;i<n;i++){ 
    printf("%s %d %d
",Stu[i].name,Stu[i].num,Stu[i].cj);
   } 
 #
output.txt文件:
(10 ,int)
(10 ,main)
(27 ,()
(28 ,))
(32 ,{)
(30 ,
)
(10 ,int)
(10 ,n)
(-1 ,其他字符 ",")
(10 ,i)
(-1 ,其他字符 ",")
(10 ,j)
(-1 ,其他字符 ",")
(10 ,k)
(26 ,;)
(30 ,
)
(10 ,printf)
(27 ,()
(-1 ,其他字符 """)
(-1 ,其他字符 "请")
(-1 ,其他字符 "输")
(-1 ,其他字符 "入")
(-1 ,其他字符 "班")
(-1 ,其他字符 "级")
(-1 ,其他字符 "人")
(-1 ,其他字符 "数")
(-1 ,其他字符 "：")
(-1 ,其他字符 """)
(28 ,))
(26 ,;)
(30 ,
)
(10 ,scanf)
(27 ,()
(-1 ,其他字符 """)
(-1 ,其他字符 "%")
(10 ,d)
(-1 ,其他字符 """)
(-1 ,其他字符 ",")
(-1 ,其他字符 "&")
(10 ,n)
(28 ,))
(26 ,;)
(30 ,
)
(10 ,struct)
(10 ,student)
(10 ,Stu)
(-1 ,其他字符 "[")
(10 ,n)
(-1 ,其他字符 "]")
(26 ,;)
(30 ,
)
(30 ,
)
(10 ,for)
(27 ,()
(10 ,i)
(25 ,=)
(11 ,)
(26 ,;)
(10 ,i)
(20 ,<)
(10 ,n)
(26 ,;)
(10 ,i)
(13 ,+)
(13 ,+)
(28 ,))
(32 ,{)
(30 ,
)
(10 ,printf)
(27 ,()
(-1 ,其他字符 """)
(-1 ,其他字符 "请")
(-1 ,其他字符 "按")
(-1 ,其他字符 "顺")
(-1 ,其他字符 "序")
(-1 ,其他字符 "输")
(-1 ,其他字符 "入")
(-1 ,其他字符 "第")
(-1 ,其他字符 "%")
(10 ,d)
(-1 ,其他字符 "位")
(-1 ,其他字符 "学")
(-1 ,其他字符 "生")
(-1 ,其他字符 "姓")
(-1 ,其他字符 "名")
(-1 ,其他字符 ",")
(-1 ,其他字符 "学")
(-1 ,其他字符 "号")
(-1 ,其他字符 "和")
(-1 ,其他字符 "成")
(-1 ,其他字符 "绩")
(17 ,:)
(-1 ,其他字符 "")
(10 ,n)
(-1 ,其他字符 """)
(-1 ,其他字符 ",")
(10 ,i)
(13 ,+)
(11 ,1)
(28 ,))
(26 ,;)
(30 ,
)
(10 ,printf)
(27 ,()
(-1 ,其他字符 """)
(-1 ,其他字符 "姓")
(-1 ,其他字符 "名")
(-1 ,其他字符 "学")
(-1 ,其他字符 "号")
(-1 ,其他字符 "成")
(-1 ,其他字符 "绩")
(-1 ,其他字符 "")
(10 ,n)
(-1 ,其他字符 """)
(28 ,))
(26 ,;)
(30 ,
)
(10 ,scanf)
(27 ,()
(-1 ,其他字符 """)
(-1 ,其他字符 "%")
(10 ,s)
(-1 ,其他字符 "%")
(10 ,d)
(-1 ,其他字符 "%")
(10 ,d)
(-1 ,其他字符 """)
(-1 ,其他字符 ",")
(10 ,Stu)
(-1 ,其他字符 "[")
(10 ,i)
(-1 ,其他字符 "]")
(-1 ,其他字符 ".")
(10 ,name)
(-1 ,其他字符 ",")
(-1 ,其他字符 "&")
(10 ,Stu)
(-1 ,其他字符 "[")
(10 ,i)
(-1 ,其他字符 "]")
(-1 ,其他字符 ".")
(10 ,num)
(-1 ,其他字符 ",")
(-1 ,其他字符 "&")
(10 ,Stu)
(-1 ,其他字符 "[")
(10 ,i)
(-1 ,其他字符 "]")
(-1 ,其他字符 ".")
(10 ,cj)
(28 ,))
(26 ,;)
(30 ,
)
(33 ,})
(30 ,
)
(10 ,for)
(27 ,()
(10 ,i)
(25 ,=)
(11 ,)
(26 ,;)
(10 ,i)
(20 ,<)
(10 ,n)
(14 ,-)
(11 ,1)
(26 ,;)
(10 ,i)
(13 ,+)
(13 ,+)
(28 ,))
(30 ,
)
(10 ,for)
(27 ,()
(10 ,j)
(25 ,=)
(11 ,)
(26 ,;)
(10 ,j)
(20 ,<)
(10 ,n)
(14 ,-)
(10 ,i)
(14 ,-)
(11 ,1)
(26 ,;)
(10 ,j)
(13 ,+)
(13 ,+)
(28 ,))
(30 ,
)
(2 ,if)
(27 ,()
(10 ,Stu)
(-1 ,其他字符 "[")
(10 ,j)
(-1 ,其他字符 "]")
(-1 ,其他字符 ".")
(10 ,cj)
(20 ,<)
(10 ,Stu)
(-1 ,其他字符 "[")
(10 ,j)
(13 ,+)
(11 ,1)
(-1 ,其他字符 "]")
(-1 ,其他字符 ".")
(10 ,cj)
(28 ,))
(30 ,
)
(32 ,{)
(30 ,
)
(10 ,t)
(25 ,=)
(10 ,Stu)
(-1 ,其他字符 "[")
(10 ,j)
(-1 ,其他字符 "]")
(26 ,;)
(30 ,
)
(10 ,Stu)
(-1 ,其他字符 "[")
(10 ,j)
(-1 ,其他字符 "]")
(25 ,=)
(10 ,Stu)
(-1 ,其他字符 "[")
(10 ,j)
(13 ,+)
(11 ,1)
(-1 ,其他字符 "]")
(26 ,;)
(30 ,
)
(10 ,Stu)
(-1 ,其他字符 "[")
(10 ,j)
(13 ,+)
(11 ,1)
(-1 ,其他字符 "]")
(25 ,=)
(10 ,t)
(26 ,;)
(30 ,
)
(33 ,})
(30 ,
)
(10 ,printf)
(27 ,()
(-1 ,其他字符 """)
(-1 ,其他字符 "他")
(-1 ,其他字符 "们")
(-1 ,其他字符 "的")
(-1 ,其他字符 "排")
(-1 ,其他字符 "名")
(-1 ,其他字符 "如")
(-1 ,其他字符 "下")
(17 ,:)
(-1 ,其他字符 "")
(10 ,n)
(-1 ,其他字符 """)
(28 ,))
(26 ,;)
(30 ,
)
(10 ,for)
(27 ,()
(10 ,i)
(25 ,=)
(11 ,)
(26 ,;)
(10 ,i)
(20 ,<)
(10 ,n)
(26 ,;)
(10 ,i)
(13 ,+)
(13 ,+)
(28 ,))
(32 ,{)
(30 ,
)
(10 ,printf)
(27 ,()
(-1 ,其他字符 """)
(-1 ,其他字符 "%")
(10 ,s)
(-1 ,其他字符 "%")
(10 ,d)
(-1 ,其他字符 "%")
(10 ,d)
(-1 ,其他字符 "")
(10 ,n)
(-1 ,其他字符 """)
(-1 ,其他字符 ",")
(10 ,Stu)
(-1 ,其他字符 "[")
(10 ,i)
(-1 ,其他字符 "]")
(-1 ,其他字符 ".")
(10 ,name)
(-1 ,其他字符 ",")
(10 ,Stu)
(-1 ,其他字符 "[")
(10 ,i)
(-1 ,其他字符 "]")
(-1 ,其他字符 ".")
(10 ,num)
(-1 ,其他字符 ",")
(10 ,Stu)
(-1 ,其他字符 "[")
(10 ,i)
(-1 ,其他字符 "]")
(-1 ,其他字符 ".")
(10 ,cj)
(28 ,))
(26 ,;)
(30 ,
)
(33 ,})
(30 ,
)
(0 ,#)
由于选取的测试代码为C语言代码，有一些边界符和字符都混合了

编译原理 作业五

编译原理作业五