词法分析器实验报告(JAVA)

实验一、词法分析实验

商业软件工程专业   张树恒  201506110093

一、        实验目的

通过设计一个词法分析程序,对词法进行分析,加强对词法的理解,掌握对程序设计语言的分解和理解。

二、        实验内容和要求

在原程序中输入源代码

  • 对字符串表示的源程序 
  • 从左到右进行扫描和分解
  • 根据词法规则
  • 识别出一个一个具有独立意义的单词符号
  • 以供语法分析之用
  • 发现词法错误,则返回出错信息

在源程序中,自动识别单词,把单词分为五种,并输出对应的单词种别码。

  1. 识别关键字:main if int for while do return break continue,该类的单词码为1.
  2. 识别标识符:表示各种名字,如变量名、数组名、函数名等,如char ch, int syn, token,sum,该类的单词码为2.
  3. 运算符:+、-、*、/、=、>、<、>=、<=、!=
  4.  分隔符:,、;、{、}、(、)
  5. 常数,如123,4587

各种单词符号对应的种别码。

 

输出形式:

  • 二元式

–     (单词种别,单词自身的值)

  • 单词种别,表明单词的种类,语法分析需要的重要信息

–     整数码

  • 关键字、运算符、界符:一符一码
  • 标识符:10, 常数:11
  • 单词自身的值

–     标识符token、常数sum

–     关键字、运算符、界符token

三、        实验方法、步骤及结果测试

  1. 1.      源程序名:编译原理实验报告中源程序名算法分析.c

可执行程序名:编译原理实验报告.exe

  1. 2.      原理分析及流程图

      

  1. 3.      主要程序段及其解释:

import java.awt.*;

import java.util.Scanner;

/**

 * Created by s2002 on 2016/9/30.

 */

public class com {

    public static void main(String[] args) {

        //限制字符串最大值

        final int MAX_LEN = 100;

        //关键字

        String[] key_word = new String[]{"begin","end","if","then","while","do"};

        //录入用户输入

        Scanner input = new Scanner(System.in);

        System.out.print("Please input a string <end with '#'>:");

        String uString = input.nextLine();

        char[] analyseData = new char[MAX_LEN];

        int index = 0, key = 0;

        List list = new List();

        do {

            String compareStr = null;

            char temp = uString.charAt(index);

            list = extactCharacters(temp, analyseData, key_word, uString, index, compareStr);

            if (list.getItemCount() == 0) {

                index++;

                continue;

            }

            // 规定List的第一个元素为index,第二个元素为key

            index = Integer.parseInt(list.getItem(0));

            key = Integer.parseInt(list.getItem(1));

            String words = list.getItem(2);

            System.out.println("< " + key + " ," + words + " >");

        } while (key != 0);

    }

    public static List extactCharacters(char temp, char[] analyseDate, String[] keywords, String uString, int index,

                                        String compareStr) {

        int keyID = -1, m = 0;

        List list = new List();

        //判断下一个读入的字符是否为空格,若读取到空格则跳过,提取下一个字符进行判断

        while (temp != ' ') {

            //判断当前字符是字母或者数字和字母的组合

            if (temp >= 'a' && temp <= 'z') {

                m = 0;

                // 当读取到不是大小写字母或者数字时候判断为一个单词读取完成

                while (temp >= 'a' && temp <= 'z' || temp >= 'A' && temp <= 'Z' || temp >= '0' && temp <= '9') {

                    analyseDate[m++] = temp;

                    compareStr += temp + "";

                    temp = uString.charAt(++index);

                }

                // 与读取出来的字符判断是否为关键字

                compareStr = compareStr.substring(4);

                for (int i = 0; i < 6; i++) {

                    if (compareStr.equals(keywords[i])) {

                        keyID = i + 1;

                        list.add(index + "");

                        list.add(keyID + "");

                        list.add(compareStr);

                        return list;

                    }

                }

                //若为非关键字就当作为标识符

                keyID = 10;

                list.add(index + "");

                list.add(keyID + "");

                list.add(compareStr);

                return list;

            }

            //判断当前字符是否为数字

            else if (temp >= '0' && temp <= '9') {

                m = 0;

                String tempTokens = null;

                // 对后面的字符进行判断是否为数字

                while (temp >= '0' && temp <= '9') {

                    analyseDate[m++] = temp;

                    tempTokens += temp;

                    temp = uString.charAt(++index);

                }

                // 不是数字则返回种别码,结束当前方法

                keyID = 11;

                tempTokens = tempTokens.substring(4);

                list.add(index + "");

                list.add(keyID + "");

                list.add(tempTokens + "");

                return list;

            }

            m = 0;

            //判断当前字符是否为其他关系运算符

            String token = null;

            switch (temp) {

                case '<':

                    // String token = null;

                    analyseDate[m++] = temp;

                    token += temp;

                    if (uString.charAt(++index) == '=') {

                        analyseDate[m++] = temp;

                        keyID = 22;

                        token += uString.charAt(index++);

                    } else if (uString.charAt(++index) == '>') {

                        analyseDate[m++] = temp;

                        keyID = 21;

                        token += uString.charAt(index++);

                    } else {

                        keyID = 23;

                    }

                    list.add(index + "");

                    list.add(keyID + "");

                    token = token.substring(4);

                    list.add(token);

                    return list;

                case '>':

                    analyseDate[m++] = temp;

                    token += temp;

                    if (uString.charAt(++index) == '=') {

                        keyID = 24;

                        analyseDate[m++] = temp;

                        token += uString.charAt(index++);

                    } else {

                        keyID = 20;

                    }

                    list.add(index + "");

                    list.add(keyID + "");

                    token = token.substring(4);

                    list.add(token);

                    return list;

                case ':':

                    analyseDate[m++] = temp;

                    token += temp;

                    if (uString.charAt(++index) == '=') {

                        keyID = 18;

                        // analyseDate[m++] = temp;

                        analyseDate[m++] = uString.charAt(index);

                        token += uString.charAt(index++);

                    } else {

                        keyID = 17;

                    }

                    list.add(index + "");

                    list.add(keyID + "");

                    token = token.substring(4);

                    list.add(token);

                    return list;

                case '*':

                    keyID = 13;

                    break;

                case '/':

                    keyID = 14;

                    break;

                case '+':

                    keyID = 15;

                    break;

                case '-':

                    keyID = 16;

                    break;

                case '=':

                    keyID = 25;

                    break;

                case ';':

                    keyID = 26;

                    break;

                case '(':

                    keyID = 27;

                    break;

                case ')':

                    keyID = 28;

                    break;

                case '#':

                    keyID = 0;

                    break;

                default:

                    keyID = -1;

                    break;

            }

            analyseDate[m++] = temp;

            list.add(++index + "");

            list.add(keyID + "");

            list.add(temp + "");

            return list;

        }

        return list;

    }

}

  1. 4.      运行结果及分析

输入源代码:begin x:=0; end#

符合个字符的单词码

 

四、        实验总结

学会编写一个简单的词法分析程序,学会了利用词法分析程序分析源代码,逐个单词进行分析拆分,把标识符,保留字区分,并且能识别出空格,并把数据能从文件中读出来,对代码的理解更加的深刻。

原文地址:https://www.cnblogs.com/Zhang-Shuheng/p/5961175.html