游戏脚本编程 文本token解析

一个数字的组成由以下几个字符

正负号 + -   小数点 .   数字 0-9

比如

3

-3

3.13

-34.2234

但是符号和小数点不会出现多次

那么识别流程用图来表示 则是

整数

浮点数

一个读取C++源文件 将内容解析成一个个单独的TOKEN的代码

代码1

#include <iostream>
#include <fstream>
#include <cctype>
#include <cstring>
#include <string>
#include <exception>

using namespace std;

#define SOURCE_FILE_NAME    "sourcefile.cpp"
#define DEST_FILE_NAME      "destfile.cpp"

// The input and output file streams.
ifstream fin;
ofstream fout;



bool GetToken(string& token){
    bool bRet = false;
    char ch;

    ch = fin.get();
    if(ch == EOF){
        return false;
    }

    if(isspace(ch)){
        //进入接受连续空白符(' ' '
'等)
        while(isspace(ch)){
            token += ch;
            ch = fin.get();
        }
        fin.putback(ch);
        bRet = true;
        return bRet;
    }

    if(isalpha(ch)){
        while(isalpha(ch)){
            token += ch;
            ch =fin.get();
        }
        fin.putback(ch);
        bRet = true;
        return bRet;
    }

    if(isdigit(ch)){
        while(isdigit(ch) || ch == '.'){
            token += ch;
            ch = fin.get();
        }
        fin.putback(ch);
        bRet = true;
        return bRet;
    }

    if(ch == '-' || ch == '+'){
        token += ch;
        ch = fin.get();
        while(isdigit(ch) || ch == '.'){
            token += ch;
            ch = fin.get();
        }
        fin.putback(ch);
        bRet = true;
        return bRet;
    }

    if(ch == '<' || ch == '>'){
        token += ch;
        ch = fin.get();
        if(ch == '<' || ch == '>'){
            token += ch;
        }else{
            fin.putback(ch);
        }
        bRet = true;
        return bRet;
    }


    token += ch;
    bRet = true;
    return bRet;
}


int main(int argc, char *argv[])
{
    fin.open(SOURCE_FILE_NAME);
    if(!fin){
        cout << "Open source file error.Exit!!" << endl;
        return -1;
    }

    fout.open(DEST_FILE_NAME);
    if(!fout){
        cout << "Open destinaton  file error.Exit!!" << endl;
        return -1;
    }

    try{
        string token;
        while(GetToken(token)){
            cout << token ;//<< endl;
            token.clear();
        }


    }catch(exception& e){
        cerr << e.what() << endl;
    }







    fin.close();
    fout.close();
    cout << "Hello World!"<<endl;
    return 0;
}

 测试文件

293048 24 895523
3.14159 
235
		253
		  52435 345

		   459245

 22 .5 .35 2.0
 
1
 0.0
  1.0
   0

	02345
		
	63246 0.2346
	34.0

 代码2

#include <iostream>
#include <fstream>
#include <exception>
#include <queue>
using namespace std;

#define IN_FILE_NAME    "SourceFile.cpp"
#define OUT_FILE_NAME    "DestinationFile.cpp"

enum STATE{
    state_init = 0,
    state_int,
    state_float,
    state_error
};

class FileParse{
public:
    FileParse(const string& infileName,const string& outfileName){
        fin_.open(infileName);
        fout_.open(outfileName);
    }
    ~FileParse(){
        if(fin_.is_open())
            fin_.close();
        if(fout_.is_open())
            fout_.close();
    }

    bool ParseToTokens(){
        STATE state = state_init;
        bool isFinish = false;
        string token;

        if(linestr_.empty())
            return false;

        for(size_t i = 0;i<linestr_.size();++i){
            char currentChar = linestr_[i];
            if(currentChar == '')
                break;

            switch(state){
            case state_init:
                if(isspace(currentChar)){
                    continue;
                }else if(isdigit(currentChar)){
                    state = state_int;
                    token += currentChar;
                    continue;
                }else if(currentChar == '.'){
                    state = state_float;
                    token += currentChar;
                    continue;
                }else{
                    state = state_error;
                    break;
                }
            case state_int:
                if(isdigit(currentChar)){
                    state = state_int;
                    token += currentChar;
                    continue;
                }else if(currentChar == '.'){
                    state = state_float;
                    token += currentChar;
                    continue;
                }else if(isspace(currentChar)){
                    isFinish = true;
                    break;
                }else{
                    state = state_error;
                    break;
                }

            case state_float:
                if(isdigit(currentChar)){
                    state = state_int;
                    token += currentChar;
                    continue;
                }else if(isspace(currentChar)){
                    isFinish = true;
                    break;
                }else{
                    state = state_error;
                    break;
                }

            case state_error:
                break;
            }

            if(isFinish ){
                cout << token <<endl;
                token.clear();
                isFinish = false;
                state = state_init;
            }
         }


        return true;
    }

    bool run(){
        try{
            if(!fin_.is_open() || !fout_.is_open()) {
                throw runtime_error("open file is null");
            }
            while(1){
                if (fin_.eof())
                    break;
                linestr_.clear();
                getline(fin_,linestr_);
                linestr_ += '
';
                ParseToTokens();
            }

        }catch(exception& e){
            cerr << e.what() << endl;
            return false;
        }



    }

private:
    string linestr_;
    queue<string> vecToken_;
    ifstream fin_;
    ofstream fout_;
};


int main(int argc, char *argv[])
{
    FileParse a(IN_FILE_NAME,OUT_FILE_NAME);
    a.run();
    return 0;
}

  显示结果

代码3 新增字符串的识别解析

#include <iostream>
#include <fstream>
#include <exception>
#include <queue>
using namespace std;

#define IN_FILE_NAME    "SourceFile.cpp"
#define OUT_FILE_NAME    "DestinationFile.cpp"

enum STATE{
    state_init = 0,
    state_int,
    state_float,
    state_word,
    state_error
};

class FileParse{
public:
    FileParse(const string& infileName,const string& outfileName){
        fin_.open(infileName);
        fout_.open(outfileName);
    }
    ~FileParse(){
        if(fin_.is_open())
            fin_.close();
        if(fout_.is_open())
            fout_.close();
    }

    bool ParseToTokens(){
        STATE state = state_init;
        bool isFinish = false;
        string token;

        if(linestr_.empty())
            return false;

        for(size_t i = 0;i<linestr_.size();++i){
            char currentChar = linestr_[i];
            if(currentChar == '')
                break;

            switch(state){
            case state_init:
                if(isspace(currentChar)){
                    continue;
                }else if(isdigit(currentChar)){
                    state = state_int;
                    token += currentChar;
                    continue;
                }else if(currentChar == '.'){
                    state = state_float;
                    token += currentChar;
                    continue;
                }else if(isalpha(currentChar)|| currentChar == '_'){
                    state = state_word;
                    token += currentChar;
                    continue;
                }else{
                    state = state_error;
                    break;
                }
            case state_word:
                if(isalpha(currentChar)||isdigit(currentChar)||
                        currentChar == '_'){
                    state = state_word;
                    token += currentChar;
                    continue;
                }else if(isspace(currentChar)){
                    isFinish = true;
                    break;
                }else{
                    state = state_error;
                    break;
                }
            case state_int:
                if(isdigit(currentChar)){
                    state = state_int;
                    token += currentChar;
                    continue;
                }else if(currentChar == '.'){
                    state = state_float;
                    token += currentChar;
                    continue;
                }else if(isspace(currentChar)){
                    isFinish = true;
                    break;
                }else{
                    state = state_error;
                    break;
                }

            case state_float:
                if(isdigit(currentChar)){
                    state = state_int;
                    token += currentChar;
                    continue;
                }else if(isspace(currentChar)){
                    isFinish = true;
                    break;
                }else{
                    state = state_error;
                    break;
                }

            case state_error:
                break;
            }

            if(isFinish ){
                cout << token <<endl;
                token.clear();
                isFinish = false;
                state = state_init;
            }
         }


        return true;
    }

    bool run(){
        try{
            if(!fin_.is_open() || !fout_.is_open()) {
                throw runtime_error("open file is null");
            }
            while(1){
                if (fin_.eof())
                    break;
                linestr_.clear();
                getline(fin_,linestr_);
                linestr_ += '
';
                ParseToTokens();
            }

        }catch(exception& e){
            cerr << e.what() << endl;
            return false;
        }



    }

private:
    string linestr_;
    queue<string> vecToken_;
    ifstream fin_;
    ofstream fout_;
};


int main(int argc, char *argv[])
{
    FileParse a(IN_FILE_NAME,OUT_FILE_NAME);
    a.run();
    return 0;
}

  测试文本

293048 24 895523
3.14159 
235
		253
		  52435 345

MyVar0 MyVar1 MyVar2
		   459245

	rEtUrN
	
	
TRUE false

 22 .5 .35 2.0
 
while

1
 0.0 var
  1.0 var
   0
   
   	This_is_an_identifier

	02345

		_so_is_this___
		
	63246 0.2346
	34.0

  显示结果

作 者: itdef
欢迎转帖 请保持文本完整并注明出处
技术博客 http://www.cnblogs.com/itdef/
B站算法视频题解
https://space.bilibili.com/18508846
qq 151435887
gitee https://gitee.com/def/
欢迎c c++ 算法爱好者 windows驱动爱好者 服务器程序员沟通交流
如果觉得不错,欢迎点赞,你的鼓励就是我的动力
阿里打赏 微信打赏
原文地址:https://www.cnblogs.com/itdef/p/6875089.html