【Word Ladder II】cpp

题目：

Given two words (start and end), and a dictionary, find all shortest transformation sequence(s) from start to end, such that:

Only one letter can be changed at a time
Each intermediate word must exist in the dictionary

For example,

Given:
start = "hit"
end = "cog"
dict = ["hot","dot","dog","lot","log"]

Return

  [
    ["hit","hot","dot","dog","cog"],
    ["hit","hot","lot","log","cog"]
  ]

Note:

All words have the same length.
All words contain only lowercase alphabetic characters.

代码：

class Solution {
public:
    vector<vector<string>> findLadders(string start, string end, unordered_set<string> &dict) {
            vector<vector<string> > ret;
            queue<pair<string, vector<string>>> que; 
            set<string> used; 
            vector<string> path;
            vector<string> endOfLevel;
            path.push_back(start);
            bool ifFind = false;
            que.push(make_pair(start, path)); 
            que.push(make_pair("", endOfLevel));
            while ( !que.empty() )
            {
                string curr = que.front().first;
                vector<string> path = que.front().second;
                que.pop();
                if (curr!="")
                {
                    for ( size_t i = 0; i < curr.size(); ++i )
                    {
                        char curr_c = curr[i];
                        for ( char c='a'; c <= 'z'; ++c )
                        {
                            if ( c==curr_c ) continue;
                            curr[i] = c;
                            if ( curr==end )
                            {
                                ifFind = true;
                                vector<string> tmp;
                                tmp.push_back(curr);
                                tmp.insert(tmp.begin(), path.begin(),path.end());
                                ret.push_back(tmp);
                            }
                            if ( dict.find(curr)!=dict.end() )
                            {
                                vector<string> tmp;
                                tmp.push_back(curr);
                                tmp.insert(tmp.begin(), path.begin(), path.end());
                                que.push(make_pair(curr, tmp));
                                used.insert(curr);
                            }
                        }
                        curr[i] = curr_c;
                    }
                }
                else if ( !que.empty() )
                {
                    if ( !ifFind )
                    {
                        for ( set<string>::iterator i = used.begin(); i!=used.end(); ++i )
                        {
                            dict.erase(*i);
                        }
                        used.clear();
                        que.push(make_pair("", endOfLevel));
                    }
                    else
                    {
                        break;
                    }
                }
            }
            return ret;
    }
};

tips：

主要思路是对Word Ladder这道题（http://www.cnblogs.com/xbf9xbf/p/4527302.html）扩展。

麻烦之处在于要找到所有的最短路径，关键在于BFS方法搜寻到最短路径后，如何回溯发现路径上所有的前驱节点。

1. 队列中存放每个word及其前驱节点（存成一个vector）

2. BFS的同一层中，dict中的word可以复用；但dict的一个单词不能再不同层中都使用，否则就死循环了；记录每层用到过哪些word, 这层结束后从dict中删除

3. 所有最短的路径一定存在于BFS的同一层；因此设置一个标记变量ifFind，用于标记是否在某一层找到了匹配的路径，如果找到就不往下找了

没想到第一次提交就AC了，就是代码效率太低了。

上述代码相对来说比较简洁，但是queue中要维护每个节点的所有前驱路径vector<string>，整个路径跟着出队入队太太耗时了。

网上有一种维护每个节点前驱节点的hashmap，前驱路径不用入栈，可能会节省不少时间，再研究一下这个实现思路。

==========================================

实现了利用hashmap保存每条路径上前驱节点的算法，代码如下：

class Solution {
public:
    vector<vector<string>> findLadders(string start, string end, unordered_set<string> &dict) {
            vector<vector<string> > ret;
            // record each word's pre words
            map<string, vector<string> > wordPre;
            vector<string> pre;
            for ( unordered_set<string>::iterator i = dict.begin(); i!=dict.end(); ++i ) { wordPre[*i] = pre; }
            // queue for bfs & "" denotes the end of a certain level
            queue<string> que;
            que.push(start);
            que.push("");
            // used records the dict words which used in the current level when bfs 
            set<string> used;
            bool ifFind = false;
            // bfs all shortest available paths from start to end
            while ( !que.empty() )
            {
                string ori = que.front();
                string curr = ori;
                que.pop();
                if (curr!="")
                {
                    for ( size_t i = 0; i < curr.size(); ++i )
                    {
                        char curr_c = curr[i];
                        for ( char c='a'; c <= 'z'; ++c )
                        {
                            if ( c==curr_c ) continue;
                            curr[i] = c;
                            if ( curr==end )
                            {
                                ifFind = true;
                                wordPre[end].push_back(ori);
                                continue;
                            }
                            if ( dict.find(curr)!=dict.end() )
                            {
                                wordPre[curr].push_back(ori);
                                if (used.find(curr)==used.end())
                                {
                                    que.push(curr);
                                    used.insert(curr);
                                }
                            }
                        }
                        curr[i] = curr_c;
                    }
                }
                else if ( !que.empty() )
                {
                    if ( !ifFind )
                    {
                        for ( set<string>::iterator i = used.begin(); i!=used.end(); ++i )
                        {
                            dict.erase(*i);
                        }
                        used.clear();
                        que.push("");
                    }
                    else { break; }
                }
            }
            // backtracing all shorest paths
            if ( wordPre.find(end)!=wordPre.end() )
            {
                vector<string> tmp;
                tmp.push_back(end);
                Solution::backTracingPaths(ret, wordPre, start, end, tmp);
            }
            return ret;
    }
    static void backTracingPaths(
        vector<vector<string> >& ret, 
        map<string, vector<string> >& wordPre, 
        string start,
        string curr,
        vector<string>& tmp)
    {
            if ( curr==start )
            {
                reverse(tmp.begin(), tmp.end());
                ret.push_back(tmp);
                reverse(tmp.begin(),tmp.end());
                return;
            }
            vector<string> pre = wordPre[curr];
            for (size_t i = 0; i < pre.size(); ++i )
            {
                tmp.push_back(pre[i]);
                Solution::backTracingPaths(ret, wordPre, start, pre[i], tmp);
                tmp.pop_back();
            }
    }
};

tips：

1. 利用hashap保存bfs过程中dict中每个word的前驱节点；这样queue中只需要保存bfs的每一层访问的节点即可，省去了vector入队出队的大量耗时。

2. 再获得所有最短路径之后，可以从hashmap中的end节点出发往前回溯；回溯的思路是dfs，终止条件是前驱节点是start，就发现了一条完整路径（这里有个地方需要注意，从end出发dfs的方法得到的路径是倒着的，因此再加入ret时需要reverse一次；同时为了不影响其余的回溯，不能改变tmp本身，需要复制到result里面再reverse）

3. 这里有一个细节需要注意：在queue.push(curr)的时候，需要注意去重，不要把当前层的节点重复加入到队列中。第一次没有注意这个问题，一直报超时。在自己第一个解法中，queue中保存的是节点和前面所有的路径，即使curr相同，但是其之前的path一定是不同的，所以都加入queue中也无妨。这种方法其实就把第二种方法的dfs过程省了，但是中间vector<string>耗费了大量的入队出队时间。

之前学习的时候，实现BFS的过程有一种双队列的方法，换这种方式实现这道题一下。

======================================================

改用双队列实现BFS的方法又完成一次AC，代码如下：

class Solution {
public:
    vector<vector<string>> findLadders(string start, string end, unordered_set<string> &dict) {
            vector<vector<string> > ret;
            // BFS product by two queues
            queue<string> curr;
            curr.push(start);
            queue<string> next;
            // word and its previous word in potential paths
            map<string, vector<string>> wordPre;
            vector<string> pre;
            for ( unordered_set<string>::iterator i = dict.begin(); i!=dict.end(); ++i )
            {
                wordPre[*i] = pre;
            }
            // if find path
            bool ifFind = false;
            // already used
            set<string> used;
            // BFS progress
            while ( !curr.empty() )
            {
                while ( !curr.empty() )
                {
                    string word = curr.front();
                    string tmp = word;
                    curr.pop();
                    for ( size_t i = 0; i < tmp.size(); ++i )
                    {
                        char ori = tmp[i];
                        for ( char c = 'a'; c <= 'z'; ++c )
                        {
                            if ( c==tmp[i] ) continue;
                            tmp[i] = c;
                            if ( tmp==end )
                            {
                                ifFind = true;
                                wordPre[end].push_back(word);
                                continue;
                            }
                            if ( dict.find(tmp)!=dict.end() )
                            {    
                                wordPre[tmp].push_back(word);
                                if ( used.find(tmp)==used.end())
                                {
                                    next.push(tmp);
                                    used.insert(tmp);
                                }
                            }    
                        }
                        tmp[i] = ori;
                    }
                }
                if ( !ifFind )
                {
                    std::swap(next, curr);
                    for ( set<string>::iterator i = used.begin(); i!=used.end(); ++i )
                    {
                        dict.erase(*i);
                    }
                    used.clear();
                }
            }
            // backtracing all shorest paths
            if ( wordPre.find(end)!=wordPre.end() )
            {
                vector<string> tmp;
                tmp.push_back(end);
                Solution::dfs(ret, tmp, start, end, wordPre);
            }
            return ret;
    }
    static void dfs(
            vector<vector<string> >& ret, 
            vector<string>& tmp, 
            string start, 
            string curr, 
            map<string, vector<string>>& wordPre)
    {
            if ( curr==start )
            {
                std::reverse(tmp.begin(), tmp.end());
                ret.push_back(tmp);
                std::reverse(tmp.begin(), tmp.end());
                return;
            }
            vector<string> pre = wordPre[curr];
            for ( size_t i = 0 ; i < pre.size(); ++i )
            {
                tmp.push_back(pre[i]);
                Solution::dfs(ret, tmp, start, pre[i], wordPre);
                tmp.pop_back();
            }
    }
};

tips:

双队列的方式实现BFS主要的好处是省去了判断每一层结束的代码，且只需要swap(next,curr)即可（交换指针操作O(1)）

代码整洁的同时，并没有影响时间复杂度和空间复杂度，应该说是优于第二种实现的。

重写这部分代码的时候，突然对如下的代码有些疑问：

                            if ( dict.find(tmp)!=dict.end() )
                            {    
                                wordPre[tmp].push_back(word);
                                if ( used.find(tmp)==used.end())
                                {
                                    next.push(tmp);
                                    used.insert(tmp);
                                }
                            }

为什么每次wordPre[tmp].push_back(word)就不用检查是否重复？而加入next队列的时候就需要检查重复呢?

这是一个思维误区，因为这两个说的不是一个事情。

1. word是每次从队列头部弹出来的，加入队列前要查重的，因此不可能有两个相同的word；而tmp，是由word的某个位置变化一个字母得来的。

比如，队列curr中有{"hot" "cot" }，而字典dict中有{“got”}。

　当word为“hot”时，tmp会取到“got” → wordPre["got"].push_back("hot")

当word为“cot”时，tmp也会取到“got” → wordPre["got"].push_back("cot")

　到此为止，可以看到由于队列中word不会重复，wordPre[tmp].push_back(word)是没问题的。

2. 再看next.push(tmp)：used的作用是记录在BFS某一层的过程中，dict中出现过的单词。显然，在上例中"got"这个单词作出现了两次，都作为tmp。

如果不加区分，把两个got都加入了next队列中，那么再最后dfs回溯的过程中必然会输出重复的路径，并且如果这种got出现了很多次，会大大影响迭代效率，因此需要无论从结果正确性还是代码效率都要判断，不能让队列中有重复的元素。

总结起来，就是dict中的一个单词，可以有多个不同的前驱；但是每个单词不能再同一层队列中出现多次，更不能在BFS的不同层中出现。

存几个参考过的blog

http://www.cnblogs.com/TenosDoIt/p/3443512.html

主要参考上面的思路，做了一些细节的处理，还是要感谢作者share solution。

完毕。

====================================================

第二次过这道题，还是很复杂，很多细节要注意。

class Solution {
public:
        vector<vector<string> > findLadders(
            string start, string end, unordered_set<string> &dict)
        {
            vector<vector<string> > ret;
            map<string, vector<string> > preWords;
            bool ladderFind = false;
            queue<string> curr;
            queue<string> next;
            curr.push(start);
            set<string> used; // record words in dict that used in this level 
            while ( !curr.empty() )
            {
                while ( !curr.empty() )
                {
                    string word = curr.front();
                    string pre = word; // remain the original word as potential pre word
                    curr.pop();
                    for ( int i=0; i<word.size(); ++i )
                    {
                        // change word i's char to match words in dict
                        char ori = word[i];
                        for ( char c='a'; c<='z'; ++c )
                        {
                            if (word[i]==c) continue;
                            word[i] = c;
                            if ( word==end ) // move to the end word
                            {
                                ladderFind = true;
                                preWords[end].push_back(pre);
                                break;
                            }
                            if ( dict.find(word)!=dict.end() ) // find a following word from pre word
                            {
                                used.insert(word);
                                preWords[word].push_back(pre);
                                continue;
                            }
                        }
                        word[i] = ori;
                    }
                }
                if ( ladderFind ) break;
                // erase all the words in dict used in this level
                for ( set<string>::iterator i=used.begin(); i!=used.end(); ++i )
                {
                    next.push(*i);
                    dict.erase(*i);
                }
                swap(curr, next);
                used.clear();
            }
            if (ladderFind) 
            {
                vector<string> tmp;
                tmp.push_back(end);
                Solution::dfs(ret, start, end, tmp, preWords);
            }
            return ret;
        }
        static void dfs(
            vector<vector<string> >& ret, 
            string start,
            string curr,
            vector<string> tmp,
            map<string, vector<string> >& preWords
            )
        {
            if ( curr==start )
            {
                reverse(tmp.begin(), tmp.end());
                ret.push_back(tmp);
                reverse(tmp.begin(), tmp.end());
                return;
            }
            for ( int i=0; i<preWords[curr].size(); ++i )
            {
                tmp.push_back(preWords[curr][i]);
                Solution::dfs(ret, start, preWords[curr][i], tmp, preWords);
                tmp.pop_back();
            }
        }
};