Leetcode 140 单词拆分II: 字符串s在字典wordDict中有多少种拆分方法。

本文持续更新地址:https://haoqchen.site/2018/11/08/LeetCode140/

题目描述

给定一个非空字符串 s 和一个包含非空单词列表的字典 wordDict,在字符串中增加空格来构建一个句子,使得句子中所有的单词都在词典中。返回所有这些可能的句子。

说明:

  • 分隔时可以重复使用字典中的单词。
  • 你可以假设字典中没有重复的单词。

示例

(1)

输入: s = "catsanddog" wordDict = ["cat", "cats", "and", "sand", "dog"] 输出:
[
"cats and dog",
"cat sand dog"
]

(2)

输入:
s = "pineapplepenapple"
wordDict = ["apple", "pen", "applepen", "pine", "pineapple"]
输出:
[
  "pine apple pen apple",
  "pineapple pen apple",
  "pine applepen apple"
]
解释: 注意你可以重复使用字典中的单词。

(3)

输入:
s = "catsandog"
wordDict = ["cats", "dog", "sand", "and", "cat"]
输出:
[]

代码

#include <iostream>
#include <map>
#include <vector>
#include <string>
#include <list>
#include <algorithm>

using namespace std;

class Solution {
public:
    vector<string> wordBreak(string s, vector<string>& wordDict)
    {
        bool temp;
        vector<vector<int>> words_map;
        list<int> solution;
        words_map.resize(52);
        solution_after_index_.resize(s.size()+1);
        cannot_break_ = new bool[s.size()+1];//创建一个与s同长度的bool类型变量,用于存储以前在某个位置上是否已经进行过分割,比如在[5]这里进行过分割,然后[5]后面没能成功分割,那么以后遇到在[5]这里的分割就可以直接跳过了,没有这个会超时。
        memset(cannot_break_, false, sizeof(bool)*(s.size()+1));
        for (int i = 0; i < wordDict.size(); ++i){//以首字母为键值构建自己的map
            char word = wordDict[i][0];
            words_map[letter2int(word)].push_back(i);
        }
        for (int i = 0; i < 52; ++i){//对map按照字符串大小进行从大到小排序,目的是想先用长的字符串进行分割,可以一定程度上节省时间,后来加入了cannot_break其实这里不用也行
            sort(words_map[i].begin(), words_map[i].end(), [&wordDict](int a, int b)->bool{ return (wordDict[a].size() > wordDict[b].size()); });
        }
        bfs(s, wordDict, words_map, solution, 0, &temp);
        delete[] cannot_break_;
        return solutions_;
    }
private:
    vector<string> solutions_;
    vector<vector<pair<int, int>>> solution_after_index_;
    bool* cannot_break_;
    int letter2int(char _letter)//字母转map键值,先'a-z'再'A-Z'
    {
        if (_letter >= 'a' && _letter <= 'z')
            return (_letter - 'a');
        return (_letter - 'A' + 26);
    }

    bool bfs(const string& _s, const vector<string>& _words, vector<vector<int>>& _words_map, list<int>& _solution, int _deal_size, bool* _has_solution)
    {
        if (_s.size() == 0){
            *_has_solution = true;
            return true;
        }
        bool can_break = true;
        bool has_solution = false;
        for (auto index : _words_map[letter2int(_s[0])]){
            int i = 0;
            if (_words[index].size() > _s.size() || cannot_break_[_words[index].size() + _deal_size])//如果字典字符串比原字符串大,以及已经在该位置进行过分割,直接跳过
                continue;
            for (; i < _words[index].size(); ++i){//比较所有首字母相同的words
                if (_s[i] ^ _words[index][i]){//利用异或比较首字母相同的word与s是否相同
                    can_break = false;
                    break;
                }
            }
            if (can_break){
                _solution.push_back(index);
                int curr_deal_size = _deal_size + _words[index].size();
                if (solution_after_index_[curr_deal_size].size() > 0){
                    has_solution = true;
                    int solu_size = solutions_.size();
                    for (int i = 0; i < solution_after_index_[curr_deal_size].size(); ++i){
                        string str;
                        int count = 0;
                        for (auto val : _solution){
                            count += _words[val].size();
                            str += _words[val];
                            str += ' ';
                            pair<int, int> p;
                            p.first = solu_size;
                            p.second = str.size();
                            solution_after_index_[count].push_back(p);
                        }
                        solution_after_index_[count].pop_back();
                        str += solutions_[solution_after_index_[curr_deal_size][i].first].substr(solution_after_index_[curr_deal_size][i].second);
                        solutions_.push_back(str);
                        ++solu_size;
                    }
                }else{
                    string substr = _s.substr(_words[index].size());
                    if (bfs(substr, _words, _words_map, _solution, _deal_size + _words[index].size(), &has_solution)){//如果可以切割则bfs迭代
                        string str;
                        int count = 0;
                        for (auto val : _solution){
                            count += _words[val].size();
                            str += _words[val];
                            str += ' ';
                            pair<int, int> p;
                            p.first = solutions_.size();
                            p.second = str.size();
                            solution_after_index_[count].push_back(p);
                        }
                        solution_after_index_[count].pop_back();
                        str = str.substr(0, str.size() - 1);
                        solutions_.push_back(str);
                    }
                }
                _solution.pop_back();
            }
            can_break = true;
        }
        if (has_solution)
            *_has_solution = true;
        else{
            cannot_break_[_deal_size] = true;
        }
        return false;
    }
};
Solution obj;
int main(int argc, char** argv)
{
    //  string s = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
    //  vector<string> words = { "a", "aa", "aaa", "aaaa", "aaaaa", "aaaaaa", "aaaaaaa", "aaaaaaaa", "aaaaaaaaa", "aaaaaaaaaa" };
    string s = "catsanddog";
    vector<string> words = {"cat", "cats", "and", "sand", "dog"};
    vector<string> result = obj.wordBreak(s, words);
    return 0;
}

思路

与单词拆分中的方法类似,详情请参考之前写的LeetCode 139 单词拆分:字符串s能否分割为字符串数组words(wordDict)中字符串的组合?(某未来公司面试题目)。有一点区别的是,这里要用BFS而不是DFS,因为要找出所有的结果。用一个list<int>& _solution队列来实现栈的效果,暂存方案,然后当找到最后确定是正确的solution时再从头到尾将其放入私有成员vector<string> solutions_;中。同样为了避免超时问题,用一个bool* cannot_break_;记录是否在该位置无法进行切割,判断依据是下一层全部不能有solution。然后,考虑到从上到下会出现很多反复的情况,用一个vector<vector<pair<int, int>>> solution_after_index_;记录下,某个位置有多少种实现情况。比如在[10]这里分割的话,可以有3种情况,那么solution_after_index_[10]将会push_back3个pair,每个pair的first和second表示的是solutions_[first][second]及其以后的子字符串就是在[10]这里中断的所有结果。以后要是再遇到在[10]这里截断,直接遍历加到后面即可。

原文地址:https://www.cnblogs.com/HaoQChen/p/11048601.html