UvaLive 4917 Abstract Extract （模拟）

题意：

给定一篇文章，文章中有段落，段落中有句子。句子只会以'!' , '.' , '?' 结尾，求出每段中含有与他下面同样是该段落中相同单词数最多的句子，注意，单词忽略大小写，重复的单词只算一个。

题目中关键段：

A topic sentence for a paragraph is the single sentence in the paragraph that best describes the paragraph’s content. For our purposes, we will select the earliest sentence in the paragraph that maximizes the number of distinct words in S that also occur in any following sentence within the same paragraph.

分析：

英文模拟题需要多读几次题目才能明白意思，段中下面（following）的句子就是，如果一段有四句， 1 要和 234比， 2 和 34比， 3和4比，不可能是最后一句。

然后我们用先把每篇文章记录下来，用一个string将多行转化为一行，因为gets没有读入回车，所以要添加回车，然后再分开每个段落处理就好，

每个段落我们如果要处理句子只要3个数据，句子的头，尾，不同的（distinct）单词数

句子头尾的话我们用一个2个变量在string里面找就行，然后set<string>记录下每个句子不同的单词数。

然后比较输出即可。

  1 #include <bits/stdc++.h>
  2 using namespace std;
  3 char a[512][256];
  4 bool para(int st, int ed){
  5     string s;
  6     set<string> dic;
  7     for(int i = st; i < ed; i++){//将很多行的段落 转换为只有一行的string
  8         s += a[i], s += '
';
  9     }
 10     int len = s.size();
 11     vector<int> sen_be; //标记句子开头
 12     vector<int> sen_ed; //句子结尾
 13     vector<set<string> > sen_word;// 每个句子的distinct单词
 14     int k = 0, l = 0;
 15     //闭区间[k,l] 为句子
 16     int once = 0, t = 3;
 17     for(;;){
 18 
 19         while(s[k] == ' ' || s[k] == '
') {
 20             k++;
 21             if(k >= len) break;
 22         }
 23         if(k >= len) break;
 24         while(s[l] != '?' && s[l] != '!' && s[l] != '.'){
 25             l++;
 26             if(l >= len) break;
 27         }
 28         if(l >= len) break;
 29         sen_be.push_back(k);
 30         sen_ed.push_back(l);
 31         l++;
 32         k = l;
 33     }
 34 
 35     /*-----------下面处理这段中所有的句子-----------*/
 36 
 37     int n_sen = sen_be.size();
 38     if(n_sen < 3){
 39         return false;
 40     }
 41     for(int i = 0; i < n_sen; i++){
 42         int k1, l1;
 43         k1 = sen_be[i], l1 = sen_ed[i];
 44         string temp = s.substr(k1,l1-k1);
 45         set<string> temp1;
 46         stringstream ss(temp);
 47         string temp_word;
 48         while(ss >> temp_word){
 49             string word = "";
 50             for(int i = 0; i < temp_word.size(); i++){
 51                 if(isalnum(temp_word[i]))
 52                     word += tolower(temp_word[i]);
 53             }
 54             temp1.insert(word);
 55         }
 56         sen_word.push_back(temp1);
 57     }
 58     int mcnt = 0, k2 = sen_be[0], l2 = sen_ed[0];
 59     for(int i = 0; i < n_sen; i++){
 60         int cnt = 0;
 61         for(set<string> :: iterator it = sen_word[i].begin(); it != sen_word[i].end(); it ++){
 62             for(int j = i + 1; j < n_sen; j++){
 63                 if(sen_word[j].count(*it)){
 64                     cnt++;
 65                     break;
 66                 }
 67             }
 68         }
 69         if(cnt > mcnt){
 70             mcnt = cnt;
 71             k2 = sen_be[i];
 72             l2 = sen_ed[i];
 73         }
 74     }
 75 
 76     for(int i = k2; i <= l2; i++){
 77         putchar(s[i]);
 78     }
 79     printf("
");
 80     return true;
 81 }
 82 void art(int n){
 83     int last = 0;
 84     bool flag = false; //标记是否所有段落都小于3个句子， 如果是要输出空行
 85     for(int i = 0; i < n; i++){
 86         if(a[i][0] == 0){
 87             if(para(last,i))
 88                 flag = true;
 89             last = i + 1;
 90         }
 91     }
 92     if(last != n)
 93         if(para(last, n))
 94             flag = true;
 95     if(!flag) printf("
");
 96     puts("======");
 97 }
 98 int main(){
 99     int line = 0;
100     while(gets(a[line])){//gets是不会读入回车符的， 但fgets会
101         if(strcmp(a[line],"***") == 0){ // article
102             art(line);
103             line = 0;//每读完一篇文章就让line 归0
104         }
105         else if(strcmp(a[line], "******") == 0){
106             art(line);
107             break;
108         }
109         else line++;
110     }
111 }