面试题:一千万条短信中,找到重复次数最多的10条

  1 #include<iostream>
  2 #include<fstream>
  3 #include <string>
  4 #include <unordered_map>
  5 #include <vector>
  6 #include <algorithm>
  7 #include <queue>
  8 
  9 using namespace std;
 10 
 11 typedef unsigned long int uint64_t;
 12 
 13 //获取字符串的hash值
 14 uint64_t getHash(const string& s) {
 15     uint64_t hashVal = 0;
 16     uint64_t a = 3589;
 17     uint64_t b = 81369;
 18     for (char ch : s) {
 19         hashVal = hashVal * a + ch;
 20         a = a * b;
 21     }
 22     return hashVal;
 23 }
 24 
 25 struct posAndCount {
 26     int pos;
 27     int count;
 28     posAndCount(int a = 0, int b = 0) :
 29             pos(a), count(b) {
 30     };
 31 };
 32 
 33 struct NameHashPosAndCount {
 34     uint64_t namehash;
 35     int pos;
 36     int count;
 37 
 38     friend bool operator <(const NameHashPosAndCount& a,
 39             const NameHashPosAndCount& b) {
 40         return a.count > b.count;
 41     }
 42 };
 43 
 44 
 45 //从N个数据中找出出现次数最多的N个
 46 void MfromN() {
 47     unordered_map<uint64_t, posAndCount> names;
 48     unordered_map<uint64_t, posAndCount>::iterator it;
 50     posAndCount pc;
 51     int pos = 0;
52 ifstream fin("message.txt"); 53 string l; 54 while (fin >> l) { 55 uint64_t hashVal = getHash(l); 56 it = names.find(hashVal); 57 if (it == names.end()) { 58 pc.pos = pos; 59 pc.count = 0; 60 names[hashVal] = pc; 61 } else { 62 names[hashVal].count++; 63 } 64 pos++; 65 } 66 fin.close(); 67 68 69 //优先队列寻找出现次数最多的10条 70 priority_queue<NameHashPosAndCount> name10; 71 NameHashPosAndCount n; 72 for (it = names.begin(); it != names.end(); it++) { 73 if (name10.size() < 10) { 74 n.namehash = it->first; 75 n.pos = it->second.pos; 76 n.count = it->second.count; 77 name10.push(n); 78 } else { 79 if (name10.top().count < it->second.count) { 80 name10.pop(); 81 n.namehash = it->first; 82 n.pos = it->second.pos; 83 n.count = it->second.count; 84 name10.push(n); 85 } 86 } 87 } 88 vector<int> index; 89 while (!name10.empty()) { 90 index.push_back(name10.top().pos); 91 name10.pop(); 92 } 93 sort(index.begin(),index.end()); 94 95 ifstream fin2("message.txt"); 96 int i = 0; 97 int k = 0; 98 while(k < 10 && fin2 >> l){ 99 if(i == index[k]){ 100 cout << l << endl; 101 k++; 102 } 103 i++; 104 } 105 fin2.close(); 106 }
原文地址:https://www.cnblogs.com/wxquare/p/6478774.html