A typical solution is heap based - "top K". Complexity is O(nlgk).
typedef pair<int, unsigned> Rec; struct Comp { bool operator()(const Rec &r1, const Rec &r2) { return r1.second > r2.second; } }; class Solution { public: vector<int> topKFrequent(vector<int>& nums, int k) { unordered_map<int, unsigned> hm; for(auto&v : nums) hm[v]++; priority_queue<Rec, vector<Rec>, Comp> q; for(auto &kv : hm) { Rec r(kv.first, kv.second); q.push(r); if(q.size() > k) q.pop(); } vector<int> ret; while(!q.empty()) { ret.push_back(q.top().first); q.pop(); } return ret; } };
There is a O(n) one indeed - bucketing the frequencies.
https://leetcode.com/discuss/100636/c-o-nlogk-and-o-n-solutions