从N个数中取M个最小的有序数

最近工作碰到类似问题,存储的元数据有100w个块,存储使用容量超过95%触发紧急覆盖需要删除超出的空间,所有需要找到最早使用的块。

假设N=1000000, M=10000,从N个0~1000000的随机数中,找有顺序的M个数。

方法一、通过维护M个数的大根堆,然后对M个数调整小根堆遍历。

#include<iostream>
#include<Queue>
#include<functional>
#include<ctime>
using namespace std;
const int N = 1000000;
const int M = 10000;
int a[N] = { 0 };

int main()
{
    for (int i = 0; i < N; i++) {
        a[i] = N - i;
    }
    time_t t = time(NULL);
    priority_queue<int> pq; // 默认是大根堆, priority_queue<int, vector<int>, less<int> >
    for (int i = 0; i < M; i++) {
        pq.push(a[i]);
    }
    for (int i = M; i < N; i++) {
        if (pq.top() > a[i]) {
            pq.pop();
            pq.push(a[i]);
        }
    }
    priority_queue<int, vector<int>, greater<int> > qp1; //创建小根堆
    for (int i = 0; i < M; i++) {
        qp1.push(pq.top());
        pq.pop();
    }
    for (int i = 0; i < M; i++) {
        cout << qp1.top() << " ";
        qp1.pop();
    }
    cout << endl << "consume time:" << time(NULL) - t << "s" << endl;
    return 0;
}

最坏情况耗时4s,无法满足业务需求。

方法二   维护M空间的数组,并把哈希到对应下标的值缓存起来,通过趟数来控制,直到找到M个数。

#include<iostream>
#include<vector>
#include<ctime>
using namespace std;
const int N = 1000000;
const int M = 10000;
int a[N] = { 0 };

int main()
{
    for (int i = 0; i < N; i++) {
        a[i] = N - i;
    }
    time_t t = time(NULL);
    vector<vector<int> > v;
    v.resize(M);
    int minv = INT_MAX;
    int maxv = INT_MIN;
    for (int i = 0; i < N; i++) {
        v[a[i] % M].push_back(a[i]);
        if (minv > a[i]) {
            minv = a[i];
        }
        if (maxv < a[i]) {
            maxv = a[i];
        }
    }
    int count = 0;
    int i = 0;
    bool flag = true;
    for (int k = minv; flag && k < maxv + M; k+= M) {
        if (k != minv) {
            i = 0;
        }
        else {
            i = minv % M;
        }
        for (; flag && i < M; i++) {
            for (int j = 0; j < (int)v[i].size(); j++) {
                int& val = v[i][j];
                if (val >= k / M * M && val < k / M * M + M) {
                    count++;
                    //cout << val << " ";
                    if (count == M){
                        flag = false;
                        break;
                    }
                }
            }
        }
    }
    cout << endl << "consume time:" << time(NULL) - t << "s" << endl;
    return 0;
}

该方法不管什么情况耗时1s, 满足业务需求。

原文地址:https://www.cnblogs.com/shao-qi/p/13172670.html