python中的快速排序

在工程实际中，经常需要将python代码转化成c++代码，为了获得一样的结果，需要保证算法的一致性。最近在目标检测的算法中，发现python默认排序算法为改进版的快速排序，描述如下：

* Quick sort is usually the fastest, but the worst case scenario is O(N^2) so
* the code switches to the O(NlogN) worst case heapsort if not enough progress
* is made on the large side of the two quicksort partitions. This improves the
* worst case while still retaining the speed of quicksort for the common case.
* This is variant known as introsort.
*
*
* def introsort(lower, higher, recursion_limit=log2(higher - lower + 1) * 2):
*   # sort remainder with heapsort if we are not making enough progress
*   # we arbitrarily choose 2 * log(n) as the cutoff point
*   if recursion_limit < 0:
*       heapsort(lower, higher)
*       return
*
*   if lower < higher:
*      pivot_pos = partition(lower, higher)
*      # recurse into smaller first and leave larger on stack
*      # this limits the required stack space
*      if (pivot_pos - lower > higher - pivot_pos):
*          quicksort(pivot_pos + 1, higher, recursion_limit - 1)
*          quicksort(lower, pivot_pos, recursion_limit - 1)
*      else:
*          quicksort(lower, pivot_pos, recursion_limit - 1)
*          quicksort(pivot_pos + 1, higher, recursion_limit - 1)
*
*
* the below code implements this converted to an iteration and as an
* additional minor optimization skips the recursion depth checking on the
* smaller partition as it is always less than half of the remaining data and
* will thus terminate fast enough

从上面的内容可以看到，改进的算法结合了快速排序和堆排序，如果我们不按照这个算法去实现c++版的排序算法，很难保证结果一致性。

以下是c++代码，复现了python语言numpy库中的argsort的快速排序算法

#ifndef __myself__sort__h__h__
#define __myself__sort__h__h__

#define PYA_QS_STACK 100
#define SMALL_QUICKSORT 15
#define SMALL_MERGESORT 20

template<typename T>
void TYPE_SWAP(T* a, T* b)
{
    T t = *a;
    *a = *b;
    *b = t;
}

template<typename T>
bool TYPE_LT(T a, T b)
{
    return a<b;
}

int npy_get_msb(int n)
{
    int k;  
    for(k=0;n>1;n>>=1) 
        ++k;  
    return k; 
}

template<typename T>
int heap_sort(T *start, int n)
{
    T tmp, *a;
    int i,j,l;

    /* The array needs to be offset by one for heapsort indexing */
    a = start - 1;
    //先建立大顶堆
    for (l = n>>1; l > 0; --l) 
    {
        tmp = a[l];
        for (i = l, j = l<<1; j <= n;) 
        {
            //因为假设根结点的序号是1，所以i结点左孩子和右孩子分别为2j和2i+1
            if (j < n && TYPE_LT(a[j], a[j+1])) //左右孩子的比较
            {
                    j += 1;//j为较大的记录的下标
            }
            if (TYPE_LT(tmp, a[j])) 
            {
                //将孩子结点上位，则以孩子结点的位置进行下一轮的筛选
                    a[i] = a[j];
                    i = j;
                    j += j;
            }
            else 
            {
                    break;
            }
        }
        a[i] = tmp;//插入最开始不和谐的元素
    }
    //进行排序
    for (; n > 1;) 
    {
        //最后一个元素和第一元素进行交换
        tmp = a[n];
        a[n] = a[1];
        n -= 1;
        //然后将剩下的无序元素继续调整为大顶堆
        for (i = 1, j = 2; j <= n;) 
        {
            if (j < n && TYPE_LT(a[j], a[j+1])) 
            {
                j++;
            }
            if (TYPE_LT(tmp, a[j])) 
            {
                a[i] = a[j];
                i = j;
                j += j;
            }
            else 
            {
                break;
            }
        }
        a[i] = tmp;
    }    
    return 0;
}

template<typename T>
int quick_sort(T *start, int num)
{
    T vp;
    T *pl = start;
    T *pr = pl + num - 1;
    T *stack[PYA_QS_STACK];
    T **sptr = stack;
    T *pm, *pi, *pj, *pk;
    int depth[PYA_QS_STACK];
    int * psdepth = depth;
    int cdepth = npy_get_msb(num) * 2;

    for (;;) 
    {
        if (cdepth < 0) 
        {
            heap_sort(pl, pr - pl + 1);
            goto stack_pop;
        }
        while ((pr - pl) > SMALL_QUICKSORT) 
        {
            /* quicksort partition */
            pm = pl + ((pr - pl) >> 1);
            if (TYPE_LT(*pm, *pl)) TYPE_SWAP(pm, pl);
            if (TYPE_LT(*pr, *pm)) TYPE_SWAP(pr, pm);
            if (TYPE_LT(*pm, *pl)) TYPE_SWAP(pm, pl);
            vp = *pm;
            pi = pl;
            pj = pr - 1;
            TYPE_SWAP(pm, pj);
            for (;;) 
            {
                do ++pi; while (TYPE_LT(*pi, vp));
                do --pj; while (TYPE_LT(vp, *pj));
                if (pi >= pj) 
                {
                        break;
                }
                TYPE_SWAP(pi,pj);
            }
            pk = pr - 1;
            TYPE_SWAP(pi, pk);
            /* push largest partition on stack */
            if (pi - pl < pr - pi) 
            {
                *sptr++ = pi + 1;
                *sptr++ = pr;
                pr = pi - 1;
            }
            else 
            {
                *sptr++ = pl;
                *sptr++ = pi - 1;
                pl = pi + 1;
            }
            *psdepth++ = --cdepth;
        }

        /* insertion sort */
        for (pi = pl + 1; pi <= pr; ++pi) 
        {
            vp = *pi;
            pj = pi;
            pk = pi - 1;
            while (pj > pl && TYPE_LT(vp, *pk)) 
            {
                *pj-- = *pk--;
            }
            *pj = vp;
        }
stack_pop:
        if (sptr == stack) 
        {
            break;
        }
        pr = *(--sptr);
        pl = *(--sptr);
        cdepth = *(--psdepth);
    }
    return 0;
}
#endif

使用方法：

#include <iostream>
#include <fstream>
#include <cstdlib>
#include <vector>
#include <map>
#include "mysort.h"
using namespace std;

typedef struct tagELEM
{
    double value;
    int index;
    bool operator < (const tagELEM& e)
    {
        return value<e.value;
    }
}ELEM, *PELEM;

int main(int argc, char **argv) 
{
    fstream fdata("data.txt", std::ios::in);
    if(!fdata.is_open())
        return -1;
    fstream fresult("result.txt", std::ios::in);
        if(!fresult.is_open())
        return -1;
    char data_buffer[255] ={0};
    char result_buffer[255] ={0};
    multimap<double, int> mapScore;
    vector<int> vecIdx;
    int idx = 0;
    
    ELEM elems[15280];
    while(!fresult.eof())
    {
        fdata.getline(data_buffer, 255);
        fresult.getline(result_buffer, 255);
        if(data_buffer[0]==0)
            continue;
        elems[idx].index = idx;
        elems[idx].value = atof(data_buffer);
        idx++;
         vecIdx.push_back(atoi(result_buffer));
    }
    
    quick_sort(elems, 15280);// 0, 15279);//, 15280);
    for(int i=0; i<15280; i++)
    {
        if(vecIdx[i] != elems[i].index)
        {
            std::cout<<i<<"	"<<elems[i].index<<"	"<<vecIdx[i]<<"	---------------------"<<elems[i].value<<std::endl;
        }
        else
        {
            std::cout<<i<<"	"<<elems[i].index<<"	"<<vecIdx[i]<<"	"<<elems[i].value<<std::endl;
        }
    }
    fdata.close();
    fresult.close();
    return 0;
}

data.txt为保存score的文件，result.tx保存了python排序结果

参考：

堆排序：http://www.cnblogs.com/mengdd/archive/2012/11/30/2796845.html

快速排序：http://blog.csdn.net/morewindows/article/details/6684558

python排序代码：https://github.com/numpy/numpy/tree/master/numpy/core/src/npysort