implement the bucket sort and some analysis

Here I give two insertion sort, one use array, another use list. I also get a conclusion that nothing better in insertion_sort2 than insertion_sort, maybe the size is too large and too random which will cause much more operation of linked list and memory malloc.

”即使数组接触的数据比链表要多,他们也会比链表更快一些,这是因为他们的顺序内存访问和系统的高速缓存之间交互作用时效率很高。“《编程珠玑》如是说

the first version codes here:

   1:  #include <stdio.h>
   2:  #include <stdlib.h>
   3:  #define MAX_LENGTH 99999
   4:   
   5:  struct node;
   6:  typedef struct node * ptrtonode;
   7:  typedef ptrtonode pnode;
   8:  struct node
   9:  {
  10:      int value;
  11:      pnode next;
  12:  };
  13:   
  14:  void empty_list(pnode l)
  15:  {
  16:      l->value=0;
  17:      l->next=NULL;
  18:  }
  19:   
  20:  pnode create_new()
  21:  {
  22:      pnode pn=malloc(sizeof(struct node));
  23:      empty_list(pn);
  24:      return pn;
  25:  }
  26:   
  27:  void insert(pnode l,int value)
  28:  {
  29:      pnode previous=l;
  30:      pnode current=l->next;
  31:      pnode newnode=create_new();
  32:      newnode->value=value;
  33:      while(current!=NULL)
  34:      {
  35:          if(value<current->value)
  36:          {
  37:              previous->next=newnode;
  38:              newnode->next=current;
  39:              return;
  40:          }
  41:          previous=current;
  42:          current=current->next;
  43:      }
  44:   
  45:      previous->next=newnode;
  46:      newnode->next=NULL;
  47:  }
  48:   
  49:  void bucket_sort(const int a[],int b[])
  50:  {
  51:      int i=0;
  52:      pnode pn[MAX_LENGTH];
  53:      for(i=0;i<MAX_LENGTH;i++)
  54:      {
  55:          pn[i]=create_new();
  56:          b[i]=0;
  57:      }
  58:   
  59:      for(i=0;i<MAX_LENGTH;i++)
  60:      {
  61:          insert(pn[a[i]],a[i]);
  62:      }
  63:   
  64:      //concatenate
  65:      pnode position;
  66:      pnode temp;
  67:      int j=0;
  68:      for(i=0;i<MAX_LENGTH;i++)
  69:      {
  70:          position=pn[i]->next;
  71:          while(position!=NULL)
  72:          {
  73:              b[j++]=position->value;
  74:              temp=position;
  75:              position=position->next;
  76:              free(temp);
  77:          }
  78:          free(pn[i]);
  79:      }
  80:   
  81:  }
  82:   
  83:  void insertion_sort(const int a[],int b[])
  84:  {
  85:      int i,j,m,n;
  86:      for(i=0;i<MAX_LENGTH;i++)
  87:      {
  88:          b[i]=a[i];
  89:      }
  90:   
  91:      for(i=1;i<MAX_LENGTH;i++)
  92:      {
  93:          m=b[i];
  94:          for(j=i-1;j>=0;j--)
  95:          {
  96:              if(m<b[j])
  97:              {
  98:                  n=b[j];
  99:                  b[j]=b[j+1];
 100:                  b[j+1]=n;
 101:              }
 102:          }
 103:      }
 104:  }
 105:   
 106:  void insertion_sort2(const int a[],int b[])
 107:  {
 108:      int i,j,m,n;
 109:      for(i=0;i<MAX_LENGTH;i++)
 110:      {
 111:          b[i]=0;
 112:      }
 113:   
 114:      pnode pn=create_new();
 115:      for(i=0;i<MAX_LENGTH;i++)
 116:      {
 117:          insert(pn,a[i]);
 118:      }
 119:   
 120:      pnode c=pn->next;
 121:      pnode temp;
 122:      i=0;
 123:      while(c!=NULL)
 124:      {
 125:          b[i++]=c->value;
 126:          temp=c;
 127:          c=c->next;
 128:          free(temp);
 129:      }
 130:  }
 131:   
 132:  void print_array(const int a[],int size)
 133:  {
 134:      int i=0;
 135:      for(i=0;i<size;i++)
 136:      {
 137:          printf("%d,",a[i]);
 138:      }
 139:      printf("\n");
 140:  }
 141:   
 142:  int main()
 143:  {
 144:      int a[MAX_LENGTH];
 145:      int b[MAX_LENGTH];
 146:      int i=0;
 147:      for(i=0;i<MAX_LENGTH;i++)
 148:      {
 149:          a[i]=rand();
 150:          if(a[i]<0)
 151:          {
 152:              printf("fail");
 153:              exit(-1);
 154:          }
 155:   
 156:          b[i]=0;
 157:      }
 158:      bucket_sort(a,b);
 159:      insertion_sort(a,b);
 160:      insertion_sort2(a,b);
 161:   
 162:      print_array(a,MAX_LENGTH);
 163:      print_array(b,MAX_LENGTH);
 164:      return 0;
 165:  }

but when i change the MAX_LENGTH from 99999 to 999999, then cause stack overflow exception, let’s check the default stack size via VMMap:

image

and we found the default stack size of the current thread is 2M, but the number of pn array at line 50 is 999999, that need more that 3.5M memory, so we need refine this issue:

using static variable or malloc dynamic memory.

then the line 50 should be changed to:

static pnode pn[MAX_LENGTH];

or

pnode *pn=malloc(sizeof(pnode)*MAX_LENGTH); //don’t forget free this big array

Ok, again, let’s build and run, but still stack overflow! oh, yeah, the line 144 and 145 also need be changed:

static int a[MAX_LENGTH];
static int b[MAX_LENGTH];

微笑

performance of running time:

when length is 99999:

image

when length is 9999:

image

when length is 999:

image

when length is 99:

image

take care when the length is small the insertion sort will perform better than bucket sort.

最后留两个问题:

修改程序使之适配任意或自定义桶的数量,(不过我们知道在内存足够大的情况下,桶越多,序越快);

malloc的多次调用会导致性能下降,如何解决这个问题呢?

原文地址:https://www.cnblogs.com/dancewithautomation/p/2569299.html