查找，比较查找，散列(哈希)查找

　　查找，在一组记录集合中，找到关键码值等于给定值的某个记录，或找到关键码值符合特定条件的某些记录的过程，也叫检索。

　　一、查找表，是由同一类型的数据元素(或记录)构成的集合。

　　二、提高查找效率的方法：1、预排序；2、建立索引；3、散列技术（不允许出现重复关键码，不适合进行范围查询）；

　　三、平均查找长度（也叫平均检索长度），检索过程中对关键码的比较次数。（ASL，Average Search Length）

　　四、对查找表查询、检索操作的称为静态查找；插入、删除操作的称为动态查找；

　　五、比较式查找，特点是，记录在表中的位置和其关键字间不存在确定的关系，查找过程为给定值依次和各个关键字进行比较，查找的效率取决进行比较的关键字个数

　　1、顺序查找，等概率下，查找成功查找长度 (n+1)/2，设置监哨岗的情况下，查找失败查找长度n+1次，平均查找长度 (n+1)/2 < ASL < n+1

 1 #include <stdio.h>
 2 
 3 /* 定义数组 */
 4 #define MAXSIZE 100
 5 typedef int ElementType;
 6 typedef struct LNode *List;
 7 
 8 struct LNode{
 9     ElementType Element[MAXSIZE];
10     int Length;
11 };
12 
13 /* 顺序查找 */
14 int SequentialSearch (List Tbl, ElementType K)
15 { /*在表Tbl[1]~Tbl[n]中查找关键字为K的数据元素*/
16     int i;
17     Tbl->Element[0] = K; /*建立哨兵*/
18     for(i = Tbl->Length; Tbl->Element[i]!= K; i--);
19     return i; /*查找成功返回所在单元下标；不成功返回0*/
20 }
21 
22 int main()
23 {    /* 定义静态表变量及指针 */
24     struct LNode L;
25     List Tbl = &L;
26     
27     /* 静态表赋值 */
28     int N = 7;
29     for(int i=1; i<=N; ++i){
30         Tbl->Element[i] = i;
31     }
32     Tbl->Length = N;
33     
34     /* 查找是否成功, 成功返回下标 */
35     printf("%d", SequentialSearch(Tbl, 6));
36     return 0; 
37 }

顺序查找代码

　　2、二分法查找，数据元素的关键字满足有序（比如：小到大）并且是连续存放（即顺序存储，数组），那么可以进行二分查找；查找成功时查找次数不会超过判定树的深度， n个结点的判定树的深度为[log₂n]+1；二分查找只适合关键字有序并且顺序存储的查找表；

 1 #include <stdio.h>
 2 
 3 /* 定义数组 */
 4 #define MAXSIZE 100
 5 typedef int ElementType;
 6 typedef struct LNode *List;
 7 
 8 struct LNode{
 9     ElementType Element[MAXSIZE];
10     int Length;
11 };
12 
13 /* 二分查找 */
14 int BinarySearch ( List Tbl, ElementType K)
15 { /*在表Tbl中查找关键字为K的数据元素*/
16     int left, right, mid, NotFound = -1;
17     left = 1; /*初始左边界*/
18     right = Tbl->Length; /*初始右边界*/
19     while ( left <= right )
20     {
21         mid = (left+right)/2; /*计算中间元素坐标*/
22         if( K < Tbl->Element[mid]) right = mid-1; /*调整右边界*/
23         else if( K > Tbl->Element[mid]) left = mid+1; /*调整左边界*/
24         else return mid; /*查找成功，返回数据元素的下标*/
25     }
26     return NotFound; /*查找不成功，返回-1*/
27 }
28 
29 int main()
30 {    /* 定义静态表变量及指针 */
31     struct LNode L;
32     List Tbl = &L;
33     
34     /* 静态表赋值 */
35     int N = 7;
36     for(int i=1; i<=N; ++i){
37         Tbl->Element[i] = i;
38     }
39     Tbl->Length = N;
40     
41     /* 查找是否成功, 成功返回下标 */
42     printf("%d", BinarySearch(Tbl, 7));
43     return 0; 
44 }

二分查找代码

　　3、二叉查找树（又叫二叉排序树，二叉搜索树）；

　　六、散列查找，也叫哈希查找，https://blog.csdn.net/qq_25579889/article/details/50466204

　　1、哈希函数，预先知道所查关键字在表中的位置，记录表中位置和关键字之间的确定关系，可以直接找到该结点；一般情况下，在关键字与记录在表中的存储位置之间建立一个函数关系，以h(key)作为关键字为key的记录在表中的位置，通常称为这个函数h(key)为哈希函数；散列函数一般应考虑两个因素，一个是简单，一个是关键字对应的地址空间分布均匀，以尽量减少冲突；装填因子，设散列表空间大小为m，填入表中元素个数为n，则称n/m为散列表的装填因子；散列方法的存储对关键字是随机的，不便于顺序查找关键字，也不适合于范围查找，或最大值最小值查找；

　　2、数字关键字的散列函数构造：直接定址法( h(key) = key - 1990 ，h(key) = a*key + b)，除留余数法( h(key) = key mod p，p = TableSize，p一般取小于TableSize的素数 )，数字分析法( h(key) = atoi(key+7) ， char *key )，折叠法，平方取中法；

　　字符关键字的散列函数构造：ASCII码加和法，前3位字符移位法( h(key) = (key[0]*27^2+key[1]*27+key[2]） mod TableSize)，移位法；

Index Hash(const char *Key, int TableSize)
{
    unsigned int h = 0;   /*散列函数值，初始化为0*/
    while(*Key != '')   
        h = (h << 5) + *Key++;  /*位移映射*/
    return h % TableSize;
}

　　3、冲突处理方法，开放地址法，链地址法；

　　(1)、开放地址法，h_i(key) = ( h(key) + d_i) mod TableSize，方案，线性探测 d_i = i ；平方探测 d_i = +i²（Quadratic probing，也叫二次探测），如果散列表长度TableSize是某个4k+3(k是正整数)形式的素数，平方探测法就可以探查到整个散列表空间；双散列 d_i = i*h₂(key) ，h2(key) = p- (key mod p)；再散列；

　　(2)、链地址法，分离链接法，将相应位置上冲突的所有关键词存储在同一个链表中；

 1 #define MAXTABLESIZE 100000 /* 允许开辟的最大散列表长度 */
 2 typedef int ElementType;    /* 关键词类型用整型 */
 3 typedef int Index;          /* 散列地址类型 */
 4 typedef Index Position;     /* 数据所在位置与散列地址是同一类型 */
 5 /* 散列单元状态类型，分别对应：有合法元素、空单元、有已删除元素 */
 6 typedef enum { Legitimate, Empty, Deleted } EntryType;
 7  
 8 typedef struct HashEntry Cell; /* 散列表单元类型 */
 9 struct HashEntry{
10     ElementType Data; /* 存放元素 */
11     EntryType Info;   /* 单元状态 */
12 };
13  
14 typedef struct TblNode *HashTable; /* 散列表类型 */
15 struct TblNode {   /* 散列表结点定义 */
16     int TableSize; /* 表的最大长度 */
17     Cell *Cells;   /* 存放散列单元数据的数组 */
18 };
19  
20 int NextPrime( int N )
21 { /* 返回大于N且不超过MAXTABLESIZE的最小素数 */
22     int i, p = (N%2)? N+2 : N+1; /*从大于N的下一个奇数开始 */
23  
24     while( p <= MAXTABLESIZE ) {
25         for( i=(int)sqrt(p); i>2; i-- )
26             if ( !(p%i) ) break; /* p不是素数 */
27         if ( i==2 ) break; /* for正常结束，说明p是素数 */
28         else  p += 2; /* 否则试探下一个奇数 */
29     }
30     return p;
31 }
32  
33 HashTable CreateTable( int TableSize )
34 {
35     HashTable H;
36     int i;
37  
38     H = (HashTable)malloc(sizeof(struct TblNode));
39     /* 保证散列表最大长度是素数 */
40     H->TableSize = NextPrime(TableSize);
41     /* 声明单元数组 */
42     H->Cells = (Cell *)malloc(H->TableSize*sizeof(Cell));
43     /* 初始化单元状态为“空单元” */
44     for( i=0; i<H->TableSize; i++ )
45         H->Cells[i].Info = Empty;
46  
47     return H;
48 }
49 Position Find( HashTable H, ElementType Key )
50 {
51     Position CurrentPos, NewPos;
52     int CNum = 0; /* 记录冲突次数 */
53  
54     NewPos = CurrentPos = Hash( Key, H->TableSize ); /* 初始散列位置 */
55     /* 当该位置的单元非空，并且不是要找的元素时，发生冲突 */
56     while( H->Cells[NewPos].Info!=Empty && H->Cells[NewPos].Data!=Key ) {
57         /* 字符串类型的关键词需要 strcmp 函数!! */
58         /* 统计1次冲突，并判断奇偶次 */
59         if( ++CNum%2 ){ /* 奇数次冲突 */
60             NewPos = CurrentPos + (CNum+1)*(CNum+1)/4; /* 增量为+[(CNum+1)/2]^2*/
61             if ( NewPos >= H->TableSize )
62                 NewPos = NewPos % H->TableSize; /* 调整为合法地址 */
63         }
64         else { /* 偶数次冲突 */
65             NewPos = CurrentPos - CNum*CNum/4; /* 增量为-(CNum/2)^2 */
66             while( NewPos < 0 )
67                 NewPos += H->TableSize; /* 调整为合法地址 */
68         }
69     }
70     return NewPos; /* 此时NewPos或者是Key的位置，或者是一个空单元的位置（表示找不到）*/
71 }
72  
73 bool Insert( HashTable H, ElementType Key )
74 {
75     Position Pos = Find( H, Key ); /* 先检查Key是否已经存在 */
76  
77     if( H->Cells[Pos].Info != Legitimate ) { 
78     /* 如果这个单元没有被占，说明Key可以插入在此*/
79         H->Cells[Pos].Info = Legitimate;
80         H->Cells[Pos].Data = Key;
81         /*字符串类型的关键词需要 strcpy 函数!! */
82         return true;
83     }
84     else {
85         printf("键值已存在");
86         return false;
87     }
88 }

开放地址法代码

 1 #define KEYLENGTH 15                   /* 关键词字符串的最大长度 */
 2 typedef char ElementType[KEYLENGTH+1]; /* 关键词类型用字符串 */
 3 typedef int Index;                     /* 散列地址类型 */
 4 /******** 以下是单链表的定义 ********/
 5 typedef struct LNode *PtrToLNode;
 6 struct LNode {
 7     ElementType Data;
 8     PtrToLNode Next;
 9 };
10 typedef PtrToLNode Position;
11 typedef PtrToLNode List;
12 /******** 以上是单链表的定义 ********/
13  
14 typedef struct TblNode *HashTable; /* 散列表类型 */
15 struct TblNode {   /* 散列表结点定义 */
16     int TableSize; /* 表的最大长度 */
17     List Heads;    /* 指向链表头结点的数组 */
18 };
19  
20 HashTable CreateTable( int TableSize )
21 {
22     HashTable H;
23     int i;
24  
25     H = (HashTable)malloc(sizeof(struct TblNode));
26     /* 保证散列表最大长度是素数，具体见代码5.3 */
27     H->TableSize = NextPrime(TableSize);
28  
29     /* 以下分配链表头结点数组 */
30     H->Heads = (List)malloc(H->TableSize*sizeof(struct LNode));
31     /* 初始化表头结点 */
32     for( i=0; i<H->TableSize; i++ ) {
33          H->Heads[i].Data[0] = '';
34          H->Heads[i].Next = NULL;
35     }
36  
37     return H;
38 }
39  
40 Position Find( HashTable H, ElementType Key )
41 {
42     Position P;
43     Index Pos;
44      
45     Pos = Hash( Key, H->TableSize ); /* 初始散列位置 */
46     P = H->Heads[Pos].Next; /* 从该链表的第1个结点开始 */
47     /* 当未到表尾，并且Key未找到时 */ 
48     while( P && strcmp(P->Data, Key) )
49         P = P->Next;
50  
51     return P; /* 此时P或者指向找到的结点，或者为NULL */
52 }
53  
54 bool Insert( HashTable H, ElementType Key )
55 {
56     Position P, NewCell;
57     Index Pos;
58      
59     P = Find( H, Key );
60     if ( !P ) { /* 关键词未找到，可以插入 */
61         NewCell = (Position)malloc(sizeof(struct LNode));
62         strcpy(NewCell->Data, Key);
63         Pos = Hash( Key, H->TableSize ); /* 初始散列位置 */
64         /* 将NewCell插入为H->Heads[Pos]链表的第1个结点 */
65         NewCell->Next = H->Heads[Pos].Next;
66         H->Heads[Pos].Next = NewCell; 
67         return true;
68     }
69     else { /* 关键词已存在 */
70         printf("键值已存在");
71         return false;
72     }
73 }
74  
75 void DestroyTable( HashTable H )
76 {
77     int i;
78     Position P, Tmp;
79      
80     /* 释放每个链表的结点 */
81     for( i=0; i<H->TableSize; i++ ) {
82         P = H->Heads[i].Next;
83         while( P ) {
84             Tmp = P->Next;
85             free( P );
86             P = Tmp;
87         }
88     }
89     free( H->Heads ); /* 释放头结点数组 */
90     free( H );        /* 释放散列表结点 */
91 }

链地址法代码

　　4、关键词的比较次数，取决于产生冲突的多少，影响产生冲突的多少的三个因素：

　　(1)、散列函数是否均匀；

　　(2)、处理冲突的方法；

　　(3)、散列表的装填因子；

　　*西安邮电大学数据结构mooc作业*

 1 #include <stdio.h>
 2 #include <stdlib.h>
 3 
 4 struct HashTableNode{
 5     int Data;
 6     int Info;    
 7 };
 8 
 9 int Insert( struct HashTableNode* H, int TableSize, int Key )
10 {
11     int CurrentPos;
12     int CNum = 0; /* 记录次数 */
13  
14     CurrentPos = Key % TableSize; /* 初始散列位置 */
15 
16     /* 当该位置的单元非空，并且不是要找的元素时，发生冲突 */
17     while( H[CurrentPos].Info)// && H[CurrentPos].Data != Key ) 
18     {       
19         CNum++;
20         CurrentPos = (CurrentPos + 1) % TableSize; /* 调整为合法地址 */
21     }
22     
23     H[CurrentPos].Data = Key; /* 插入 */
24     H[CurrentPos].Info = 1;
25     
26     printf("Key=%2d,Index=%d
",Key,CurrentPos);
27     //printf("比较次数:%d
", CNum+1);    
28     
29     return CNum + 1;
30 }
31 
32 /* 
33 13 
34 10
35 5 88 12 56 71 28 33 43 93 17 
36 */
37 
38 int main()
39 {
40     int i, N, TableSize, Key, Sum = 0;/* 哈希表长度 */
41     printf("请输入哈希表的长度
");
42     scanf("%d", &TableSize);
43     /* 创建哈希表 */
44     HashTableNode* H = (HashTableNode*)malloc(sizeof(HashTableNode)*TableSize);
45     for(i=0; i<TableSize; ++i)
46     {
47         H[i].Data = 0;
48         H[i].Info = 0;    
49     }    
50     printf("请输入关键字个数
");
51     scanf("%d", &N);
52     printf("请输入%d个关键字
",N);
53     for(i=0; i<N; ++i)
54     {
55         scanf("%d",&Key);
56         Sum += Insert(H,TableSize,Key);
57     }
58     printf("ASL:%d/%d
", Sum,N);    
59     free(H);
60     return 0;
61 }

线性探测，测试代码

 1 /* 二次探测再散列 */
 2 int Insert( struct HashTableNode* H, int TableSize, int Key )
 3 {
 4     int CurrentPos,NewPos,sign=1;
 5     int CNum = 0; /* 记录次数 */
 6  
 7     NewPos = CurrentPos = Key % TableSize; /* 初始散列位置 */
 8 
 9     /* 当该位置的单元非空，并且不是要找的元素时，发生冲突 */
10     while( H[NewPos].Info)// && H[CurrentPos].Data != Key ) 
11     {       
12         CNum++;
13         NewPos = (CurrentPos + CNum*CNum*sign) % TableSize; /* 调整为合法地址 */
14         sign = -sign;        
15     }
16     
17     H[NewPos].Data = Key; /* 插入 */
18     H[NewPos].Info = 1;
19     
20     printf("Key=%2d,Index=%d
",Key,NewPos);
21     /* printf("比较次数:%d
", CNum+1); */    
22     
23     return CNum + 1;
24 }

平方探测，Insert函数

  1 #include <stdio.h>
  2 #include <stdlib.h>
  3 #include <math.h>
  4 
  5 int Sum = 0;
  6 #define MAXTABLESIZE 65535
  7 typedef int ElementType; /* 关键词类型用字符串 */
  8 typedef int Index;       /* 散列地址类型 */
  9 
 10 /******** 以下是单链表的定义 ********/
 11 typedef struct LNode *PtrToLNode;
 12 struct LNode {
 13     ElementType Data;
 14     PtrToLNode Next;
 15 };
 16 typedef PtrToLNode Position;
 17 typedef PtrToLNode List;
 18 /******** 以上是单链表的定义 ********/
 19  
 20 typedef struct TblNode *HashTable; /* 散列表类型 */
 21 struct TblNode {   /* 散列表结点定义 */
 22     int TableSize; /* 表的最大长度 */
 23     List Heads;    /* 指向链表头结点的数组 */
 24 };
 25 
 26 #ifdef N
 27 #define N
 28 int NextPrime( int N )
 29 { /* 返回大于N且不超过MAXTABLESIZE的最小素数 */
 30     int i, p = (N%2)? N+2 : N+1; /*从大于N的下一个奇数开始 */
 31  
 32     while( p <= MAXTABLESIZE ) {
 33         for( i=(int)sqrt(p); i>2; i-- )
 34             if ( !(p%i) ) break; /* p不是素数 */
 35         if ( i==2 ) break; /* for正常结束，说明p是素数 */
 36         else  p += 2; /* 否则试探下一个奇数 */
 37     }
 38     return p;
 39 }
 40 #endif
 41 
 42 HashTable CreateTable( int TableSize )
 43 {
 44     HashTable H;
 45     int i;
 46  
 47     H = (HashTable)malloc(sizeof(struct TblNode));
 48     /* 保证散列表最大长度是素数 */
 49     /* H->TableSize = NextPrime(TableSize); */
 50     H->TableSize = TableSize;
 51  
 52     /* 以下分配链表头结点数组 */
 53     H->Heads = (List)malloc(H->TableSize*sizeof(struct LNode));
 54     /* 初始化表头结点 */
 55     for( i=0; i<H->TableSize; i++ ) {
 56          H->Heads[i].Data = 0;
 57          H->Heads[i].Next = NULL;
 58     }
 59  
 60     return H;
 61 }
 62  
 63 Position Find( HashTable H, ElementType Key )
 64 {
 65     Position P;
 66     Index Pos;
 67      
 68     Pos =  Key % (H->TableSize) ; /* 初始散列位置 */
 69     Sum++;
 70     
 71     P = H->Heads[Pos].Next; /* 从该链表的第1个结点开始 */
 72     /* 当未到表尾，并且Key未找到时 */ 
 73     while( P && P->Data!=Key) 
 74     {
 75         P = P->Next;
 76         Sum++;
 77     }
 78         
 79     return P; /* 此时P或者指向找到的结点，或者为NULL */
 80 }
 81  
 82 bool Insert( HashTable H, ElementType Key )
 83 {
 84     Position P, NewCell;
 85     Index Pos;
 86      
 87     P = Find( H, Key );
 88     if ( !P ) { /* 关键词未找到，可以插入 */
 89         NewCell = (Position)malloc(sizeof(struct LNode));
 90         NewCell->Data = Key;
 91         Pos =  Key % (H->TableSize) ; /* 初始散列位置 */
 92         
 93         /* 将NewCell插入为H->Heads[Pos]链表的第1个结点 */
 94         NewCell->Next = H->Heads[Pos].Next;
 95         H->Heads[Pos].Next = NewCell; 
 96         return true;
 97     }
 98     else { /* 关键词已存在 */
 99         printf("键值已存在");
100         return false;
101     }
102 }
103  
104 void DestroyTable( HashTable H )
105 {
106     int i;
107     Position P, Tmp;
108      
109     /* 释放每个链表的结点 */
110     for( i=0; i<H->TableSize; i++ ) {
111         P = H->Heads[i].Next;
112         while( P ) {
113             Tmp = P->Next;
114             free( P );
115             P = Tmp;
116         }
117     }
118     free( H->Heads ); /* 释放头结点数组 */
119     free( H );        /* 释放散列表结点 */
120 }
121 
122 /* 
123 13 
124 10
125 5 88 12 56 71 28 33 43 93 17 
126 */
127 
128 int main()
129 {
130     int i, N, TableSize, Key;/* 哈希表长度 */
131     printf("请输入哈希表的长度
");
132     scanf("%d", &TableSize);
133     /* 创建哈希表 */
134     HashTable H = CreateTable(TableSize);
135     
136     printf("请输入关键字个数
");
137     scanf("%d", &N);
138     printf("请输入%d个关键字
",N);
139     for(i=0; i<N; ++i)
140     {
141         scanf("%d",&Key);
142         Insert(H, Key);
143     }
144     
145     for(i=0; i<H->TableSize; ++i)
146     {
147         Position P = H->Heads[i].Next;
148         while(P)
149         {
150             printf("Key=%2d,Index=%2d
",P->Data, i);
151             P = P->Next;
152         }
153     }
154     printf("ASL:%d/%d
", Sum,N);    
155     DestroyTable(H);    
156     return 0;
157 }

分离链接法，测试代码