php扩展开发实现一个简易的哈希表

从一个简易的哈希表入手,会让你更好的理解php的哈希表,他们的本质是一样的,只是php的哈希表做了更多的功能扩展,php的哈希表是php语言的一个重要核心,大量的内核代码使用到哈希表。

  1 #include <string.h>
  2 #include <stdio.h>
  3 #include <stdlib.h>
  4 #define HASH_ADD 0
  5 #define HASH_UPDATE 1
  6 
  7 typedef unsigned long ulong;
  8 typedef unsigned int uint;
  9 
 10 typedef struct bucket {
 11     ulong h;                        //字符数组的key哈希值或者数字数组的key值
 12     uint nKeyLength;//字符数组的key长度,数字数组为0
 13     void *pData;//存储的数据
 14     struct bucket *pListNext;//在哈希表中的下一个元素
 15     struct bucket *pListLast;//在哈希表中的下一个元素
 16     struct bucket *pNext;//哈希值冲突时,同一哈希值链表的下一个元素
 17     struct bucket *pLast;//哈希值冲突时,同一哈希值链表的上一个元素
 18     const char *arKey;//字符数组的key值,数字数组为NULL
 19 } Bucket;
 20 
 21 typedef struct _hashtable {
 22     uint nTableSize;//哈希表的大小
 23     uint nTableMask;//用来计算哈希值所在当前哈希表的位置 
 24     uint nNumOfElements;//哈希表的元素数量
 25     ulong nNextFreeElement;//下一个自动插入的位置
 26     Bucket *pListHead;//哈希表的第一个元素
 27     Bucket *pListTail;//哈希表的最后一个元素
 28     Bucket **arBuckets;//哈希表存储数据的数组
 29 } HashTable;
 30 
 31 int hash_init(HashTable *ht, uint nSize);
 32 int hash_add(HashTable *ht, const char *arKey, uint nKeyLength, void *pData);
 33 int hash_update(HashTable *ht, const char *arKey, uint nKeyLength, void *pData);
 34 int _hash_add_update(HashTable *ht, const char *arKey, uint nKeyLength, void *pData, int flag);
 35 int hash_index_add(HashTable *ht, ulong h, void *pData);
 36 int hash_index_update(HashTable *ht, ulong h, void *pData);
 37 int hash_foreach(HashTable *ht);
 38 static int hash_resize_if_full(HashTable *ht);
 39 int hash_resize(HashTable *ht);
 40 char * hash_find(HashTable *ht,const char *arKey);
 41 char * hash_index_find(HashTable *ht,ulong h);
 42 
 43 int hash_init(HashTable *ht, uint nSize){
 44     uint i = 1;
 45 
 46     if (nSize >= 0x80000000) {
 47         /* prevent overflow */
 48         ht->nTableSize = 0x80000000;
 49     } else {
 50         while ((1U << i) < nSize) {
 51             i++;
 52         }
 53         ht->nTableSize = 1 << i;
 54     }
 55     ht->nTableMask = ht->nTableSize - 1;
 56     ht->nNumOfElements = 0;
 57     ht->nNextFreeElement = 0;
 58     ht->pListHead = NULL;
 59     ht->pListTail = NULL;
 60     ht->arBuckets = (Bucket **)calloc(ht->nTableSize,sizeof(Bucket *));  
 61     return 0;
 62 }
 63 
 64 static inline ulong hash_func(const char *arKey, uint nKeyLength)
 65 {
 66     register ulong hash = 5381;
 67 
 68     /* variant with the hash unrolled eight times */
 69     for (; nKeyLength >= 8; nKeyLength -= 8) {
 70         hash = ((hash << 5) + hash) + *arKey++;
 71         hash = ((hash << 5) + hash) + *arKey++;
 72         hash = ((hash << 5) + hash) + *arKey++;
 73         hash = ((hash << 5) + hash) + *arKey++;
 74         hash = ((hash << 5) + hash) + *arKey++;
 75         hash = ((hash << 5) + hash) + *arKey++;
 76         hash = ((hash << 5) + hash) + *arKey++;
 77         hash = ((hash << 5) + hash) + *arKey++;
 78     }
 79     switch (nKeyLength) {
 80         case 7: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
 81         case 6: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
 82         case 5: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
 83         case 4: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
 84         case 3: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
 85         case 2: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
 86         case 1: hash = ((hash << 5) + hash) + *arKey++; break;
 87         case 0: break;
 88     }
 89     return hash;
 90 }
 91 
 92 int hash_add(HashTable *ht, const char *arKey, uint nKeyLength, void *pData){
 93     return _hash_add_update(ht, arKey, nKeyLength, pData, HASH_ADD);
 94 }
 95 
 96 int hash_update(HashTable *ht, const char *arKey, uint nKeyLength, void *pData){
 97     return _hash_add_update(ht, arKey, nKeyLength, pData, HASH_UPDATE);
 98 }
 99 
100 int _hash_add_update(HashTable *ht, const char *arKey, uint nKeyLength, void *pData, int flag){
101     Bucket *p;
102     ulong  h;
103     ulong nIndex;
104     h = hash_func(arKey, nKeyLength);
105     nIndex = h & ht->nTableMask;
106     p = ht->arBuckets[nIndex];
107     while (p != NULL) {
108         //printf("nIndex=>%d,arKey=>%s,p=>%s,%s\n",nIndex,arKey,p->arKey,p->pData);
109         if (p->arKey == arKey){
110             if(flag == HASH_ADD){
111                 //已经存在同样的key
112                 return -1; 
113             }else{
114                 p->pData = pData; 
115                 return 0;
116             }
117         }
118         p = p->pNext;
119     }
120     p = (Bucket *)malloc(sizeof(Bucket));
121     p->h = h;
122     p->nKeyLength = nKeyLength;
123     p->pData = pData;
124     p->pNext = ht->arBuckets[nIndex];
125     p->pLast = NULL;
126     if(p->pNext != NULL){
127         p->pNext->pLast = p; 
128     }
129     p->arKey = arKey;
130     ht->arBuckets[nIndex] = p;
131     ht->nNumOfElements++;
132     if(ht->pListHead == NULL){
133         ht->pListHead = p; 
134         p->pListNext = NULL;
135         p->pListLast = NULL; 
136         ht->pListTail = p;
137     }else{
138         p->pListLast = ht->pListTail;
139         p->pListLast->pListNext = p;
140         p->pListNext = NULL;
141         ht->pListTail = p;
142     }
143     hash_resize_if_full(ht);
144     return 0;
145 }
146 
147 int hash_index_add(HashTable *ht, ulong h, void *pData){
148     return _hash_index_add_update(ht,h,pData,HASH_ADD);
149 }
150 
151 int hash_index_update(HashTable *ht, ulong h, void *pData){
152     return _hash_index_add_update(ht,h,pData,HASH_UPDATE);
153 }
154 
155 int hash_next_add(HashTable *ht,void *pData){
156     ulong h = ht->nNextFreeElement;
157     return _hash_index_add_update(ht,h,pData,HASH_ADD);
158 }
159 
160 int _hash_index_add_update(HashTable *ht, ulong h,void *pData,int flag){
161     Bucket *p;
162     ulong nIndex;
163     nIndex = h & ht->nTableMask;
164     p = ht->arBuckets[nIndex];
165     while (p != NULL) {
166         if (p->arKey == NULL && p->nKeyLength == 0){
167             if(flag == HASH_ADD){
168                 //已经存在同样的key
169                 return -1; 
170             }else{
171                 p->pData = pData; 
172                 return 0;
173             }
174         }
175         p = p->pNext;
176     }
177     p = (Bucket *)malloc(sizeof(Bucket));
178     p->h = h;
179     p->nKeyLength = 0;
180     p->pData = pData;
181     p->pNext = ht->arBuckets[nIndex];
182     p->pLast = NULL;
183     if(p->pNext != NULL){
184         p->pNext->pLast = p; 
185     }
186     p->arKey = NULL;
187     ht->arBuckets[nIndex] = p;
188     ht->nNumOfElements++;
189     if(h >= ht->nNextFreeElement){
190         ht->nNextFreeElement = h + 1;
191     }
192     if(ht->pListHead == NULL){
193         ht->pListHead = p; 
194         p->pListNext = NULL;
195         p->pListLast = NULL; 
196         ht->pListTail = p;
197     }else{
198         p->pListLast = ht->pListTail;
199         p->pListLast->pListNext = p;
200         p->pListNext = NULL;
201         ht->pListTail = p;
202     }
203     hash_resize_if_full(ht);
204     return 0;
205 }
206 
207 int hash_foreach(HashTable *ht){
208     if(ht->pListHead == NULL){
209         return ; 
210     }
211     Bucket *p;
212     p = ht->pListHead;
213     while(p != NULL){
214         if(p->nKeyLength > 0){
215             printf("h=>%ld,index=>%ld,%s=>%s\n",p->h,p->h & ht->nTableMask,p->arKey,p->pData); 
216         }else{
217             printf("h=>%ld,index=>%ld,%d=>%s\n",p->h,p->h & ht->nTableMask,p->h,p->pData); 
218         }
219         p=p->pListNext;
220     }
221 }
222 
223 static int hash_resize_if_full(HashTable *ht){
224     //printf("if_null,num=>%d,size=>%d\n",ht->nNumOfElements,ht->nTableSize);
225     if(ht->nNumOfElements >= ht->nTableSize){
226         return hash_resize(ht);
227     }
228 }
229 
230 int hash_resize(HashTable *ht){
231     printf("before resize:%d\n",ht->nTableSize);
232     hash_foreach(ht);
233     ht->nTableSize = ht->nTableSize << 1;
234     ht->nTableMask = ht->nTableSize - 1;
235     Bucket **t;
236     t = realloc(ht->arBuckets,ht->nTableSize * sizeof(Bucket*));
237     memset(t,0,ht->nTableSize * sizeof(Bucket *));
238     ht->arBuckets = t;
239     Bucket *p;
240     ulong nIndex;
241     for(p=ht->pListHead;p!=NULL;p=p->pListNext){
242         nIndex = p->h & ht->nTableMask; 
243         p->pNext = ht->arBuckets[nIndex];
244         if(p->pNext != NULL){
245             p->pNext->pLast = p; 
246         }
247         ht->arBuckets[nIndex] = p;
248     }
249     printf("after resize:%d\n",ht->nTableSize);
250     hash_foreach(ht);
251     return 0;
252 }
253 
254 char * hash_find(HashTable *ht,const char *arKey){
255     ulong h;
256     int nKeyLength = strlen(arKey);
257     h = hash_func(arKey, nKeyLength);
258     ulong nIndex = h & ht->nTableMask;
259     Bucket *p;
260     for(p=ht->arBuckets[nIndex];p!=NULL;p=p->pNext){
261         if(strcmp(p->arKey,arKey) == 0 && p->nKeyLength == nKeyLength){
262             return p->pData; 
263         } 
264         printf("hash_find,arKey=>%s,nKeyLength=>%d,pData=>%s\n",p->arKey,p->nKeyLength,p->pData);
265     }
266     return NULL;
267 }
268 
269 char * hash_index_find(HashTable *ht,ulong h){
270     ulong nIndex = h & ht->nTableMask;
271     Bucket *p;
272     for(p=ht->arBuckets[nIndex];p!=NULL;p=p->pNext){
273         if(p->nKeyLength == 0 && p->arKey == NULL){
274             return p->pData;
275         }
276     }
277 }
278 
279 int main(){
280     HashTable *ht;
281     ht = (HashTable *)malloc(sizeof(HashTable));
282     hash_init(ht,1);//初始化哈希表,nTableSize=1
283     char *arKey = "keya";
284     int len = strlen(arKey);
285     char *pData = "valuea";
286     hash_add(ht,arKey,len,pData);//插入字符串数据
287     arKey = "keyb";
288     len = strlen(arKey);
289     pData = "valueb";
290     hash_add(ht,arKey,len,pData);
291     arKey = "keyb";
292     len = strlen(arKey);
293     pData = "valueb";
294     int ret = hash_add(ht,arKey,len,pData);//插入失败,key已经存在
295     printf("ret=>%d\n",ret);
296     char c;
297     //char buffer[100];
298     char *buffer;
299     for(c='c';c<='z';c++){
300         buffer = (char *)malloc(100);
301         sprintf(buffer,"key%c",c); 
302         arKey = buffer;
303         len = strlen(arKey);
304         buffer = (char *)malloc(100);
305         sprintf(buffer,"value%c",c);
306         pData = buffer;
307         printf("%s,%s\n",arKey,pData);
308         hash_add(ht,arKey,len,pData);//批量插入
309     }
310     for(c='A';c<='Z';c++){
311         buffer = (char *)malloc(100);
312         sprintf(buffer,"value%c",c);
313         pData = buffer;
314         hash_next_add(ht,pData);//使用数字数组,自动插入元素,不需要指定key
315     }
316     hash_index_add(ht,100,"index3");//使用数字数组,指定索引位置。
317     hash_next_add(ht,"101");
318     hash_next_add(ht,"102");
319     hash_index_update(ht,100,"100 -> 102");//更新指定位置的值
320     hash_foreach(ht);//遍历数组
321     char *finda = hash_find(ht,"keya");//查找指定字符串key的值
322     printf("finda=>%s\n",finda);
323     char *findb = hash_index_find(ht,100);//查找指定索引的值
324     printf("findb=>%s\n",findb);
325 }
原文地址:https://www.cnblogs.com/wuhen781/p/6184970.html