《算法导论》第11章散列表 (2)散列表

用散列表来解决直接寻址表的那两个问题。但由此带来的散列值的碰撞问题。

最简单的解决方法是链接法，以及下一节介绍的开放寻址法。

链接法，即把散列到同一槽中的所有元素都放在一个链表中。

链表是无序的，在查找一个元素时需要遍历链表。

对于删除函数，假如参数是要删除的结点，那么如果链表是双向的，删除操作可以O(1)内完成。

在下面的删除函数中，参数是关键字，这样更为方便。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define SIZE 20

// 链表结点的定义
typedef struct _ListNode {     
     struct _ListNode *prev, *next;
     char *key;
     char *val;
} ListNode;

// 定义全局的各个槽链表的指针数组
ListNode *hashmap[SIZE];

// 这里的散列函数与Java中String及HashMap中的散列相同
// 注意为了保证向右逻辑运算（用0而不是符号位补高位）
// 要将h声明为无符号的
unsigned hashcode(char *key)
{
     // Ensure >> is logical shift
     unsigned h = 0;

     // String.hashcode()
     do h = 31 * h + *key++;
     while (*key != '\0');     

     // HashMap.hash()
     h ^= (h >> 20) ^ (h >> 12);
     return h ^ (h >> 7) ^ (h >> 4);
}

ListNode * hashmap_search(char *key)
{
     unsigned h = hashcode(key) % SIZE;
     ListNode *node = hashmap[h];
     while (node != NULL) {
          // 当相同时，strcmp返回0
          if (strcmp(node->key, key) == 0) {
               return node;
          }
          node = node->next;
     }
     return NULL;
}

char * hashmap_insert(char *key, char *val)
{
     unsigned h = hashcode(key) % SIZE;
     printf("Insert %s - %s to bucket %d\n", key, val, h);
     
     // Find duplicate key, replace it then return old value
     ListNode *node = hashmap_search(key);
     if (node != NULL) {
          char *oldVal = node->val;
          node->val = val;
          return oldVal;
     }
     
     // Not found, create new node to save key&val pair
     ListNode *newNode = malloc(sizeof(ListNode));
     newNode->prev = NULL;
     newNode->key = key;
     newNode->val = val;
     if (hashmap[h] == NULL) {
          hashmap[h] = newNode;
     } else {
          hashmap[h]->prev = newNode;
          newNode->next = hashmap[h];
          hashmap[h] = newNode;
     }
     return val;
}

char * hashmap_delete(char *key)
{
     ListNode *node = hashmap_search(key);
     if (node != NULL) {
          // Set prev node's next to node's next
          if (node->prev == NULL) {
               unsigned h = hashcode(key) % SIZE;
               hashmap[h] = node->next;
          } else {
               node->prev->next = node->next;
          }

          // Set next node's prev to node's prev
          if (node->next != NULL)
               node->next->prev = node->prev;

          return node->val;
     }
     return NULL;
}

void hashmap_print()
{
     int i;
     for (i = 0; i < SIZE; i++) {
          ListNode *node = hashmap[i];          
          while (node != NULL) {
               printf("%s - %s, ", node->key, node->val);
               node = node->next;
          }
          if (hashmap[i] != NULL)
               printf("%d bucket\n", i);
     }
     printf("\n");     
}

int main(void)
{
     // Compare to String.hashcode() in JDK
     printf("%d\n", hashcode("helloworld"));

     hashmap_insert("aabb", "value1");
     hashmap_insert("ccdd", "value2");
     hashmap_insert("i'mcdai", "value3");

     int i;
     for (i = 0; i < 2 * SIZE + 5; i++) {
          char *key = calloc(sizeof(char), 10);
          char *val = calloc(sizeof(char), 10);
          sprintf(key, "%s%d", "aabbcc", i);
          sprintf(val, "%s%d", "val ", i);
          hashmap_insert(key, val);
     }

     // Insert duplicate key
     printf("%s\n", hashmap_insert("i'mcdai", "dupdup"));

     hashmap_print();

     printf("%s\n", hashmap_search("i'mcdai")->val);
     printf("%s\n", hashmap_search("aabbcc18")->val);
     //printf("%s\n", hashmap_search("NOTFOUND")->val);
     
     // All in bucket 18: aabbcc10 => aabbcc0 => i'mcdai
     printf("%s\n", hashmap_delete("aabbcc10"));
     printf("%s\n", hashmap_delete("i'mcdai"));
     
     hashmap_print();

     return 1;
}

《算法导论》第11章 散列表 (2)散列表

《算法导论》第11章散列表 (2)散列表