散列

一、散列简介

1、散列技术：允许以常数时间插入、删除、查找的技术

2、散列表：包含关键字，具有固定大小的数组。（散列表的实现叫做散列）

3、散列函数：每个关键字被映射到0到size-1中的某个位置，这个映射叫做散列函数。（散列表的大小一般为素数，减少冲突）

4、冲突：当两个关键字同时映射到一个位置时，就发生了冲突。

二、解决冲突的方法

（一）分离链接法

1、方法：将散列到同一个值的所有元素保留到一个表中。

2、散列函数：H(X)=X mod size;

3、图示：

4、代码实现：

#include<iostream>
#include<cstdio>
#include<cstdlib>
using namespace std;
const int MAXSIZE = 12000;
struct Node{
    int data;
    struct Node *next;
}; 
typedef struct Node* List;
typedef List Position;

struct HNode{
    int TableSize;
    List *TheLists;
};
typedef struct HNode* HashTable;

bool Is_Prim(int x)
{
    for(int i=2;i*i<=x;i++)
    if(x%i==0) return false;
    return true;
}
int NextPrim(int x)
{
    while(!Is_Prim(x)) x++;
    return x;
}

HashTable Init(int size)
{
    HashTable H;
    int i;
    if(size>MAXSIZE)
    {
        printf("Table if too small!
");
        return NULL;
    }
    H=(HashTable)malloc(sizeof(struct HNode));
    if(H==NULL) printf("Out of Space!!!");
    H->TableSize=NextPrim(size);
    H->TheLists=(List*)malloc(sizeof(struct Node)*H->TableSize);
    if(H->TheLists==NULL) printf("Out of Space!!!");
    
    for(i=0;i<H->TableSize;i++)
    {
        H->TheLists[i]=(List)malloc(sizeof(struct Node));
        if(H->TheLists[i]==NULL) printf("Out of Space!!!");
        else H->TheLists[i]->next=NULL;
    }
    return H;
}

int Hash(int key,int size)
{
    return key%size;
}
Position Find(int key,HashTable H)
{
    Position P;
    List L;
    L=H->TheLists[Hash(key,H->TableSize)];
    P=L->next;
    while(P!=NULL&&P->data!=key)
    {
        P=P->next;
    }
    return P;
}
void Insert(int key,HashTable H)
{
    Position Pos,Cell;
    List L;
    Pos=Find(key,H);
    if(Pos==NULL)
    {
        Cell=(List)malloc(sizeof(struct Node));
        if(Cell==NULL) printf("Out of Space!!!");
        else
        {
            L=H->TheLists[Hash(key,H->TableSize)];
            Cell->next=L->next;
            Cell->data=key;
            L->next=Cell; 
        }
    } 
} 
void Print(HashTable H)
{
    Position P,tmp;
    for(int i=0;i<H->TableSize;i++)
    {
        P=H->TheLists[i]->next;
        while(P!=NULL)
        {
            printf("%d ",P->data);
            P=P->next;
        }
        printf("
");
    }
}
int main(void)
{
    HashTable H;
    int n,i,x;
    cin>>n;
    H=Init(n);
    for(i=0;i<n;i++)
    {
        cin>>x;
        Insert(x,H);
    }
    Print(H);
    return 0;
}

View Code

（二）开放定址法

1、相比于分离链接法的优点：不用指针，提高算法运行速度。

2、原理：如果有冲突发生，尝试选择另外的单元，直到找到空单元为止。

H(X)=(Hash(X)+F(i)) mod Size，并且F(0) = 0;

3、注意：无法删除，要用懒惰删除。

3、线性探测法

（1）散列函数：F(i)=i；

（2）缺点：如果有多于一半多元素被填满后就不是个好办法，会产生一次聚集。

4、平方探测法

（1）散列函数：F(i)=i^2；

（2）代码中使用快速平方法F(i)=F(i-1)+2*i-1;

优点：解决线性探测法中的一次聚集问题。

#include<iostream>
#include<cstdio>
#include<cstring>
#include<cstdlib>
using namespace std;
const int MAXSIZE = 12000;
typedef unsigned int Index;
typedef Index Position;
enum Kind{
    Legitimate,Empty,Deleted
};

struct Node{
    int data;
    enum Kind Info;
}; 
typedef struct Node Cell;

struct HNode{
    int TableSize;
    Cell *TheCells;
};
typedef struct HNode* HashTable;
bool IsPrim(int x)
{
    for(int i=2;i*i<=x;i++)
    if(x%i==0) return false;
    return true;
}
int NextPrim(int x)
{
    while(!IsPrim(x)) x++;
    return x; 
}
HashTable Init(int size)
{
    HashTable H;
    int i;
    if(size>MAXSIZE)
    {
        printf("Too small
");
        return NULL;
    }
    H=(HashTable)malloc(sizeof(struct HNode));
    if(H==NULL) printf("Out of Space");
    H->TableSize=NextPrim(size);
    
    H->TheCells=(Cell*)malloc(sizeof(Cell)*H->TableSize);
    if(H->TheCells==NULL) printf("Out of Space");
    for(i=0;i<H->TableSize;i++)
    H->TheCells[i].Info=Empty;
    return H;
}
Index Hash(int x,int size)
{
    return x%size;
}
Position Find(int key,HashTable H)
{
    Position Pos;
    int num=0;
    Pos=Hash(key,H->TableSize);
    while(H->TheCells[Pos].Info!=Empty&&H->TheCells[Pos].data!=key)
    {
        Pos+=2*++num-1;
        if(Pos>=H->TableSize) Pos-=H->TableSize;
    }
    return Pos;
}
void Insert(int key,HashTable H)
{
    Position Pos=Find(key,H);
    if(H->TheCells[Pos].Info!=Legitimate)
    {
        H->TheCells[Pos].Info=Legitimate;
        H->TheCells[Pos].data=key;
    }
}
void Print(HashTable H)
{
    for(int i=0;i<H->TableSize;i++)
    {
        if(H->TheCells[i].Info==Legitimate)
        printf("%d ",H->TheCells[i].data);
    }
    printf("
");
}
int main(void)
{
    int n,i,x;
    HashTable H;
    cin>>n;
    H=Init(n);
    for(i=1;i<=n;i++)
    {
        cin>>x;
        Insert(x,H);
    }
    Print(H);
    return 0;
}

View Code

5、双散列

（1）散列函数：F(i)=i*Hash(X)；

（2）Hash(X)的选取很重要。

6、再散列

如果插入后表的一部分是满的，就再建立一个大约两倍的表（原表和新表的大小都是素数）。