字符串hash函数

本文搜集了一些字符串的常用hash函数。

范例1：判断两个单词是否含有相同的字母，此时我们可以用hash做。例如，“aaabb”与"aabb"含有相同的单词。（参考：http://kmplayer.iteye.com/blog/656782）

#include <iostream>  
using namespace std;  

int hash(const char* a)  //hash函数设计到位，节约了空间，当然我们也可以用bitset
{  
	int tmp=0;  
	while(*a)  
	{  
		tmp|=1<<(*a-'a');  
		a++;  
	}  
	return tmp;  
}  

int main()  
{  
	char* a="abc";  
	char* b="acb";  
	cout<<hash(a)<<endl;  
	cout<<hash(b)<<endl;  
	return 0;  
}

范例2：判断兄弟单词，兄弟单词定义为两个单词含有的所有字母相同，但是位置不同。例如“aaccdd”和“acdacd”。何海涛在《剑指offer》上的解法，定义bitcnt[26]，用一个字符串加计数，另一个字符串减计数。

我们这里为了素数的hash构造方法。特意用素数来hash，例如，a=2，b=3，c=5，然后整个单词的hash就是其乘积。

#include <iostream>
#include <cctype>
using namespace std;  

const int MAX = 200;  
int prime[MAX] = {2,3,5};  

//产生小于num的所有素数,返回值为产生素数的个数  
int GeneratePrime(int num)  
{  
	int curPossibleNum = 5;  
	int gap = 2;  
	int count = 3;  

	while(curPossibleNum <= num){  
		curPossibleNum += gap;  
		bool flag = true;  

		for(int j=0; prime[j]*prime[j]<=curPossibleNum; j++){  
			if(curPossibleNum % prime[j] == 0)  
				flag = false;  
		}  

		if(flag == true)  
			prime[count++] = curPossibleNum;  

		gap = 6 - gap;   
	}  

	return count;  
} 

long long Hash(char str[]){ //在这里我们简单的将大小写统一
	long long hashValue = 1;
	while(*str != ''){
		hashValue = hashValue * prime[tolower(*str) - 'a'];
		++str;
	}
	return hashValue;
}

int main()  
{  
	char* str="abdc";  
	GeneratePrime(26);
	cout<<Hash(str)<<endl;
	system("pause");
	return 0;  
}

范例3：统计单词的个数，C++ Primer中采用map的方法。本文采用介绍采用hash的方法。（参考：http://kmplayer.iteye.com/blog/647471）

#include <stdio.h>  
#include <stdlib.h>  
#include <string.h>  

#define WORDLENGTH 30  
#define NHASH 300  

typedef struct node* nodeptr;  
typedef struct node  
{  
	char* word;  
	int cnt;  
	nodeptr next;  
} node;  

int hash(char* buf)  //其实，我没懂这里为什么是31
{  
	unsigned n=0;  
	char* p;  
	for(p=buf;*p;p++)  
		n=31*n+(*p);  
	return n%NHASH;  
}  

nodeptr hashTable[NHASH];  

//链表法,解决hash的冲突.  
void incword(char* buf)  
{  
	int n=hash(buf);  
	nodeptr p;  
	for(p=hashTable[n];p;p=p->next)  
	{  
		if(strcmp(p->word,buf)==0)  
		{  
			p->cnt++;  
			return;  
		}  
	}  
	p=(nodeptr)malloc(sizeof(node));  
	p->word=(char*)malloc(strlen(buf)+1);  
	strcpy(p->word,buf);  
	p->cnt=1;  
	p->next=hashTable[n];  
	hashTable[n]=p;  
}  

int main ()  
{  
	freopen("genetic.txt","r",stdin);  
	char buf[WORDLENGTH];  
	int i;  
	while( scanf("%s",buf)!=EOF )  
		incword(buf);  
	for(i=0;i<NHASH;i++)  
	{  
		nodeptr p=hashTable[i];  
		for(;p;p=p->next)  
			printf("%s %d
",p->word,p->cnt);  
	}  
	return 0 ;  
}

有好的字符串hash，欢迎告之。