STL标准库-一个万用的hash function

技术在于交流、沟通,本文为博主原创文章转载请注明出处并保持作品的完整性

在前面我介绍过hash的使用,本次主要介绍一下Hash Function

Hash Function即获得hash code的函数,根据其获得的hash code放到指定的bucket中,那么为了保证其hash的效率我们应尽量避免碰撞,所以hash Function所产生的hash code应足够的乱

下面介绍一个万用的hash function及其测试代码

首先我们创建一个客户类,它有三个成员变量 姓,名,年龄

class Customer
{
public:
    string mFirstName;
    string mLastName;
    string mAge;
    Customer(string firstName, string lastName, string age):mFirstName(firstName),mLastName(lastName),mAge(age){}
};

下面我们来创建hash function

class CustomerHash
{
    public:
    std::size_t operator()(const Customer& c) const
    {
        return hash_val(c.mFirstName, c.mLastName, c.mAge);
    }

    template <typename... Types>
    size_t hash_val(const Types&... args)const
    {
        size_t seed = 0;//seed 为需要返回的hash code
        hash_value(seed, args...);//C++11 新语法 我在http://www.cnblogs.com/LearningTheLoad/p/7208680.html中有介绍
        return seed;
    }

    template <typename T, typename... Types>
    void hash_value(size_t& seed,
                         const T& firstArg,
                         const Types&... args) const
    {
        hash_combine(seed, firstArg);
        hash_value(seed, args...);
    }

    template <typename T>
    void hash_value(size_t& seed,
                         const T& val) const //参数仅剩一个时
    {
        hash_combine(seed, val);
    }

    template<typename T>
    void hash_combine(size_t& seed,
                             const T& val) const
    {
        seed ^= std::hash<T>()(val) + 0x9e3779b9 + (seed << 6) + (seed >> 2); //0x9e3779b9  黄金分割比例
    }
};

 测试代码

int main(int argc, char *argv[])
{
    unordered_multiset<Customer, CustomerHash> set;

    set.insert(Customer("a", "b", "1"));
    set.insert(Customer("c", "d", "2"));
    set.insert(Customer("e", "f", "3"));
    set.insert(Customer("g", "h", "4"));

    int myBucket_count = set.bucket_count();//返回有多少个篮子
    cout << set.bucket_count() << endl;

    CustomerHash hh;
    cout << "bucket postion of " << hh(Customer("a", "b", "1")) %myBucket_count << endl;//取余后 得出落在哪个篮子上
    cout << "bucket postion of " << hh(Customer("c", "d", "2")) %myBucket_count << endl;
    cout << "bucket postion of " << hh(Customer("e", "f", "3")) %myBucket_count << endl;
    cout << "bucket postion of " << hh(Customer("a", "b", "4")) %myBucket_count << endl;

    for (int i = 0; i< myBucket_count; i++)
    {//检测落在哪个篮子上
        cout << "bucket at #: " << i << "has: " << set.bucket_size(i) << endl;
    }
    return 0;
}

测试结果

  

全部测试代码

#include <iostream>
#include <unordered_set>

using namespace std;

class Customer
{
public:
    string mFirstName;
    string mLastName;
    string mAge;

    Customer(string firstName, string lastName, string age):mFirstName(firstName),mLastName(lastName),mAge(age){}

    operator ==(const Customer& c) const
    {
        return (mFirstName == c.mFirstName && mLastName == c.mLastName && mAge == c.mAge);
    }
};

class CustomerHash
{
    public:
    std::size_t operator()(const Customer& c) const
    {
        return hash_val(c.mFirstName, c.mLastName, c.mAge);
    }

    template <typename... Types>
    size_t hash_val(const Types&... args)const
    {
        size_t seed = 0;
        hash_value(seed, args...);
        return seed;
    }

    template <typename T, typename... Types>
    void hash_value(size_t& seed,
                         const T& firstArg,
                         const Types&... args) const
    {
        hash_combine(seed, firstArg);
        hash_value(seed, args...);
    }

    template <typename T>
    void hash_value(size_t& seed,
                         const T& val) const
    {
        hash_combine(seed, val);
    }

    template<typename T>
    void hash_combine(size_t& seed,
                             const T& val) const
    {
        seed ^= std::hash<T>()(val) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
    }
};

int main(int argc, char *argv[])
{
    unordered_multiset<Customer, CustomerHash> set;

    set.insert(Customer("a", "b", "1"));
    set.insert(Customer("c", "d", "2"));
    set.insert(Customer("e", "f", "3"));
    set.insert(Customer("g", "h", "4"));

    int myBucket_count = set.bucket_count();
    cout << set.bucket_count() << endl;

    CustomerHash hh;
    cout << "bucket postion of " << hh(Customer("a", "b", "1")) %myBucket_count << endl;
    cout << "bucket postion of " << hh(Customer("c", "d", "2")) %myBucket_count << endl;
    cout << "bucket postion of " << hh(Customer("e", "f", "3")) %myBucket_count << endl;
    cout << "bucket postion of " << hh(Customer("a", "b", "4")) %myBucket_count << endl;

    for (int i = 0; i< myBucket_count; i++)
    {
        cout << "bucket at #: " << i << "has: " << set.bucket_size(i) << endl;
    }
    return 0;
}
View Code

这是一个万用的hash function ,在我们自定义hash function时就可以定义为上面的函数

参考侯捷<<STL源码剖析>> 

原文地址:https://www.cnblogs.com/LearningTheLoad/p/7667029.html