unordered_map和unordered_set特性以及解决哈希冲突
目录
1. 哈希表的特性及核心概念
1. 哈希表的特性及核心概念
哈希表(Hash Table)是一种高效的键值对存储结构,通过哈希函数建立键与存储位置的映射关系,实现平均 O (1) 时间复杂度的插入、查找和删除操作。
-
哈希函数(Hash Function)
将任意大小的输入(键值)映射到固定范围的输出(哈希地址)的函数。理想的哈希函数应具有:- 确定性:同一键值始终映射到同一地址
- 均匀性:键值分布均匀,减少冲突
- 高效性:计算快速,时间复杂度为 O (1)
-
哈希冲突(Hash Collision)
不同键值通过哈希函数得到相同相同哈希地址的现象。解决方法主要有:- 链地址法(Separate 链接法):每个哈希地址对应一个链表 / 红黑树,冲突元素依次存储在链表中
- 开放定址法:冲突发生时,通过线性探测、二次探测等方式寻找下一个空闲位置
- 再哈希法:使用多个哈希函数,冲突时切换函数重新计算地址
-
负载因子(Load Factor)
哈希表中元素数量与桶(Bucket)数量的比值,计算公式:负载因子 = 元素数 / 桶数
。负载因子过大会导致冲突率上升,通常当负载因子超过阈值(如 0.7)时触发扩容。 -
扩容(Resizing)
当负载因子超标时,创建更大的桶数组(通常为原大小的 2 倍或素数倍),并将所有元素重新哈希到新桶中,以降低冲突率。 -
桶(Bucket)
哈希表的基本存储单元,每个桶对应一个哈希地址,可存放单个元素或冲突元素组成的链表 / 树。
开放地址法的代码实现
#pragma once
#include<vector>enum State
{EXIST,EMPTY,DELETE
};template<class K, class V>
struct HashData
{pair<K, V> _kv;State _state = EMPTY;
};template<class K>
struct HashFunc
{size_t operator()(const K& key){return (size_t)key;}
};template<>
struct HashFunc<string>
{size_t operator()(const string& s){// BKDRsize_t hash = 0;for (auto ch : s){hash += ch;hash *= 131;}return hash;}
};inline unsigned long __stl_next_prime(unsigned long n)
{// Note: assumes long is at least 32 bits.static const int __stl_num_primes = 28;static const unsigned long __stl_prime_list[__stl_num_primes] = {53, 97, 193, 389, 769,1543, 3079, 6151, 12289, 24593,49157, 98317, 196613, 393241, 786433,1572869, 3145739, 6291469, 12582917, 25165843,50331653, 100663319, 201326611, 402653189, 805306457,1610612741, 3221225473, 4294967291};const unsigned long* first = __stl_prime_list;const unsigned long* last = __stl_prime_list + __stl_num_primes;const unsigned long* pos = lower_bound(first, last, n);return pos == last ? *(last - 1) : *pos;
}namespace open_address
{template<class K, class V, class Hash = HashFunc<K>>class HashTable{public:HashTable():_tables(__stl_next_prime(0)), _n(0){}bool Insert(const pair<K, V>& kv){if (Find(kv.first))return false;// 负载因子 >= 0.7扩容if (_n * 10 / _tables.size() >= 7){//vector<HashData<K, V>> newtables(_tables.size()*2);//for (auto& data : _tables)//{// // 旧表的数据映射到新表// if (data._state == EXIST)// {// size_t hash0 = data._kv.first % newtables.size();// // ...// }//}//_tables.swap(newtables);HashTable<K, V, Hash> newht;//newht._tables.resize(_tables.size() * 2);newht._tables.resize(__stl_next_prime(_tables.size() + 1));for (auto& data : _tables){// 旧表的数据映射到新表if (data._state == EXIST){newht.Insert(data._kv);}}_tables.swap(newht._tables);}Hash hash;size_t hash0 = hash(kv.first) % _tables.size();size_t hashi = hash0;size_t i = 1;int flag = 1;while (_tables[hashi]._state == EXIST){// 线性探测hashi = (hash0 + i) % _tables.size();++i;/*hashi = (hash0 + (i*i*flag)) % _tables.size();if (hashi < _tables.size())hashi += _tables.size();if (flag == 1){flag = -1;}else{++i;flag = 1;}*/}_tables[hashi]._kv = kv;_tables[hashi]._state = EXIST;++_n;return true;}HashData<K, V>* Find(const K& key){Hash hash;size_t hash0 = hash(key) % _tables.size();size_t hashi = hash0;size_t i = 1;while (_tables[hashi]._state != EMPTY){if (_tables[hashi]._state == EXIST&& _tables[hashi]._kv.first == key){return &_tables[hashi];}// 线性探测hashi = (hash0 + i) % _tables.size();++i;}return nullptr;}bool Erase(const K& key){HashData<K, V>* ret = Find(key);if (ret){ret->_state = DELETE;return true;}else{return false;}}private:vector<HashData<K, V>> _tables;size_t _n; // 记录数据个数};
}
1.2链式地址法
#pragma once
#include<vector>
#include<xutility>
using namespace std;
enum State
{EXIST,EMPTY,DELETE
};template<class K, class V>
struct HashData
{pair<K, V> _kv;State _state = EMPTY;
};template<class K>
struct HashFunc
{size_t operator()(const K& key){return (size_t)key;}
};template<>
struct HashFunc<string>
{size_t operator()(const string& s){// BKDRsize_t hash = 0;for (auto ch : s){hash += ch;hash *= 131;}return hash;}
};inline unsigned long __stl_next_prime(unsigned long n)
{// Note: assumes long is at least 32 bits.static const int __stl_num_primes = 28;static const unsigned long __stl_prime_list[__stl_num_primes] = {53, 97, 193, 389, 769,1543, 3079, 6151, 12289, 24593,49157, 98317, 196613, 393241, 786433,1572869, 3145739, 6291469, 12582917, 25165843,50331653, 100663319, 201326611, 402653189, 805306457,1610612741, 3221225473, 4294967291};const unsigned long* first = __stl_prime_list;const unsigned long* last = __stl_prime_list + __stl_num_primes;const unsigned long* pos = lower_bound(first, last, n);return pos == last ? *(last - 1) : *pos;
}namespace hash_bucket
{template<class K,class V>struct HashNode{pair<K, V> _kv;HashNode<K, V>* _next;HashNode(const pair<K,V>& kv):_kv(kv),_next(nullptr){}};template<class K,class V,class Hash=HashFunc<K>>class HashTable{typedef HashNode<K,V> Node;public:Hashtable(): _tables(11),_n(0){}bool Insert(const pair<K, V>& kv){if (_n == _tables.size()){vector<Node*> newht(__stl_next_prime(_tables.size()+1));for (int i = 0; i < _tables.size(); i++){Node* cur = _tables[i];while (cur){Node* next = cur->next;size_t hashi = cur->kv.first % newth.size();cur->_next = newTatble[hashi];newTatble[hashi] = cur;cur = next;}_tables[i] = nullptr;}_tables.swap(newTatble);}size_t hashi = kv.first % _tables.size();// 头插Node* newnode = new Node(kv);newnode->_next = _tables[hashi];_tables[hashi] = newnode;++_n;return true;}private:vector<Node*> _tables; // 指针数组size_t _n = 0; // 表中存储数据个数};
}
2. 模拟实现 unordered_map 和 unordered_set 的要点分析
底层哈希表设计
unordered_map
和unordered_set
底层均依赖哈希表实现,区别在于存储的数据类型:
unordered_set
存储单个键值(键即值)unordered_map
存储键值对(pair<Key, Value>
)
核心设计要点:
- 桶数组:使用动态数组存储桶,每个桶为链表头指针(链地址法解决冲突)
- 节点结构:包含键(或键值对)和指向下一节点的指针
- 迭代器:需支持遍历桶内链表和跨桶移动,重载
++
、*
、->
等操作符
hash_backet的头文件
#pragma once
#include<vector>
#include<xutility>enum State
{EXIST,EMPTY,DELETE
};template<class K, class V>
struct HashData
{pair<K, V> _kv;State _state = EMPTY;
};template<class K>
struct HashFunc
{size_t operator()(const K& key){return (size_t)key;}
};template<>
struct HashFunc<string>
{size_t operator()(const string& s){// BKDRsize_t hash = 0;for (auto ch : s){hash += ch;hash *= 131;}return hash;}
};inline unsigned long __stl_next_prime(unsigned long n)
{// Note: assumes long is at least 32 bits.static const int __stl_num_primes = 28;static const unsigned long __stl_prime_list[__stl_num_primes] = {53, 97, 193, 389, 769,1543, 3079, 6151, 12289, 24593,49157, 98317, 196613, 393241, 786433,1572869, 3145739, 6291469, 12582917, 25165843,50331653, 100663319, 201326611, 402653189, 805306457,1610612741, 3221225473, 4294967291};const unsigned long* first = __stl_prime_list;const unsigned long* last = __stl_prime_list + __stl_num_primes;const unsigned long* pos = lower_bound(first, last, n);return pos == last ? *(last - 1) : *pos;
}namespace hash_bucket
{template<class T>struct HashNode{T _data;HashNode<T>* _next;HashNode(const T& data):_data(data),_next(nullptr){}};template<class K,class T,class Ref,class Ptr,class Hash,class KeyOfT>struct HTIterator{typedef HashNode<T> Node;typedef HashTables<K, T, KeyOfT, Hash> HT;typedef HTIterator<K, T, KeyOfT, Ref, Ptr, Hash> Self;Node* _node;const HT* _ht;HTIterator(Node* node, const HT* ht):_node(node), _ht(ht){}Ref operator*(){return _node->_data;}Ptr operator->(){return &_node->_data;}bool operator!=(const Self& s){return _node != s._node;}Self& operator++(){if (_node->_next){_node = _node->_next;}else{KeyOfT kot;Hash hash;size_t hashi = hash(kot(_node->_data)) % _ht->_tables.size();++hashi;while (hashi < _ht->_tables.size()){_node = _ht->_tables[hashi];if (_node)break;else++hashi;}if (hashi == _ht->_tables.size()){_node = nullptr;}}return this*}};template<class K,class T,class KeyOfT,class Hash>class HashTables{// 友元声明template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>friend struct HTIterator;typedef HashNode<T> Node;public:typedef HTIterator<K, T, T&, T*, KeyOfT, Hash> Iterator;typedef HTIterator<K, T, const T&, const T*, KeyOfT, Hash> ConstIterator;Iterator brgin(){if (_n == 0)return End();for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];if (cur){return Iterator(cur, this);}}}Iterator End(){return Iterator(nullptr, this);}ConstIterator Begin() const{if (_n == 0)return End();for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];if (cur){return ConstIterator(cur, this);}}return End();}ConstIterator End() const{return ConstIterator(nullptr, this);}HashTable():_tables(__stl_next_prime(0)), _n(0){}// 拷贝构造和赋值重载也需要~HashTable(){for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];while (cur){Node* next = cur->_next;delete cur;cur = next;}_tables[i] = nullptr;}}pair<Iterator, bool> Insert(const T& data){KeyOfT kot;Iterator it = Find(kot(data));if (it != End())return { it, false };Hash hash;// 负载因子 == 1时扩容if (_n == _tables.size()){vector<Node*> newTable(__stl_next_prime(_tables.size() + 1));for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];while (cur){Node* next = cur->_next;// 头插到新表size_t hashi = hash(kot(cur->_data)) % newTable.size();cur->_next = newTable[hashi];newTable[hashi] = cur;cur = next;}_tables[i] = nullptr;}_tables.swap(newTable);}size_t hashi = hash(kot(data)) % _tables.size();// 头插Node* newnode = new Node(data);newnode->_next = _tables[hashi];_tables[hashi] = newnode;++_n;return { Iterator(newnode, this), false };}Iterator Find(const K& key){KeyOfT kot;Hash hash;size_t hashi = hash(key) % _tables.size();Node* cur = _tables[hashi];while (cur){if (kot(cur->_data) == key){return Iterator(cur, this);}cur = cur->_next;}return End();}bool Erase(const K& key){KeyOfT kot;size_t hashi = key % _tables.size();Node* prev = nullptr;Node* cur = _tables[hashi];while (cur){if (kot(cur->_data) == key){if (prev == nullptr){// 头结点_tables[hashi] = cur->_next;}else{// 中间节点prev->_next = cur->_next;}delete cur;--_n;return true;}else{prev = cur;cur = cur->_next;}}return false;}private:vector<Node*> _tables; // 指针数组size_t _n = 0; // 表中存储数据个数};
}
unordered_set的模拟实现
#pragma once
#include"hash_bucket.h"
namespace aaa
{template<class K, class Hash = HashFunc<K>>class unordered_set{struct SetKeyOfT{const K& operator()(const K& key){return key;}};public:typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::Iterator iterator;typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::ConstIterator const_iterator;iterator begin(){return _ht.Begin();}iterator end(){return _ht.End();}const_iterator begin() const{return _ht.Begin();}const_iterator end() const{return _ht.End();}pair<iterator, bool> insert(const K& key){return _ht.Insert(key);}iterator Find(const K& key){return _ht.Find(key);}bool Erase(const K& key){return _ht.Erase(key);}private:hash_bucket::HashTable<K, const K, SetKeyOfT, Hash> _ht;};
}
unordered_map的模拟实现
#pragma once
#include"hash_bucket.h"
namespace aaa
{template<class K, class V, class Hash = HashFunc<K>>class unordered_map{struct MapKeyOfT{const K& operator()(const pair<K, V>& kv){return kv.first;}};public:typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::Iterator iterator;typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::ConstIterator const_iterator;iterator begin(){return _ht.Begin();}iterator end(){return _ht.End();}const_iterator begin() const{return _ht.Begin();}const_iterator end() const{return _ht.End();}V& operator[](const K& key){pair<iterator, bool> ret = insert({ key, V() });return ret.first->second;}pair<iterator, bool> insert(const pair<K, V>& kv){return _ht.Insert(kv);}iterator Find(const K& key){return _ht.Find(key);}bool Erase(const K& key){return _ht.Erase(key);}private:hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash> _ht;};
}
总的来说,unordered_map
和unordered_set
的模拟实现核心在于:
- 基于链地址法的哈希表设计,解决哈希冲突
- 仿函数处理键的哈希计算和相等性比较
- 动态扩容机制维持低负载因子,保证效率
- 针对 set 和 map 的存储特性设计差异化接口和迭代器