当前位置：首页 > news >正文

数据结构与算法分析：哈希表简述（一）

news 2025/7/9 3:24:49

遇到的问题，都有解决方案，希望我的博客能为你提供一点帮助。

一、哈希表概述

1.核心原理

哈希表（hash table），又称散列表，是一种高效的数据结构，用于存储键值对（Key-Value Pairs）。

数据结构：散列表是一个固定大小的数组（TableSize），用于存储键值对（item），其中每个项包含关键字（key）和关联数据（如字符串、数值等）。
索引范围：表的索引从 0 到 TableSize-1，所有关键字通过散列函数映射到这一范围内。
哈希函数：将任意大小的输入（键）转换为固定范围的索引值（通常为数组下标）（如key=1的下标为0）。理想情况下，哈希函数应满足：
- 均匀性：键尽可能均匀分布，减少冲突。
- 高效性：计算速度快。
- 确定性：同一键始终映射到同一索引。
- 理想情况：每个关键字映射到唯一单元

比如：输入1得到A的时间复杂度为 $o(1)$ （相当于数组下标访问）

2.哈希表高效的原因

	数组	链表	哈希表
查找元素	O(n)	O(n)	O(1)
添加元素	O(1)	O(1)	O(1)
删除元素	O(n)	O(n)	O(1)

数组

查找元素O(n)：数组元素连续存储，查找需遍历数组，最坏情况检查所有 n 个元素，时间复杂度与元素数量正相关。
添加元素O(1)：若仅在数组末尾添加（不考虑扩容），直接操作尾位置，时间固定。
删除元素O(n)：删除元素后，需将后续元素前移填补空位，移动次数与元素数量 n 相关。

链表

查找元素O(n)：链表通过指针连接，元素内存不连续，查找需从表头逐节点遍历，最坏遍历整个链表（n 个节点）。
添加元素O(1)：若已知插入位置（如指定节点后），仅修改指针指向，时间固定。
删除元素O(n)：删除前需先遍历找到目标节点，再修改指针，遍历过程导致 $O(n)$ 复杂度。

哈希表

查找 / 添加 / 删除（均为 O(1)）：通过哈希函数将键映射到固定位置（桶），理想无冲突时，直接定位元素，操作时间恒定。实际虽可能有冲突（如链地址法处理），但平均复杂度仍接近 O(1)。

3. 哈希表的简单实现

基于数组的哈希表实现：

#include <iostream>
#include <string>
#include <vector>
using namespace std;
class HashTable {
private:
    vector<pair<int ,string>> hash_table;
    const int size=100;
    public:
    HashTable(int size) {
        //初始化哈希表的大小为size个元素，每个元素初始化为空值。
        hash_table.resize(size);
    }
    
    ~HashTable() {
        // vector会自动释放内存
    }
    int hash(int key) {
        //哈希函数，将键值映射到哈希表的索引。
        return key % size;
    }
    void insert(int key, string value) {
        //插入键值对到哈希表中（假设无哈希冲突）。
        int index = hash(key);
        hash_table[index] = make_pair(key, value); 
    }

    string search(int key) {
        //查找键对应的值，如果存在则返回值，否则返回空字符串。
        int index = hash(key)   ;   
        if (hash_table[index].first == key) {
            return hash_table[index].second;
        } else {
            return "Not Found";
        }
    }
    void remove(int key) {
        //删除键值对。
        int index = hash(key);
        hash_table[index] = make_pair(0, ""); 
        cout << "删除成功" << endl;
    }
    // 打印哈希表
    void print() {
       for(auto i:hash_table)
        if(i.first!=0)
           cout<<"key:"<<i.first<<"value:"<<i.second<<endl;
    }
};

int main(){
    HashTable ht(100);
    ht.insert(12345, "A");
    ht.insert(12355, "B");
    ht.insert(12365, "C");
    ht.insert(12376, "D");
    ht.insert(12387, "E");
    cout << ht.search(12345) << endl; // 输出: A
    ht.remove(12345);
    cout<<ht.search(12345)<<endl;
    ht.print();
}

结果如下：

如何去用python实现呢？

class Pair:
    """A key-value pair."""
    def __init__(self, key, value):
        self.key = key
        self.value = value
class HashTable:
    """A hash table that uses chaining to handle collisions."""
    def __init__(self, capacity=100):
        """Initialize the hash table with a given capacity."""
        self.capacity = capacity
        self.table :list[Pair|None] = [None] * capacity
    def hash_function(self, key):
        """Compute the hash value for a given key."""
        return key % self.capacity
    def insert(self, key, value):
        """Insert a key-value pair into the hash table."""
        index = self.hash_function(key)
        if self.table[index] is None:
            self.table[index] = Pair(key, value)
        else:
           print("Collision!")
    def search(self, key):
        """Search for a value in the hash table using a given key."""
        index = self.hash_function(key)
        if self.table[index] is not None and self.table[index].key == key:
            return self.table[index].value
        else:
            raise KeyError("Key not found.")
    def remove(self, key):
        """Remove a key-value pair from the hash table using a given key."""
        index = self.hash_function(key)
        if self.table[index] is not None and self.table[index].key == key:
            self.table[index] = None
        else:
            raise KeyError("Key not found.")
    def display(self):
        """Display the contents of the hash table."""
        for i in range(self.capacity):
            if self.table[i] is not None:
                print(f"Index {i}: {self.table[i].key} -> {self.table[i].value}")
if __name__ == "__main__":
    # Create a hash table with a capacity of 100
    ht = HashTable(100)
    # Insert some key-value pairs into the hash table
    ht.insert(12345, "A")
    ht.insert(12355, "B")
    ht.insert(12365, "C")
    ht.insert(12376, "D")
    ht.insert(12387, "E")
    # Display the contents of the hash table
    ht.display()
    # Search for a value in the hash table using a given key
    print(ht.search(12365))
    # Remove a key-value pair from the hash table using a given key
    ht.remove(12365)
    # Display the contents of the hash table after removing a key-value pair
    ht.display()

输出结果：

4.如果两个KEY映射到同一个数组下标了呢？（哈希冲突）

如图23455与33355均映射到了下标为55的桶内，也就是说一个Value对应了两个Key。该如何去解决这个问题呢？

4.1 哈希表的扩容

如果存放桶的数组足够的大，是不是就可以解决这个问题了呢？

像这样如果数组扩容到156，那么key=33355和key=23455是不是就不会再冲突了

ok,下一个问题：扩容触发的阈值？

4.2扩容触发的条件

4.2.1. 什么时候需要扩容（一个可观测的指标）？

遇到哈希冲突时就扩容（很不好，只是一种想法）
负载因子（Load Factor）：已存储键值对数量 / 数组容量（类似预防手段）。
- 示例：数组大小为 10，已存 7 个元素，负载因子为 0.7。
阈值设定：当负载因子超过预设阈值（如 0.75）时，哈希冲突概率显著增加，操作效率（插入、查找）可能退化为 O(n)。
扩容目的：通过扩大数组容量，降低负载因子，减少哈希冲突。

4.2.2.如何去扩容呢？

(1) 创建新数组

新数组大小通常为原数组的 2倍（或其他策略，如质数扩容）。
- 示例：原数组大小为 10，扩容后为 20。

(2) 重新哈希（Rehashing）

遍历原数组中的所有键值对。
重新计算哈希值：根据新数组大小，重新应用哈希函数 index = hash(key) % new_size。
插入新数组：将键值对放入新数组的对应位置（需重新处理冲突）。

(3) 替换旧数组

释放旧数组内存，将哈希表的底层数组指向新数组。

代码实现：（在原代码的基础上加上扩容的逻辑）（仅仅只是演示哦，请不要这样写）

#include <iostream>
#include <string>
#include <vector>
using namespace std;
class HashTable {
private:
    vector<pair<int ,string>> hash_table;
    int size;
    public:
    HashTable(int size): size(size) {
        //初始化哈希表的大小为size个元素，每个元素初始化为空值。
        hash_table.resize(size);
    }
    
    ~HashTable() {
        // vector会自动释放内存
    }
    int hash(int key) {
        //哈希函数，将键值映射到哈希表的索引。
        return key % size;
    }
    void insert(int key, string value) {
        //插入键值对到哈希表中。
        // 用负载因子检查扩容条件
        if (load_factor() >= 0.75) { // 负载因子达到0.75时扩容
            resize(size * 2); // 扩容为原来的两倍
        }
        //有冲突时，扩容。
        if (hash_table[hash(key)].first != 0) { // 冲突处理
            resize(hash_table.size()* 10); // 扩容为原来的10倍，如55到155
        }
        int index = hash(key);
        hash_table[index] = make_pair(key, value); 
    }

    string search(int key) {
        //查找键对应的值，如果存在则返回值，否则返回空字符串。
        int index = hash(key)   ;   
        if (hash_table[index].first == key) {
            return hash_table[index].second;
        } else {
            return "Not Found";
        }
    }
    void remove(int key) {
        //删除键值对。
        int index = hash(key);
        hash_table[index] = make_pair(0, ""); 
        cout << "删除成功" << endl;
    }
    // 打印哈希表
    void print() {
       for(auto i:hash_table)
        if(i.first!=0)
           cout<<"key:"<<i.first<<"value:"<<i.second<<endl;
    }

    //扩容函数
    void resize(int new_size) {
        vector<pair<int, string>> new_table(new_size); // 创建新的哈希表
        for (auto& pair : hash_table) { // 遍历旧哈希表
            if (pair.first != 0) { // 如果该位置有元素  
                int new_index = hash(pair.first); // 计算新的索引
                new_table[new_index] = pair; // 将元素插入新的哈希表
            }
        }
        hash_table = new_table; // 替换旧哈希表为新的哈希表
        setSize(new_size); // 更新哈希表的大小
        
    }
    //计算哈希表的负载因子
    double load_factor() {
        int count = 0; // 计数器，用于记录非空元素的个数
        for (auto& pair : hash_table) { // 遍历哈希表
            if (pair.first != 0) { // 如果该位置有元素
                count++; // 计数器加1
            }
        }  
        return static_cast<double>(count) / hash_table.size(); // 返回负载因子
    }
   void setSize(int size) { // 设置哈希表的大小
        this->size = size; // 将哈希表的大小设置为size
    }
    int getSize() { // 获取哈希表的大小
        return hash_table.size(); // 返回哈希表的大小
    } // 新增的getSize函数，用于获取哈希表的大小，用于测试b

};

int main(){
    HashTable ht(100);
    ht.insert(12345, "A");
    ht.insert(12355, "B");
    ht.insert(12365, "C");
    ht.insert(12376, "D");
    ht.insert(12387, "E");
    cout << ht.search(12345) << endl; // 输出: A
    ht.remove(12345);
    cout<<ht.search(12345)<<endl;
    ht.print();
    ht.insert(33355, "X"); // 插入一个新的键值对
    cout<<"扩容后的大小为："<<ht.getSize()<<endl;
    ht.print(); // 打印哈希表
}

输出结果：