算法导论第三章:数据结构艺术与高效实现
算法导论第三章:数据结构艺术与高效实现
本文是《算法导论》精讲专栏第三章,通过动态操作可视化、内存布局图解和性能对比实验,结合完整C语言实现,深入解析数据结构核心原理。包含栈、队列、链表、散列表、二叉树等完整实现,以及工程实践中的优化技巧。
1. 动态集合:算法操作的基石
1.1 基本操作与数据结构选择
不同数据结构操作复杂度对比:
数据结构 | Search | Insert | Delete | Minimum | Successor |
---|---|---|---|---|---|
无序数组 | O(n) | O(1) | O(n) | O(n) | O(n) |
有序数组 | O(log n) | O(n) | O(n) | O(1) | O(1) |
链表 | O(n) | O(1) | O(1)* | O(n) | O(1) |
二叉搜索树 | O(h) | O(h) | O(h) | O(h) | O(h) |
散列表 | O(1) | O(1) | O(1) | O(n) | O(n) |
*注:链表删除操作在已知节点位置时为O(1)
2. 栈与队列:线性结构的经典应用
2.1 栈:LIFO(后进先出)结构
操作原理:
push -> | | | | <- pop+---+---+---+| 3 | 2 | 1 |+---+---+---+栈顶↑ 栈底
C语言实现:
#define MAX_SIZE 100typedef struct {int data[MAX_SIZE];int top;
} Stack;void init_stack(Stack *s) {s->top = -1;
}int is_empty(Stack *s) {return s->top == -1;
}int is_full(Stack *s) {return s->top == MAX_SIZE - 1;
}void push(Stack *s, int item) {if (is_full(s)) {printf("Stack Overflow\n");return;}s->data[++s->top] = item;
}int pop(Stack *s) {if (is_empty(s)) {printf("Stack Underflow\n");return -1;}return s->data[s->top--];
}// 应用:括号匹配检查
int is_balanced(char *expr) {Stack s;init_stack(&s);for (int i = 0; expr[i]; i++) {if (expr[i] == '(' || expr[i] == '[' || expr[i] == '{') {push(&s, expr[i]);} else if (expr[i] == ')' || expr[i] == ']' || expr[i] == '}') {if (is_empty(&s)) return 0;char top = pop(&s);if ((expr[i] == ')' && top != '(') ||(expr[i] == ']' && top != '[') ||(expr[i] == '}' && top != '{')) {return 0;}}}return is_empty(&s);
}
2.2 队列:FIFO(先进先出)结构
循环队列实现:
typedef struct {int data[MAX_SIZE];int front;int rear;int size;
} Queue;void init_queue(Queue *q) {q->front = 0;q->rear = -1;q->size = 0;
}int is_empty(Queue *q) {return q->size == 0;
}int is_full(Queue *q) {return q->size == MAX_SIZE;
}void enqueue(Queue *q, int item) {if (is_full(q)) {printf("Queue Overflow\n");return;}q->rear = (q->rear + 1) % MAX_SIZE;q->data[q->rear] = item;q->size++;
}int dequeue(Queue *q) {if (is_empty(q)) {printf("Queue Underflow\n");return -1;}int item = q->data[q->front];q->front = (q->front + 1) % MAX_SIZE;q->size--;return item;
}// 应用:BFS广度优先搜索
void bfs(int graph[][5], int start, int n) {int visited[5] = {0};Queue q;init_queue(&q);visited[start] = 1;enqueue(&q, start);while (!is_empty(&q)) {int node = dequeue(&q);printf("%d ", node);for (int i = 0; i < n; i++) {if (graph[node][i] && !visited[i]) {visited[i] = 1;enqueue(&q, i);}}}
}
3. 链表:动态内存的优雅组织
3.1 链表类型与性能对比
类型 | 优点 | 缺点 | 应用场景 |
---|---|---|---|
单向链表 | 插入/删除快,内存小 | 只能单向遍历 | 简单列表,LRU缓存 |
双向链表 | 双向遍历,删除高效 | 内存占用大 | 浏览器历史记录 |
循环链表 | 循环访问,无边界 | 实现复杂 | 轮询调度,约瑟夫问题 |
跳跃链表 | 快速查找(O(log n)) | 实现复杂,维护成本高 | Redis有序集合 |
3.2 双向链表完整实现
typedef struct Node {int data;struct Node *prev;struct Node *next;
} Node;typedef struct {Node *head;Node *tail;int size;
} DoublyLinkedList;Node *create_node(int data) {Node *new_node = (Node *)malloc(sizeof(Node));new_node->data = data;new_node->prev = NULL;new_node->next = NULL;return new_node;
}void init_list(DoublyLinkedList *list) {list->head = NULL;list->tail = NULL;list->size = 0;
}void insert_front(DoublyLinkedList *list, int data) {Node *new_node = create_node(data);if (list->head == NULL) {list->head = list->tail = new_node;} else {new_node->next = list->head;list->head->prev = new_node;list->head = new_node;}list->size++;
}void insert_end(DoublyLinkedList *list, int data) {Node *new_node = create_node(data);if (list->tail == NULL) {list->head = list->tail = new_node;} else {list->tail->next = new_node;new_node->prev = list->tail;list->tail = new_node;}list->size++;
}void delete_node(DoublyLinkedList *list, Node *node) {if (node == NULL) return;if (node == list->head) {list->head = node->next;if (list->head) list->head->prev = NULL;}if (node == list->tail) {list->tail = node->prev;if (list->tail) list->tail->next = NULL;}if (node->prev) node->prev->next = node->next;if (node->next) node->next->prev = node->prev;free(node);list->size--;
}// 应用:LRU缓存实现
typedef struct {int capacity;DoublyLinkedList list;Node **hash_table; // 简化版哈希表
} LRUCache;LRUCache *create_cache(int capacity) {LRUCache *cache = (LRUCache *)malloc(sizeof(LRUCache));cache->capacity = capacity;init_list(&cache->list);cache->hash_table = (Node **)calloc(1000, sizeof(Node *));return cache;
}int get(LRUCache *cache, int key) {if (cache->hash_table[key] == NULL) return -1;// 移动到链表头部Node *node = cache->hash_table[key];delete_node(&cache->list, node);insert_front(&cache->list, key);cache->hash_table[key] = cache->list.head;return node->data;
}void put(LRUCache *cache, int key, int value) {if (cache->hash_table[key] != NULL) {// 更新现有值Node *node = cache->hash_table[key];node->data = value;get(cache, key); // 触发访问更新return;}if (cache->list.size >= cache->capacity) {// 淘汰最久未使用int key_to_remove = cache->list.tail->data;delete_node(&cache->list, cache->list.tail);cache->hash_table[key_to_remove] = NULL;}// 插入新节点insert_front(&cache->list, key);cache->list.head->data = value;cache->hash_table[key] = cache->list.head;
}
4. 内存中的指针与对象
4.1 三种内存表示方式
数组实现图示:
索引: 0 1 2 3 4 5+-----+-----+-----+-----+-----+-----+
key: | 10 | 20 | 30 | | | |
next:| 2 | 3 | -1 | | | |
prev:| -1 | 0 | 1 | | | |+-----+-----+-----+-----+-----+-----+头节点↑
C语言实现:
#define MAX_SIZE 100typedef struct {int key;int next;int prev;
} ListNode;typedef struct {ListNode nodes[MAX_SIZE];int free_list;int head;int size;
} ArrayLinkedList;void init_array_list(ArrayLinkedList *list) {// 初始化空闲列表for (int i = 0; i < MAX_SIZE - 1; i++) {list->nodes[i].next = i + 1;}list->nodes[MAX_SIZE - 1].next = -1;list->free_list = 0;list->head = -1;list->size = 0;
}int allocate_object(ArrayLinkedList *list) {if (list->free_list == -1) {printf("Out of space\n");return -1;}int index = list->free_list;list->free_list = list->nodes[index].next;return index;
}void free_object(ArrayLinkedList *list, int index) {list->nodes[index].next = list->free_list;list->free_list = index;
}void insert_sorted(ArrayLinkedList *list, int key) {int new_index = allocate_object(list);if (new_index == -1) return;list->nodes[new_index].key = key;list->nodes[new_index].next = -1;list->nodes[new_index].prev = -1;// 查找插入位置int curr = list->head;int prev = -1;while (curr != -1 && list->nodes[curr].key < key) {prev = curr;curr = list->nodes[curr].next;}if (prev == -1) {// 插入头部list->nodes[new_index].next = list->head;if (list->head != -1) {list->nodes[list->head].prev = new_index;}list->head = new_index;} else {// 插入中间list->nodes[new_index].next = curr;list->nodes[new_index].prev = prev;list->nodes[prev].next = new_index;if (curr != -1) {list->nodes[curr].prev = new_index;}}list->size++;
}
5. 树结构:层次化数据组织
5.1 二叉树表示法
三种表示法对比:
表示法 | 存储结构 | 优点 | 缺点 |
---|---|---|---|
左右链表示法 | 节点存储左右子节点指针 | 结构灵活,操作高效 | 内存占用大 |
数组表示法 | 按层级顺序存储在数组中 | 内存紧凑,访问快速 | 不适合动态变化 |
父亲链表示法 | 节点存储父节点指针 | 适合找父节点操作 | 查找子节点效率低 |
C语言实现(左右链法):
typedef struct TreeNode {int data;struct TreeNode *left;struct TreeNode *right;
} TreeNode;TreeNode *create_node(int data) {TreeNode *node = (TreeNode *)malloc(sizeof(TreeNode));node->data = data;node->left = NULL;node->right = NULL;return node;
}// 二叉搜索树插入
TreeNode *insert(TreeNode *root, int data) {if (root == NULL) {return create_node(data);}if (data < root->data) {root->left = insert(root->left, data);} else if (data > root->data) {root->right = insert(root->right, data);}return root;
}// 三种遍历方式
void preorder(TreeNode *root) {if (root) {printf("%d ", root->data);preorder(root->left);preorder(root->right);}
}void inorder(TreeNode *root) {if (root) {inorder(root->left);printf("%d ", root->data);inorder(root->right);}
}void postorder(TreeNode *root) {if (root) {postorder(root->left);postorder(root->right);printf("%d ", root->data);}
}
5.2 树的遍历应用
表达式树求值:
*/ \+ -/ \ / \2 3 5 1
int evaluate_expression(TreeNode *root) {if (root == NULL) return 0;// 叶子节点为操作数if (!root->left && !root->right) {return root->data;}int left_val = evaluate_expression(root->left);int right_val = evaluate_expression(root->right);switch (root->data) {case '+': return left_val + right_val;case '-': return left_val - right_val;case '*': return left_val * right_val;case '/': return left_val / right_val;}return 0;
}
6. 散列表:快速访问的魔法
6.1 散列函数设计原则
- 均匀性:键值均匀分布到各个槽
- 高效性:计算速度快
- 确定性:相同键产生相同散列值
常见散列函数:
// 除法散列法:h(k) = k mod m
int division_hash(int key, int m) {return key % m;
}// 乘法散列法:h(k) = floor(m * (k * A mod 1))
int multiplication_hash(int key, int m) {double A = 0.6180339887; // 黄金分割double val = key * A;return (int)(m * (val - (int)val));
}// 全域散列法(避免最坏情况)
int universal_hash(int key, int a, int b, int p, int m) {return ((a * key + b) % p) % m;
}
6.2 冲突解决策略
6.2.1 链地址法
C语言实现:
#define TABLE_SIZE 10typedef struct HashNode {int key;int value;struct HashNode *next;
} HashNode;typedef struct {HashNode **buckets;int size;
} HashMap;HashMap *create_hash_map() {HashMap *map = (HashMap *)malloc(sizeof(HashMap));map->size = TABLE_SIZE;map->buckets = (HashNode **)calloc(TABLE_SIZE, sizeof(HashNode *));return map;
}int hash(int key) {return key % TABLE_SIZE;
}void put(HashMap *map, int key, int value) {int index = hash(key);HashNode *new_node = (HashNode *)malloc(sizeof(HashNode));new_node->key = key;new_node->value = value;new_node->next = NULL;if (map->buckets[index] == NULL) {map->buckets[index] = new_node;} else {HashNode *current = map->buckets[index];while (current->next != NULL) {if (current->key == key) {current->value = value; // 更新现有键free(new_node);return;}current = current->next;}current->next = new_node;}
}int get(HashMap *map, int key) {int index = hash(key);HashNode *current = map->buckets[index];while (current != NULL) {if (current->key == key) {return current->value;}current = current->next;}return -1; // 未找到
}
6.2.2 开放寻址法
探查序列比较:
方法 | 探查序列 | 优点 | 缺点 |
---|---|---|---|
线性探查 | h(k,i) = (h’(k) + i) mod m | 实现简单 | 聚集现象严重 |
二次探查 | h(k,i) = (h’(k)+c₁i+c₂i²) mod m | 减少聚集 | 可能无法探查所有槽 |
双重散列 | h(k,i) = (h₁(k)+i*h₂(k)) mod m | 最接近均匀探查 | 计算复杂 |
双重散列实现:
#define TABLE_SIZE 10
#define EMPTY -1
#define DELETED -2int hash1(int key) {return key % TABLE_SIZE;
}int hash2(int key) {return 7 - (key % 7); // 确保不为0
}int double_hash(int key, int i) {return (hash1(key) + i * hash2(key)) % TABLE_SIZE;
}void insert_open_addressing(int table[], int key) {int i = 0;do {int j = double_hash(key, i);if (table[j] == EMPTY || table[j] == DELETED) {table[j] = key;return;}i++;} while (i < TABLE_SIZE);printf("Hash table overflow\n");
}int search_open_addressing(int table[], int key) {int i = 0;do {int j = double_hash(key, i);if (table[j] == EMPTY) return -1;if (table[j] == key) return j;i++;} while (i < TABLE_SIZE);return -1;
}
7. 工程优化与性能分析
7.1 动态数组:STL vector 原理
typedef struct {int *data;int size;int capacity;
} DynamicArray;void init_dynamic_array(DynamicArray *arr, int capacity) {arr->data = (int *)malloc(capacity * sizeof(int));arr->size = 0;arr->capacity = capacity;
}void push_back(DynamicArray *arr, int value) {if (arr->size >= arr->capacity) {// 扩容策略:倍增容量int new_capacity = arr->capacity * 2;int *new_data = (int *)realloc(arr->data, new_capacity * sizeof(int));if (new_data) {arr->data = new_data;arr->capacity = new_capacity;} else {printf("Memory allocation failed\n");return;}}arr->data[arr->size++] = value;
}// 时间复杂度分析:均摊O(1)
// 数学证明:考虑连续插入n个元素的总成本
// T(n) = n + (1 + 2 + 4 + ... + 2^{k}) < n + 2^{k+1} < 3n
// ∴ 单次操作均摊成本 = T(n)/n < 3
7.2 内存池优化
#define POOL_SIZE 1000typedef struct {Node *nodes[POOL_SIZE];int free_index;
} MemoryPool;MemoryPool *create_memory_pool() {MemoryPool *pool = (MemoryPool *)malloc(sizeof(MemoryPool));for (int i = 0; i < POOL_SIZE - 1; i++) {pool->nodes[i] = (Node *)malloc(sizeof(Node));pool->nodes[i]->next = i + 1; // 使用next指针连接空闲节点}pool->nodes[POOL_SIZE - 1] = (Node *)malloc(sizeof(Node));pool->nodes[POOL_SIZE - 1]->next = -1;pool->free_index = 0;return pool;
}Node *allocate_node(MemoryPool *pool) {if (pool->free_index == -1) {printf("Memory pool exhausted\n");return NULL;}Node *node = pool->nodes[pool->free_index];pool->free_index = (int)(node->next); // 转换指针为索引return node;
}void free_node(MemoryPool *pool, Node *node) {node->next = (Node *)(pool->free_index);pool->free_index = (int)(node - pool->nodes[0]); // 计算索引
}
总结与思考
本章深入探讨了数据结构的设计与实现:
- 基础结构:栈、队列、链表的实现与应用
- 内存管理:指针与数组表示的权衡
- 树结构:二叉树表示与遍历算法
- 散列表:冲突解决策略与性能优化
- 工程实践:动态数组、内存池等优化技术
关键洞见:没有完美的数据结构,只有最适合特定场景的选择。理解操作特性和性能瓶颈是设计高效系统的关键。
下章预告:第四章《高级设计与分析技术》将探讨:
- 分治策略的数学证明
- 动态规划的原理与应用
- 贪心算法的正确性证明
- 摊还分析的三种方法
本文完整代码已上传至GitHub仓库:Algorithm-Implementations
思考题:
- 在LRU缓存实现中,为什么使用双向链表而不是单向链表?
- 当散列表的装载因子超过阈值时,如何实现高效的重散列(rehashing)操作?
- 对比链式散列和开放寻址法,各自适合什么应用场景?
- 如何扩展二叉树结构以支持区间查询操作?