当前位置：首页 > news >正文

算法导论第三章：数据结构艺术与高效实现

news 来源：原创 2025/6/13 7:10:57

算法导论第三章：数据结构艺术与高效实现

本文是《算法导论》精讲专栏第三章，通过动态操作可视化、内存布局图解和性能对比实验，结合完整C语言实现，深入解析数据结构核心原理。包含栈、队列、链表、散列表、二叉树等完整实现，以及工程实践中的优化技巧。

1. 动态集合：算法操作的基石

1.1 基本操作与数据结构选择

不同数据结构操作复杂度对比：

数据结构	Search	Insert	Delete	Minimum	Successor
无序数组	O(n)	O(1)	O(n)	O(n)	O(n)
有序数组	O(log n)	O(n)	O(n)	O(1)	O(1)
链表	O(n)	O(1)	O(1)*	O(n)	O(1)
二叉搜索树	O(h)	O(h)	O(h)	O(h)	O(h)
散列表	O(1)	O(1)	O(1)	O(n)	O(n)

*注：链表删除操作在已知节点位置时为O(1)

2. 栈与队列：线性结构的经典应用

2.1 栈：LIFO（后进先出）结构

操作原理：

      push -> |   |   |   | <- pop+---+---+---+| 3 | 2 | 1 |+---+---+---+栈顶↑     栈底

C语言实现：

#define MAX_SIZE 100typedef struct {int data[MAX_SIZE];int top;
} Stack;void init_stack(Stack *s) {s->top = -1;
}int is_empty(Stack *s) {return s->top == -1;
}int is_full(Stack *s) {return s->top == MAX_SIZE - 1;
}void push(Stack *s, int item) {if (is_full(s)) {printf("Stack Overflow\n");return;}s->data[++s->top] = item;
}int pop(Stack *s) {if (is_empty(s)) {printf("Stack Underflow\n");return -1;}return s->data[s->top--];
}// 应用：括号匹配检查
int is_balanced(char *expr) {Stack s;init_stack(&s);for (int i = 0; expr[i]; i++) {if (expr[i] == '(' || expr[i] == '[' || expr[i] == '{') {push(&s, expr[i]);} else if (expr[i] == ')' || expr[i] == ']' || expr[i] == '}') {if (is_empty(&s)) return 0;char top = pop(&s);if ((expr[i] == ')' && top != '(') ||(expr[i] == ']' && top != '[') ||(expr[i] == '}' && top != '{')) {return 0;}}}return is_empty(&s);
}

2.2 队列：FIFO（先进先出）结构

循环队列实现：

typedef struct {int data[MAX_SIZE];int front;int rear;int size;
} Queue;void init_queue(Queue *q) {q->front = 0;q->rear = -1;q->size = 0;
}int is_empty(Queue *q) {return q->size == 0;
}int is_full(Queue *q) {return q->size == MAX_SIZE;
}void enqueue(Queue *q, int item) {if (is_full(q)) {printf("Queue Overflow\n");return;}q->rear = (q->rear + 1) % MAX_SIZE;q->data[q->rear] = item;q->size++;
}int dequeue(Queue *q) {if (is_empty(q)) {printf("Queue Underflow\n");return -1;}int item = q->data[q->front];q->front = (q->front + 1) % MAX_SIZE;q->size--;return item;
}// 应用：BFS广度优先搜索
void bfs(int graph[][5], int start, int n) {int visited[5] = {0};Queue q;init_queue(&q);visited[start] = 1;enqueue(&q, start);while (!is_empty(&q)) {int node = dequeue(&q);printf("%d ", node);for (int i = 0; i < n; i++) {if (graph[node][i] && !visited[i]) {visited[i] = 1;enqueue(&q, i);}}}
}

3. 链表：动态内存的优雅组织

3.1 链表类型与性能对比

类型	优点	缺点	应用场景
单向链表	插入/删除快，内存小	只能单向遍历	简单列表，LRU缓存
双向链表	双向遍历，删除高效	内存占用大	浏览器历史记录
循环链表	循环访问，无边界	实现复杂	轮询调度，约瑟夫问题
跳跃链表	快速查找(O(log n))	实现复杂，维护成本高	Redis有序集合

3.2 双向链表完整实现

typedef struct Node {int data;struct Node *prev;struct Node *next;
} Node;typedef struct {Node *head;Node *tail;int size;
} DoublyLinkedList;Node *create_node(int data) {Node *new_node = (Node *)malloc(sizeof(Node));new_node->data = data;new_node->prev = NULL;new_node->next = NULL;return new_node;
}void init_list(DoublyLinkedList *list) {list->head = NULL;list->tail = NULL;list->size = 0;
}void insert_front(DoublyLinkedList *list, int data) {Node *new_node = create_node(data);if (list->head == NULL) {list->head = list->tail = new_node;} else {new_node->next = list->head;list->head->prev = new_node;list->head = new_node;}list->size++;
}void insert_end(DoublyLinkedList *list, int data) {Node *new_node = create_node(data);if (list->tail == NULL) {list->head = list->tail = new_node;} else {list->tail->next = new_node;new_node->prev = list->tail;list->tail = new_node;}list->size++;
}void delete_node(DoublyLinkedList *list, Node *node) {if (node == NULL) return;if (node == list->head) {list->head = node->next;if (list->head) list->head->prev = NULL;}if (node == list->tail) {list->tail = node->prev;if (list->tail) list->tail->next = NULL;}if (node->prev) node->prev->next = node->next;if (node->next) node->next->prev = node->prev;free(node);list->size--;
}// 应用：LRU缓存实现
typedef struct {int capacity;DoublyLinkedList list;Node **hash_table; // 简化版哈希表
} LRUCache;LRUCache *create_cache(int capacity) {LRUCache *cache = (LRUCache *)malloc(sizeof(LRUCache));cache->capacity = capacity;init_list(&cache->list);cache->hash_table = (Node **)calloc(1000, sizeof(Node *));return cache;
}int get(LRUCache *cache, int key) {if (cache->hash_table[key] == NULL) return -1;// 移动到链表头部Node *node = cache->hash_table[key];delete_node(&cache->list, node);insert_front(&cache->list, key);cache->hash_table[key] = cache->list.head;return node->data;
}void put(LRUCache *cache, int key, int value) {if (cache->hash_table[key] != NULL) {// 更新现有值Node *node = cache->hash_table[key];node->data = value;get(cache, key); // 触发访问更新return;}if (cache->list.size >= cache->capacity) {// 淘汰最久未使用int key_to_remove = cache->list.tail->data;delete_node(&cache->list, cache->list.tail);cache->hash_table[key_to_remove] = NULL;}// 插入新节点insert_front(&cache->list, key);cache->list.head->data = value;cache->hash_table[key] = cache->list.head;
}

4. 内存中的指针与对象

4.1 三种内存表示方式

数组实现图示：

索引:  0     1     2     3     4     5+-----+-----+-----+-----+-----+-----+
key: |  10 |  20 |  30 |     |     |     |
next:|  2  |  3  | -1  |     |     |     |
prev:| -1  |  0  |  1  |     |     |     |+-----+-----+-----+-----+-----+-----+头节点↑

C语言实现：

#define MAX_SIZE 100typedef struct {int key;int next;int prev;
} ListNode;typedef struct {ListNode nodes[MAX_SIZE];int free_list;int head;int size;
} ArrayLinkedList;void init_array_list(ArrayLinkedList *list) {// 初始化空闲列表for (int i = 0; i < MAX_SIZE - 1; i++) {list->nodes[i].next = i + 1;}list->nodes[MAX_SIZE - 1].next = -1;list->free_list = 0;list->head = -1;list->size = 0;
}int allocate_object(ArrayLinkedList *list) {if (list->free_list == -1) {printf("Out of space\n");return -1;}int index = list->free_list;list->free_list = list->nodes[index].next;return index;
}void free_object(ArrayLinkedList *list, int index) {list->nodes[index].next = list->free_list;list->free_list = index;
}void insert_sorted(ArrayLinkedList *list, int key) {int new_index = allocate_object(list);if (new_index == -1) return;list->nodes[new_index].key = key;list->nodes[new_index].next = -1;list->nodes[new_index].prev = -1;// 查找插入位置int curr = list->head;int prev = -1;while (curr != -1 && list->nodes[curr].key < key) {prev = curr;curr = list->nodes[curr].next;}if (prev == -1) {// 插入头部list->nodes[new_index].next = list->head;if (list->head != -1) {list->nodes[list->head].prev = new_index;}list->head = new_index;} else {// 插入中间list->nodes[new_index].next = curr;list->nodes[new_index].prev = prev;list->nodes[prev].next = new_index;if (curr != -1) {list->nodes[curr].prev = new_index;}}list->size++;
}

5. 树结构：层次化数据组织

5.1 二叉树表示法

三种表示法对比：

表示法	存储结构	优点	缺点
左右链表示法	节点存储左右子节点指针	结构灵活，操作高效	内存占用大
数组表示法	按层级顺序存储在数组中	内存紧凑，访问快速	不适合动态变化
父亲链表示法	节点存储父节点指针	适合找父节点操作	查找子节点效率低

C语言实现（左右链法）：

typedef struct TreeNode {int data;struct TreeNode *left;struct TreeNode *right;
} TreeNode;TreeNode *create_node(int data) {TreeNode *node = (TreeNode *)malloc(sizeof(TreeNode));node->data = data;node->left = NULL;node->right = NULL;return node;
}// 二叉搜索树插入
TreeNode *insert(TreeNode *root, int data) {if (root == NULL) {return create_node(data);}if (data < root->data) {root->left = insert(root->left, data);} else if (data > root->data) {root->right = insert(root->right, data);}return root;
}// 三种遍历方式
void preorder(TreeNode *root) {if (root) {printf("%d ", root->data);preorder(root->left);preorder(root->right);}
}void inorder(TreeNode *root) {if (root) {inorder(root->left);printf("%d ", root->data);inorder(root->right);}
}void postorder(TreeNode *root) {if (root) {postorder(root->left);postorder(root->right);printf("%d ", root->data);}
}

5.2 树的遍历应用

表达式树求值：

        */ \+   -/ \ / \2  3 5  1

int evaluate_expression(TreeNode *root) {if (root == NULL) return 0;// 叶子节点为操作数if (!root->left && !root->right) {return root->data;}int left_val = evaluate_expression(root->left);int right_val = evaluate_expression(root->right);switch (root->data) {case '+': return left_val + right_val;case '-': return left_val - right_val;case '*': return left_val * right_val;case '/': return left_val / right_val;}return 0;
}

6. 散列表：快速访问的魔法

6.1 散列函数设计原则

均匀性：键值均匀分布到各个槽
高效性：计算速度快
确定性：相同键产生相同散列值

常见散列函数：

// 除法散列法：h(k) = k mod m
int division_hash(int key, int m) {return key % m;
}// 乘法散列法：h(k) = floor(m * (k * A mod 1))
int multiplication_hash(int key, int m) {double A = 0.6180339887; // 黄金分割double val = key * A;return (int)(m * (val - (int)val));
}// 全域散列法（避免最坏情况）
int universal_hash(int key, int a, int b, int p, int m) {return ((a * key + b) % p) % m;
}

6.2 冲突解决策略

6.2.1 链地址法

C语言实现：

#define TABLE_SIZE 10typedef struct HashNode {int key;int value;struct HashNode *next;
} HashNode;typedef struct {HashNode **buckets;int size;
} HashMap;HashMap *create_hash_map() {HashMap *map = (HashMap *)malloc(sizeof(HashMap));map->size = TABLE_SIZE;map->buckets = (HashNode **)calloc(TABLE_SIZE, sizeof(HashNode *));return map;
}int hash(int key) {return key % TABLE_SIZE;
}void put(HashMap *map, int key, int value) {int index = hash(key);HashNode *new_node = (HashNode *)malloc(sizeof(HashNode));new_node->key = key;new_node->value = value;new_node->next = NULL;if (map->buckets[index] == NULL) {map->buckets[index] = new_node;} else {HashNode *current = map->buckets[index];while (current->next != NULL) {if (current->key == key) {current->value = value; // 更新现有键free(new_node);return;}current = current->next;}current->next = new_node;}
}int get(HashMap *map, int key) {int index = hash(key);HashNode *current = map->buckets[index];while (current != NULL) {if (current->key == key) {return current->value;}current = current->next;}return -1; // 未找到
}

6.2.2 开放寻址法

探查序列比较：

方法	探查序列	优点	缺点
线性探查	h(k,i) = (h’(k) + i) mod m	实现简单	聚集现象严重
二次探查	h(k,i) = (h’(k)+c₁i+c₂i²) mod m	减少聚集	可能无法探查所有槽
双重散列	h(k,i) = (h₁(k)+i*h₂(k)) mod m	最接近均匀探查	计算复杂

双重散列实现：

#define TABLE_SIZE 10
#define EMPTY -1
#define DELETED -2int hash1(int key) {return key % TABLE_SIZE;
}int hash2(int key) {return 7 - (key % 7); // 确保不为0
}int double_hash(int key, int i) {return (hash1(key) + i * hash2(key)) % TABLE_SIZE;
}void insert_open_addressing(int table[], int key) {int i = 0;do {int j = double_hash(key, i);if (table[j] == EMPTY || table[j] == DELETED) {table[j] = key;return;}i++;} while (i < TABLE_SIZE);printf("Hash table overflow\n");
}int search_open_addressing(int table[], int key) {int i = 0;do {int j = double_hash(key, i);if (table[j] == EMPTY) return -1;if (table[j] == key) return j;i++;} while (i < TABLE_SIZE);return -1;
}

7. 工程优化与性能分析

7.1 动态数组：STL vector 原理

typedef struct {int *data;int size;int capacity;
} DynamicArray;void init_dynamic_array(DynamicArray *arr, int capacity) {arr->data = (int *)malloc(capacity * sizeof(int));arr->size = 0;arr->capacity = capacity;
}void push_back(DynamicArray *arr, int value) {if (arr->size >= arr->capacity) {// 扩容策略：倍增容量int new_capacity = arr->capacity * 2;int *new_data = (int *)realloc(arr->data, new_capacity * sizeof(int));if (new_data) {arr->data = new_data;arr->capacity = new_capacity;} else {printf("Memory allocation failed\n");return;}}arr->data[arr->size++] = value;
}// 时间复杂度分析：均摊O(1)
// 数学证明：考虑连续插入n个元素的总成本
// T(n) = n + (1 + 2 + 4 + ... + 2^{k}) < n + 2^{k+1} < 3n
// ∴ 单次操作均摊成本 = T(n)/n < 3

7.2 内存池优化

#define POOL_SIZE 1000typedef struct {Node *nodes[POOL_SIZE];int free_index;
} MemoryPool;MemoryPool *create_memory_pool() {MemoryPool *pool = (MemoryPool *)malloc(sizeof(MemoryPool));for (int i = 0; i < POOL_SIZE - 1; i++) {pool->nodes[i] = (Node *)malloc(sizeof(Node));pool->nodes[i]->next = i + 1; // 使用next指针连接空闲节点}pool->nodes[POOL_SIZE - 1] = (Node *)malloc(sizeof(Node));pool->nodes[POOL_SIZE - 1]->next = -1;pool->free_index = 0;return pool;
}Node *allocate_node(MemoryPool *pool) {if (pool->free_index == -1) {printf("Memory pool exhausted\n");return NULL;}Node *node = pool->nodes[pool->free_index];pool->free_index = (int)(node->next); // 转换指针为索引return node;
}void free_node(MemoryPool *pool, Node *node) {node->next = (Node *)(pool->free_index);pool->free_index = (int)(node - pool->nodes[0]); // 计算索引
}