算法导论第十章:基本数据结构的艺术与科学
本文是《算法导论》精讲专栏第十章,通过动态操作图解、内存结构可视化和性能对比实验,结合完整C语言实现,深入解析基本数据结构的精髓。包含栈、队列、链表、哈希表、二叉树等核心内容,提供15个以上完整代码实现。
一、数据结构:计算世界的基石
1.1 数据结构核心概念
1.2 数据结构操作复杂度
数据结构 | 插入 | 删除 | 查找 | 访问 | 空间复杂度 |
---|---|---|---|---|---|
数组 | O(n) | O(n) | O(n) | O(1) | O(n) |
链表 | O(1) | O(1) | O(n) | O(n) | O(n) |
栈 | O(1) | O(1) | O(n) | O(1) | O(n) |
队列 | O(1) | O(1) | O(n) | O(1) | O(n) |
哈希表 | O(1) | O(1) | O(1) | O(1) | O(n) |
二叉搜索树 | O(h) | O(h) | O(h) | O(h) | O(n) |
堆 | O(log n) | O(log n) | O(n) | O(1) | O(n) |
二、栈:后进先出(LIFO)的魔力
2.1 栈的基本操作
#include <stdio.h>
#include <stdlib.h>
#define MAX_SIZE 100
typedef struct {
int data[MAX_SIZE];
int top;
} Stack;
void stack_init(Stack *s) {
s->top = -1;
}
int stack_empty(Stack *s) {
return s->top == -1;
}
int stack_full(Stack *s) {
return s->top == MAX_SIZE - 1;
}
void stack_push(Stack *s, int value) {
if (stack_full(s)) {
printf("栈溢出\n");
return;
}
s->data[++s->top] = value;
}
int stack_pop(Stack *s) {
if (stack_empty(s)) {
printf("栈下溢\n");
return -1;
}
return s->data[s->top--];
}
int stack_peek(Stack *s) {
if (stack_empty(s)) return -1;
return s->data[s->top];
}
// 可视化栈操作
void visualize_stack(Stack *s, const char* operation) {
printf("%s后: [", operation);
for (int i = 0; i <= s->top; i++) {
printf("%d", s->data[i]);
if (i < s->top) printf(", ");
}
printf("] 栈顶=%d\n", s->top);
}
2.2 栈的应用场景
- 函数调用栈:程序执行时的函数调用管理
- 表达式求值:中缀表达式转后缀表达式
- 括号匹配:检查代码中的括号是否合法
- 浏览器历史:网页的前进后退功能
- 撤销机制:编辑器的撤销操作实现
括号匹配算法:
int is_balanced(char *expr) {
Stack s;
stack_init(&s);
for (int i = 0; expr[i]; i++) {
if (expr[i] == '(' || expr[i] == '[' || expr[i] == '{') {
stack_push(&s, expr[i]);
} else {
char top_char = stack_peek(&s);
if ((expr[i] == ')' && top_char == '(') ||
(expr[i] == ']' && top_char == '[') ||
(expr[i] == '}' && top_char == '{')) {
stack_pop(&s);
} else {
return 0;
}
}
}
return stack_empty(&s);
}
三、队列:先进先出(FIFO)的智慧
3.1 循环队列实现
typedef struct {
int data[MAX_SIZE];
int front;
int rear;
int size;
} Queue;
void queue_init(Queue *q) {
q->front = 0;
q->rear = -1;
q->size = 0;
}
int queue_empty(Queue *q) {
return q->size == 0;
}
int queue_full(Queue *q) {
return q->size == MAX_SIZE;
}
void enqueue(Queue *q, int value) {
if (queue_full(q)) {
printf("队列已满\n");
return;
}
q->rear = (q->rear + 1) % MAX_SIZE;
q->data[q->rear] = value;
q->size++;
}
int dequeue(Queue *q) {
if (queue_empty(q)) {
printf("队列为空\n");
return -1;
}
int value = q->data[q->front];
q->front = (q->front + 1) % MAX_SIZE;
q->size--;
return value;
}
// 可视化队列操作
void visualize_queue(Queue *q, const char* operation) {
printf("%s后: [", operation);
int count = 0;
int index = q->front;
while (count < q->size) {
printf("%d", q->data[index]);
if (count < q->size - 1) printf(", ");
index = (index + 1) % MAX_SIZE;
count++;
}
printf("] 大小=%d\n", q->size);
}
3.2 队列应用场景
- BFS算法:图的广度优先搜索
- 任务调度:操作系统进程调度
- 消息队列:分布式系统通信
- 打印队列:打印机任务管理
- 实时系统:事件处理队列
BFS算法实现:
void bfs(int graph[][5], int start, int n) {
int visited[5] = {0};
Queue q;
queue_init(&q);
visited[start] = 1;
enqueue(&q, start);
printf("BFS遍历顺序: ");
while (!queue_empty(&q)) {
int node = dequeue(&q);
printf("%d ", node);
for (int i = 0; i < n; i++) {
if (graph[node][i] && !visited[i]) {
visited[i] = 1;
enqueue(&q, i);
}
}
}
printf("\n");
}
四、链表:动态连接的优雅
4.1 链表类型对比
类型 | 优点 | 缺点 | 应用场景 |
---|---|---|---|
单向链表 | 插入/删除快,内存小 | 只能单向遍历 | 简单列表,LRU缓存 |
双向链表 | 双向遍历,删除高效 | 内存占用大 | 浏览器历史记录 |
循环链表 | 循环访问,无边界 | 实现复杂 | 轮询调度,约瑟夫问题 |
跳跃链表 | 快速查找(O(log n)) | 实现复杂,维护成本高 | Redis有序集合 |
4.2 双向链表实现
typedef struct Node {
int data;
struct Node *prev;
struct Node *next;
} Node;
typedef struct {
Node *head;
Node *tail;
int size;
} DoublyLinkedList;
Node *create_node(int data) {
Node *new_node = (Node *)malloc(sizeof(Node));
new_node->data = data;
new_node->prev = NULL;
new_node->next = NULL;
return new_node;
}
void dll_init(DoublyLinkedList *list) {
list->head = NULL;
list->tail = NULL;
list->size = 0;
}
void dll_insert_front(DoublyLinkedList *list, int data) {
Node *new_node = create_node(data);
if (list->head == NULL) {
list->head = list->tail = new_node;
} else {
new_node->next = list->head;
list->head->prev = new_node;
list->head = new_node;
}
list->size++;
}
void dll_insert_end(DoublyLinkedList *list, int data) {
Node *new_node = create_node(data);
if (list->tail == NULL) {
list->head = list->tail = new_node;
} else {
list->tail->next = new_node;
new_node->prev = list->tail;
list->tail = new_node;
}
list->size++;
}
void dll_delete_node(DoublyLinkedList *list, Node *node) {
if (node == NULL) return;
if (node == list->head) {
list->head = node->next;
if (list->head) list->head->prev = NULL;
}
if (node == list->tail) {
list->tail = node->prev;
if (list->tail) list->tail->next = NULL;
}
if (node->prev) node->prev->next = node->next;
if (node->next) node->next->prev = node->prev;
free(node);
list->size--;
}
// 可视化链表
void visualize_dll(DoublyLinkedList *list) {
printf("链表: ");
Node *current = list->head;
while (current) {
printf("%d", current->data);
if (current->next) printf(" ⇄ ");
current = current->next;
}
printf("\n");
printf("反向: ");
current = list->tail;
while (current) {
printf("%d", current->data);
if (current->prev) printf(" ⇄ ");
current = current->prev;
}
printf("\n大小: %d\n", list->size);
}
4.3 链表高级应用:LRU缓存
typedef struct {
int capacity;
DoublyLinkedList list;
Node **hash_table; // 简化版哈希表
} LRUCache;
LRUCache *create_cache(int capacity) {
LRUCache *cache = (LRUCache *)malloc(sizeof(LRUCache));
cache->capacity = capacity;
dll_init(&cache->list);
cache->hash_table = (Node **)calloc(1000, sizeof(Node *));
return cache;
}
int cache_get(LRUCache *cache, int key) {
if (cache->hash_table[key] == NULL) return -1;
// 移动到链表头部
Node *node = cache->hash_table[key];
dll_delete_node(&cache->list, node);
dll_insert_front(&cache->list, key);
cache->hash_table[key] = cache->list.head;
return node->data;
}
void cache_put(LRUCache *cache, int key, int value) {
if (cache->hash_table[key] != NULL) {
// 更新现有值
Node *node = cache->hash_table[key];
node->data = value;
cache_get(cache, key); // 触发访问更新
return;
}
if (cache->list.size >= cache->capacity) {
// 淘汰最久未使用
int key_to_remove = cache->list.tail->data;
dll_delete_node(&cache->list, cache->list.tail);
cache->hash_table[key_to_remove] = NULL;
}
// 插入新节点
dll_insert_front(&cache->list, key);
cache->list.head->data = value;
cache->hash_table[key] = cache->list.head;
}
五、哈希表:快速访问的魔法
5.1 哈希函数设计
// 除法哈希法
int division_hash(int key, int size) {
return key % size;
}
// 乘法哈希法
int multiplication_hash(int key, int size) {
double A = 0.6180339887; // 黄金分割
double val = key * A;
return (int)(size * (val - (int)val));
}
// 全域哈希法
int universal_hash(int key, int a, int b, int prime, int size) {
return ((a * key + b) % prime) % size;
}
5.2 冲突解决方案
5.2.1 链地址法实现
#define TABLE_SIZE 10
typedef struct HashNode {
int key;
int value;
struct HashNode *next;
} HashNode;
typedef struct {
HashNode **buckets;
int size;
} HashMap;
HashMap *create_hash_map() {
HashMap *map = (HashMap *)malloc(sizeof(HashMap));
map->size = TABLE_SIZE;
map->buckets = (HashNode **)calloc(TABLE_SIZE, sizeof(HashNode *));
return map;
}
void hash_map_put(HashMap *map, int key, int value) {
int index = division_hash(key, map->size);
HashNode *new_node = (HashNode *)malloc(sizeof(HashNode));
new_node->key = key;
new_node->value = value;
new_node->next = NULL;
if (map->buckets[index] == NULL) {
map->buckets[index] = new_node;
} else {
HashNode *current = map->buckets[index];
while (current->next != NULL) {
if (current->key == key) {
current->value = value; // 更新现有键
free(new_node);
return;
}
current = current->next;
}
current->next = new_node;
}
}
int hash_map_get(HashMap *map, int key) {
int index = division_hash(key, map->size);
HashNode *current = map->buckets[index];
while (current != NULL) {
if (current->key == key) {
return current->value;
}
current = current->next;
}
return -1; // 未找到
}
5.2.2 开放寻址法实现
#define TABLE_SIZE 10
#define EMPTY -1
#define DELETED -2
int linear_probing(int key, int i, int size) {
return (division_hash(key, size) + i) % size;
}
int quadratic_probing(int key, int i, int size) {
return (division_hash(key, size) + i*i) % size;
}
int double_hashing(int key, int i, int size) {
int h1 = division_hash(key, size);
int h2 = 1 + (key % (size - 1));
return (h1 + i * h2) % size;
}
void hash_table_insert(int table[], int key, int size) {
int i = 0;
do {
int index = double_hashing(key, i, size);
if (table[index] == EMPTY || table[index] == DELETED) {
table[index] = key;
return;
}
i++;
} while (i < size);
printf("哈希表已满\n");
}
六、二叉树:层次化数据组织
6.1 二叉树表示法
typedef struct TreeNode {
int data;
struct TreeNode *left;
struct TreeNode *right;
} TreeNode;
TreeNode *create_tree_node(int data) {
TreeNode *node = (TreeNode *)malloc(sizeof(TreeNode));
node->data = data;
node->left = NULL;
node->right = NULL;
return node;
}
// 二叉树遍历
void preorder(TreeNode *root) {
if (root) {
printf("%d ", root->data);
preorder(root->left);
preorder(root->right);
}
}
void inorder(TreeNode *root) {
if (root) {
inorder(root->left);
printf("%d ", root->data);
inorder(root->right);
}
}
void postorder(TreeNode *root) {
if (root) {
postorder(root->left);
postorder(root->right);
printf("%d ", root->data);
}
}
// 可视化二叉树
void print_tree(TreeNode *root, int level) {
if (root == NULL) return;
print_tree(root->right, level + 1);
for (int i = 0; i < level; i++) printf(" ");
printf("%d\n", root->data);
print_tree(root->left, level + 1);
}
6.2 二叉搜索树
TreeNode *bst_insert(TreeNode *root, int data) {
if (root == NULL) {
return create_tree_node(data);
}
if (data < root->data) {
root->left = bst_insert(root->left, data);
} else if (data > root->data) {
root->right = bst_insert(root->right, data);
}
return root;
}
TreeNode *bst_search(TreeNode *root, int data) {
if (root == NULL || root->data == data) {
return root;
}
if (data < root->data) {
return bst_search(root->left, data);
}
return bst_search(root->right, data);
}
TreeNode *bst_delete(TreeNode *root, int data) {
if (root == NULL) return NULL;
if (data < root->data) {
root->left = bst_delete(root->left, data);
} else if (data > root->data) {
root->right = bst_delete(root->right, data);
} else {
// 找到要删除的节点
if (root->left == NULL) {
TreeNode *temp = root->right;
free(root);
return temp;
} else if (root->right == NULL) {
TreeNode *temp = root->left;
free(root);
return temp;
}
// 有两个子节点:找后继节点
TreeNode *successor = root->right;
while (successor->left != NULL) {
successor = successor->left;
}
root->data = successor->data;
root->right = bst_delete(root->right, successor->data);
}
return root;
}
七、数据结构工程优化
7.1 内存池管理
#define POOL_SIZE 1000
typedef struct {
Node *nodes[POOL_SIZE];
int free_index;
} MemoryPool;
MemoryPool *create_memory_pool() {
MemoryPool *pool = (MemoryPool *)malloc(sizeof(MemoryPool));
for (int i = 0; i < POOL_SIZE - 1; i++) {
pool->nodes[i] = (Node *)malloc(sizeof(Node));
pool->nodes[i]->next = i + 1; // 使用next指针连接空闲节点
}
pool->nodes[POOL_SIZE - 1] = NULL;
pool->free_index = 0;
return pool;
}
Node *pool_allocate_node(MemoryPool *pool) {
if (pool->free_index == -1) return NULL;
Node *node = pool->nodes[pool->free_index];
pool->free_index = node->next;
return node;
}
void pool_free_node(MemoryPool *pool, Node *node) {
node->next = pool->free_index;
pool->free_index = (int)(node - pool->nodes[0]); // 计算索引
}
7.2 数据结构选择指南
应用场景 | 推荐数据结构 | 理由 |
---|---|---|
高频插入删除 | 链表 | O(1)插入删除 |
高频随机访问 | 数组 | O(1)访问 |
后进先出需求 | 栈 | LIFO语义 |
先进先出需求 | 队列 | FIFO语义 |
快速查找 | 哈希表 | O(1)查找 |
有序数据 | 二叉搜索树 | 自动排序 |
优先级管理 | 堆 | 高效获取最大/最小值 |
键值关联 | 哈希表 | 直接映射 |
层次关系 | 树 | 天然层次结构 |
网络关系 | 图 | 节点和边 |
八、综合应用:表达式求值
int evaluate_expression(char *expression) {
Stack values;
Stack operators;
stack_init(&values);
stack_init(&operators);
for (int i = 0; expression[i]; i++) {
if (expression[i] == ' ') continue;
if (isdigit(expression[i])) {
int num = 0;
while (isdigit(expression[i])) {
num = num * 10 + (expression[i] - '0');
i++;
}
i--;
stack_push(&values, num);
} else if (expression[i] == '(') {
stack_push(&operators, expression[i]);
} else if (expression[i] == ')') {
while (stack_peek(&operators) != '(') {
int val2 = stack_pop(&values);
int val1 = stack_pop(&values);
char op = stack_pop(&operators);
stack_push(&values, apply_op(val1, val2, op));
}
stack_pop(&operators); // 弹出 '('
} else if (is_operator(expression[i])) {
while (!stack_empty(&operators) &&
precedence(stack_peek(&operators)) >= precedence(expression[i])) {
int val2 = stack_pop(&values);
int val1 = stack_pop(&values);
char op = stack_pop(&operators);
stack_push(&values, apply_op(val1, val2, op));
}
stack_push(&operators, expression[i]);
}
}
while (!stack_empty(&operators)) {
int val2 = stack_pop(&values);
int val1 = stack_pop(&values);
char op = stack_pop(&operators);
stack_push(&values, apply_op(val1, val2, op));
}
return stack_pop(&values);
}
int apply_op(int a, int b, char op) {
switch(op) {
case '+': return a + b;
case '-': return a - b;
case '*': return a * b;
case '/': return a / b;
}
return 0;
}
int precedence(char op) {
if (op == '+' || op == '-') return 1;
if (op == '*' || op == '/') return 2;
return 0;
}
九、总结与展望
9.1 关键知识点回顾
- 栈和队列:LIFO和FIFO的经典实现
- 链表:动态内存管理的优雅解决方案
- 哈希表:快速访问的工程实践
- 二叉树:层次化数据组织的核心结构
- 工程优化:内存池、混合数据结构等技巧
9.2 数据结构选择矩阵
操作需求 | 数组 | 链表 | 哈希表 | 二叉搜索树 | 堆 |
---|---|---|---|---|---|
插入效率 | 差 | 优 | 优 | 良 | 优 |
删除效率 | 差 | 优 | 优 | 良 | 良 |
查找效率 | 差 | 差 | 优 | 良 | 差 |
随机访问 | 优 | 差 | 优 | 差 | 差 |
内存效率 | 优 | 良 | 良 | 良 | 优 |
有序访问 | 差 | 差 | 差 | 优 | 差 |
空间复杂度 | O(n) | O(n) | O(n) | O(n) | O(n) |
“数据结构是算法的基石,选择合适的数据结构往往比设计精巧的算法更重要。在工程实践中,90%的性能问题可以通过选择更优的数据结构来解决。”
—— Niklaus Wirth,Pascal语言发明者
下章预告:第十一章《散列》将深入探讨:
- 散列函数的数学原理
- 动态散列表的扩展策略
- 完美散列与全域散列
- 布隆过滤器的高级应用
本文完整代码已上传至GitHub仓库:Data-Structures-Implementations
思考题:
- 如何实现一个支持O(1)时间复杂度的最小值栈?
- 在内存受限环境中,如何优化二叉搜索树的内存占用?
- 哈希表在动态扩容时如何保证性能?
- 如何设计线程安全的数据结构?