Linux 内核锁机制：从自旋锁到 RCU 的并发控制演进-港品优选

Linux 内核锁机制：从自旋锁到 RCU 的并发控制演进

一、并发控制的"锁焦虑"：选择错误的锁等于选择错误的性能

Linux 内核中，锁的选择直接影响系统吞吐和延迟。某数据库系统在高并发场景下使用自旋锁保护共享链表，CPU 利用率飙升至 95% 但有效吞吐仅 12%——大量 CPU 时间消耗在自旋等待上。将自旋锁替换为互斥锁后，CPU 利用率降至 45%，吞吐提升 3 倍。但在另一个中断处理场景中，互斥锁导致睡眠而触发死锁，必须使用自旋锁。锁的选择不是"哪个更好"，而是"哪个场景用哪个"。

Linux 内核提供了从轻量到重量的多种锁机制，理解它们的底层实现和适用场景，是内核开发的必备能力。

二、Linux 内核锁机制的层级与适用场景

flowchart TB subgraph 锁层级["锁机制层级（从轻到重）"] direction TB L1["原子操作<br/>atomic_t<br/>无锁，单条指令<br/>适用：计数器、标志位"] L2["自旋锁<br/>spinlock_t<br/>忙等，不睡眠<br/>适用：中断上下文、短临界区"] L3["互斥锁<br/>mutex<br/>可睡眠，开销较大<br/>适用：长临界区、可睡眠上下文"] L4["读写锁<br/>rwlock_t / rw_semaphore<br/>读共享、写独占<br/>适用：读多写少"] L5["RCU<br/>Read-Copy-Update<br/>读无锁，写延迟回收<br/>适用：读远多于写"] end L1 -->|竞争加剧| L2 L2 -->|临界区变长| L3 L3 -->|读多写少| L4 L4 -->|读远多于写| L5 style 锁层级 fill:#eef,stroke:#333

三、内核锁机制的代码实现与分析

#include <linux/spinlock.h> #include <linux/mutex.h> #include <linux/rwlock.h> #include <linux/rwsem.h> #include <linux/rcupdate.h> #include <linux/atomic.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/interrupt.h> /* ============ 1. 原子操作 ============ */ /* * 原子操作是最轻量的并发控制，基于 CPU 的原子指令 * 适用于简单的计数器和标志位操作 */ struct ref_counter { atomic_t count; }; /* 原子递增并返回新值 */ static inline int ref_counter_inc(struct ref_counter *rc) { return atomic_inc_return(&rc->count); } /* 原子递减并测试是否归零 */ static inline int ref_counter_dec_and_test(struct ref_counter *rc) { return atomic_dec_and_test(&rc->count); } /* ============ 2. 自旋锁 ============ */ /* * 自旋锁在等待时忙等（busy-wait），不会让出 CPU * 适用于：中断上下文、临界区极短（< 微秒级）的场景 * 禁忌：持有自旋锁时不能睡眠（不能调用可能调度的函数） */ struct spinlock_protected_list { spinlock_t lock; struct list_head head; int count; }; /* 初始化 */ static void spl_init(struct spinlock_protected_list *spl) { spin_lock_init(&spl->lock); INIT_LIST_HEAD(&spl->head); spl->count = 0; } /* 在中断处理中安全地添加节点 */ static void spl_add_irqsafe(struct spinlock_protected_list *spl, struct list_head *node) { unsigned long flags; /* * spin_lock_irqsave：保存中断状态并禁用本地中断 * 防止中断处理程序在同一 CPU 上死锁 */ spin_lock_irqsave(&spl->lock, flags); list_add(node, &spl->head); spl->count++; spin_unlock_irqrestore(&spl->lock, flags); } /* 普通上下文中的添加 */ static void spl_add(struct spinlock_protected_list *spl, struct list_head *node) { spin_lock(&spl->lock); list_add(node, &spl->head); spl->count++; spin_unlock(&spl->lock); } /* ============ 3. 互斥锁 ============ */ /* * 互斥锁在竞争时让出 CPU，等待者进入睡眠 * 适用于：临界区较长、可以在进程上下文中睡眠的场景 * 开销：加锁/解锁涉及上下文切换，约 1-10 微秒 */ struct mutex_protected_buffer { struct mutex lock; void *data; size_t size; size_t used; }; static int mpb_init(struct mutex_protected_buffer *mpb, size_t size) { mutex_init(&mpb->lock); mpb->data = kzalloc(size, GFP_KERNEL); if (!mpb->data) return -ENOMEM; mpb->size = size; mpb->used = 0; return 0; } /* 写入数据：可能阻塞 */ static ssize_t mpb_write(struct mutex_protected_buffer *mpb, const void *buf, size_t len) { ssize_t written; /* * mutex_lock：可能睡眠，不能在中断上下文使用 * 适合进程上下文中的长临界区 */ mutex_lock(&mpb->lock); if (mpb->used + len > mpb->size) { written = -ENOSPC; goto out; } memcpy(mpb->data + mpb->used, buf, len); mpb->used += len; written = len; out: mutex_unlock(&mpb->lock); return written; } /* ============ 4. 读写信号量 ============ */ /* * 读写信号量：读操作共享，写操作独占 * 适用于：读多写少的数据结构（如配置表、路由表） * rw_semaphore 比 rwlock_t 更重，但允许读操作睡眠 */ struct rwsem_config_table { struct rw_semaphore rwsem; struct hlist_head *buckets; int bucket_count; }; static void rct_init(struct rwsem_config_table *rct, int buckets) { init_rwsem(&rct->rwsem); rct->buckets = kzalloc(sizeof(struct hlist_head) * buckets, GFP_KERNEL); rct->bucket_count = buckets; } /* 读操作：多个读者可并发 */ static void *rct_lookup(struct rwsem_config_table *rct, int key) { void *result = NULL; down_read(&rct->rwsem); /* 获取读锁 */ /* 查找逻辑 */ result = NULL; /* 实际查找省略 */ up_read(&rct->rwsem); /* 释放读锁 */ return result; } /* 写操作：独占访问 */ static void rct_update(struct rwsem_config_table *rct, int key, void *value) { down_write(&rct->rwsem); /* 获取写锁，等待所有读者退出 */ /* 更新逻辑 */ up_write(&rct->rwsem); /* 释放写锁 */ } /* ============ 5. RCU（Read-Copy-Update） ============ */ /* * RCU 是 Linux 内核最精妙的并发控制机制 * 核心思想：读者无锁访问旧数据，写者创建副本修改，宽限期后回收旧数据 * 适用于：读远多于写的场景（如路由表、进程描述符） * 优势：读操作零开销，写操作延迟回收 */ struct rcu_protected_entry { int key; int value; struct rcu_head rcu; /* RCU 回收头 */ struct list_head node; }; struct rcu_protected_list { struct list_head head; spinlock_t update_lock; /* 写者之间的互斥 */ }; /* RCU 读操作：无锁，零开销 */ static struct rcu_protected_entry * rpl_lookup(struct rcu_protected_list *rpl, int key) { struct rcu_protected_entry *entry; /* * rcu_read_lock / rcu_read_unlock： * 标记读侧临界区，禁止此期间的宽限期结束 * 实际开销极低（仅禁用抢占或递增计数器） */ rcu_read_lock(); list_for_each_entry_rcu(entry, &rpl->head, node) { if (entry->key == key) { rcu_read_unlock(); return entry; } } rcu_read_unlock(); return NULL; } /* RCU 写操作：创建副本，原子替换，延迟回收 */ static void rpl_update(struct rcu_protected_list *rpl, int key, int new_value) { struct rcu_protected_entry *old, *new_entry; /* 分配新节点（在锁外分配，减少锁持有时间） */ new_entry = kmalloc(sizeof(*new_entry), GFP_KERNEL); if (!new_entry) return; new_entry->key = key; new_entry->value = new_value; spin_lock(&rpl->update_lock); /* 查找旧节点 */ old = rpl_lookup(rpl, key); if (old) { /* 用新节点替换旧节点 */ list_replace_rcu(&old->node, &new_entry->node); /* * call_rcu：注册回调，在宽限期结束后回收旧节点 * 宽限期：所有预存的读者退出读侧临界区 */ call_rcu(&old->rcu, rpl_free_callback); } else { /* 新增节点 */ list_add_rcu(&new_entry->node, &rpl->head); } spin_unlock(&rpl->update_lock); } /* RCU 回调：宽限期结束后安全释放旧数据 */ static void rpl_free_callback(struct rcu_head *head) { struct rcu_protected_entry *entry = container_of(head, struct rcu_protected_entry, rcu); kfree(entry); } /* RCU 删除操作 */ static void rpl_delete(struct rcu_protected_list *rpl, int key) { struct rcu_protected_entry *entry; spin_lock(&rpl->update_lock); entry = rpl_lookup(rpl, key); if (entry) { list_del_rcu(&entry->node); /* 延迟回收：等待所有读者退出 */ call_rcu(&entry->rcu, rpl_free_callback); } spin_unlock(&rpl->update_lock); }

四、内核锁机制的 Trade-offs

自旋锁的 CPU 浪费。自旋锁在竞争时忙等，CPU 时间被空转消耗。临界区越长、竞争越激烈，浪费越严重。经验法则：临界区超过 10 微秒时应考虑互斥锁。在中断上下文中别无选择时，必须将临界区压缩到最短。

互斥锁的优先级反转。低优先级任务持有互斥锁时，高优先级任务被阻塞，而中等优先级任务可以抢占低优先级任务，导致高优先级任务间接被中等优先级任务阻塞。Linux 内核通过优先级继承（PI）机制缓解此问题，但 PI 本身增加了锁的开销。

读写锁的写饥饿。读操作持续到来时，写操作可能被无限期阻塞。rw_semaphore 的实现中，写者等待期间新的读者仍可获取读锁。解决方案是在写者等待时阻止新读者，但这又降低了读吞吐。

RCU 的内存开销与延迟。RCU 读操作零开销的代价是：写操作需要分配副本（双倍内存），旧数据的回收延迟到宽限期结束（通常数十毫秒）。在写操作频繁的场景下，内存占用会显著增加。RCU 不适用于写多读少的场景。

五、总结

Linux 内核锁机制从轻到重分为原子操作、自旋锁、互斥锁、读写锁和 RCU 五个层级，每个层级针对不同的并发场景。原子操作适用于简单计数器，自旋锁适用于中断上下文和极短临界区，互斥锁适用于可睡眠的长临界区，读写锁适用于读多写少场景，RCU 适用于读远多于写的极致性能场景。选择锁的关键是匹配场景：错误的选择要么导致 CPU 浪费（长临界区用自旋锁），要么导致死锁（中断上下文用互斥锁），要么导致写饥饿（高写频率用读写锁）。

企业官网建设流程全解析