【Linux驱动开发】Linux块设备驱动开发详解
Linux块设备驱动开发详解
概述
Linux块设备驱动是Linux内核中最复杂和重要的驱动类型之一。块设备以固定大小的块为单位进行数据读写,与字符设备相比,它们具有缓冲、缓存和复杂的I/O调度机制。
1. 块设备基础概念
1.1 块设备vs字符设备
块设备特点:
- 以固定大小的块为单位进行数据传输(通常为512字节或4KB)
- 支持随机访问,可以寻址到任意块
- 具有缓冲和缓存机制
- 支持复杂的I/O调度和重排序
- 典型例子:硬盘、SSD、USB存储设备、RAM磁盘
字符设备特点:
- 以字节流形式进行数据传输
- 通常不支持随机访问
- 无缓冲机制(除非应用层实现)
- 典型例子:串口、键盘、鼠标
1.2 块设备核心数据结构
1.2.1 gendisk结构体
struct gendisk {int major; /* 主设备号 */int first_minor; /* 第一个次设备号 */int minors; /* 次设备号数量 */char disk_name[32]; /* 设备名称 */struct disk_part_tbl *part_tbl; /* 分区表 */struct hd_struct part0; /* 第0个分区 */struct block_device_operations *fops; /* 块设备操作函数 */struct request_queue *queue; /* 请求队列 */void *private_data; /* 私有数据 */int flags; /* 设备标志 */// ... 其他字段
};
1.2.2 block_device_operations结构体
struct block_device_operations {int (*open) (struct block_device *, fmode_t);void (*release) (struct gendisk *, fmode_t);int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);int (*direct_access) (struct block_device *, sector_t, void __pmem **, unsigned long *);unsigned int (*check_events) (struct gendisk *disk, unsigned int clearing);// ... 其他函数指针
};
1.2.3 request结构体
struct request {struct request_queue *q; /* 所属的请求队列 */sector_t sector; /* 起始扇区号 */unsigned int nr_sectors; /* 扇区数量 */unsigned int current_nr_sectors; /* 当前扇区数量 */struct bio *bio; /* 关联的bio */struct bio *biotail; /* bio链表尾部 */// ... 其他字段
};
1.2.4 bio结构体
struct bio {sector_t bi_sector; /* 起始扇区号 */struct bio *bi_next; /* 下一个bio */struct block_device *bi_bdev; /* 块设备 */unsigned long bi_flags; /* bio标志 */unsigned long bi_rw; /* 读写标志 */unsigned short bi_vcnt; /* bio_vec数量 */unsigned short bi_idx; /* 当前bio_vec索引 */unsigned short bi_phys_segments; /* 物理段数量 */unsigned short bi_hw_segments; /* 硬件段数量 */unsigned int bi_size; /* 数据大小(字节) */unsigned int bi_hw_front_size; /* 硬件前填充大小 */unsigned int bi_hw_back_size; /* 硬件后填充大小 */unsigned int bi_max_vecs; /* 最大bio_vec数量 */struct bio_vec *bi_io_vec; /* bio_vec数组 */// ... 其他字段
};
2. 块设备驱动架构
2.1 整体架构
Linux块设备驱动采用分层架构:
用户空间应用程序↓
VFS(虚拟文件系统)↓
块设备层(Block Layer)↓
I/O调度器↓
块设备驱动↓
硬件设备
2.2 关键组件
2.2.1 请求队列(Request Queue)
- 管理待处理的I/O请求
- 实现I/O调度算法
- 处理请求合并和重排序
2.2.2 I/O调度器
- noop:最简单的调度器,只做基本合并
- deadline:防止请求饥饿,保证服务质量
- cfq(Completely Fair Queueing):完全公平队列调度
- mq-deadline:多队列版本的deadline调度器
2.2.3 bio(Block I/O)
- 描述块I/O操作的数据结构
- 支持分散-聚集(scatter-gather)I/O
- 可以链接多个bio形成bio链表
3. 块设备注册和初始化
3.1 基本步骤
- 分配gendisk结构体
- 设置设备参数
- 初始化请求队列
- 注册块设备
- 添加磁盘设备
3.2 代码示例
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/bio.h>#define DEVICE_NAME "myblock"
#define SECTOR_SIZE 512
#define DEVICE_SIZE (16 * 1024 * 1024) /* 16MB */static int major;
static struct gendisk *disk;
static struct request_queue *queue;
static spinlock_t lock;
static unsigned char *device_data;/* 块设备操作函数 */
static int myblock_open(struct block_device *bdev, fmode_t mode)
{printk(KERN_INFO "myblock: device opened\n");return 0;
}static void myblock_release(struct gendisk *gd, fmode_t mode)
{printk(KERN_INFO "myblock: device released\n");
}static const struct block_device_operations myblock_fops = {.owner = THIS_MODULE,.open = myblock_open,.release = myblock_release,
};/* 请求处理函数 */
static void myblock_request(struct request_queue *q)
{struct request *req;while ((req = blk_fetch_request(q)) != NULL) {struct bio_vec bvec;struct req_iterator iter;sector_t sector = blk_rq_pos(req);unsigned long offset;void *buffer;/* 检查请求有效性 */if (blk_rq_sectors(req) * SECTOR_SIZE + sector * SECTOR_SIZE > DEVICE_SIZE) {printk(KERN_ERR "myblock: request beyond device size\n");__blk_end_request_all(req, -EIO);continue;}/* 处理请求 */rq_for_each_segment(bvec, req, iter) {offset = sector * SECTOR_SIZE + iter.iter.bi_sector * SECTOR_SIZE;buffer = page_address(bvec.bv_page) + bvec.bv_offset;if (rq_data_dir(req) == READ) {memcpy(buffer, device_data + offset, bvec.bv_len);} else {memcpy(device_data + offset, buffer, bvec.bv_len);}}__blk_end_request_all(req, 0);}
}static int __init myblock_init(void)
{/* 1. 注册块设备 */major = register_blkdev(0, DEVICE_NAME);if (major < 0) {printk(KERN_ERR "myblock: failed to register block device\n");return major;}/* 2. 分配设备内存 */device_data = vmalloc(DEVICE_SIZE);if (!device_data) {unregister_blkdev(major, DEVICE_NAME);return -ENOMEM;}/* 3. 初始化自旋锁 */spin_lock_init(&lock);/* 4. 创建请求队列 */queue = blk_init_queue(myblock_request, &lock);if (!queue) {vfree(device_data);unregister_blkdev(major, DEVICE_NAME);return -ENOMEM;}/* 5. 设置队列参数 */blk_queue_logical_block_size(queue, SECTOR_SIZE);blk_queue_physical_block_size(queue, SECTOR_SIZE);/* 6. 分配gendisk结构体 */disk = alloc_disk(1);if (!disk) {blk_cleanup_queue(queue);vfree(device_data);unregister_blkdev(major, DEVICE_NAME);return -ENOMEM;}/* 7. 设置gendisk参数 */disk->major = major;disk->first_minor = 0;disk->minors = 1;strcpy(disk->disk_name, DEVICE_NAME);disk->fops = &myblock_fops;disk->queue = queue;set_capacity(disk, DEVICE_SIZE / SECTOR_SIZE);/* 8. 添加磁盘设备 */add_disk(disk);printk(KERN_INFO "myblock: block device initialized successfully\n");return 0;
}static void __exit myblock_exit(void)
{/* 1. 删除磁盘设备 */del_gendisk(disk);/* 2. 清理gendisk结构体 */put_disk(disk);/* 3. 清理请求队列 */blk_cleanup_queue(queue);/* 4. 释放设备内存 */vfree(device_data);/* 5. 注销块设备 */unregister_blkdev(major, DEVICE_NAME);printk(KERN_INFO "myblock: block device removed\n");
}module_init(myblock_init);
module_exit(myblock_exit);MODULE_LICENSE("GPL");
MODULE_AUTHOR("Linux Driver Developer");
MODULE_DESCRIPTION("A simple block device driver");
4. 请求处理机制
4.1 请求处理流程
- 请求接收:内核通过
make_request_fn接收bio请求 - 请求合并:尝试将新请求与队列中的现有请求合并
- 请求调度:根据I/O调度算法对请求进行排序和调度
- 请求派发:将请求派发给驱动的请求处理函数
- 请求完成:驱动处理完成后通知内核
4.2 bio处理
现代块设备驱动通常直接使用bio而不是传统的request:
static void myblock_make_request(struct request_queue *q, struct bio *bio)
{struct bio_vec bvec;struct bvec_iter iter;sector_t sector = bio->bi_iter.bi_sector;unsigned long offset;void *buffer;/* 检查bio有效性 */if (bio_sectors(bio) * SECTOR_SIZE + sector * SECTOR_SIZE > DEVICE_SIZE) {printk(KERN_ERR "myblock: bio beyond device size\n");bio_io_error(bio);return;}/* 处理bio */bio_for_each_segment(bvec, bio, iter) {offset = sector * SECTOR_SIZE + iter.bi_sector * SECTOR_SIZE;buffer = kmap(bvec.bv_page) + bvec.bv_offset;if (bio_data_dir(bio) == READ) {memcpy(buffer, device_data + offset, bvec.bv_len);} else {memcpy(device_data + offset, buffer, bvec.bv_len);}kunmap(bvec.bv_page);}bio_endio(bio);
}
5. 高级特性
5.1 多队列支持
现代存储设备支持多队列,提高并发性能:
static int myblock_init(void)
{/* 创建多队列 */queue = blk_mq_init_sq_queue(&myblock_tag_set, &myblock_mq_ops, 128, BLK_MQ_F_SHOULD_MERGE);if (!queue) {return -ENOMEM;}/* 设置队列参数 */blk_queue_logical_block_size(queue, SECTOR_SIZE);blk_queue_physical_block_size(queue, SECTOR_SIZE);blk_queue_max_hw_sectors(queue, 256);/* 其他初始化代码 */// ...
}
5.2 多队列操作函数
static const struct blk_mq_ops myblock_mq_ops = {.queue_rq = myblock_queue_rq,.map_queue = blk_mq_map_queue,
};static int myblock_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd)
{struct request *req = bd->rq;struct myblock_dev *dev = hctx->queue->queuedata;/* 处理请求 */if (blk_rq_is_passthrough(req)) {return BLK_MQ_RQ_QUEUE_ERROR;}/* 执行实际的I/O操作 */myblock_execute_rq(dev, req);return BLK_MQ_RQ_QUEUE_OK;
}
5.3 电源管理
static int myblock_suspend(struct device *dev)
{struct myblock_dev *mydev = dev_get_drvdata(dev);/* 停止请求队列 */blk_mq_stop_hw_queues(mydev->queue);/* 保存设备状态 */myblock_save_device_state(mydev);return 0;
}static int myblock_resume(struct device *dev)
{struct myblock_dev *mydev = dev_get_drvdata(dev);/* 恢复设备状态 */myblock_restore_device_state(mydev);/* 重启请求队列 */blk_mq_start_stopped_hw_queues(mydev->queue, true);return 0;
}static const struct dev_pm_ops myblock_pm_ops = {.suspend = myblock_suspend,.resume = myblock_resume,
};
6. 错误处理和调试
6.1 错误处理
static void myblock_handle_error(struct myblock_dev *dev, int error)
{switch (error) {case -EIO:dev->stats.io_errors++;break;case -ENOMEM:dev->stats.memory_errors++;break;case -ETIME:dev->stats.timeout_errors++;break;default:dev->stats.other_errors++;break;}/* 记录错误信息 */if (dev->stats.total_errors++ > MAX_ALLOWED_ERRORS) {dev->state = DEVICE_STATE_ERROR;/* 停止设备 */blk_mq_stop_hw_queues(dev->queue);}
}
6.2 调试技巧
-
使用内核调试工具:
blktrace:跟踪块设备I/Oblkparse:解析blktrace输出btt:分析I/O性能
-
添加调试信息:
#define MYBLOCK_DEBUG#ifdef MYBLOCK_DEBUG
#define myblock_dbg(fmt, args...) \printk(KERN_DEBUG "myblock: " fmt "\n", ##args)
#else
#define myblock_dbg(fmt, args...)
#endif
- 使用动态调试:
/* 在代码中添加动态调试点 */
pr_debug("myblock: processing request sector=%llu nr_sectors=%u\n",(unsigned long long)sector, nr_sectors);
7. 性能优化
7.1 I/O合并优化
static void myblock_merge_bvec(struct request_queue *q,struct bvec_merge_data *bmd,struct bio_vec *bvec)
{struct myblock_dev *dev = q->queuedata;sector_t sector = bmd->bi_sector;unsigned short max_sectors;/* 计算最大合并扇区数 */max_sectors = min_t(unsigned short, dev->max_hw_sectors,(dev->cache_size - (sector & (dev->cache_sectors - 1))));if (bvec->bv_len > (max_sectors << 9)) {bvec->bv_len = max_sectors << 9;}
}
7.2 预读优化
static void myblock_end_io(struct bio *bio)
{struct myblock_dev *dev = bio->bi_private;if (bio_data_dir(bio) == READ && bio->bi_iter.bi_size > 0) {/* 触发预读 */myblock_trigger_readahead(dev, bio->bi_iter.bi_sector + bio_sectors(bio));}bio_endio(bio);
}
7.3 多队列负载均衡
static int myblock_map_queues(struct blk_mq_tag_set *set)
{struct myblock_dev *dev = set->driver_data;int i;for (i = 0; i < set->nr_maps; i++) {struct blk_mq_queue_map *map = &set->map[i];/* 根据CPU拓扑映射队列 */map->nr_queues = dev->nr_queues;map->queue_offset = i * dev->nr_queues;/* 设置CPU亲和性 */cpumask_setall(&map->mq_map[0]);}return 0;
}
8. 最佳实践
8.1 内存管理
-
使用适当的内存分配函数:
kmalloc():小内存分配vmalloc():大内存分配dma_alloc_coherent():DMA缓冲区
-
避免内存泄漏:
static int myblock_init_memory(struct myblock_dev *dev)
{dev->data_buf = kmalloc(DATA_BUFFER_SIZE, GFP_KERNEL);if (!dev->data_buf)return -ENOMEM;dev->meta_buf = kmalloc(META_BUFFER_SIZE, GFP_KERNEL);if (!dev->meta_buf) {kfree(dev->data_buf);return -ENOMEM;}return 0;
}static void myblock_cleanup_memory(struct myblock_dev *dev)
{kfree(dev->meta_buf);kfree(dev->data_buf);
}
8.2 并发控制
-
使用适当的锁机制:
- 自旋锁:短时间锁定
- 互斥锁:长时间锁定
- RCU:读多写少场景
-
避免死锁:
static void myblock_lock_queues(struct myblock_dev *dev)
{int i;/* 按固定顺序获取锁,避免死锁 */for (i = 0; i < dev->nr_queues; i++) {spin_lock(&dev->queues[i].lock);}
}static void myblock_unlock_queues(struct myblock_dev *dev)
{int i;/* 按相反顺序释放锁 */for (i = dev->nr_queues - 1; i >= 0; i--) {spin_unlock(&dev->queues[i].lock);}
}
8.3 错误恢复
- 实现超时机制:
static void myblock_timeout_handler(unsigned long data)
{struct myblock_dev *dev = (struct myblock_dev *)data;if (dev->state == DEVICE_STATE_BUSY) {dev->timeout_count++;if (dev->timeout_count > MAX_TIMEOUT_COUNT) {dev->state = DEVICE_STATE_ERROR;/* 重置设备 */myblock_reset_device(dev);}}
}
- 实现重试机制:
static int myblock_retry_request(struct myblock_dev *dev, struct request *req)
{int ret;int retries = 0;do {ret = myblock_execute_request(dev, req);if (ret == 0)return 0;retries++;msleep(RETRY_DELAY_MS);} while (retries < MAX_RETRIES);return ret;
}
9. 总结
Linux块设备驱动开发涉及内核多个子系统,需要深入理解:
- 内核块设备子系统架构
- 请求队列和I/O调度机制
- bio和request的处理
- 多队列和电源管理
- 错误处理和性能优化
开发高质量的块设备驱动需要:
- 扎实的内核编程基础
- 对硬件特性的深入理解
- 充分的测试和调试
- 持续的性能优化
通过遵循最佳实践和不断学习,可以开发出稳定、高效的块设备驱动程序。
