GPU虚拟化实现(六)
GPU虚拟化实现(六)
- 章节回顾
- 初步介绍
- 详细介绍
- 共享内存和初始化
- GPU 内存管理
- GPU 利用率管理
- 锁机制
- 总结
章节回顾
在上一章,基本上介绍了项目的allocator模块(负责显存分配释放)的运行流程,在这一章,将会介绍项目的multiprocess模块,这个模块主要负责算力资源的分配。
初步介绍
项目的multiprocess模块代码管理多个进程对 GPU 资源(主要是内存和计算单元)的共享使用。它通过创建一个共享内存区域(shared_region_t)来记录每个进程的 GPU 内存使用情况、计算资源(SM 利用率)以及进程状态等信息。同时,它提供了锁机制(信号量和文件锁)来避免多进程冲突。此外,它还实现了对 GPU 资源利用率的监控和限制功能,确保单个进程不会过度占用资源。
详细介绍
项目中的multiprocess模块主要有两个代码文件
multiprocess_memory_limit.c
这个代码文件定义了共享内存结构、GPU 内存和利用率的记录与管理同时获取和设置 GPU 内存使用量、SM(流式多处理器)利用率限制。
multiprocess_utilization_watcher.c
这个代码文件实现了 GPU 利用率的监控和动态调整机制,通过 NVML(NVIDIA 管理库)接口获取当前 GPU 的使用情况同时启动一个后台线程(utilization_watcher),周期性地检查 GPU 利用率,并根据预设的限制动态调整资源分配。
共享内存和初始化
void ensure_initialized() {(void) pthread_once(®ion_info.init_status, initialized);
}
void initialized() {pthread_mutex_init(&_kernel_mutex, NULL);char* _record_kernel_interval_env = getenv("RECORD_KERNEL_INTERVAL");if (_record_kernel_interval_env) {_record_kernel_interval = atoi(_record_kernel_interval_env);}try_create_shrreg();init_proc_slot_withlock();
}
void try_create_shrreg() {LOG_DEBUG("Try create shrreg");if (region_info.fd == -1) {// use .fd to indicate whether a reinit after fork happen// no need to register exit handler after forkif (0 != atexit(exit_handler)) {LOG_ERROR("Register exit handler failed: %d", errno);}}enable_active_oom_killer = set_active_oom_killer();env_utilization_switch = set_env_utilization_switch();pthread_atfork(NULL, NULL, child_reinit_flag);region_info.pid = getpid();region_info.fd = -1;region_info.last_kernel_time = time(NULL);umask(0);char* shr_reg_file = getenv(MULTIPROCESS_SHARED_REGION_CACHE_ENV);if (shr_reg_file == NULL) {shr_reg_file = MULTIPROCESS_SHARED_REGION_CACHE_DEFAULT;}// Initialize NVML BEFORE!! open it//nvmlInit();/* If you need sm modification, do it here *//* ... set_sm_scale */int fd = open(shr_reg_file, O_RDWR | O_CREAT, 0666);if (fd == -1) {LOG_ERROR("Fail to open shrreg %s: errno=%d", shr_reg_file, errno);}region_info.fd = fd;size_t offset = lseek(fd, SHARED_REGION_SIZE_MAGIC, SEEK_SET);if (offset != SHARED_REGION_SIZE_MAGIC) {LOG_ERROR("Fail to init shrreg %s: errno=%d", shr_reg_file, errno);}size_t check_bytes = write(fd, "\0", 1);if (check_bytes != 1) {LOG_ERROR("Fail to write shrreg %s: errno=%d", shr_reg_file, errno);}if (lseek(fd, 0, SEEK_SET) != 0) {LOG_ERROR("Fail to reseek shrreg %s: errno=%d", shr_reg_file, errno);}region_info.shared_region = (shared_region_t*) mmap(NULL, SHARED_REGION_SIZE_MAGIC, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);shared_region_t* region = region_info.shared_region;if (region == NULL) {LOG_ERROR("Fail to map shrreg %s: errno=%d", shr_reg_file, errno);}if (lockf(fd, F_LOCK, SHARED_REGION_SIZE_MAGIC) != 0) {LOG_ERROR("Fail to lock shrreg %s: errno=%d", shr_reg_file, errno);}//put_device_info();if (region->initialized_flag != MULTIPROCESS_SHARED_REGION_MAGIC_FLAG) {region->major_version = MAJOR_VERSION;region->minor_version = MINOR_VERSION;//init_device_info();do_init_device_memory_limits(region->limit, CUDA_DEVICE_MAX_COUNT);do_init_device_sm_limits(region->sm_limit,CUDA_DEVICE_MAX_COUNT);if (sem_init(®ion->sem, 1, 1) != 0) {LOG_ERROR("Fail to init sem %s: errno=%d", shr_reg_file, errno);}__sync_synchronize();region->sm_init_flag = 0;region->utilization_switch = 1;region->recent_kernel = 2;region->priority = 1;if (getenv(CUDA_TASK_PRIORITY_ENV)!=NULL)region->priority = atoi(getenv(CUDA_TASK_PRIORITY_ENV));region->initialized_flag = MULTIPROCESS_SHARED_REGION_MAGIC_FLAG;} else {if (region->major_version != MAJOR_VERSION || region->minor_version != MINOR_VERSION) {LOG_ERROR("The current version number %d.%d"" is different from the file's version number %d.%d",MAJOR_VERSION, MINOR_VERSION,region->major_version, region->minor_version);}uint64_t local_limits