llama.cpp学习笔记:后端加载
单例
struct ggml_backend_registry {std::vector<ggml_backend_reg_entry> backends;std::vector<ggml_backend_dev_t> devices;// ...
}struct ggml_backend_reg_entry {ggml_backend_reg_t reg;dl_handle_ptr handle;
};typedef struct ggml_backend_reg * ggml_backend_reg_t;typedef struct ggml_backend_device * ggml_backend_dev_t;
struct ggml_backend_device {struct ggml_backend_device_i iface;ggml_backend_reg_t reg;void * context;
};
每种后端一个单例:cuda、cpu等
版本号、接口和上下文信息
struct ggml_backend_reg {int api_version; // initialize to GGML_BACKEND_API_VERSIONstruct ggml_backend_reg_i iface;void * context;};
后端对应的设备:接口、所属后端和上下文信息
typedef struct ggml_backend_device * ggml_backend_dev_t;
struct ggml_backend_device {struct ggml_backend_device_i iface;ggml_backend_reg_t reg;void * context;
};
cuda后端
cuda后端iface
static const ggml_backend_reg_i ggml_backend_cuda_reg_interface = {/* .get_name = */ ggml_backend_cuda_reg_get_name,/* .get_device_count = */ ggml_backend_cuda_reg_get_device_count,/* .get_device = */ ggml_backend_cuda_reg_get_device,/* .get_proc_address = */ ggml_backend_cuda_reg_get_proc_address,
};
cuda后端context,存储多个cuda设备(GPU)的接口、所属后端和上下文等信息
struct ggml_backend_cuda_reg_context {std::vector<ggml_backend_dev_t> devices;
};
cuda设备GPU的接口iface
static const ggml_backend_device_i ggml_backend_cuda_device_interface = {/* .get_name = */ ggml_backend_cuda_device_get_name,/* .get_description = */ ggml_backend_cuda_device_get_description,/* .get_memory = */ ggml_backend_cuda_device_get_memory,/* .get_type = */ ggml_backend_cuda_device_get_type,/* .get_props = */ ggml_backend_cuda_device_get_props,/* .init_backend = */ ggml_backend_cuda_device_init_backend,/* .get_buffer_type = */ ggml_backend_cuda_device_get_buffer_type,/* .get_host_buffer_type = */ ggml_backend_cuda_device_get_host_buffer_type,/* .buffer_from_host_ptr = */ NULL,/* .supports_op = */ ggml_backend_cuda_device_supports_op,/* .supports_buft = */ ggml_backend_cuda_device_supports_buft,/* .offload_op = */ ggml_backend_cuda_device_offload_op,/* .event_new = */ ggml_backend_cuda_device_event_new,/* .event_free = */ ggml_backend_cuda_device_event_free,/* .event_synchronize = */ ggml_backend_cuda_device_event_synchronize,
};
cuda设备GPU的context
struct ggml_backend_cuda_device_context {int device;std::string name;std::string description;
};
CPU后端
CPU后端iface
static const struct ggml_backend_reg_i ggml_backend_cpu_reg_i = {/* .get_name = */ ggml_backend_cpu_reg_get_name,/* .get_device_count = */ ggml_backend_cpu_reg_get_device_count,/* .get_device = */ ggml_backend_cpu_reg_get_device,/* .get_proc_address = */ ggml_backend_cpu_get_proc_address,
};
CPU后端无context
CPU设备接口iface
static const struct ggml_backend_device_i ggml_backend_cpu_device_i = {/* .get_name = */ ggml_backend_cpu_device_get_name,/* .get_description = */ ggml_backend_cpu_device_get_description,/* .get_memory = */ ggml_backend_cpu_device_get_memory,/* .get_type = */ ggml_backend_cpu_device_get_type,/* .get_props = */ ggml_backend_cpu_device_get_props,/* .init_backend = */ ggml_backend_cpu_device_init_backend,/* .get_buffer_type = */ ggml_backend_cpu_device_get_buffer_type,/* .get_host_buffer_type = */ NULL,/* .buffer_from_host_ptr = */ ggml_backend_cpu_device_buffer_from_host_ptr,/* .supports_op = */ ggml_backend_cpu_device_supports_op,/* .supports_buft = */ ggml_backend_cpu_device_supports_buft,/* .offload_op = */ NULL,/* .event_new = */ NULL,/* .event_free = */ NULL,/* .event_synchronize = */ NULL,
};
CPU设备CPU的context
struct ggml_backend_cpu_device_context {std::string description = "CPU";// ...
}