AF3 OpenFoldMultimerDataModule类解读
AlphaFold3 data_modules 模块的 OpenFoldMultimerDataModule
类继承自OpenFoldDataModule
类,并且针对多聚体(multimer)蛋白质训练做了特化。在 OpenFoldMultimerDataModule
中,除了继承自 OpenFoldDataModule
的所有功能外,还额外要求提供与多聚体相关的数据缓存路径 (mmcif_data_cache_path
)。
源代码:
class OpenFoldMultimerDataModule(OpenFoldDataModule):
"""
Create a datamodule specifically for multimer training
Compared to OpenFoldDataModule, OpenFoldMultimerDataModule
requires mmcif_data_cache_path which is the product of
scripts/generate_mmcif_cache.py mmcif_data_cache_path should be
a file that record what chain(s) each mmcif file has
"""
def __init__(self,
config: mlc.ConfigDict,
template_mmcif_dir: str,
max_template_date: str,
train_data_dir: Optional[str] = None,
train_mmcif_data_cache_path: Optional[str] = None,
val_mmcif_data_cache_path: Optional[str] = None,
**kwargs):
super(OpenFoldMultimerDataModule, self).__init__(config,
template_mmcif_dir,
max_template_date,
train_data_dir,
**kwargs)
self.train_mmcif_data_cache_path = train_mmcif_data_cache_path
self.training_mode = self.train_data_dir is not None
self.val_mmcif_data_cache_path = val_mmcif_data_cache_path
def setup(self, setup=None):
# Most of the arguments are the same for the three datasets
dataset_gen = partial(OpenFoldSingleMultimerDataset,
template_mmcif_dir=self.template_mmcif_dir,
max_template_date=self.max_template_date,
config=self.config,
kalign_binary_path=self.kalign_binary_path,
template_release_dates_cache_path=self.template_release_dates_cache_path,
obsolete_pdbs_f