From 01ea27d405aafdfabafc2b7366dba88aa506bd19 Mon Sep 17 00:00:00 2001 From: Dario Coscia Date: Mon, 17 Mar 2025 12:29:40 +0100 Subject: [PATCH] modify automatic batching doc --- pina/data/data_module.py | 11 ++++++++++- pina/trainer.py | 10 +++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/pina/data/data_module.py b/pina/data/data_module.py index cc09425..9ef419f 100644 --- a/pina/data/data_module.py +++ b/pina/data/data_module.py @@ -81,7 +81,16 @@ class Collator: :param dict max_conditions_lengths: ``dict`` containing the maximum number of data points to consider in a single batch for each condition. - :param bool automatic_batching: Whether to enable automatic batching. + :param bool automatic_batching: Whether to enable automatic batching. + If ``True``, automatic PyTorch batching + is performed, which consists of extracting one element at a time + from the dataset and collating them into a batch. This is useful + when the dataset is too large to fit into memory. On the other hand, + if ``False``, the items are retrieved from the dataset all at once + avoind the overhead of collating them into a batch and reducing the + __getitem__ calls to the dataset. This is useful when the dataset + fits into memory. Avoid using automatic batching when ``batch_size`` + is large. Default is ``False``. :param PinaDataset dataset: The dataset where the data is stored. """ diff --git a/pina/trainer.py b/pina/trainer.py index c76c9f7..a29152c 100644 --- a/pina/trainer.py +++ b/pina/trainer.py @@ -170,7 +170,15 @@ class Trainer(lightning.pytorch.Trainer): validation dataset. :param int batch_size: The number of samples per batch to load. :param bool automatic_batching: Whether to perform automatic batching - with PyTorch. + with PyTorch. If ``True``, automatic PyTorch batching + is performed, which consists of extracting one element at a time + from the dataset and collating them into a batch. This is useful + when the dataset is too large to fit into memory. On the other hand, + if ``False``, the items are retrieved from the dataset all at once + avoind the overhead of collating them into a batch and reducing the + __getitem__ calls to the dataset. This is useful when the dataset + fits into memory. Avoid using automatic batching when ``batch_size`` + is large. Default is ``False``. :param bool pin_memory: Whether to use pinned memory for faster data transfer to GPU. :param int num_workers: The number of worker threads for data loading.