fix collect_data

2025-11-18 17:08:22 +01:00
10 changed files with 579 additions and 383 deletions
--- a/README.md
+++ b/README.md
@@ -7,8 +7,8 @@ SPDX-License-Identifier: Apache-2.0
 <table>
  <tr>
    <td>
-      <a href="readme/pina_logo.png">
+      <a href="https://github.com/mathLab/PINA/raw/master/readme/pina_logo.png">
-        <img src="readme/pina_logo.png"
+        <img src="https://github.com/mathLab/PINA/raw/master/readme/pina_logo.png"
             alt="PINA logo"
             style="width: 220px; aspect-ratio: 1 / 1; object-fit: contain;">
      </a>
--- a/docs/source/index_files/PINA_logo.png
+++ b/docs/source/index_files/PINA_logo.png
--- a/pina/data/init.py
+++ b/pina/data/init.py
@@ -4,3 +4,4 @@ __all__ = ["PinaDataModule", "PinaDataset"]
 from .data_module import PinaDataModule
 from .dataset import PinaDataset
--- a/pina/data/data_module.py
+++ b/pina/data/data_module.py
@@ -7,9 +7,232 @@ different types of Datasets defined in PINA.
 import warnings
 from lightning.pytorch import LightningDataModule
 import torch
 from torch_geometric.data import Data
 from torch.utils.data import DataLoader, SequentialSampler, RandomSampler
 from torch.utils.data.distributed import DistributedSampler
 from ..label_tensor import LabelTensor
-from .dataset import PinaDatasetFactory
+from .dataset import PinaDatasetFactory, PinaTensorDataset
-from .dataloader import PinaDataLoader
+
 class DummyDataloader:
    def __init__(self, dataset):
        """
        Prepare a dataloader object that returns the entire dataset in a single
        batch. Depending on the number of GPUs, the dataset is managed
        as follows:
        - **Distributed Environment** (multiple GPUs): Divides dataset across
            processes using the rank and world size. Fetches only portion of
            data corresponding to the current process.
        - **Non-Distributed Environment** (single GPU): Fetches the entire
            dataset.
        :param PinaDataset dataset: The dataset object to be processed.
        .. note::
           This dataloader is used when the batch size is ``None``.
        """
        if (
            torch.distributed.is_available()
            and torch.distributed.is_initialized()
        ):
            rank = torch.distributed.get_rank()
            world_size = torch.distributed.get_world_size()
            if len(dataset) < world_size:
                raise RuntimeError(
                    "Dimension of the dataset smaller than world size."
                    " Increase the size of the partition or use a single GPU"
                )
            idx, i = [], rank
            while i < len(dataset):
                idx.append(i)
                i += world_size
            self.dataset = dataset.fetch_from_idx_list(idx)
        else:
            self.dataset = dataset.get_all_data()
    def __iter__(self):
        return self
    def __len__(self):
        return 1
    def __next__(self):
        return self.dataset
 class Collator:
    """
    This callable class is used to collate the data points fetched from the
    dataset. The collation is performed based on the type of dataset used and
    on the batching strategy.
    """
    def __init__(
        self, max_conditions_lengths, automatic_batching, dataset=None
    ):
        """
        Initialize the object, setting the collate function based on whether
        automatic batching is enabled or not.
        :param dict max_conditions_lengths: ``dict`` containing the maximum
            number  of data points to consider in a single batch for
            each condition.
        :param bool automatic_batching: Whether automatic PyTorch batching is
            enabled or not. For more information, see the
            :class:`~pina.data.data_module.PinaDataModule` class.
        :param PinaDataset dataset: The dataset where the data is stored.
        """
        self.max_conditions_lengths = max_conditions_lengths
        # Set the collate function based on the batching strategy
        # collate_pina_dataloader is used when automatic batching is disabled
        # collate_torch_dataloader is used when automatic batching is enabled
        self.callable_function = (
            self._collate_torch_dataloader
            if automatic_batching
            else (self._collate_pina_dataloader)
        )
        self.dataset = dataset
        # Set the function which performs the actual collation
        if isinstance(self.dataset, PinaTensorDataset):
            # If the dataset is a PinaTensorDataset, use this collate function
            self._collate = self._collate_tensor_dataset
        else:
            # If the dataset is a PinaDataset, use this collate function
            self._collate = self._collate_graph_dataset
    def _collate_pina_dataloader(self, batch):
        """
        Function used to create a batch when automatic batching is disabled.
        :param list[int] batch: List of integers representing the indices of
            the data points to be fetched.
        :return: Dictionary containing the data points fetched from the dataset.
        :rtype: dict
        """
        # Call the fetch_from_idx_list method of the dataset
        return self.dataset.fetch_from_idx_list(batch)
    def _collate_torch_dataloader(self, batch):
        """
        Function used to collate the batch
        :param list[dict] batch: List of retrieved data.
        :return: Dictionary containing the data points fetched from the dataset,
            collated.
        :rtype: dict
        """
        batch_dict = {}
        if isinstance(batch, dict):
            return batch
        conditions_names = batch[0].keys()
        # Condition names
        for condition_name in conditions_names:
            single_cond_dict = {}
            condition_args = batch[0][condition_name].keys()
            for arg in condition_args:
                data_list = [
                    batch[idx][condition_name][arg]
                    for idx in range(
                        min(
                            len(batch),
                            self.max_conditions_lengths[condition_name],
                        )
                    )
                ]
                single_cond_dict[arg] = self._collate(data_list)
            batch_dict[condition_name] = single_cond_dict
        return batch_dict
    @staticmethod
    def _collate_tensor_dataset(data_list):
        """
        Function used to collate the data when the dataset is a
        :class:`~pina.data.dataset.PinaTensorDataset`.
        :param data_list: Elements to be collated.
        :type data_list: list[torch.Tensor] | list[LabelTensor]
        :return: Batch of data.
        :rtype: dict
        :raises RuntimeError: If the data is not a :class:`torch.Tensor` or a
            :class:`~pina.label_tensor.LabelTensor`.
        """
        if isinstance(data_list[0], LabelTensor):
            return LabelTensor.stack(data_list)
        if isinstance(data_list[0], torch.Tensor):
            return torch.stack(data_list)
        raise RuntimeError("Data must be Tensors or LabelTensor ")
    def _collate_graph_dataset(self, data_list):
        """
        Function used to collate data when the dataset is a
        :class:`~pina.data.dataset.PinaGraphDataset`.
        :param data_list: Elememts to be collated.
        :type data_list: list[Data] | list[Graph]
        :return: Batch of data.
        :rtype: dict
        :raises RuntimeError: If the data is not a
            :class:`~torch_geometric.data.Data` or a :class:`~pina.graph.Graph`.
        """
        if isinstance(data_list[0], LabelTensor):
            return LabelTensor.cat(data_list)
        if isinstance(data_list[0], torch.Tensor):
            return torch.cat(data_list)
        if isinstance(data_list[0], Data):
            return self.dataset.create_batch(data_list)
        raise RuntimeError(
            "Data must be Tensors or LabelTensor or pyG "
            "torch_geometric.data.Data"
        )
    def __call__(self, batch):
        """
        Perform the collation of data fetched from the dataset. The behavoior
        of the function is set based on the batching strategy during class
        initialization.
        :param batch: List of retrieved data or sampled indices.
        :type batch: list[int] | list[dict]
        :return: Dictionary containing colleted data fetched from the dataset.
        :rtype: dict
        """
        return self.callable_function(batch)
 class PinaSampler:
    """
    This class is used to create the sampler instance based on the shuffle
    parameter and the environment in which the code is running.
    """
    def __new__(cls, dataset):
        """
        Instantiate and initialize the sampler.
        :param PinaDataset dataset: The dataset from which to sample.
        :return: The sampler instance.
        :rtype: :class:`torch.utils.data.Sampler`
        """
        if (
            torch.distributed.is_available()
            and torch.distributed.is_initialized()
        ):
            sampler = DistributedSampler(dataset)
        else:
            sampler = SequentialSampler(dataset)
        return sampler
 class PinaDataModule(LightningDataModule):
@@ -27,8 +250,7 @@ class PinaDataModule(LightningDataModule):
        val_size=0.1,
        batch_size=None,
        shuffle=True,
-        common_batch_size=True,
+        repeat=False,
        separate_conditions=False,
        automatic_batching=None,
        num_workers=0,
        pin_memory=False,
@@ -49,12 +271,11 @@ class PinaDataModule(LightningDataModule):
            Default is ``None``.
        :param bool shuffle: Whether to shuffle the dataset before splitting.
            Default ``True``.
-        :param bool common_batch_size: If ``True``, the same batch size is used
+        :param bool repeat: If ``True``, in case of batch size larger than the
-            for all conditions. If ``False``, each condition can have its own
+            number of elements in a specific condition, the elements are
-            batch size, proportional to the size of the dataset in that
+            repeated until the batch size is reached. If ``False``, the number
-            condition. Default is ``True``.
+            of elements in the batch is the minimum between the batch size and
-        :param bool separate_conditions: If ``True``, dataloaders for each
+            the number of elements in the condition. Default is ``False``.
            condition are iterated separately. Default is ``False``.
        :param automatic_batching: If ``True``, automatic PyTorch batching
            is performed, which consists of extracting one element at a time
            from the dataset and collating them into a batch. This is useful
@@ -84,8 +305,7 @@ class PinaDataModule(LightningDataModule):
        # Store fixed attributes
        self.batch_size = batch_size
        self.shuffle = shuffle
-        self.common_batch_size = common_batch_size
+        self.repeat = repeat
        self.separate_conditions = separate_conditions
        self.automatic_batching = automatic_batching
        # If batch size is None, num_workers has no effect
@@ -156,16 +376,23 @@ class PinaDataModule(LightningDataModule):
        if stage == "fit" or stage is None:
            self.train_dataset = PinaDatasetFactory(
                self.data_splits["train"],
                max_conditions_lengths=self.find_max_conditions_lengths(
                    "train"
                ),
                automatic_batching=self.automatic_batching,
            )
            if "val" in self.data_splits.keys():
                self.val_dataset = PinaDatasetFactory(
                    self.data_splits["val"],
                    max_conditions_lengths=self.find_max_conditions_lengths(
                        "val"
                    ),
                    automatic_batching=self.automatic_batching,
                )
        elif stage == "test":
            self.test_dataset = PinaDatasetFactory(
                self.data_splits["test"],
                max_conditions_lengths=self.find_max_conditions_lengths("test"),
                automatic_batching=self.automatic_batching,
            )
        else:
@@ -275,15 +502,53 @@ class PinaDataModule(LightningDataModule):
            ),
            module="lightning.pytorch.trainer.connectors.data_connector",
        )
-        return PinaDataLoader(
+        # Use custom batching (good if batch size is large)
-            dataset,
+        if self.batch_size is not None:
-            batch_size=self.batch_size,
+            sampler = PinaSampler(dataset)
-            shuffle=self.shuffle,
+            if self.automatic_batching:
-            num_workers=self.num_workers,
+                collate = Collator(
-            collate_fn=None,
+                    self.find_max_conditions_lengths(split),
-            common_batch_size=self.common_batch_size,
+                    self.automatic_batching,
-            separate_conditions=self.separate_conditions,
+                    dataset=dataset,
                )
            else:
                collate = Collator(
                    None, self.automatic_batching, dataset=dataset
                )
            return DataLoader(
                dataset,
                self.batch_size,
                collate_fn=collate,
                sampler=sampler,
                num_workers=self.num_workers,
            )
        dataloader = DummyDataloader(dataset)
        dataloader.dataset = self._transfer_batch_to_device(
            dataloader.dataset, self.trainer.strategy.root_device, 0
        )
        self.transfer_batch_to_device = self._transfer_batch_to_device_dummy
        return dataloader
    def find_max_conditions_lengths(self, split):
        """
        Define the maximum length for each conditions.
        :param dict split: The split of the dataset.
        :return: The maximum length per condition.
        :rtype: dict
        """
        max_conditions_lengths = {}
        for k, v in self.data_splits[split].items():
            if self.batch_size is None:
                max_conditions_lengths[k] = len(v["input"])
            elif self.repeat:
                max_conditions_lengths[k] = self.batch_size
            else:
                max_conditions_lengths[k] = min(
                    len(v["input"]), self.batch_size
                )
        return max_conditions_lengths
    def val_dataloader(self):
        """
--- a/pina/data/dataloader.py
+++ b/pina/data/dataloader.py
@@ -1,242 +0,0 @@
 from torch.utils.data import DataLoader
 from functools import partial
 from torch.utils.data.distributed import DistributedSampler
 from torch.utils.data.sampler import SequentialSampler
 import torch
 class DummyDataloader:
    def __init__(self, dataset):
        """
        Prepare a dataloader object that returns the entire dataset in a single
        batch. Depending on the number of GPUs, the dataset is managed
        as follows:
        - **Distributed Environment** (multiple GPUs): Divides dataset across
            processes using the rank and world size. Fetches only portion of
            data corresponding to the current process.
        - **Non-Distributed Environment** (single GPU): Fetches the entire
            dataset.
        :param PinaDataset dataset: The dataset object to be processed.
        .. note::
           This dataloader is used when the batch size is ``None``.
        """
        print("Using DummyDataloader")
        if (
            torch.distributed.is_available()
            and torch.distributed.is_initialized()
        ):
            rank = torch.distributed.get_rank()
            world_size = torch.distributed.get_world_size()
            if len(dataset) < world_size:
                raise RuntimeError(
                    "Dimension of the dataset smaller than world size."
                    " Increase the size of the partition or use a single GPU"
                )
            idx, i = [], rank
            while i < len(dataset):
                idx.append(i)
                i += world_size
        else:
            idx = list(range(len(dataset)))
        self.dataset = dataset._getitem_from_list(idx)
    def __iter__(self):
        return self
    def __len__(self):
        return 1
    def __next__(self):
        return self.dataset
 class PinaSampler:
    """
    This class is used to create the sampler instance based on the shuffle
    parameter and the environment in which the code is running.
    """
    def __new__(cls, dataset, shuffle=True):
        """
        Instantiate and initialize the sampler.
        :param PinaDataset dataset: The dataset from which to sample.
        :return: The sampler instance.
        :rtype: :class:`torch.utils.data.Sampler`
        """
        if (
            torch.distributed.is_available()
            and torch.distributed.is_initialized()
        ):
            sampler = DistributedSampler(dataset, shuffle=shuffle)
        else:
            if shuffle:
                sampler = torch.utils.data.RandomSampler(dataset)
            else:
                sampler = SequentialSampler(dataset)
        return sampler
 def _collect_items(batch):
    """
    Helper function to collect items from a batch of graph data samples.
    :param batch: List of graph data samples.
    """
    to_return = {name: [] for name in batch[0].keys()}
    for sample in batch:
        for k, v in sample.items():
            to_return[k].append(v)
    return to_return
 def collate_fn_custom(batch, dataset):
    """
    Override the default collate function to handle datasets without automatic batching.
    :param batch: List of indices from the dataset.
    :param dataset: The PinaDataset instance (must be provided).
    """
    return dataset._getitem_from_list(batch)
 def collate_fn_default(batch, stack_fn):
    """
    Default collate function that simply returns the batch as is.
    :param batch: List of data samples.
    """
    print("Using default collate function")
    to_return = _collect_items(batch)
    return {k: stack_fn[k](v) for k, v in to_return.items()}
 class PinaDataLoader:
    """
    Custom DataLoader for PinaDataset.
    """
    def __init__(
        self,
        dataset_dict,
        batch_size,
        shuffle=False,
        num_workers=0,
        collate_fn=None,
        common_batch_size=True,
        separate_conditions=False,
    ):
        self.dataset_dict = dataset_dict
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.num_workers = num_workers
        self.collate_fn = collate_fn
        self.separate_conditions = separate_conditions
        if batch_size is None:
            batch_size_per_dataset = {
                split: None for split in dataset_dict.keys()
            }
        else:
            if common_batch_size:
                batch_size_per_dataset = {
                    split: batch_size for split in dataset_dict.keys()
                }
            else:
                batch_size_per_dataset = self._compute_batch_size()
        self.dataloaders = {
            split: self._create_dataloader(
                dataset, batch_size_per_dataset[split]
            )
            for split, dataset in dataset_dict.items()
        }
    def _compute_batch_size(self):
        """
        Compute an appropriate batch size for the given dataset.
        """
        elements_per_dataset = {
            dataset_name: len(dataset)
            for dataset_name, dataset in self.dataset_dict.items()
        }
        total_elements = sum(el for el in elements_per_dataset.values())
        portion_per_dataset = {
            name: el / total_elements
            for name, el in elements_per_dataset.items()
        }
        batch_size_per_dataset = {
            name: max(1, int(portion * self.batch_size))
            for name, portion in portion_per_dataset.items()
        }
        tot_el_per_batch = sum(el for el in batch_size_per_dataset.values())
        if self.batch_size > tot_el_per_batch:
            difference = self.batch_size - tot_el_per_batch
            while difference > 0:
                for k, v in batch_size_per_dataset.items():
                    if difference == 0:
                        break
                    if v > 1:
                        batch_size_per_dataset[k] += 1
                        difference -= 1
        if self.batch_size < tot_el_per_batch:
            difference = tot_el_per_batch - self.batch_size
            while difference > 0:
                for k, v in batch_size_per_dataset.items():
                    if difference == 0:
                        break
                    if v > 1:
                        batch_size_per_dataset[k] -= 1
                        difference -= 1
        return batch_size_per_dataset
    def _create_dataloader(self, dataset, batch_size):
        print(batch_size)
        if batch_size is None:
            return DummyDataloader(dataset)
        if not dataset.automatic_batching:
            collate_fn = partial(collate_fn_custom, dataset=dataset)
        else:
            collate_fn = partial(collate_fn_default, stack_fn=dataset.stack_fn)
        return DataLoader(
            dataset,
            batch_size=batch_size,
            num_workers=self.num_workers,
            collate_fn=collate_fn,
            sampler=PinaSampler(dataset, shuffle=self.shuffle),
        )
    def __len__(self):
        if self.separate_conditions:
            return sum(len(dl) for dl in self.dataloaders.values())
        return max(len(dl) for dl in self.dataloaders.values())
    def __iter__(self):
        """
        Restituisce un iteratore che produce dizionari di batch.
        Itera per un numero di passi pari al dataloader più lungo (come da __len__)
        e fa ricominciare i dataloader più corti quando si esauriscono.
        """
        if self.separate_conditions:
            for split, dl in self.dataloaders.items():
                for batch in dl:
                    yield {split: batch}
            return
        iterators = {split: iter(dl) for split, dl in self.dataloaders.items()}
        for _ in range(len(self)):
            batch_dict = {}
            for split, it in iterators.items():
                try:
                    batch = next(it)
                except StopIteration:
                    new_it = iter(self.dataloaders[split])
                    iterators[split] = new_it
                    batch = next(new_it)
                batch_dict[split] = batch
            yield batch_dict
--- a/pina/data/dataset.py
+++ b/pina/data/dataset.py
@@ -1,158 +1,326 @@
 """Module for the PINA dataset classes."""
-import torch
+from abc import abstractmethod, ABC
 from torch.utils.data import Dataset
 from torch_geometric.data import Data
 from ..graph import Graph, LabelBatch
 from ..label_tensor import LabelTensor
 class PinaDatasetFactory:
    """
-    TODO: Update docstring
+    Factory class for the PINA dataset.
    Depending on the data type inside the conditions, it instanciate an object
    belonging to the appropriate subclass of 
    :class:`~pina.data.dataset.PinaDataset`. The possible subclasses are:
    - :class:`~pina.data.dataset.PinaTensorDataset`, for handling \
        :class:`torch.Tensor` and :class:`~pina.label_tensor.LabelTensor` data.
    - :class:`~pina.data.dataset.PinaGraphDataset`, for handling \
        :class:`~pina.graph.Graph` and :class:`~torch_geometric.data.Data` data.
    """
    def __new__(cls, conditions_dict, **kwargs):
        """
-        TODO: Update docstring
+        Instantiate the appropriate subclass of
        :class:`~pina.data.dataset.PinaDataset`.
        If a graph is present in the conditions, returns a
        :class:`~pina.data.dataset.PinaGraphDataset`, otherwise returns a
        :class:`~pina.data.dataset.PinaTensorDataset`.
        :param dict conditions_dict: Dictionary containing all the conditions
            to be included in the dataset instance.
        :return: A subclass of :class:`~pina.data.dataset.PinaDataset`.
        :rtype: PinaTensorDataset | PinaGraphDataset
        :raises ValueError: If an empty dictionary is provided.
        """
        # Check if conditions_dict is empty
        if len(conditions_dict) == 0:
            raise ValueError("No conditions provided")
        dataset_dict = {}
        # Check is a Graph is present in the conditions
-        for name, data in conditions_dict.items():
+        is_graph = cls._is_graph_dataset(conditions_dict)
-            if not isinstance(data, dict):
+        if is_graph:
-                raise ValueError(
+            # If a Graph is present, return a PinaGraphDataset
-                    f"Condition '{name}' data must be a dictionary"
+            return PinaGraphDataset(conditions_dict, **kwargs)
-                )
+        # If no Graph is present, return a PinaTensorDataset
-            dataset_dict[name] = PinaDataset(data, **kwargs)
+        return PinaTensorDataset(conditions_dict, **kwargs)
-        return dataset_dict
+
    @staticmethod
    def _is_graph_dataset(conditions_dict):
        """
        Check if a graph is present in the conditions (at least one time).
        :param conditions_dict: Dictionary containing the conditions.
        :type conditions_dict: dict
        :return: True if a graph is present in the conditions, False otherwise.
        :rtype: bool
        """
        # Iterate over the conditions dictionary
        for v in conditions_dict.values():
            # Iterate over the values of the current condition
            for cond in v.values():
                # Check if the current value is a list of Data objects
                if isinstance(cond, (Data, Graph, list, tuple)):
                    return True
        return False
-class PinaDataset(Dataset):
+class PinaDataset(Dataset, ABC):
    """
    Abstract class for the PINA dataset which extends the PyTorch
    :class:`~torch.utils.data.Dataset` class. It defines the common interface
    for :class:`~pina.data.dataset.PinaTensorDataset` and
    :class:`~pina.data.dataset.PinaGraphDataset` classes.
    """
    def __init__(
        self, conditions_dict, max_conditions_lengths, automatic_batching
    ):
        """
        Initialize the instance by storing the conditions dictionary, the
        maximum number of items per conditions to consider, and the automatic
        batching flag.
        :param dict conditions_dict: A dictionary mapping condition names to
            their respective data. Each key represents a condition name, and the
            corresponding value is a dictionary containing the associated data.
        :param dict max_conditions_lengths: Maximum number of data points that
            can be included in a single batch per condition.
        :param bool automatic_batching: Indicates whether PyTorch automatic
            batching is enabled in
            :class:`~pina.data.data_module.PinaDataModule`.
        """
        # Store the conditions dictionary
        self.conditions_dict = conditions_dict
        # Store the maximum number of conditions to consider
        self.max_conditions_lengths = max_conditions_lengths
        # Store length of each condition
        self.conditions_length = {
            k: len(v["input"]) for k, v in self.conditions_dict.items()
        }
        # Store the maximum length of the dataset
        self.length = max(self.conditions_length.values())
        # Dynamically set the getitem function based on automatic batching
        if automatic_batching:
            self._getitem_func = self._getitem_int
        else:
            self._getitem_func = self._getitem_dummy
    def _get_max_len(self):
        """
        Returns the length of the longest condition in the dataset.
        :return: Length of the longest condition in the dataset.
        :rtype: int
        """
        max_len = 0
        for condition in self.conditions_dict.values():
            max_len = max(max_len, len(condition["input"]))
        return max_len
    def __len__(self):
        return self.length
    def __getitem__(self, idx):
        return self._getitem_func(idx)
    def _getitem_dummy(self, idx):
        """
        Return the index itself. This is used when automatic batching is
        disabled to postpone the data retrieval to the dataloader.
        :param int idx: Index.
        :return: Index.
        :rtype: int
        """
        # If automatic batching is disabled, return the data at the given index
        return idx
    def _getitem_int(self, idx):
        """
        Return the data at the given index in the dataset. This is used when
        automatic batching is enabled.
        :param int idx: Index.
        :return: A dictionary containing the data at the given index.
        :rtype: dict
        """
        # If automatic batching is enabled, return the data at the given index
        return {
            k: {k_data: v[k_data][idx % len(v["input"])] for k_data in v.keys()}
            for k, v in self.conditions_dict.items()
        }
    def get_all_data(self):
        """
        Return all data in the dataset.
        :return: A dictionary containing all the data in the dataset.
        :rtype: dict
        """
        to_return_dict = {}
        for condition, data in self.conditions_dict.items():
            len_condition = len(
                data["input"]
            )  # Length of the current condition
            to_return_dict[condition] = self._retrive_data(
                data, list(range(len_condition))
            )  # Retrieve the data from the current condition
        return to_return_dict
    def fetch_from_idx_list(self, idx):
        """
        Return data from the dataset given a list of indices.
        :param list[int] idx: List of indices.
        :return: A dictionary containing the data at the given indices.
        :rtype: dict
        """
        to_return_dict = {}
        for condition, data in self.conditions_dict.items():
            # Get the indices for the current condition
            cond_idx = idx[: self.max_conditions_lengths[condition]]
            # Get the length of the current condition
            condition_len = self.conditions_length[condition]
            # If the length of the dataset is greater than the length of the
            # current condition, repeat the indices
            if self.length > condition_len:
                cond_idx = [idx % condition_len for idx in cond_idx]
            # Retrieve the data from the current condition
            to_return_dict[condition] = self._retrive_data(data, cond_idx)
        return to_return_dict
    @abstractmethod
    def _retrive_data(self, data, idx_list):
        """
        Abstract method to retrieve data from the dataset given a list of
        indices.
        """
 class PinaTensorDataset(PinaDataset):
    """
    Dataset class for the PINA dataset with :class:`torch.Tensor` and
    :class:`~pina.label_tensor.LabelTensor` data.
    """
-    def __init__(self, data_dict, automatic_batching=None):
+    # Override _retrive_data method for torch.Tensor data
    def _retrive_data(self, data, idx_list):
        """
-        Initialize the instance by storing the conditions dictionary.
+        Retrieve data from the dataset given a list of indices.
-        :param dict conditions_dict: A dictionary mapping condition names to
+        :param dict data: Dictionary containing the data
-            their respective data. Each key represents a condition name, and the
+            (only :class:`torch.Tensor` or
-            corresponding value is a dictionary containing the associated data.
+            :class:`~pina.label_tensor.LabelTensor`).
        :param list[int] idx_list: indices to retrieve.
        :return: Dictionary containing the data at the given indices.
        :rtype: dict
        """
-        # Store the conditions dictionary
+        return {k: v[idx_list] for k, v in data.items()}
-        self.data = data_dict
+
-        self.automatic_batching = (
+    @property
-            automatic_batching if automatic_batching is not None else True
+    def input(self):
-        )
+        """
-        self.stack_fn = {}
+        Return the input data for the dataset.
-        # Determine stacking functions for each data type (used in collate_fn)
+
-        for k, v in data_dict.items():
+        :return: Dictionary containing the input points.
-            if isinstance(v, LabelTensor):
+        :rtype: dict
-                self.stack_fn[k] = LabelTensor.stack
+        """
-            elif isinstance(v, torch.Tensor):
+        return {k: v["input"] for k, v in self.conditions_dict.items()}
-                self.stack_fn[k] = torch.stack
+
-            elif isinstance(v, list) and all(
+    def update_data(self, new_conditions_dict):
-                isinstance(item, (Data, Graph)) for item in v
+        """
-            ):
+        Update the dataset with new data.
-                self.stack_fn[k] = LabelBatch.from_data_list
+        This method is used to update the dataset with new data. It replaces
        the current data with the new data provided in the new_conditions_dict
        parameter.
        :param dict new_conditions_dict: Dictionary containing the new data.
        :return: None
        """
        for condition, data in new_conditions_dict.items():
            if condition in self.conditions_dict:
                self.conditions_dict[condition].update(data)
            else:
-                raise ValueError(
+                self.conditions_dict[condition] = data
                    f"Unsupported data type for stacking: {type(v)}"
                )
    def __len__(self):
        return len(next(iter(self.data.values())))
-    def __getitem__(self, idx):
+class PinaGraphDataset(PinaDataset):
    """
    Dataset class for the PINA dataset with :class:`~torch_geometric.data.Data`
    and :class:`~pina.graph.Graph` data.
    """
    def _create_graph_batch(self, data):
        """
-        Return the data at the given index in the dataset.
+        Create a LabelBatch object from a list of
        :class:`~torch_geometric.data.Data` objects.
-        :param int idx: Index.
+        :param data: List of items to collate in a single batch.
-        :return: A dictionary containing the data at the given index.
+        :type data: list[Data] | list[Graph]
-        :rtype: dict
+        :return: LabelBatch object all the graph collated in a single batch
-        """
+            disconnected graphs.
-
+        :rtype: LabelBatch
-        if self.automatic_batching:
+        """
-            # Return the data at the given index
+        batch = LabelBatch.from_data_list(data)
-            return {
+        return batch
-                field_name: data[idx] for field_name, data in self.data.items()
+
-            }
+    def create_batch(self, data):
-        return idx
+        """
-
+        Create a Batch object from a list of :class:`~torch_geometric.data.Data`
-    def _getitem_from_list(self, idx_list):
+        objects.
-        """
+
-        Return data from the dataset given a list of indices.
+        :param data: List of items to collate in a single batch.
-
+        :type data: list[Data] | list[Graph]
-        :param list[int] idx_list: List of indices.
+        :return: Batch object.
-        :return: A dictionary containing the data at the given indices.
+        :rtype: :class:`~torch_geometric.data.Batch`
-        :rtype: dict
+            | :class:`~pina.graph.LabelBatch`
-        """
+        """
-
+
-        to_return = {}
+        if isinstance(data[0], Data):
-        for field_name, data in self.data.items():
+            return self._create_graph_batch(data)
-            if self.stack_fn[field_name] == LabelBatch.from_data_list:
+        return self._create_tensor_batch(data)
-                to_return[field_name] = self.stack_fn[field_name](
+
-                    [data[i] for i in idx_list]
+    # Override _retrive_data method for graph handling
-                )
+    def _retrive_data(self, data, idx_list):
-            else:
+        """
-                to_return[field_name] = data[idx_list]
+        Retrieve data from the dataset given a list of indices.
-        return to_return
+
-
+        :param dict data: Dictionary containing the data.
-
+        :param list[int] idx_list: List of indices to retrieve.
-class PinaGraphDataset(Dataset):
+        :return: Dictionary containing the data at the given indices.
    def __init__(self, data_dict, automatic_batching=None):
        """
        Initialize the instance by storing the conditions dictionary.
        :param dict conditions_dict: A dictionary mapping condition names to
            their respective data. Each key represents a condition name, and the
            corresponding value is a dictionary containing the associated data.
        """
        # Store the conditions dictionary
        self.data = data_dict
        self.automatic_batching = (
            automatic_batching if automatic_batching is not None else True
        )
    def __len__(self):
        return len(next(iter(self.data.values())))
    def __getitem__(self, idx):
        """
        Return the data at the given index in the dataset.
        :param int idx: Index.
        :return: A dictionary containing the data at the given index.
        :rtype: dict
        """
        if self.automatic_batching:
            # Return the data at the given index
            return {
                field_name: data[idx] for field_name, data in self.data.items()
            }
        return idx
    def _getitem_from_list(self, idx_list):
        """
        Return data from the dataset given a list of indices.
        :param list[int] idx_list: List of indices.
        :return: A dictionary containing the data at the given indices.
        :rtype: dict
        """
        # Return the data from the current condition
        # If the data is a list of Data objects, create a Batch object
        # If the data is a list of torch.Tensor objects, create a torch.Tensor
        return {
-            field_name: [data[i] for i in idx_list]
+            k: (
-            for field_name, data in self.data.items()
+                self._create_graph_batch([v[i] for i in idx_list])
                if isinstance(v, list)
                else v[idx_list]
            )
            for k, v in data.items()
        }
    @property
    def input(self):
        """
        Return the input data for the dataset.
        :return: Dictionary containing the input points.
        :rtype: dict
        """
        return {k: v["input"] for k, v in self.conditions_dict.items()}
--- a/pina/problem/abstract_problem.py
+++ b/pina/problem/abstract_problem.py
@@ -337,6 +337,10 @@ class AbstractProblem(metaclass=ABCMeta):
                # If the condition does not have a domain attribute, store
                # the input and target points
                keys = condition.__slots__
-                values = [getattr(condition, name) for name in keys]
+                values = [
                    getattr(condition, name)
                    for name in keys
                    if getattr(condition, name) is not None
                ]
                data[condition_name] = dict(zip(keys, values))
        self._collected_data = data
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "pina-mathlab"
-version = "0.2.5"
+version = "0.2.4"
 description = "Physic Informed Neural networks for Advance modeling."
 readme = "README.md"
 authors = [
--- a/readme/pina_logo.png
+++ b/readme/pina_logo.png
--- a/tutorials/static/pina_logo.png
+++ b/tutorials/static/pina_logo.png
`@@ -4,3 +4,4 @@ __all__ = ["PinaDataModule", "PinaDataset"]`


	`from .data_module import PinaDataModule`	`from .data_module import PinaDataModule`
		`from .dataset import PinaDataset`