Add functionalities in DataModule and data loaders + tests datasets and DataModule (#453)
* Add num_workers and pin_memory arguments to DataLoader and DataModule tests
This commit is contained in:
committed by
Nicola Demo
parent
9cae9a438f
commit
571ef7f9e2
178
tests/test_data/test_datamodule.py
Normal file
178
tests/test_data/test_datamodule.py
Normal file
@@ -0,0 +1,178 @@
|
||||
import torch
|
||||
import pytest
|
||||
from pina.data import PinaDataModule
|
||||
from pina.data.dataset import PinaTensorDataset, PinaGraphDataset
|
||||
from pina.problem.zoo import SupervisedProblem
|
||||
from pina.graph import RadiusGraph
|
||||
from pina.data.data_module import DummyDataloader
|
||||
from pina import Trainer
|
||||
from pina.solvers import SupervisedSolver
|
||||
from torch_geometric.data import Batch
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
input_tensor = torch.rand((100, 10))
|
||||
output_tensor = torch.rand((100, 2))
|
||||
|
||||
x = torch.rand((100, 50 , 10))
|
||||
pos = torch.rand((100, 50 , 2))
|
||||
input_graph = RadiusGraph(x, pos, r=.1, build_edge_attr=True)
|
||||
output_graph = torch.rand((100, 50 , 10))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input_, output_",
|
||||
[
|
||||
(input_tensor, output_tensor),
|
||||
(input_graph, output_graph)
|
||||
]
|
||||
)
|
||||
def test_constructor(input_, output_):
|
||||
problem = SupervisedProblem(input_=input_, output_=output_)
|
||||
PinaDataModule(problem)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input_, output_",
|
||||
[
|
||||
(input_tensor, output_tensor),
|
||||
(input_graph, output_graph)
|
||||
]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"train_size, val_size, test_size",
|
||||
[
|
||||
(.7, .2, .1),
|
||||
(.7, .3, 0)
|
||||
]
|
||||
)
|
||||
def test_setup_train(input_, output_, train_size, val_size, test_size):
|
||||
problem = SupervisedProblem(input_=input_, output_=output_)
|
||||
dm = PinaDataModule(problem, train_size=train_size, val_size=val_size, test_size=test_size)
|
||||
dm.setup()
|
||||
assert hasattr(dm, "train_dataset")
|
||||
if isinstance(input_, torch.Tensor):
|
||||
assert isinstance(dm.train_dataset, PinaTensorDataset)
|
||||
else:
|
||||
assert isinstance(dm.train_dataset, PinaGraphDataset)
|
||||
#assert len(dm.train_dataset) == int(len(input_) * train_size)
|
||||
if test_size > 0:
|
||||
assert hasattr(dm, "test_dataset")
|
||||
assert dm.test_dataset is None
|
||||
else:
|
||||
assert not hasattr(dm, "test_dataset")
|
||||
assert hasattr(dm, "val_dataset")
|
||||
if isinstance(input_, torch.Tensor):
|
||||
assert isinstance(dm.val_dataset, PinaTensorDataset)
|
||||
else:
|
||||
assert isinstance(dm.val_dataset, PinaGraphDataset)
|
||||
#assert len(dm.val_dataset) == int(len(input_) * val_size)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input_, output_",
|
||||
[
|
||||
(input_tensor, output_tensor),
|
||||
(input_graph, output_graph)
|
||||
]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"train_size, val_size, test_size",
|
||||
[
|
||||
(.7, .2, .1),
|
||||
(0., 0., 1.)
|
||||
]
|
||||
)
|
||||
def test_setup_test(input_, output_, train_size, val_size, test_size):
|
||||
problem = SupervisedProblem(input_=input_, output_=output_)
|
||||
dm = PinaDataModule(problem, train_size=train_size, val_size=val_size, test_size=test_size)
|
||||
dm.setup(stage='test')
|
||||
if train_size > 0:
|
||||
assert hasattr(dm, "train_dataset")
|
||||
assert dm.train_dataset is None
|
||||
else:
|
||||
assert not hasattr(dm, "train_dataset")
|
||||
if val_size > 0:
|
||||
assert hasattr(dm, "val_dataset")
|
||||
assert dm.val_dataset is None
|
||||
else:
|
||||
assert not hasattr(dm, "val_dataset")
|
||||
|
||||
assert hasattr(dm, "test_dataset")
|
||||
if isinstance(input_, torch.Tensor):
|
||||
assert isinstance(dm.test_dataset, PinaTensorDataset)
|
||||
else:
|
||||
assert isinstance(dm.test_dataset, PinaGraphDataset)
|
||||
#assert len(dm.test_dataset) == int(len(input_) * test_size)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input_, output_",
|
||||
[
|
||||
(input_tensor, output_tensor),
|
||||
(input_graph, output_graph)
|
||||
]
|
||||
)
|
||||
def test_dummy_dataloader(input_, output_):
|
||||
problem = SupervisedProblem(input_=input_, output_=output_)
|
||||
solver = SupervisedSolver(problem=problem, model=torch.nn.Linear(10, 10))
|
||||
trainer = Trainer(solver, batch_size=None, train_size=.7, val_size=.3, test_size=0.)
|
||||
dm = trainer.data_module
|
||||
dm.setup()
|
||||
dm.trainer = trainer
|
||||
dataloader = dm.train_dataloader()
|
||||
assert isinstance(dataloader, DummyDataloader)
|
||||
assert len(dataloader) == 1
|
||||
data = next(dataloader)
|
||||
assert isinstance(data, list)
|
||||
assert isinstance(data[0], tuple)
|
||||
if isinstance(input_, RadiusGraph):
|
||||
assert isinstance(data[0][1]['input_points'], Batch)
|
||||
else:
|
||||
assert isinstance(data[0][1]['input_points'], torch.Tensor)
|
||||
assert isinstance(data[0][1]['output_points'], torch.Tensor)
|
||||
|
||||
dataloader = dm.val_dataloader()
|
||||
assert isinstance(dataloader, DummyDataloader)
|
||||
assert len(dataloader) == 1
|
||||
data = next(dataloader)
|
||||
assert isinstance(data, list)
|
||||
assert isinstance(data[0], tuple)
|
||||
if isinstance(input_, RadiusGraph):
|
||||
assert isinstance(data[0][1]['input_points'], Batch)
|
||||
else:
|
||||
assert isinstance(data[0][1]['input_points'], torch.Tensor)
|
||||
assert isinstance(data[0][1]['output_points'], torch.Tensor)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input_, output_",
|
||||
[
|
||||
(input_tensor, output_tensor),
|
||||
(input_graph, output_graph)
|
||||
]
|
||||
)
|
||||
def test_dataloader(input_, output_):
|
||||
problem = SupervisedProblem(input_=input_, output_=output_)
|
||||
solver = SupervisedSolver(problem=problem, model=torch.nn.Linear(10, 10))
|
||||
trainer = Trainer(solver, batch_size=10, train_size=.7, val_size=.3, test_size=0.)
|
||||
dm = trainer.data_module
|
||||
dm.setup()
|
||||
dm.trainer = trainer
|
||||
dataloader = dm.train_dataloader()
|
||||
assert isinstance(dataloader, DataLoader)
|
||||
assert len(dataloader) == 7
|
||||
data = next(iter(dataloader))
|
||||
assert isinstance(data, dict)
|
||||
if isinstance(input_, RadiusGraph):
|
||||
assert isinstance(data['data']['input_points'], Batch)
|
||||
else:
|
||||
assert isinstance(data['data']['input_points'], torch.Tensor)
|
||||
assert isinstance(data['data']['output_points'], torch.Tensor)
|
||||
|
||||
dataloader = dm.val_dataloader()
|
||||
assert isinstance(dataloader, DataLoader)
|
||||
assert len(dataloader) == 3
|
||||
data = next(iter(dataloader))
|
||||
assert isinstance(data, dict)
|
||||
if isinstance(input_, RadiusGraph):
|
||||
assert isinstance(data['data']['input_points'], Batch)
|
||||
else:
|
||||
assert isinstance(data['data']['input_points'], torch.Tensor)
|
||||
assert isinstance(data['data']['output_points'], torch.Tensor)
|
||||
|
||||
101
tests/test_data/test_graph_dataset.py
Normal file
101
tests/test_data/test_graph_dataset.py
Normal file
@@ -0,0 +1,101 @@
|
||||
import torch
|
||||
import pytest
|
||||
from pina.data.dataset import PinaDatasetFactory, PinaGraphDataset
|
||||
from pina.graph import KNNGraph
|
||||
from torch_geometric.data import Data
|
||||
|
||||
x = torch.rand((100, 20, 10))
|
||||
pos = torch.rand((100, 20, 2))
|
||||
input_ = KNNGraph(x=x, pos=pos, k=3, build_edge_attr=True)
|
||||
output_ = torch.rand((100, 20, 10))
|
||||
|
||||
x_2 = torch.rand((50, 20, 10))
|
||||
pos_2 = torch.rand((50, 20, 2))
|
||||
input_2_ = KNNGraph(x=x_2, pos=pos_2, k=3, build_edge_attr=True)
|
||||
output_2_ = torch.rand((50, 20, 10))
|
||||
|
||||
|
||||
# Problem with a single condition
|
||||
conditions_dict_single = {
|
||||
'data': {
|
||||
'input_points': input_.data,
|
||||
'output_points': output_,
|
||||
}
|
||||
}
|
||||
max_conditions_lengths_single = {
|
||||
'data': 100
|
||||
}
|
||||
|
||||
# Problem with multiple conditions
|
||||
conditions_dict_single_multi = {
|
||||
'data_1': {
|
||||
'input_points': input_.data,
|
||||
'output_points': output_,
|
||||
},
|
||||
'data_2': {
|
||||
'input_points': input_2_.data,
|
||||
'output_points': output_2_,
|
||||
}
|
||||
}
|
||||
|
||||
max_conditions_lengths_multi = {
|
||||
'data_1': 100,
|
||||
'data_2': 50
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"conditions_dict, max_conditions_lengths",
|
||||
[
|
||||
(conditions_dict_single, max_conditions_lengths_single),
|
||||
(conditions_dict_single_multi, max_conditions_lengths_multi)
|
||||
]
|
||||
)
|
||||
def test_constructor(conditions_dict, max_conditions_lengths):
|
||||
dataset = PinaDatasetFactory(conditions_dict,
|
||||
max_conditions_lengths=max_conditions_lengths,
|
||||
automatic_batching=True)
|
||||
assert isinstance(dataset, PinaGraphDataset)
|
||||
assert len(dataset) == 100
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"conditions_dict, max_conditions_lengths",
|
||||
[
|
||||
(conditions_dict_single, max_conditions_lengths_single),
|
||||
(conditions_dict_single_multi, max_conditions_lengths_multi)
|
||||
]
|
||||
)
|
||||
def test_getitem(conditions_dict, max_conditions_lengths):
|
||||
dataset = PinaDatasetFactory(conditions_dict,
|
||||
max_conditions_lengths=max_conditions_lengths,
|
||||
automatic_batching=True)
|
||||
data = dataset[50]
|
||||
assert isinstance(data, dict)
|
||||
assert all([isinstance(d['input_points'], Data)
|
||||
for d in data.values()])
|
||||
assert all([isinstance(d['output_points'], torch.Tensor)
|
||||
for d in data.values()])
|
||||
assert all([d['input_points'].x.shape == torch.Size((20, 10))
|
||||
for d in data.values()])
|
||||
assert all([d['output_points'].shape == torch.Size((20, 10))
|
||||
for d in data.values()])
|
||||
assert all([d['input_points'].edge_index.shape ==
|
||||
torch.Size((2, 60)) for d in data.values()])
|
||||
assert all([d['input_points'].edge_attr.shape[0]
|
||||
== 60 for d in data.values()])
|
||||
|
||||
data = dataset.fetch_from_idx_list([i for i in range(20)])
|
||||
assert isinstance(data, dict)
|
||||
assert all([isinstance(d['input_points'], Data)
|
||||
for d in data.values()])
|
||||
assert all([isinstance(d['output_points'], torch.Tensor)
|
||||
for d in data.values()])
|
||||
assert all([d['input_points'].x.shape == torch.Size((400, 10))
|
||||
for d in data.values()])
|
||||
assert all([d['output_points'].shape == torch.Size((400, 10))
|
||||
for d in data.values()])
|
||||
assert all([d['input_points'].edge_index.shape ==
|
||||
torch.Size((2, 1200)) for d in data.values()])
|
||||
assert all([d['input_points'].edge_attr.shape[0]
|
||||
== 1200 for d in data.values()])
|
||||
88
tests/test_data/test_tensor_dataset.py
Normal file
88
tests/test_data/test_tensor_dataset.py
Normal file
@@ -0,0 +1,88 @@
|
||||
import torch
|
||||
import pytest
|
||||
from pina.data.dataset import PinaDatasetFactory, PinaTensorDataset
|
||||
|
||||
input_tensor = torch.rand((100, 10))
|
||||
output_tensor = torch.rand((100, 2))
|
||||
|
||||
input_tensor_2 = torch.rand((50, 10))
|
||||
output_tensor_2 = torch.rand((50, 2))
|
||||
|
||||
conditions_dict_single = {
|
||||
'data': {
|
||||
'input_points': input_tensor,
|
||||
'output_points': output_tensor,
|
||||
}
|
||||
}
|
||||
|
||||
conditions_dict_single_multi = {
|
||||
'data_1': {
|
||||
'input_points': input_tensor,
|
||||
'output_points': output_tensor,
|
||||
},
|
||||
'data_2': {
|
||||
'input_points': input_tensor_2,
|
||||
'output_points': output_tensor_2,
|
||||
}
|
||||
}
|
||||
|
||||
max_conditions_lengths_single = {
|
||||
'data': 100
|
||||
}
|
||||
|
||||
max_conditions_lengths_multi = {
|
||||
'data_1': 100,
|
||||
'data_2': 50
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"conditions_dict, max_conditions_lengths",
|
||||
[
|
||||
(conditions_dict_single, max_conditions_lengths_single),
|
||||
(conditions_dict_single_multi, max_conditions_lengths_multi)
|
||||
]
|
||||
)
|
||||
def test_constructor_tensor(conditions_dict, max_conditions_lengths):
|
||||
dataset = PinaDatasetFactory(conditions_dict,
|
||||
max_conditions_lengths=max_conditions_lengths,
|
||||
automatic_batching=True)
|
||||
assert isinstance(dataset, PinaTensorDataset)
|
||||
|
||||
|
||||
def test_getitem_single():
|
||||
dataset = PinaDatasetFactory(conditions_dict_single,
|
||||
max_conditions_lengths=max_conditions_lengths_single,
|
||||
automatic_batching=False)
|
||||
|
||||
tensors = dataset.fetch_from_idx_list([i for i in range(70)])
|
||||
assert isinstance(tensors, dict)
|
||||
assert list(tensors.keys()) == ['data']
|
||||
assert sorted(list(tensors['data'].keys())) == [
|
||||
'input_points', 'output_points']
|
||||
assert isinstance(tensors['data']['input_points'], torch.Tensor)
|
||||
assert tensors['data']['input_points'].shape == torch.Size((70, 10))
|
||||
assert isinstance(tensors['data']['output_points'], torch.Tensor)
|
||||
assert tensors['data']['output_points'].shape == torch.Size((70, 2))
|
||||
|
||||
|
||||
def test_getitem_multi():
|
||||
dataset = PinaDatasetFactory(conditions_dict_single_multi,
|
||||
max_conditions_lengths=max_conditions_lengths_multi,
|
||||
automatic_batching=False)
|
||||
tensors = dataset.fetch_from_idx_list([i for i in range(70)])
|
||||
assert isinstance(tensors, dict)
|
||||
assert list(tensors.keys()) == ['data_1', 'data_2']
|
||||
assert sorted(list(tensors['data_1'].keys())) == [
|
||||
'input_points', 'output_points']
|
||||
assert isinstance(tensors['data_1']['input_points'], torch.Tensor)
|
||||
assert tensors['data_1']['input_points'].shape == torch.Size((70, 10))
|
||||
assert isinstance(tensors['data_1']['output_points'], torch.Tensor)
|
||||
assert tensors['data_1']['output_points'].shape == torch.Size((70, 2))
|
||||
|
||||
assert sorted(list(tensors['data_2'].keys())) == [
|
||||
'input_points', 'output_points']
|
||||
assert isinstance(tensors['data_2']['input_points'], torch.Tensor)
|
||||
assert tensors['data_2']['input_points'].shape == torch.Size((50, 10))
|
||||
assert isinstance(tensors['data_2']['output_points'], torch.Tensor)
|
||||
assert tensors['data_2']['output_points'].shape == torch.Size((50, 2))
|
||||
Reference in New Issue
Block a user