Implementation of DataLoader and DataModule (#383)

Refactoring for 0.2
* Data module, data loader and dataset
* Refactor LabelTensor
* Refactor solvers

Co-authored-by: dario-coscia <dariocos99@gmail.com>
This commit is contained in:
Filippo Olivo
2024-11-27 16:01:39 +01:00
committed by Nicola Demo
parent dd43c8304c
commit a27bd35443
34 changed files with 827 additions and 1349 deletions

View File

@@ -1,227 +0,0 @@
import math
import torch
from pina.data import SamplePointDataset, SupervisedDataset, PinaDataModule, \
UnsupervisedDataset
from pina.data import PinaDataLoader
from pina import LabelTensor, Condition
from pina.equation import Equation
from pina.domain import CartesianDomain
from pina.problem import SpatialProblem, AbstractProblem
from pina.operators import laplacian
from pina.equation.equation_factory import FixedValue
from pina.graph import Graph
def laplace_equation(input_, output_):
force_term = (torch.sin(input_.extract(['x']) * torch.pi) *
torch.sin(input_.extract(['y']) * torch.pi))
delta_u = laplacian(output_.extract(['u']), input_)
return delta_u - force_term
my_laplace = Equation(laplace_equation)
in_ = LabelTensor(torch.tensor([[0., 1.]]), ['x', 'y'])
out_ = LabelTensor(torch.tensor([[0.]]), ['u'])
in2_ = LabelTensor(torch.rand(60, 2), ['x', 'y'])
out2_ = LabelTensor(torch.rand(60, 1), ['u'])
class Poisson(SpatialProblem):
output_variables = ['u']
spatial_domain = CartesianDomain({'x': [0, 1], 'y': [0, 1]})
conditions = {
'gamma1':
Condition(domain=CartesianDomain({
'x': [0, 1],
'y': 1
}),
equation=FixedValue(0.0)),
'gamma2':
Condition(domain=CartesianDomain({
'x': [0, 1],
'y': 0
}),
equation=FixedValue(0.0)),
'gamma3':
Condition(domain=CartesianDomain({
'x': 1,
'y': [0, 1]
}),
equation=FixedValue(0.0)),
'gamma4':
Condition(domain=CartesianDomain({
'x': 0,
'y': [0, 1]
}),
equation=FixedValue(0.0)),
'D':
Condition(input_points=LabelTensor(torch.rand(size=(100, 2)),
['x', 'y']),
equation=my_laplace),
'data':
Condition(input_points=in_, output_points=out_),
'data2':
Condition(input_points=in2_, output_points=out2_),
'unsupervised':
Condition(
input_points=LabelTensor(torch.rand(size=(45, 2)), ['x', 'y']),
conditional_variables=LabelTensor(torch.ones(size=(45, 1)),
['alpha']),
),
'unsupervised2':
Condition(
input_points=LabelTensor(torch.rand(size=(90, 2)), ['x', 'y']),
conditional_variables=LabelTensor(torch.ones(size=(90, 1)),
['alpha']),
)
}
boundaries = ['gamma1', 'gamma2', 'gamma3', 'gamma4']
poisson = Poisson()
poisson.discretise_domain(10, 'grid', locations=boundaries)
def test_sample():
sample_dataset = SamplePointDataset(poisson, device='cpu')
assert len(sample_dataset) == 140
assert sample_dataset.input_points.shape == (140, 2)
assert sample_dataset.input_points.labels == ['x', 'y']
assert sample_dataset.condition_indices.dtype == torch.uint8
assert sample_dataset.condition_indices.max() == torch.tensor(4)
assert sample_dataset.condition_indices.min() == torch.tensor(0)
def test_data():
dataset = SupervisedDataset(poisson, device='cpu')
assert len(dataset) == 61
assert dataset['input_points'].shape == (61, 2)
assert dataset.input_points.shape == (61, 2)
assert dataset['input_points'].labels == ['x', 'y']
assert dataset.input_points.labels == ['x', 'y']
assert dataset.input_points[3:].shape == (58, 2)
assert dataset.output_points[:3].labels == ['u']
assert dataset.output_points.shape == (61, 1)
assert dataset.output_points.labels == ['u']
assert dataset.condition_indices.dtype == torch.uint8
assert dataset.condition_indices.max() == torch.tensor(1)
assert dataset.condition_indices.min() == torch.tensor(0)
def test_unsupervised():
dataset = UnsupervisedDataset(poisson, device='cpu')
assert len(dataset) == 135
assert dataset.input_points.shape == (135, 2)
assert dataset.input_points.labels == ['x', 'y']
assert dataset.input_points[3:].shape == (132, 2)
assert dataset.conditional_variables.shape == (135, 1)
assert dataset.conditional_variables.labels == ['alpha']
assert dataset.condition_indices.dtype == torch.uint8
assert dataset.condition_indices.max() == torch.tensor(1)
assert dataset.condition_indices.min() == torch.tensor(0)
def test_data_module():
data_module = PinaDataModule(poisson, device='cpu')
data_module.setup()
loader = data_module.train_dataloader()
assert isinstance(loader, PinaDataLoader)
assert isinstance(loader, PinaDataLoader)
data_module = PinaDataModule(poisson,
device='cpu',
batch_size=10,
shuffle=False)
data_module.setup()
loader = data_module.train_dataloader()
assert len(loader) == 24
for i in loader:
assert len(i) <= 10
len_ref = sum(
[math.ceil(len(dataset) * 0.7) for dataset in data_module.datasets])
len_real = sum(
[len(dataset) for dataset in data_module.splits['train'].values()])
assert len_ref == len_real
supervised_dataset = SupervisedDataset(poisson, device='cpu')
data_module = PinaDataModule(poisson,
device='cpu',
batch_size=10,
shuffle=False,
datasets=[supervised_dataset])
data_module.setup()
loader = data_module.train_dataloader()
for batch in loader:
assert len(batch) <= 10
physics_dataset = SamplePointDataset(poisson, device='cpu')
data_module = PinaDataModule(poisson,
device='cpu',
batch_size=10,
shuffle=False,
datasets=[physics_dataset])
data_module.setup()
loader = data_module.train_dataloader()
for batch in loader:
assert len(batch) <= 10
unsupervised_dataset = UnsupervisedDataset(poisson, device='cpu')
data_module = PinaDataModule(poisson,
device='cpu',
batch_size=10,
shuffle=False,
datasets=[unsupervised_dataset])
data_module.setup()
loader = data_module.train_dataloader()
for batch in loader:
assert len(batch) <= 10
def test_loader():
data_module = PinaDataModule(poisson, device='cpu', batch_size=10)
data_module.setup()
loader = data_module.train_dataloader()
assert isinstance(loader, PinaDataLoader)
assert len(loader) == 24
for i in loader:
assert len(i) <= 10
assert i.supervised.input_points.labels == ['x', 'y']
assert i.physics.input_points.labels == ['x', 'y']
assert i.unsupervised.input_points.labels == ['x', 'y']
assert i.supervised.input_points.requires_grad == True
assert i.physics.input_points.requires_grad == True
assert i.unsupervised.input_points.requires_grad == True
coordinates = LabelTensor(torch.rand((100, 100, 2)), labels=['x', 'y'])
data = LabelTensor(torch.rand((100, 100, 3)), labels=['ux', 'uy', 'p'])
class GraphProblem(AbstractProblem):
output = LabelTensor(torch.rand((100, 3)), labels=['ux', 'uy', 'p'])
input = [
Graph.build('radius',
nodes_coordinates=coordinates[i, :, :],
nodes_data=data[i, :, :],
radius=0.2) for i in range(100)
]
output_variables = ['u']
conditions = {
'graph_data': Condition(input_points=input, output_points=output)
}
graph_problem = GraphProblem()
def test_loader_graph():
data_module = PinaDataModule(graph_problem, device='cpu', batch_size=10)
data_module.setup()
loader = data_module.train_dataloader()
for i in loader:
assert len(i) <= 10
assert isinstance(i.supervised.input_points, list)
assert all(isinstance(x, Graph) for x in i.supervised.input_points)

View File

@@ -114,5 +114,5 @@ def test_slice():
assert torch.allclose(tensor_view2, data[3])
tensor_view3 = tensor[:, 2]
assert tensor_view3.labels == labels[2]
assert tensor_view3.labels == [labels[2]]
assert torch.allclose(tensor_view3, data[:, 2].reshape(-1, 1))

View File

@@ -1,5 +1,4 @@
import torch
from pina.problem import SpatialProblem, InverseProblem
from pina.operators import laplacian
from pina.domain import CartesianDomain
@@ -9,7 +8,7 @@ from pina.trainer import Trainer
from pina.model import FeedForward
from pina.equation.equation import Equation
from pina.equation.equation_factory import FixedValue
from pina.loss.loss_interface import LpLoss
from pina.loss import LpLoss
def laplace_equation(input_, output_):
@@ -54,22 +53,22 @@ class InversePoisson(SpatialProblem, InverseProblem):
# define the conditions for the loss (boundary conditions, equation, data)
conditions = {
'gamma1': Condition(location=CartesianDomain({'x': [x_min, x_max],
'gamma1': Condition(domain=CartesianDomain({'x': [x_min, x_max],
'y': y_max}),
equation=FixedValue(0.0, components=['u'])),
'gamma2': Condition(location=CartesianDomain(
'gamma2': Condition(domain=CartesianDomain(
{'x': [x_min, x_max], 'y': y_min
}),
equation=FixedValue(0.0, components=['u'])),
'gamma3': Condition(location=CartesianDomain(
'gamma3': Condition(domain=CartesianDomain(
{'x': x_max, 'y': [y_min, y_max]
}),
equation=FixedValue(0.0, components=['u'])),
'gamma4': Condition(location=CartesianDomain(
'gamma4': Condition(domain=CartesianDomain(
{'x': x_min, 'y': [y_min, y_max]
}),
equation=FixedValue(0.0, components=['u'])),
'D': Condition(location=CartesianDomain(
'D': Condition(domain=CartesianDomain(
{'x': [x_min, x_max], 'y': [y_min, y_max]
}),
equation=Equation(laplace_equation)),
@@ -84,16 +83,16 @@ class Poisson(SpatialProblem):
conditions = {
'gamma1': Condition(
location=CartesianDomain({'x': [0, 1], 'y': 1}),
domain=CartesianDomain({'x': [0, 1], 'y': 1}),
equation=FixedValue(0.0)),
'gamma2': Condition(
location=CartesianDomain({'x': [0, 1], 'y': 0}),
domain=CartesianDomain({'x': [0, 1], 'y': 0}),
equation=FixedValue(0.0)),
'gamma3': Condition(
location=CartesianDomain({'x': 1, 'y': [0, 1]}),
domain=CartesianDomain({'x': 1, 'y': [0, 1]}),
equation=FixedValue(0.0)),
'gamma4': Condition(
location=CartesianDomain({'x': 0, 'y': [0, 1]}),
domain=CartesianDomain({'x': 0, 'y': [0, 1]}),
equation=FixedValue(0.0)),
'D': Condition(
input_points=LabelTensor(torch.rand(size=(100, 2)), ['x', 'y']),
@@ -112,7 +111,6 @@ class Poisson(SpatialProblem):
truth_solution = poisson_sol
class myFeature(torch.nn.Module):
"""
Feature: sin(x)
@@ -158,21 +156,35 @@ def test_train_cpu():
pinn = PINN(problem = poisson_problem, model=model,
extra_features=None, loss=LpLoss())
trainer = Trainer(solver=pinn, max_epochs=1,
accelerator='cpu', batch_size=20)
trainer.train()
accelerator='cpu', batch_size=20, val_size=0., train_size=1., test_size=0.)
def test_log():
poisson_problem.discretise_domain(100)
solver = PINN(problem = poisson_problem, model=model,
extra_features=None, loss=LpLoss())
trainer = Trainer(solver, max_epochs=2, accelerator='cpu')
def test_train_load():
tmpdir = "tests/tmp_load"
poisson_problem = Poisson()
boundaries = ['gamma1', 'gamma2', 'gamma3', 'gamma4']
n = 10
poisson_problem.discretise_domain(n, 'grid', locations=boundaries)
pinn = PINN(problem=poisson_problem,
model=model,
extra_features=None,
loss=LpLoss())
trainer = Trainer(solver=pinn,
max_epochs=15,
accelerator='cpu',
default_root_dir=tmpdir)
trainer.train()
# assert the logged metrics are correct
logged_metrics = sorted(list(trainer.logged_metrics.keys()))
total_metrics = sorted(
list([key + '_loss' for key in poisson_problem.conditions.keys()])
+ ['mean_loss'])
assert logged_metrics == total_metrics
new_pinn = PINN.load_from_checkpoint(
f'{tmpdir}/lightning_logs/version_0/checkpoints/epoch=14-step=15.ckpt',
problem = poisson_problem, model=model)
test_pts = CartesianDomain({'x': [0, 1], 'y': [0, 1]}).sample(10)
assert new_pinn.forward(test_pts).extract(['u']).shape == (10, 1)
assert new_pinn.forward(test_pts).extract(
['u']).shape == pinn.forward(test_pts).extract(['u']).shape
torch.testing.assert_close(
new_pinn.forward(test_pts).extract(['u']),
pinn.forward(test_pts).extract(['u']))
import shutil
shutil.rmtree(tmpdir)
def test_train_restore():
tmpdir = "tests/tmp_restore"
@@ -192,36 +204,7 @@ def test_train_restore():
ntrainer = Trainer(solver=pinn, max_epochs=15, accelerator='cpu')
t = ntrainer.train(
ckpt_path=f'{tmpdir}/lightning_logs/version_0/'
'checkpoints/epoch=4-step=10.ckpt')
import shutil
shutil.rmtree(tmpdir)
def test_train_load():
tmpdir = "tests/tmp_load"
poisson_problem = Poisson()
boundaries = ['gamma1', 'gamma2', 'gamma3', 'gamma4']
n = 10
poisson_problem.discretise_domain(n, 'grid', locations=boundaries)
pinn = PINN(problem=poisson_problem,
model=model,
extra_features=None,
loss=LpLoss())
trainer = Trainer(solver=pinn,
max_epochs=15,
accelerator='cpu',
default_root_dir=tmpdir)
trainer.train()
new_pinn = PINN.load_from_checkpoint(
f'{tmpdir}/lightning_logs/version_0/checkpoints/epoch=14-step=30.ckpt',
problem = poisson_problem, model=model)
test_pts = CartesianDomain({'x': [0, 1], 'y': [0, 1]}).sample(10)
assert new_pinn.forward(test_pts).extract(['u']).shape == (10, 1)
assert new_pinn.forward(test_pts).extract(
['u']).shape == pinn.forward(test_pts).extract(['u']).shape
torch.testing.assert_close(
new_pinn.forward(test_pts).extract(['u']),
pinn.forward(test_pts).extract(['u']))
'checkpoints/epoch=4-step=5.ckpt')
import shutil
shutil.rmtree(tmpdir)
@@ -229,36 +212,24 @@ def test_train_inverse_problem_cpu():
poisson_problem = InversePoisson()
boundaries = ['gamma1', 'gamma2', 'gamma3', 'gamma4', 'D']
n = 100
poisson_problem.discretise_domain(n, 'random', locations=boundaries)
poisson_problem.discretise_domain(n, 'random', locations=boundaries,
variables=['x', 'y'])
pinn = PINN(problem = poisson_problem, model=model,
extra_features=None, loss=LpLoss())
trainer = Trainer(solver=pinn, max_epochs=1,
accelerator='cpu', batch_size=20)
trainer.train()
# # TODO does not currently work
# def test_train_inverse_problem_restore():
# tmpdir = "tests/tmp_restore_inv"
# poisson_problem = InversePoisson()
# boundaries = ['gamma1', 'gamma2', 'gamma3', 'gamma4', 'D']
# n = 100
# poisson_problem.discretise_domain(n, 'random', locations=boundaries)
# pinn = PINN(problem=poisson_problem,
# model=model,
# extra_features=None,
# loss=LpLoss())
# trainer = Trainer(solver=pinn,
# max_epochs=5,
# accelerator='cpu',
# default_root_dir=tmpdir)
# trainer.train()
# ntrainer = Trainer(solver=pinn, max_epochs=5, accelerator='cpu')
# t = ntrainer.train(
# ckpt_path=f'{tmpdir}/lightning_logs/version_0/checkpoints/epoch=4-step=10.ckpt')
# import shutil
# shutil.rmtree(tmpdir)
def test_train_extra_feats_cpu():
poisson_problem = Poisson()
boundaries = ['gamma1', 'gamma2', 'gamma3', 'gamma4']
n = 10
poisson_problem.discretise_domain(n, 'grid', locations=boundaries)
pinn = PINN(problem=poisson_problem,
model=model_extra_feats,
extra_features=extra_feats)
trainer = Trainer(solver=pinn, max_epochs=5, accelerator='cpu')
trainer.train()
def test_train_inverse_problem_load():
tmpdir = "tests/tmp_load_inv"
@@ -276,7 +247,7 @@ def test_train_inverse_problem_load():
default_root_dir=tmpdir)
trainer.train()
new_pinn = PINN.load_from_checkpoint(
f'{tmpdir}/lightning_logs/version_0/checkpoints/epoch=14-step=30.ckpt',
f'{tmpdir}/lightning_logs/version_0/checkpoints/epoch=14-step=15.ckpt',
problem = poisson_problem, model=model)
test_pts = CartesianDomain({'x': [0, 1], 'y': [0, 1]}).sample(10)
assert new_pinn.forward(test_pts).extract(['u']).shape == (10, 1)
@@ -286,160 +257,4 @@ def test_train_inverse_problem_load():
new_pinn.forward(test_pts).extract(['u']),
pinn.forward(test_pts).extract(['u']))
import shutil
shutil.rmtree(tmpdir)
# # TODO fix asap. Basically sampling few variables
# # works only if both variables are in a range.
# # if one is fixed and the other not, this will
# # not work. This test also needs to be fixed and
# # insert in test problem not in test pinn.
# def test_train_cpu_sampling_few_vars():
# poisson_problem = Poisson()
# boundaries = ['gamma1', 'gamma2', 'gamma3']
# n = 10
# poisson_problem.discretise_domain(n, 'grid', locations=boundaries)
# poisson_problem.discretise_domain(n, 'random', locations=['gamma4'], variables=['x'])
# poisson_problem.discretise_domain(n, 'random', locations=['gamma4'], variables=['y'])
# pinn = PINN(problem = poisson_problem, model=model, extra_features=None, loss=LpLoss())
# trainer = Trainer(solver=pinn, kwargs={'max_epochs' : 5, 'accelerator':'cpu'})
# trainer.train()
def test_train_extra_feats_cpu():
poisson_problem = Poisson()
boundaries = ['gamma1', 'gamma2', 'gamma3', 'gamma4']
n = 10
poisson_problem.discretise_domain(n, 'grid', locations=boundaries)
pinn = PINN(problem=poisson_problem,
model=model_extra_feats,
extra_features=extra_feats)
trainer = Trainer(solver=pinn, max_epochs=5, accelerator='cpu')
trainer.train()
# TODO, fix GitHub actions to run also on GPU
# def test_train_gpu():
# poisson_problem = Poisson()
# boundaries = ['gamma1', 'gamma2', 'gamma3', 'gamma4']
# n = 10
# poisson_problem.discretise_domain(n, 'grid', locations=boundaries)
# pinn = PINN(problem = poisson_problem, model=model, extra_features=None, loss=LpLoss())
# trainer = Trainer(solver=pinn, kwargs={'max_epochs' : 5, 'accelerator':'gpu'})
# trainer.train()
# def test_train_gpu(): #TODO fix ASAP
# poisson_problem = Poisson()
# boundaries = ['gamma1', 'gamma2', 'gamma3', 'gamma4']
# n = 10
# poisson_problem.discretise_domain(n, 'grid', locations=boundaries)
# poisson_problem.conditions.pop('data') # The input/output pts are allocated on cpu
# pinn = PINN(problem = poisson_problem, model=model, extra_features=None, loss=LpLoss())
# trainer = Trainer(solver=pinn, kwargs={'max_epochs' : 5, 'accelerator':'gpu'})
# trainer.train()
# def test_train_2():
# boundaries = ['gamma1', 'gamma2', 'gamma3', 'gamma4']
# n = 10
# expected_keys = [[], list(range(0, 50, 3))]
# param = [0, 3]
# for i, truth_key in zip(param, expected_keys):
# pinn = PINN(problem, model)
# pinn.discretise_domain(n, 'grid', locations=boundaries)
# pinn.discretise_domain(n, 'grid', locations=['D'])
# pinn.train(50, save_loss=i)
# assert list(pinn.history_loss.keys()) == truth_key
# def test_train_extra_feats():
# pinn = PINN(problem, model_extra_feat, [myFeature()])
# boundaries = ['gamma1', 'gamma2', 'gamma3', 'gamma4']
# n = 10
# pinn.discretise_domain(n, 'grid', locations=boundaries)
# pinn.discretise_domain(n, 'grid', locations=['D'])
# pinn.train(5)
# def test_train_2_extra_feats():
# boundaries = ['gamma1', 'gamma2', 'gamma3', 'gamma4']
# n = 10
# expected_keys = [[], list(range(0, 50, 3))]
# param = [0, 3]
# for i, truth_key in zip(param, expected_keys):
# pinn = PINN(problem, model_extra_feat, [myFeature()])
# pinn.discretise_domain(n, 'grid', locations=boundaries)
# pinn.discretise_domain(n, 'grid', locations=['D'])
# pinn.train(50, save_loss=i)
# assert list(pinn.history_loss.keys()) == truth_key
# def test_train_with_optimizer_kwargs():
# boundaries = ['gamma1', 'gamma2', 'gamma3', 'gamma4']
# n = 10
# expected_keys = [[], list(range(0, 50, 3))]
# param = [0, 3]
# for i, truth_key in zip(param, expected_keys):
# pinn = PINN(problem, model, optimizer_kwargs={'lr' : 0.3})
# pinn.discretise_domain(n, 'grid', locations=boundaries)
# pinn.discretise_domain(n, 'grid', locations=['D'])
# pinn.train(50, save_loss=i)
# assert list(pinn.history_loss.keys()) == truth_key
# def test_train_with_lr_scheduler():
# boundaries = ['gamma1', 'gamma2', 'gamma3', 'gamma4']
# n = 10
# expected_keys = [[], list(range(0, 50, 3))]
# param = [0, 3]
# for i, truth_key in zip(param, expected_keys):
# pinn = PINN(
# problem,
# model,
# lr_scheduler_type=torch.optim.lr_scheduler.CyclicLR,
# lr_scheduler_kwargs={'base_lr' : 0.1, 'max_lr' : 0.3, 'cycle_momentum': False}
# )
# pinn.discretise_domain(n, 'grid', locations=boundaries)
# pinn.discretise_domain(n, 'grid', locations=['D'])
# pinn.train(50, save_loss=i)
# assert list(pinn.history_loss.keys()) == truth_key
# # def test_train_batch():
# # pinn = PINN(problem, model, batch_size=6)
# # boundaries = ['gamma1', 'gamma2', 'gamma3', 'gamma4']
# # n = 10
# # pinn.discretise_domain(n, 'grid', locations=boundaries)
# # pinn.discretise_domain(n, 'grid', locations=['D'])
# # pinn.train(5)
# # def test_train_batch_2():
# # boundaries = ['gamma1', 'gamma2', 'gamma3', 'gamma4']
# # n = 10
# # expected_keys = [[], list(range(0, 50, 3))]
# # param = [0, 3]
# # for i, truth_key in zip(param, expected_keys):
# # pinn = PINN(problem, model, batch_size=6)
# # pinn.discretise_domain(n, 'grid', locations=boundaries)
# # pinn.discretise_domain(n, 'grid', locations=['D'])
# # pinn.train(50, save_loss=i)
# # assert list(pinn.history_loss.keys()) == truth_key
# if torch.cuda.is_available():
# # def test_gpu_train():
# # pinn = PINN(problem, model, batch_size=20, device='cuda')
# # boundaries = ['gamma1', 'gamma2', 'gamma3', 'gamma4']
# # n = 100
# # pinn.discretise_domain(n, 'grid', locations=boundaries)
# # pinn.discretise_domain(n, 'grid', locations=['D'])
# # pinn.train(5)
# def test_gpu_train_nobatch():
# pinn = PINN(problem, model, batch_size=None, device='cuda')
# boundaries = ['gamma1', 'gamma2', 'gamma3', 'gamma4']
# n = 100
# pinn.discretise_domain(n, 'grid', locations=boundaries)
# pinn.discretise_domain(n, 'grid', locations=['D'])
# pinn.train(5)
shutil.rmtree(tmpdir)

View File

@@ -121,7 +121,7 @@ def test_train_cpu():
batch_size=5,
train_size=1,
test_size=0.,
eval_size=0.)
val_size=0.)
trainer.train()
test_train_cpu()