Adaptive Refinment and Multiple Optimizer callbacks
* Implementing a callback to switch between optimizers during training * Implementing the R3Refinment for collocation points * Modify trainer -> dataloader is created or updated by calling `_create_or_update_loader` * Adding `add_points` routine to AbstractProblem so that new points can be added without resampling from scratch
This commit is contained in:
committed by
Nicola Demo
parent
5a4c114d48
commit
4d1187898f
190
pina/callbacks.py
Normal file
190
pina/callbacks.py
Normal file
@@ -0,0 +1,190 @@
|
|||||||
|
'''PINA Callbacks Implementations'''
|
||||||
|
|
||||||
|
from lightning.pytorch.callbacks import Callback
|
||||||
|
import torch
|
||||||
|
from .utils import check_consistency
|
||||||
|
|
||||||
|
|
||||||
|
class SwitchOptimizer(Callback):
|
||||||
|
"""
|
||||||
|
PINA implementation of a Lightining Callback to switch
|
||||||
|
optimizer during training. The rouutine can be used to
|
||||||
|
try multiple optimizers during the training, without the
|
||||||
|
need to stop training.
|
||||||
|
"""
|
||||||
|
def __init__(self, new_optimizers, new_optimizers_kargs, epoch_switch):
|
||||||
|
"""
|
||||||
|
SwitchOptimizer is a routine for switching optimizer during training.
|
||||||
|
|
||||||
|
:param torch.optim.Optimizer | list new_optimizers: The model optimizers to
|
||||||
|
switch to. It must be a list of :class:`torch.optim.Optimizer` or list of
|
||||||
|
:class:`torch.optim.Optimizer` for multiple model solvers.
|
||||||
|
:param dict| list new_optimizers: The model optimizers keyword arguments to
|
||||||
|
switch use. It must be a dict or list of dict for multiple optimizers.
|
||||||
|
:param int epoch_switch: Epoch for switching optimizer.
|
||||||
|
"""
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
# check type consistency
|
||||||
|
check_consistency(new_optimizers, torch.optim.Optimizer, subclass=True)
|
||||||
|
check_consistency(new_optimizers_kargs, dict)
|
||||||
|
check_consistency(epoch_switch, int)
|
||||||
|
|
||||||
|
if epoch_switch < 1:
|
||||||
|
raise ValueError('epoch_switch must be greater than one.')
|
||||||
|
|
||||||
|
if not isinstance(new_optimizers, list):
|
||||||
|
optimizers = [new_optimizers]
|
||||||
|
optimizers_kwargs = [new_optimizers_kargs]
|
||||||
|
len_optimizer = len(optimizers)
|
||||||
|
len_optimizer_kwargs = len(optimizers_kwargs)
|
||||||
|
|
||||||
|
if len_optimizer_kwargs != len_optimizer:
|
||||||
|
raise ValueError('You must define one dictionary of keyword'
|
||||||
|
' arguments for each optimizers.'
|
||||||
|
f'Got {len_optimizer} optimizers, and'
|
||||||
|
f' {len_optimizer_kwargs} dicitionaries')
|
||||||
|
|
||||||
|
# save new optimizers
|
||||||
|
self._new_optimizers = optimizers
|
||||||
|
self._new_optimizers_kwargs = optimizers_kwargs
|
||||||
|
self._epoch_switch = epoch_switch
|
||||||
|
|
||||||
|
def on_train_epoch_start(self, trainer, __):
|
||||||
|
if trainer.current_epoch == self._epoch_switch:
|
||||||
|
optims = []
|
||||||
|
for idx, (optim, optim_kwargs) in enumerate(
|
||||||
|
zip(self._new_optimizers,
|
||||||
|
self._new_optimizers_kwargs)
|
||||||
|
):
|
||||||
|
optims.append(optim(trainer._model.models[idx].parameters(), **optim_kwargs))
|
||||||
|
|
||||||
|
trainer.optimizers = optims
|
||||||
|
|
||||||
|
|
||||||
|
class R3Refinement(Callback):
|
||||||
|
"""
|
||||||
|
PINA implementation of a R3 Refinement Callback.
|
||||||
|
|
||||||
|
.. seealso::
|
||||||
|
|
||||||
|
**Original reference**: Daw, Arka, et al. "Mitigating Propagation Failures
|
||||||
|
in Physics-informed Neural Networks using
|
||||||
|
Retain-Resample-Release (R3) Sampling." (2023).
|
||||||
|
DOI: `10.48550/arXiv.2207.02338
|
||||||
|
< https://doi.org/10.48550/arXiv.2207.02338>`_
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, sample_every):
|
||||||
|
"""
|
||||||
|
R3 routine for sampling new points based on
|
||||||
|
adpative search. The algorithm incrementally
|
||||||
|
accumulate collocation points in regions of
|
||||||
|
high PDE residuals, and release the one which
|
||||||
|
have low residual. Points are sampled uniformmaly
|
||||||
|
in all region where sampling is needed.
|
||||||
|
|
||||||
|
:param int sample_every: Frequency for sampling.
|
||||||
|
"""
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
# sample every
|
||||||
|
check_consistency(sample_every, int)
|
||||||
|
self._sample_every = sample_every
|
||||||
|
|
||||||
|
def _compute_residual(self, trainer):
|
||||||
|
"""
|
||||||
|
Computes the residuals for a PINN object.
|
||||||
|
|
||||||
|
:return: the total loss, and pointwise loss.
|
||||||
|
:rtype: tuple
|
||||||
|
"""
|
||||||
|
|
||||||
|
# extract the solver and device from trainer
|
||||||
|
solver = trainer._model
|
||||||
|
device = trainer._accelerator_connector._accelerator_flag
|
||||||
|
|
||||||
|
# compute residual
|
||||||
|
res_loss = {}
|
||||||
|
tot_loss = []
|
||||||
|
for location in self._sampling_locations:
|
||||||
|
condition = solver.problem.conditions[location]
|
||||||
|
pts = solver.problem.input_pts[location]
|
||||||
|
# send points to correct device
|
||||||
|
pts = pts.to(device)
|
||||||
|
pts = pts.requires_grad_(True)
|
||||||
|
pts.retain_grad()
|
||||||
|
# PINN loss: equation evaluated only on locations where sampling is needed
|
||||||
|
target = condition.equation.residual(pts, solver.forward(pts))
|
||||||
|
res_loss[location] = torch.abs(target)
|
||||||
|
tot_loss.append(torch.abs(target))
|
||||||
|
|
||||||
|
return torch.vstack(tot_loss), res_loss
|
||||||
|
|
||||||
|
def _r3_routine(self, trainer):
|
||||||
|
"""
|
||||||
|
R3 refinement main routine.
|
||||||
|
|
||||||
|
:param Trainer trainer: PINA Trainer.
|
||||||
|
"""
|
||||||
|
# compute residual (all device possible)
|
||||||
|
tot_loss, res_loss = self._compute_residual(trainer)
|
||||||
|
|
||||||
|
# !!!!!! From now everything is performed on CPU !!!!!!
|
||||||
|
|
||||||
|
# average loss
|
||||||
|
avg = (tot_loss.mean()).to('cpu')
|
||||||
|
|
||||||
|
# points to keep
|
||||||
|
old_pts = {}
|
||||||
|
tot_points = 0
|
||||||
|
for location in self._sampling_locations:
|
||||||
|
pts = trainer._model.problem.input_pts[location]
|
||||||
|
labels = pts.labels
|
||||||
|
pts = pts.cpu().detach()
|
||||||
|
residuals = res_loss[location].cpu()
|
||||||
|
mask = (residuals > avg).flatten()
|
||||||
|
# TODO masking remove labels
|
||||||
|
pts = pts[mask]
|
||||||
|
pts.labels = labels
|
||||||
|
####
|
||||||
|
old_pts[location] = pts
|
||||||
|
tot_points += len(pts)
|
||||||
|
|
||||||
|
# extract new points to sample uniformally for each location
|
||||||
|
n_points = (self._tot_pop_numb - tot_points ) // len(self._sampling_locations)
|
||||||
|
remainder = (self._tot_pop_numb - tot_points ) % len(self._sampling_locations)
|
||||||
|
n_uniform_points = [n_points] * len(self._sampling_locations)
|
||||||
|
n_uniform_points[-1] += remainder
|
||||||
|
|
||||||
|
# sample new points
|
||||||
|
for numb_pts, loc in zip(n_uniform_points, self._sampling_locations):
|
||||||
|
trainer._model.problem.discretise_domain(numb_pts,
|
||||||
|
'random',
|
||||||
|
locations=[loc])
|
||||||
|
# adding previous population points
|
||||||
|
trainer._model.problem.add_points(old_pts)
|
||||||
|
|
||||||
|
# update dataloader
|
||||||
|
trainer._create_or_update_loader()
|
||||||
|
|
||||||
|
def on_train_start(self, trainer, _):
|
||||||
|
# extract locations for sampling
|
||||||
|
problem = trainer._model.problem
|
||||||
|
locations = []
|
||||||
|
for condition_name in problem.conditions:
|
||||||
|
condition = problem.conditions[condition_name]
|
||||||
|
if hasattr(condition, 'location'):
|
||||||
|
locations.append(condition_name)
|
||||||
|
self._sampling_locations = locations
|
||||||
|
|
||||||
|
# extract total population
|
||||||
|
total_population = 0
|
||||||
|
for location in self._sampling_locations:
|
||||||
|
pts = trainer._model.problem.input_pts[location]
|
||||||
|
total_population += len(pts)
|
||||||
|
self._tot_pop_numb = total_population
|
||||||
|
|
||||||
|
def on_train_epoch_end(self, trainer, __):
|
||||||
|
if trainer.current_epoch % self._sample_every == 0:
|
||||||
|
self._r3_routine(trainer)
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
""" Module for AbstractProblem class """
|
""" Module for AbstractProblem class """
|
||||||
from abc import ABCMeta, abstractmethod
|
from abc import ABCMeta, abstractmethod
|
||||||
from ..utils import merge_tensors, check_consistency
|
from ..utils import merge_tensors, check_consistency
|
||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
class AbstractProblem(metaclass=ABCMeta):
|
class AbstractProblem(metaclass=ABCMeta):
|
||||||
@@ -201,6 +202,36 @@ class AbstractProblem(metaclass=ABCMeta):
|
|||||||
if sorted(self.input_pts[location].labels) == sorted(self.input_variables):
|
if sorted(self.input_pts[location].labels) == sorted(self.input_variables):
|
||||||
self._have_sampled_points[location] = True
|
self._have_sampled_points[location] = True
|
||||||
|
|
||||||
|
def add_points(self, new_points):
|
||||||
|
"""
|
||||||
|
Adding points to the already sampled points
|
||||||
|
|
||||||
|
:param dict new_points: a dictionary with key the location to add the points
|
||||||
|
and values the torch.Tensor points.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if sorted(new_points.keys()) != sorted(self.conditions):
|
||||||
|
TypeError(f'Wrong locations for new points. Location ',
|
||||||
|
f'should be in {self.conditions}.')
|
||||||
|
|
||||||
|
for location in new_points.keys():
|
||||||
|
# extract old and new points
|
||||||
|
old_pts = self.input_pts[location]
|
||||||
|
new_pts = new_points[location]
|
||||||
|
|
||||||
|
# if they don't have the same variables error
|
||||||
|
if sorted(old_pts.labels) != sorted(new_pts.labels):
|
||||||
|
TypeError(f'Not matching variables for old and new points '
|
||||||
|
f'in condition {location}.')
|
||||||
|
if old_pts.labels != new_pts.labels:
|
||||||
|
new_pts = torch.hstack([new_pts.extract([i]) for i in old_pts.labels])
|
||||||
|
new_pts.labels = old_pts.labels
|
||||||
|
|
||||||
|
# merging
|
||||||
|
merged_pts = torch.vstack([old_pts, new_points[location]])
|
||||||
|
merged_pts.labels = old_pts.labels
|
||||||
|
self.input_pts[location] = merged_pts
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def have_sampled_points(self):
|
def have_sampled_points(self):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -9,9 +9,6 @@ class Trainer(pl.Trainer):
|
|||||||
|
|
||||||
def __init__(self, solver, **kwargs):
|
def __init__(self, solver, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
|
|
||||||
# get accellerator
|
|
||||||
device = self._accelerator_connector._accelerator_flag
|
|
||||||
|
|
||||||
# check inheritance consistency for solver
|
# check inheritance consistency for solver
|
||||||
check_consistency(solver, SolverInterface)
|
check_consistency(solver, SolverInterface)
|
||||||
@@ -26,8 +23,15 @@ class Trainer(pl.Trainer):
|
|||||||
'in the provided locations.')
|
'in the provided locations.')
|
||||||
|
|
||||||
# TODO: make a better dataloader for train
|
# TODO: make a better dataloader for train
|
||||||
self._loader = DummyLoader(solver.problem.input_pts, device)
|
self._create_or_update_loader()
|
||||||
|
|
||||||
|
# this method is used here because is resampling is needed
|
||||||
|
# during training, there is no need to define to touch the
|
||||||
|
# trainer dataloader, just call the method.
|
||||||
|
def _create_or_update_loader(self):
|
||||||
|
# get accellerator
|
||||||
|
device = self._accelerator_connector._accelerator_flag
|
||||||
|
self._loader = DummyLoader(self._model.problem.input_pts, device)
|
||||||
|
|
||||||
def train(self, **kwargs): # TODO add kwargs and lightining capabilities
|
def train(self, **kwargs): # TODO add kwargs and lightining capabilities
|
||||||
return super().fit(self._model, self._loader, **kwargs)
|
return super().fit(self._model, self._loader, **kwargs)
|
||||||
|
|||||||
Reference in New Issue
Block a user