From 7a2316da0437a5c8fea55bf164d0a0650f480228 Mon Sep 17 00:00:00 2001 From: Filippo Olivo Date: Tue, 9 Dec 2025 09:19:26 +0100 Subject: [PATCH] add experiments --- .../config_16_layer_16_hidden_refined.yaml | 27 +++++--- .../config_32_layer_16_hidden_refined.yaml} | 27 +++++--- .../config_8_layer_16_hidden_refined.yaml | 17 ++--- .../config_16_layer_8_hidden_refined.yaml | 62 ------------------ experiments/config_4_layer_16_hidden.yaml | 64 ------------------- experiments/config_4_layer_8_hidden.yaml | 62 ------------------ experiments/config_8_layer_8_hidden.yaml | 62 ------------------ 7 files changed, 45 insertions(+), 276 deletions(-) rename experiments/{ => 5_steps}/config_16_layer_16_hidden_refined.yaml (69%) rename experiments/{config_8_layer_16_hidden.yaml => 5_steps/config_32_layer_16_hidden_refined.yaml} (67%) rename experiments/{ => 5_steps}/config_8_layer_16_hidden_refined.yaml (81%) delete mode 100644 experiments/config_16_layer_8_hidden_refined.yaml delete mode 100644 experiments/config_4_layer_16_hidden.yaml delete mode 100644 experiments/config_4_layer_8_hidden.yaml delete mode 100644 experiments/config_8_layer_8_hidden.yaml diff --git a/experiments/config_16_layer_16_hidden_refined.yaml b/experiments/5_steps/config_16_layer_16_hidden_refined.yaml similarity index 69% rename from experiments/config_16_layer_16_hidden_refined.yaml rename to experiments/5_steps/config_16_layer_16_hidden_refined.yaml index 66d018e..91072c1 100644 --- a/experiments/config_16_layer_16_hidden_refined.yaml +++ b/experiments/5_steps/config_16_layer_16_hidden_refined.yaml @@ -10,8 +10,8 @@ trainer: - class_path: lightning.pytorch.loggers.WandbLogger init_args: save_dir: logs.autoregressive.wandb - project: "thermal-conduction-unsteady" - name: "16_refined" + project: "thermal-conduction-unsteady-5.steps" + name: "16_layer_16_hidden" callbacks: - class_path: lightning.pytorch.callbacks.ModelCheckpoint init_args: @@ -24,16 +24,24 @@ trainer: init_args: monitor: val/loss mode: min - patience: 10 + patience: 30 verbose: false + # - class_path: ThermalSolver.switch_dataloader_callback.SwitchDataLoaderCallback + # init_args: + # increase_unrolling_steps_by: 5 + # patience: 10 + # last_patience: 15 + # max_unrolling_steps: 20 + # ckpt_path: logs.autoregressive.wandb/16_16_refined/checkpoints max_epochs: 1000 min_epochs: null max_steps: -1 min_steps: null overfit_batches: 0.0 - log_every_n_steps: null - accumulate_grad_batches: 4 + log_every_n_steps: 0 + accumulate_grad_batches: 1 default_root_dir: null + gradient_clip_val: 1.0 model: class_path: ThermalSolver.autoregressive_module.GraphSolver @@ -45,18 +53,19 @@ model: output_dim: 1 n_layers: 16 unrolling_steps: 5 - + data: class_path: ThermalSolver.graph_datamodule_unsteady.GraphDataModule init_args: hf_repo: "SISSAmathLab/thermal-conduction-unsteady" - split_name: "100_samples_easy_refined" - batch_size: 8 + split_name: "easy.refined" + n_elements: 100 + batch_size: 32 train_size: 0.7 val_size: 0.2 test_size: 0.1 build_radial_graph: false remove_boundary_edges: true - start_unrolling_steps: 5 + unrolling_steps: 5 optimizer: null lr_scheduler: null diff --git a/experiments/config_8_layer_16_hidden.yaml b/experiments/5_steps/config_32_layer_16_hidden_refined.yaml similarity index 67% rename from experiments/config_8_layer_16_hidden.yaml rename to experiments/5_steps/config_32_layer_16_hidden_refined.yaml index 46e012f..1d53bc2 100644 --- a/experiments/config_8_layer_16_hidden.yaml +++ b/experiments/5_steps/config_32_layer_16_hidden_refined.yaml @@ -10,12 +10,12 @@ trainer: - class_path: lightning.pytorch.loggers.WandbLogger init_args: save_dir: logs.autoregressive.wandb - project: "thermal-conduction-unsteady" - name: "standard" + project: "thermal-conduction-unsteady-5.steps" + name: "32_layer_16_hidden" callbacks: - class_path: lightning.pytorch.callbacks.ModelCheckpoint init_args: - dirpath: logs.autoregressive.wandb/standard/checkpoints + dirpath: logs.autoregressive.wandb/32_refined/checkpoints monitor: val/loss mode: min save_top_k: 1 @@ -24,16 +24,24 @@ trainer: init_args: monitor: val/loss mode: min - patience: 10 + patience: 30 verbose: false + # - class_path: ThermalSolver.switch_dataloader_callback.SwitchDataLoaderCallback + # init_args: + # increase_unrolling_steps_by: 5 + # patience: 10 + # last_patience: 15 + # max_unrolling_steps: 20 + # ckpt_path: logs.autoregressive.wandb/16_16_refined/checkpoints max_epochs: 1000 min_epochs: null max_steps: -1 min_steps: null overfit_batches: 0.0 - log_every_n_steps: null + log_every_n_steps: 0 accumulate_grad_batches: 2 default_root_dir: null + gradient_clip_val: 1.0 model: class_path: ThermalSolver.autoregressive_module.GraphSolver @@ -43,20 +51,21 @@ model: input_dim: 1 hidden_dim: 16 output_dim: 1 - n_layers: 8 + n_layers: 32 unrolling_steps: 5 - + data: class_path: ThermalSolver.graph_datamodule_unsteady.GraphDataModule init_args: hf_repo: "SISSAmathLab/thermal-conduction-unsteady" - split_name: "100_samples_easy" + split_name: "easy.refined" + n_elements: 100 batch_size: 16 train_size: 0.7 val_size: 0.2 test_size: 0.1 build_radial_graph: false remove_boundary_edges: true - start_unrolling_steps: 5 + unrolling_steps: 5 optimizer: null lr_scheduler: null diff --git a/experiments/config_8_layer_16_hidden_refined.yaml b/experiments/5_steps/config_8_layer_16_hidden_refined.yaml similarity index 81% rename from experiments/config_8_layer_16_hidden_refined.yaml rename to experiments/5_steps/config_8_layer_16_hidden_refined.yaml index 323a2a6..29b97c3 100644 --- a/experiments/config_8_layer_16_hidden_refined.yaml +++ b/experiments/5_steps/config_8_layer_16_hidden_refined.yaml @@ -10,12 +10,12 @@ trainer: - class_path: lightning.pytorch.loggers.WandbLogger init_args: save_dir: logs.autoregressive.wandb - project: "thermal-conduction-unsteady" - name: "refined" + project: "thermal-conduction-unsteady-5.steps" + name: "8_layer_16_hidden" callbacks: - class_path: lightning.pytorch.callbacks.ModelCheckpoint init_args: - dirpath: logs.autoregressive.wandb/refined/checkpoints + dirpath: logs.autoregressive.wandb/8_refined/checkpoints monitor: val/loss mode: min save_top_k: 1 @@ -24,7 +24,7 @@ trainer: init_args: monitor: val/loss mode: min - patience: 10 + patience: 20 verbose: false max_epochs: 1000 min_epochs: null @@ -32,7 +32,7 @@ trainer: min_steps: null overfit_batches: 0.0 log_every_n_steps: null - accumulate_grad_batches: 2 + accumulate_grad_batches: 1 default_root_dir: null model: @@ -50,13 +50,14 @@ data: class_path: ThermalSolver.graph_datamodule_unsteady.GraphDataModule init_args: hf_repo: "SISSAmathLab/thermal-conduction-unsteady" - split_name: "100_samples_easy_refined" - batch_size: 16 + split_name: "easy.refined" + n_elements: 100 + batch_size: 32 train_size: 0.7 val_size: 0.2 test_size: 0.1 build_radial_graph: false remove_boundary_edges: true - start_unrolling_steps: 5 + unrolling_steps: 5 optimizer: null lr_scheduler: null diff --git a/experiments/config_16_layer_8_hidden_refined.yaml b/experiments/config_16_layer_8_hidden_refined.yaml deleted file mode 100644 index 02d8ab7..0000000 --- a/experiments/config_16_layer_8_hidden_refined.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# lightning.pytorch==2.5.5 -seed_everything: 1999 -trainer: - accelerator: gpu - strategy: auto - devices: 1 - num_nodes: 1 - precision: null - logger: - - class_path: lightning.pytorch.loggers.WandbLogger - init_args: - save_dir: logs.autoregressive.wandb - project: "thermal-conduction-unsteady" - name: "16_8_refined" - callbacks: - - class_path: lightning.pytorch.callbacks.ModelCheckpoint - init_args: - dirpath: logs.autoregressive.wandb/16_8_refined/checkpoints - monitor: val/loss - mode: min - save_top_k: 1 - filename: best-checkpoint - - class_path: lightning.pytorch.callbacks.EarlyStopping - init_args: - monitor: val/loss - mode: min - patience: 10 - verbose: false - max_epochs: 1000 - min_epochs: null - max_steps: -1 - min_steps: null - overfit_batches: 0.0 - log_every_n_steps: null - accumulate_grad_batches: 2 - default_root_dir: null - -model: - class_path: ThermalSolver.autoregressive_module.GraphSolver - init_args: - model_class_path: ThermalSolver.model.diffusion_net.DiffusionNet - model_init_args: - input_dim: 1 - hidden_dim: 8 - output_dim: 1 - n_layers: 16 - unrolling_steps: 5 - -data: - class_path: ThermalSolver.graph_datamodule_unsteady.GraphDataModule - init_args: - hf_repo: "SISSAmathLab/thermal-conduction-unsteady" - split_name: "100_samples_easy_refined" - batch_size: 16 - train_size: 0.7 - val_size: 0.2 - test_size: 0.1 - build_radial_graph: false - remove_boundary_edges: true - start_unrolling_steps: 5 -optimizer: null -lr_scheduler: null diff --git a/experiments/config_4_layer_16_hidden.yaml b/experiments/config_4_layer_16_hidden.yaml deleted file mode 100644 index 2b35fea..0000000 --- a/experiments/config_4_layer_16_hidden.yaml +++ /dev/null @@ -1,64 +0,0 @@ -# lightning.pytorch==2.5.5 -seed_everything: 1999 -trainer: - accelerator: gpu - strategy: auto - devices: 1 - num_nodes: 1 - precision: null - logger: - - class_path: lightning.pytorch.loggers.WandbLogger - init_args: - save_dir: logs.autoregressive.wandb/wandb - project: "thermal-conduction-unsteady" - name: "5_step_4_layers_16_hidden" - # retain: true - callbacks: - - class_path: lightning.pytorch.callbacks.ModelCheckpoint - init_args: - dirpath: logs.autoregressive.wandb/5_step_4_layers_16_hidden/checkpoints - monitor: val/loss - mode: min - save_top_k: 1 - filename: best-checkpoint - - class_path: lightning.pytorch.callbacks.EarlyStopping - init_args: - monitor: val/loss - mode: min - patience: 10 - verbose: false - max_epochs: 1000 - min_epochs: null - max_steps: -1 - min_steps: null - overfit_batches: 0.0 - log_every_n_steps: null - accumulate_grad_batches: 2 - default_root_dir: null - -model: - class_path: ThermalSolver.autoregressive_module.GraphSolver - init_args: - model_class_path: ThermalSolver.model.diffusion_net.DiffusionNet - model_init_args: - input_dim: 1 - hidden_dim: 16 - output_dim: 1 - n_layers: 4 - unrolling_steps: 5 - -data: - class_path: ThermalSolver.graph_datamodule_unsteady.GraphDataModule - init_args: - hf_repo: "SISSAmathLab/thermal-conduction-unsteady" - split_name: "100_samples_easy_refined" - batch_size: 32 - train_size: 0.7 - val_size: 0.2 - test_size: 0.1 - build_radial_graph: true - radius: 0.5 - remove_boundary_edges: true - start_unrolling_steps: 5 -optimizer: null -lr_scheduler: null diff --git a/experiments/config_4_layer_8_hidden.yaml b/experiments/config_4_layer_8_hidden.yaml deleted file mode 100644 index 289ac7a..0000000 --- a/experiments/config_4_layer_8_hidden.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# lightning.pytorch==2.5.5 -seed_everything: 1999 -trainer: - accelerator: gpu - strategy: auto - devices: 1 - num_nodes: 1 - precision: null - logger: - - class_path: lightning.pytorch.loggers.WandbLogger - init_args: - save_dir: logs.autoregressive.wandb - project: "thermal-conduction-unsteady" - name: "5_step_4_layers_8_hidden" - callbacks: - - class_path: lightning.pytorch.callbacks.ModelCheckpoint - init_args: - dirpath: logs.autoregressive.wandb/5_step_4_layers_8_hidden_0.7_radius/checkpoints - monitor: val/loss - mode: min - save_top_k: 1 - filename: best-checkpoint - - class_path: lightning.pytorch.callbacks.EarlyStopping - init_args: - monitor: val/loss - mode: min - patience: 10 - verbose: false - max_epochs: 1000 - min_epochs: null - max_steps: -1 - min_steps: null - overfit_batches: 0.0 - log_every_n_steps: null - accumulate_grad_batches: 1 - default_root_dir: null - -model: - class_path: ThermalSolver.autoregressive_module.GraphSolver - init_args: - model_class_path: ThermalSolver.model.diffusion_net.DiffusionNet - model_init_args: - input_dim: 1 - hidden_dim: 8 - output_dim: 1 - n_layers: 4 - unrolling_steps: 5 - -data: - class_path: ThermalSolver.graph_datamodule_unsteady.GraphDataModule - init_args: - hf_repo: "SISSAmathLab/thermal-conduction-unsteady" - split_name: "100_samples_easy_refined" - batch_size: 32 - train_size: 0.7 - val_size: 0.2 - test_size: 0.1 - build_radial_graph: false - remove_boundary_edges: true - start_unrolling_steps: 5 -optimizer: null -lr_scheduler: null diff --git a/experiments/config_8_layer_8_hidden.yaml b/experiments/config_8_layer_8_hidden.yaml deleted file mode 100644 index d923397..0000000 --- a/experiments/config_8_layer_8_hidden.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# lightning.pytorch==2.5.5 -seed_everything: 1999 -trainer: - accelerator: gpu - strategy: auto - devices: 1 - num_nodes: 1 - precision: null - logger: - - class_path: lightning.pytorch.loggers.WandbLogger - init_args: - save_dir: logs.autoregressive.wandb - project: "thermal-conduction-unsteady" - name: "standard" - callbacks: - - class_path: lightning.pytorch.callbacks.ModelCheckpoint - init_args: - dirpath: logs.autoregressive.wandb/standard/checkpoints - monitor: val/loss - mode: min - save_top_k: 1 - filename: best-checkpoint - - class_path: lightning.pytorch.callbacks.EarlyStopping - init_args: - monitor: val/loss - mode: min - patience: 10 - verbose: false - max_epochs: 1000 - min_epochs: null - max_steps: -1 - min_steps: null - overfit_batches: 0.0 - log_every_n_steps: null - accumulate_grad_batches: 1 - default_root_dir: null - -model: - class_path: ThermalSolver.autoregressive_module.GraphSolver - init_args: - model_class_path: ThermalSolver.model.diffusion_net.DiffusionNet - model_init_args: - input_dim: 1 - hidden_dim: 8 - output_dim: 1 - n_layers: 8 - unrolling_steps: 5 - -data: - class_path: ThermalSolver.graph_datamodule_unsteady.GraphDataModule - init_args: - hf_repo: "SISSAmathLab/thermal-conduction-unsteady" - split_name: "100_samples_easy_refined" - batch_size: 32 - train_size: 0.7 - val_size: 0.2 - test_size: 0.1 - build_radial_graph: false - remove_boundary_edges: true - start_unrolling_steps: 5 -optimizer: null -lr_scheduler: null