From 65d141d4b34f17a77a4dab67038f640a4aeec4cd Mon Sep 17 00:00:00 2001 From: FilippoOlivo Date: Tue, 14 Oct 2025 10:02:57 +0200 Subject: [PATCH] add experiments --- experiments/config.yaml | 22 ++++++++----- experiments/config_01.yaml | 61 ++++++++++++++++++++++++++++++++++++ experiments/config_02.yaml | 62 +++++++++++++++++++++++++++++++++++++ experiments/config_04.yaml | 62 +++++++++++++++++++++++++++++++++++++ experiments/config_08.yaml | 62 +++++++++++++++++++++++++++++++++++++ experiments/config_16.yaml | 62 +++++++++++++++++++++++++++++++++++++ experiments/config_32.yaml | 62 +++++++++++++++++++++++++++++++++++++ experiments/config_64.yaml | 63 ++++++++++++++++++++++++++++++++++++++ 8 files changed, 449 insertions(+), 7 deletions(-) create mode 100644 experiments/config_01.yaml create mode 100644 experiments/config_02.yaml create mode 100644 experiments/config_04.yaml create mode 100644 experiments/config_08.yaml create mode 100644 experiments/config_16.yaml create mode 100644 experiments/config_32.yaml create mode 100644 experiments/config_64.yaml diff --git a/experiments/config.yaml b/experiments/config.yaml index c0b5602..c88c524 100644 --- a/experiments/config.yaml +++ b/experiments/config.yaml @@ -1,5 +1,5 @@ # lightning.pytorch==2.5.5 -seed_everything: true +seed_everything: 1999 trainer: accelerator: gpu strategy: auto @@ -14,7 +14,13 @@ trainer: mode: min save_top_k: 1 filename: best-checkpoint - max_epochs: 50 + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/loss + mode: min + patience: 10 + verbose: false + max_epochs: 200 min_epochs: null max_steps: -1 min_steps: null @@ -22,6 +28,8 @@ trainer: log_every_n_steps: null inference_mode: true default_root_dir: null + accumulate_grad_batches: 2 + gradient_clip_val: 1.0 model: class_path: ThermalSolver.module.GraphSolver init_args: @@ -29,17 +37,17 @@ model: model_init_args: x_ch_node: 1 f_ch_node: 1 - hidden: 16 - layers: 2 + hidden: 16 + layers: 1 edge_ch: 3 out_ch: 1 - unrolling_steps: 10 + unrolling_steps: 64 data: class_path: ThermalSolver.data_module.GraphDataModule init_args: hf_repo: "SISSAmathLab/thermal-conduction" - split_name: "2000" - batch_size: 6 + split_name: "2000_ref_1" + batch_size: 10 train_size: 0.8 test_size: 0.1 test_size: 0.1 diff --git a/experiments/config_01.yaml b/experiments/config_01.yaml new file mode 100644 index 0000000..4604b8a --- /dev/null +++ b/experiments/config_01.yaml @@ -0,0 +1,61 @@ +# lightning.pytorch==2.5.5 +seed_everything: 1999 +trainer: + accelerator: gpu + strategy: auto + devices: 1 + num_nodes: 1 + precision: null + logger: + - class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: lightning_logs + name: "01" + version: null + callbacks: + - class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + monitor: val/loss + mode: min + save_top_k: 1 + filename: best-checkpoint + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/loss + mode: min + patience: 15 + verbose: false + max_epochs: 1000 + min_epochs: null + max_steps: -1 + min_steps: null + overfit_batches: 0.0 + log_every_n_steps: null + inference_mode: true + default_root_dir: null + accumulate_grad_batches: 6 + # gradient_clip_val: 1.0 +model: + class_path: ThermalSolver.module.GraphSolver + init_args: + model_class_path: ThermalSolver.model.local_gno.GatingGNO + model_init_args: + x_ch_node: 1 + f_ch_node: 1 + hidden: 16 + layers: 1 + edge_ch: 3 + out_ch: 1 + unrolling_steps: 64 +data: + class_path: ThermalSolver.data_module.GraphDataModule + init_args: + hf_repo: "SISSAmathLab/thermal-conduction" + split_name: "2000_ref_1" + batch_size: 4 + train_size: 0.8 + test_size: 0.1 + test_size: 0.1 +optimizer: null +lr_scheduler: null +# ckpt_path: lightning_logs/01/version_0/checkpoints/best-checkpoint.ckpt \ No newline at end of file diff --git a/experiments/config_02.yaml b/experiments/config_02.yaml new file mode 100644 index 0000000..995eff2 --- /dev/null +++ b/experiments/config_02.yaml @@ -0,0 +1,62 @@ +# lightning.pytorch==2.5.5 +seed_everything: 1999 +trainer: + accelerator: gpu + strategy: auto + devices: 1 + num_nodes: 1 + precision: null + logger: + - class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: lightning_logs + name: "02" + version: null + callbacks: + - class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + monitor: val/loss + mode: min + save_top_k: 1 + filename: best-checkpoint + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/loss + mode: min + patience: 10 + verbose: false + max_epochs: 200 + min_epochs: null + max_steps: -1 + min_steps: null + overfit_batches: 0.0 + log_every_n_steps: null + inference_mode: true + default_root_dir: null + accumulate_grad_batches: 6 + gradient_clip_val: 1.0 +model: + class_path: ThermalSolver.module.GraphSolver + init_args: + model_class_path: ThermalSolver.model.local_gno.GatingGNO + model_init_args: + x_ch_node: 1 + f_ch_node: 1 + hidden: 16 + layers: 2 + edge_ch: 3 + out_ch: 1 + unrolling_steps: 32 +data: + class_path: ThermalSolver.data_module.GraphDataModule + init_args: + hf_repo: "SISSAmathLab/thermal-conduction" + split_name: "2000_ref_1" + batch_size: 4 + train_size: 0.8 + test_size: 0.1 + test_size: 0.1 +optimizer: null +lr_scheduler: null +# ckpt_path: lightning_logs/version_15/checkpoints/best-checkpoint.ckpt +ckpt_path: null diff --git a/experiments/config_04.yaml b/experiments/config_04.yaml new file mode 100644 index 0000000..1a8b15e --- /dev/null +++ b/experiments/config_04.yaml @@ -0,0 +1,62 @@ +# lightning.pytorch==2.5.5 +seed_everything: 1999 +trainer: + accelerator: gpu + strategy: auto + devices: 1 + num_nodes: 1 + precision: null + logger: + - class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: lightning_logs + name: "04" + version: null + callbacks: + - class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + monitor: val/loss + mode: min + save_top_k: 1 + filename: best-checkpoint + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/loss + mode: min + patience: 10 + verbose: false + max_epochs: 200 + min_epochs: null + max_steps: -1 + min_steps: null + overfit_batches: 0.0 + log_every_n_steps: null + inference_mode: true + default_root_dir: null + accumulate_grad_batches: 6 + gradient_clip_val: 1.0 +model: + class_path: ThermalSolver.module.GraphSolver + init_args: + model_class_path: ThermalSolver.model.local_gno.GatingGNO + model_init_args: + x_ch_node: 1 + f_ch_node: 1 + hidden: 16 + layers: 4 + edge_ch: 3 + out_ch: 1 + unrolling_steps: 16 +data: + class_path: ThermalSolver.data_module.GraphDataModule + init_args: + hf_repo: "SISSAmathLab/thermal-conduction" + split_name: "2000_ref_1" + batch_size: 4 + train_size: 0.8 + test_size: 0.1 + test_size: 0.1 +optimizer: null +lr_scheduler: null +# ckpt_path: lightning_logs/version_15/checkpoints/best-checkpoint.ckpt +ckpt_path: null diff --git a/experiments/config_08.yaml b/experiments/config_08.yaml new file mode 100644 index 0000000..afb6c8e --- /dev/null +++ b/experiments/config_08.yaml @@ -0,0 +1,62 @@ +# lightning.pytorch==2.5.5 +seed_everything: 1999 +trainer: + accelerator: gpu + strategy: auto + devices: 1 + num_nodes: 1 + precision: null + logger: + - class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: lightning_logs + name: "08" + version: null + callbacks: + - class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + monitor: val/loss + mode: min + save_top_k: 1 + filename: best-checkpoint + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/loss + mode: min + patience: 10 + verbose: false + max_epochs: 200 + min_epochs: null + max_steps: -1 + min_steps: null + overfit_batches: 0.0 + log_every_n_steps: null + inference_mode: true + default_root_dir: null + accumulate_grad_batches: 6 + gradient_clip_val: 1.0 +model: + class_path: ThermalSolver.module.GraphSolver + init_args: + model_class_path: ThermalSolver.model.local_gno.GatingGNO + model_init_args: + x_ch_node: 1 + f_ch_node: 1 + hidden: 16 + layers: 8 + edge_ch: 3 + out_ch: 1 + unrolling_steps: 8 +data: + class_path: ThermalSolver.data_module.GraphDataModule + init_args: + hf_repo: "SISSAmathLab/thermal-conduction" + split_name: "2000_ref_1" + batch_size: 4 + train_size: 0.8 + test_size: 0.1 + test_size: 0.1 +optimizer: null +lr_scheduler: null +# ckpt_path: lightning_logs/version_15/checkpoints/best-checkpoint.ckpt +ckpt_path: null diff --git a/experiments/config_16.yaml b/experiments/config_16.yaml new file mode 100644 index 0000000..07031d3 --- /dev/null +++ b/experiments/config_16.yaml @@ -0,0 +1,62 @@ +# lightning.pytorch==2.5.5 +seed_everything: 1999 +trainer: + accelerator: gpu + strategy: auto + devices: 1 + num_nodes: 1 + precision: null + logger: + - class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: lightning_logs + name: "16" + version: null + callbacks: + - class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + monitor: val/loss + mode: min + save_top_k: 1 + filename: best-checkpoint + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/loss + mode: min + patience: 10 + verbose: false + max_epochs: 200 + min_epochs: null + max_steps: -1 + min_steps: null + overfit_batches: 0.0 + log_every_n_steps: null + inference_mode: true + default_root_dir: null + accumulate_grad_batches: 6 + gradient_clip_val: 1.0 +model: + class_path: ThermalSolver.module.GraphSolver + init_args: + model_class_path: ThermalSolver.model.local_gno.GatingGNO + model_init_args: + x_ch_node: 1 + f_ch_node: 1 + hidden: 16 + layers: 16 + edge_ch: 3 + out_ch: 1 + unrolling_steps: 4 +data: + class_path: ThermalSolver.data_module.GraphDataModule + init_args: + hf_repo: "SISSAmathLab/thermal-conduction" + split_name: "2000_ref_1" + batch_size: 4 + train_size: 0.8 + test_size: 0.1 + test_size: 0.1 +optimizer: null +lr_scheduler: null +# ckpt_path: lightning_logs/version_15/checkpoints/best-checkpoint.ckpt +ckpt_path: null diff --git a/experiments/config_32.yaml b/experiments/config_32.yaml new file mode 100644 index 0000000..3ca1392 --- /dev/null +++ b/experiments/config_32.yaml @@ -0,0 +1,62 @@ +# lightning.pytorch==2.5.5 +seed_everything: 1999 +trainer: + accelerator: gpu + strategy: auto + devices: 1 + num_nodes: 1 + precision: null + logger: + - class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: lightning_logs + name: "32" + version: null + callbacks: + - class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + monitor: val/loss + mode: min + save_top_k: 1 + filename: best-checkpoint + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/loss + mode: min + patience: 10 + verbose: false + max_epochs: 200 + min_epochs: null + max_steps: -1 + min_steps: null + overfit_batches: 0.0 + log_every_n_steps: null + inference_mode: true + default_root_dir: null + accumulate_grad_batches: 6 + gradient_clip_val: 1.0 +model: + class_path: ThermalSolver.module.GraphSolver + init_args: + model_class_path: ThermalSolver.model.local_gno.GatingGNO + model_init_args: + x_ch_node: 1 + f_ch_node: 1 + hidden: 16 + layers: 32 + edge_ch: 3 + out_ch: 1 + unrolling_steps: 2 +data: + class_path: ThermalSolver.data_module.GraphDataModule + init_args: + hf_repo: "SISSAmathLab/thermal-conduction" + split_name: "2000_ref_1" + batch_size: 4 + train_size: 0.8 + test_size: 0.1 + test_size: 0.1 +optimizer: null +lr_scheduler: null +# ckpt_path: lightning_logs/version_15/checkpoints/best-checkpoint.ckpt +ckpt_path: null diff --git a/experiments/config_64.yaml b/experiments/config_64.yaml new file mode 100644 index 0000000..617b8ac --- /dev/null +++ b/experiments/config_64.yaml @@ -0,0 +1,63 @@ +# lightning.pytorch==2.5.5 +seed_everything: 1999 +trainer: + accelerator: gpu + strategy: auto + devices: 1 + num_nodes: 1 + precision: null + logger: + - class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: lightning_logs + name: "64" + version: null + callbacks: + - class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + monitor: val/loss + mode: min + save_top_k: 1 + filename: best-checkpoint + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/loss + mode: min + patience: 15 + verbose: false + max_epochs: 1000 + min_epochs: null + max_steps: -1 + min_steps: null + overfit_batches: 0.0 + log_every_n_steps: null + inference_mode: true + default_root_dir: null + accumulate_grad_batches: 6 + gradient_clip_val: 1.0 + +model: + class_path: ThermalSolver.module.GraphSolver + init_args: + model_class_path: ThermalSolver.model.local_gno.GatingGNO + model_init_args: + x_ch_node: 1 + f_ch_node: 1 + hidden: 16 + layers: 64 + edge_ch: 3 + out_ch: 1 + unrolling_steps: 1 +data: + class_path: ThermalSolver.data_module.GraphDataModule + init_args: + hf_repo: "SISSAmathLab/thermal-conduction" + split_name: "2000" + batch_size: 4 + train_size: 0.8 + test_size: 0.1 + test_size: 0.1 +optimizer: null +lr_scheduler: null +# ckpt_path: lightning_logs/64/version_0/checkpoints/best-checkpoint.ckpt +ckpt_path: null