From 65d141d4b34f17a77a4dab67038f640a4aeec4cd Mon Sep 17 00:00:00 2001
From: FilippoOlivo <filippo@filippoolivo.com>
Date: Tue, 14 Oct 2025 10:02:57 +0200
Subject: [PATCH] add experiments

---
 experiments/config.yaml    | 22 ++++++++-----
 experiments/config_01.yaml | 61 ++++++++++++++++++++++++++++++++++++
 experiments/config_02.yaml | 62 +++++++++++++++++++++++++++++++++++++
 experiments/config_04.yaml | 62 +++++++++++++++++++++++++++++++++++++
 experiments/config_08.yaml | 62 +++++++++++++++++++++++++++++++++++++
 experiments/config_16.yaml | 62 +++++++++++++++++++++++++++++++++++++
 experiments/config_32.yaml | 62 +++++++++++++++++++++++++++++++++++++
 experiments/config_64.yaml | 63 ++++++++++++++++++++++++++++++++++++++
 8 files changed, 449 insertions(+), 7 deletions(-)
 create mode 100644 experiments/config_01.yaml
 create mode 100644 experiments/config_02.yaml
 create mode 100644 experiments/config_04.yaml
 create mode 100644 experiments/config_08.yaml
 create mode 100644 experiments/config_16.yaml
 create mode 100644 experiments/config_32.yaml
 create mode 100644 experiments/config_64.yaml

diff --git a/experiments/config.yaml b/experiments/config.yaml
index c0b5602..c88c524 100644
--- a/experiments/config.yaml
+++ b/experiments/config.yaml
@@ -1,5 +1,5 @@
 # lightning.pytorch==2.5.5
-seed_everything: true
+seed_everything: 1999
 trainer:
   accelerator: gpu
   strategy: auto
@@ -14,7 +14,13 @@ trainer:
         mode: min
         save_top_k: 1
         filename: best-checkpoint
-  max_epochs: 50
+    - class_path: lightning.pytorch.callbacks.EarlyStopping
+      init_args:
+        monitor: val/loss
+        mode: min
+        patience: 10
+        verbose: false
+  max_epochs: 200
   min_epochs: null
   max_steps: -1
   min_steps: null
@@ -22,6 +28,8 @@ trainer:
   log_every_n_steps: null
   inference_mode: true
   default_root_dir: null
+  accumulate_grad_batches: 2
+  gradient_clip_val: 1.0
 model: 
   class_path: ThermalSolver.module.GraphSolver
   init_args:
@@ -29,17 +37,17 @@ model:
     model_init_args:
       x_ch_node: 1 
       f_ch_node: 1 
-      hidden: 16 
-      layers: 2
+      hidden: 16
+      layers: 1
       edge_ch: 3 
       out_ch: 1
-    unrolling_steps: 10
+    unrolling_steps: 64
 data: 
   class_path: ThermalSolver.data_module.GraphDataModule
   init_args:
     hf_repo: "SISSAmathLab/thermal-conduction"
-    split_name: "2000"
-    batch_size: 6
+    split_name: "2000_ref_1"
+    batch_size: 10
     train_size: 0.8
     test_size: 0.1
     test_size: 0.1
diff --git a/experiments/config_01.yaml b/experiments/config_01.yaml
new file mode 100644
index 0000000..4604b8a
--- /dev/null
+++ b/experiments/config_01.yaml
@@ -0,0 +1,61 @@
+# lightning.pytorch==2.5.5
+seed_everything: 1999
+trainer:
+  accelerator: gpu
+  strategy: auto
+  devices: 1
+  num_nodes: 1
+  precision: null
+  logger:
+    - class_path: lightning.pytorch.loggers.TensorBoardLogger
+      init_args:
+        save_dir: lightning_logs
+        name: "01"
+        version: null
+  callbacks:
+    - class_path: lightning.pytorch.callbacks.ModelCheckpoint
+      init_args:
+        monitor: val/loss
+        mode: min
+        save_top_k: 1
+        filename: best-checkpoint
+    - class_path: lightning.pytorch.callbacks.EarlyStopping
+      init_args:
+        monitor: val/loss
+        mode: min
+        patience: 15
+        verbose: false
+  max_epochs: 1000
+  min_epochs: null
+  max_steps: -1
+  min_steps: null
+  overfit_batches: 0.0
+  log_every_n_steps: null
+  inference_mode: true
+  default_root_dir: null
+  accumulate_grad_batches: 6
+  # gradient_clip_val: 1.0
+model: 
+  class_path: ThermalSolver.module.GraphSolver
+  init_args:
+    model_class_path: ThermalSolver.model.local_gno.GatingGNO
+    model_init_args:
+      x_ch_node: 1 
+      f_ch_node: 1 
+      hidden: 16
+      layers: 1
+      edge_ch: 3 
+      out_ch: 1
+    unrolling_steps: 64
+data: 
+  class_path: ThermalSolver.data_module.GraphDataModule
+  init_args:
+    hf_repo: "SISSAmathLab/thermal-conduction"
+    split_name: "2000_ref_1"
+    batch_size: 4
+    train_size: 0.8
+    test_size: 0.1
+    test_size: 0.1
+optimizer: null
+lr_scheduler: null
+# ckpt_path: lightning_logs/01/version_0/checkpoints/best-checkpoint.ckpt
\ No newline at end of file
diff --git a/experiments/config_02.yaml b/experiments/config_02.yaml
new file mode 100644
index 0000000..995eff2
--- /dev/null
+++ b/experiments/config_02.yaml
@@ -0,0 +1,62 @@
+# lightning.pytorch==2.5.5
+seed_everything: 1999
+trainer:
+  accelerator: gpu
+  strategy: auto
+  devices: 1
+  num_nodes: 1
+  precision: null
+  logger:
+    - class_path: lightning.pytorch.loggers.TensorBoardLogger
+      init_args:
+        save_dir: lightning_logs
+        name: "02"
+        version: null
+  callbacks:
+    - class_path: lightning.pytorch.callbacks.ModelCheckpoint
+      init_args:
+        monitor: val/loss
+        mode: min
+        save_top_k: 1
+        filename: best-checkpoint
+    - class_path: lightning.pytorch.callbacks.EarlyStopping
+      init_args:
+        monitor: val/loss
+        mode: min
+        patience: 10
+        verbose: false
+  max_epochs: 200
+  min_epochs: null
+  max_steps: -1
+  min_steps: null
+  overfit_batches: 0.0
+  log_every_n_steps: null
+  inference_mode: true
+  default_root_dir: null
+  accumulate_grad_batches: 6
+  gradient_clip_val: 1.0
+model: 
+  class_path: ThermalSolver.module.GraphSolver
+  init_args:
+    model_class_path: ThermalSolver.model.local_gno.GatingGNO
+    model_init_args:
+      x_ch_node: 1 
+      f_ch_node: 1 
+      hidden: 16
+      layers: 2
+      edge_ch: 3 
+      out_ch: 1
+    unrolling_steps: 32
+data: 
+  class_path: ThermalSolver.data_module.GraphDataModule
+  init_args:
+    hf_repo: "SISSAmathLab/thermal-conduction"
+    split_name: "2000_ref_1"
+    batch_size: 4
+    train_size: 0.8
+    test_size: 0.1
+    test_size: 0.1
+optimizer: null
+lr_scheduler: null
+# ckpt_path: lightning_logs/version_15/checkpoints/best-checkpoint.ckpt
+ckpt_path: null
diff --git a/experiments/config_04.yaml b/experiments/config_04.yaml
new file mode 100644
index 0000000..1a8b15e
--- /dev/null
+++ b/experiments/config_04.yaml
@@ -0,0 +1,62 @@
+# lightning.pytorch==2.5.5
+seed_everything: 1999
+trainer:
+  accelerator: gpu
+  strategy: auto
+  devices: 1
+  num_nodes: 1
+  precision: null
+  logger:
+    - class_path: lightning.pytorch.loggers.TensorBoardLogger
+      init_args:
+        save_dir: lightning_logs
+        name: "04"
+        version: null
+  callbacks:
+    - class_path: lightning.pytorch.callbacks.ModelCheckpoint
+      init_args:
+        monitor: val/loss
+        mode: min
+        save_top_k: 1
+        filename: best-checkpoint
+    - class_path: lightning.pytorch.callbacks.EarlyStopping
+      init_args:
+        monitor: val/loss
+        mode: min
+        patience: 10
+        verbose: false
+  max_epochs: 200
+  min_epochs: null
+  max_steps: -1
+  min_steps: null
+  overfit_batches: 0.0
+  log_every_n_steps: null
+  inference_mode: true
+  default_root_dir: null
+  accumulate_grad_batches: 6
+  gradient_clip_val: 1.0
+model: 
+  class_path: ThermalSolver.module.GraphSolver
+  init_args:
+    model_class_path: ThermalSolver.model.local_gno.GatingGNO
+    model_init_args:
+      x_ch_node: 1 
+      f_ch_node: 1 
+      hidden: 16
+      layers: 4
+      edge_ch: 3 
+      out_ch: 1
+    unrolling_steps: 16
+data: 
+  class_path: ThermalSolver.data_module.GraphDataModule
+  init_args:
+    hf_repo: "SISSAmathLab/thermal-conduction"
+    split_name: "2000_ref_1"
+    batch_size: 4
+    train_size: 0.8
+    test_size: 0.1
+    test_size: 0.1
+optimizer: null
+lr_scheduler: null
+# ckpt_path: lightning_logs/version_15/checkpoints/best-checkpoint.ckpt
+ckpt_path: null
diff --git a/experiments/config_08.yaml b/experiments/config_08.yaml
new file mode 100644
index 0000000..afb6c8e
--- /dev/null
+++ b/experiments/config_08.yaml
@@ -0,0 +1,62 @@
+# lightning.pytorch==2.5.5
+seed_everything: 1999
+trainer:
+  accelerator: gpu
+  strategy: auto
+  devices: 1
+  num_nodes: 1
+  precision: null
+  logger:
+    - class_path: lightning.pytorch.loggers.TensorBoardLogger
+      init_args:
+        save_dir: lightning_logs
+        name: "08"
+        version: null
+  callbacks:
+    - class_path: lightning.pytorch.callbacks.ModelCheckpoint
+      init_args:
+        monitor: val/loss
+        mode: min
+        save_top_k: 1
+        filename: best-checkpoint
+    - class_path: lightning.pytorch.callbacks.EarlyStopping
+      init_args:
+        monitor: val/loss
+        mode: min
+        patience: 10
+        verbose: false
+  max_epochs: 200
+  min_epochs: null
+  max_steps: -1
+  min_steps: null
+  overfit_batches: 0.0
+  log_every_n_steps: null
+  inference_mode: true
+  default_root_dir: null
+  accumulate_grad_batches: 6
+  gradient_clip_val: 1.0
+model: 
+  class_path: ThermalSolver.module.GraphSolver
+  init_args:
+    model_class_path: ThermalSolver.model.local_gno.GatingGNO
+    model_init_args:
+      x_ch_node: 1 
+      f_ch_node: 1 
+      hidden: 16
+      layers: 8
+      edge_ch: 3 
+      out_ch: 1
+    unrolling_steps: 8
+data: 
+  class_path: ThermalSolver.data_module.GraphDataModule
+  init_args:
+    hf_repo: "SISSAmathLab/thermal-conduction"
+    split_name: "2000_ref_1"
+    batch_size: 4
+    train_size: 0.8
+    test_size: 0.1
+    test_size: 0.1
+optimizer: null
+lr_scheduler: null
+# ckpt_path: lightning_logs/version_15/checkpoints/best-checkpoint.ckpt
+ckpt_path: null
diff --git a/experiments/config_16.yaml b/experiments/config_16.yaml
new file mode 100644
index 0000000..07031d3
--- /dev/null
+++ b/experiments/config_16.yaml
@@ -0,0 +1,62 @@
+# lightning.pytorch==2.5.5
+seed_everything: 1999
+trainer:
+  accelerator: gpu
+  strategy: auto
+  devices: 1
+  num_nodes: 1
+  precision: null
+  logger:
+    - class_path: lightning.pytorch.loggers.TensorBoardLogger
+      init_args:
+        save_dir: lightning_logs
+        name: "16"
+        version: null
+  callbacks:
+    - class_path: lightning.pytorch.callbacks.ModelCheckpoint
+      init_args:
+        monitor: val/loss
+        mode: min
+        save_top_k: 1
+        filename: best-checkpoint
+    - class_path: lightning.pytorch.callbacks.EarlyStopping
+      init_args:
+        monitor: val/loss
+        mode: min
+        patience: 10
+        verbose: false
+  max_epochs: 200
+  min_epochs: null
+  max_steps: -1
+  min_steps: null
+  overfit_batches: 0.0
+  log_every_n_steps: null
+  inference_mode: true
+  default_root_dir: null
+  accumulate_grad_batches: 6
+  gradient_clip_val: 1.0
+model: 
+  class_path: ThermalSolver.module.GraphSolver
+  init_args:
+    model_class_path: ThermalSolver.model.local_gno.GatingGNO
+    model_init_args:
+      x_ch_node: 1 
+      f_ch_node: 1 
+      hidden: 16
+      layers: 16
+      edge_ch: 3 
+      out_ch: 1
+    unrolling_steps: 4
+data: 
+  class_path: ThermalSolver.data_module.GraphDataModule
+  init_args:
+    hf_repo: "SISSAmathLab/thermal-conduction"
+    split_name: "2000_ref_1"
+    batch_size: 4
+    train_size: 0.8
+    test_size: 0.1
+    test_size: 0.1
+optimizer: null
+lr_scheduler: null
+# ckpt_path: lightning_logs/version_15/checkpoints/best-checkpoint.ckpt
+ckpt_path: null
diff --git a/experiments/config_32.yaml b/experiments/config_32.yaml
new file mode 100644
index 0000000..3ca1392
--- /dev/null
+++ b/experiments/config_32.yaml
@@ -0,0 +1,62 @@
+# lightning.pytorch==2.5.5
+seed_everything: 1999
+trainer:
+  accelerator: gpu
+  strategy: auto
+  devices: 1
+  num_nodes: 1
+  precision: null
+  logger:
+    - class_path: lightning.pytorch.loggers.TensorBoardLogger
+      init_args:
+        save_dir: lightning_logs
+        name: "32"
+        version: null
+  callbacks:
+    - class_path: lightning.pytorch.callbacks.ModelCheckpoint
+      init_args:
+        monitor: val/loss
+        mode: min
+        save_top_k: 1
+        filename: best-checkpoint
+    - class_path: lightning.pytorch.callbacks.EarlyStopping
+      init_args:
+        monitor: val/loss
+        mode: min
+        patience: 10
+        verbose: false
+  max_epochs: 200
+  min_epochs: null
+  max_steps: -1
+  min_steps: null
+  overfit_batches: 0.0
+  log_every_n_steps: null
+  inference_mode: true
+  default_root_dir: null
+  accumulate_grad_batches: 6
+  gradient_clip_val: 1.0
+model: 
+  class_path: ThermalSolver.module.GraphSolver
+  init_args:
+    model_class_path: ThermalSolver.model.local_gno.GatingGNO
+    model_init_args:
+      x_ch_node: 1 
+      f_ch_node: 1 
+      hidden: 16
+      layers: 32
+      edge_ch: 3 
+      out_ch: 1
+    unrolling_steps: 2
+data: 
+  class_path: ThermalSolver.data_module.GraphDataModule
+  init_args:
+    hf_repo: "SISSAmathLab/thermal-conduction"
+    split_name: "2000_ref_1"
+    batch_size: 4
+    train_size: 0.8
+    test_size: 0.1
+    test_size: 0.1
+optimizer: null
+lr_scheduler: null
+# ckpt_path: lightning_logs/version_15/checkpoints/best-checkpoint.ckpt
+ckpt_path: null
diff --git a/experiments/config_64.yaml b/experiments/config_64.yaml
new file mode 100644
index 0000000..617b8ac
--- /dev/null
+++ b/experiments/config_64.yaml
@@ -0,0 +1,63 @@
+# lightning.pytorch==2.5.5
+seed_everything: 1999
+trainer:
+  accelerator: gpu
+  strategy: auto
+  devices: 1
+  num_nodes: 1
+  precision: null
+  logger:
+    - class_path: lightning.pytorch.loggers.TensorBoardLogger
+      init_args:
+        save_dir: lightning_logs
+        name: "64"
+        version: null
+  callbacks:
+    - class_path: lightning.pytorch.callbacks.ModelCheckpoint
+      init_args:
+        monitor: val/loss
+        mode: min
+        save_top_k: 1
+        filename: best-checkpoint
+    - class_path: lightning.pytorch.callbacks.EarlyStopping
+      init_args:
+        monitor: val/loss
+        mode: min
+        patience: 15
+        verbose: false
+  max_epochs: 1000
+  min_epochs: null
+  max_steps: -1
+  min_steps: null
+  overfit_batches: 0.0
+  log_every_n_steps: null
+  inference_mode: true
+  default_root_dir: null
+  accumulate_grad_batches: 6
+  gradient_clip_val: 1.0
+
+model: 
+  class_path: ThermalSolver.module.GraphSolver
+  init_args:
+    model_class_path: ThermalSolver.model.local_gno.GatingGNO
+    model_init_args:
+      x_ch_node: 1 
+      f_ch_node: 1 
+      hidden: 16
+      layers: 64
+      edge_ch: 3 
+      out_ch: 1
+    unrolling_steps: 1
+data: 
+  class_path: ThermalSolver.data_module.GraphDataModule
+  init_args:
+    hf_repo: "SISSAmathLab/thermal-conduction"
+    split_name: "2000"
+    batch_size: 4
+    train_size: 0.8
+    test_size: 0.1
+    test_size: 0.1
+optimizer: null
+lr_scheduler: null
+# ckpt_path: lightning_logs/64/version_0/checkpoints/best-checkpoint.ckpt
+ckpt_path: null