diff --git a/experiments/config.yaml b/experiments/config.yaml deleted file mode 100644 index 8216fc9..0000000 --- a/experiments/config.yaml +++ /dev/null @@ -1,56 +0,0 @@ -# lightning.pytorch==2.5.5 -seed_everything: 1999 -trainer: - accelerator: gpu - strategy: auto - devices: 1 - num_nodes: 1 - precision: null - logger: null - callbacks: - - class_path: lightning.pytorch.callbacks.ModelCheckpoint - init_args: - monitor: val/loss - mode: min - save_top_k: 1 - filename: best-checkpoint - - class_path: lightning.pytorch.callbacks.EarlyStopping - init_args: - monitor: val/loss - mode: min - patience: 10 - verbose: false - max_epochs: 2000 - min_epochs: null - max_steps: -1 - min_steps: null - overfit_batches: 0.0 - log_every_n_steps: null - inference_mode: true - default_root_dir: null - accumulate_grad_batches: 2 - gradient_clip_val: 1.0 -model: - class_path: ThermalSolver.module.GraphSolver - init_args: - model_class_path: ThermalSolver.model.local_gno.GatingGNO - model_init_args: - x_ch_node: 1 - f_ch_node: 1 - hidden: 16 - layers: 1 - edge_ch: 3 - out_ch: 1 - unrolling_steps: 64 -data: - class_path: ThermalSolver.data_module.GraphDataModule - init_args: - hf_repo: "SISSAmathLab/thermal-conduction" - split_name: "2000_ref_1" - batch_size: 10 - train_size: 0.8 - test_size: 0.1 - test_size: 0.1 -optimizer: null -lr_scheduler: null -ckpt_path: null diff --git a/experiments/config_01.yaml b/experiments/config_01.yaml deleted file mode 100644 index 686dc87..0000000 --- a/experiments/config_01.yaml +++ /dev/null @@ -1,56 +0,0 @@ -# lightning.pytorch==2.5.5 -seed_everything: 1999 -trainer: - accelerator: gpu - strategy: auto - devices: 1 - num_nodes: 1 - precision: null - logger: - - class_path: lightning.pytorch.loggers.TensorBoardLogger - init_args: - save_dir: logs - name: "test" - version: null - callbacks: - - class_path: lightning.pytorch.callbacks.ModelCheckpoint - init_args: - monitor: val/loss - mode: min - save_top_k: 1 - filename: best-checkpoint - - class_path: lightning.pytorch.callbacks.EarlyStopping - init_args: - monitor: val/loss - mode: min - patience: 15 - verbose: false - max_epochs: 1000 - min_epochs: null - max_steps: -1 - min_steps: null - overfit_batches: 0.0 - log_every_n_steps: null - inference_mode: true - default_root_dir: null - accumulate_grad_batches: 4 - gradient_clip_val: 1.0 -model: - class_path: ThermalSolver.graph_module.GraphSolver - init_args: - model_class_path: ThermalSolver.model.LearnableGraphFiniteDifference - model_init_args: - max_iters: 250 - unrolling_steps: 64 -data: - class_path: ThermalSolver.graph_datamodule.GraphDataModule - init_args: - hf_repo: "SISSAmathLab/thermal-conduction" - split_name: "1000_40x30" - batch_size: 8 - train_size: 0.8 - test_size: 0.1 - test_size: 0.1 -optimizer: null -lr_scheduler: null -# ckpt_path: lightning_logs/01/version_0/checkpoints/best-checkpoint.ckpt \ No newline at end of file diff --git a/experiments/config_02.yaml b/experiments/config_02.yaml deleted file mode 100644 index 995eff2..0000000 --- a/experiments/config_02.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# lightning.pytorch==2.5.5 -seed_everything: 1999 -trainer: - accelerator: gpu - strategy: auto - devices: 1 - num_nodes: 1 - precision: null - logger: - - class_path: lightning.pytorch.loggers.TensorBoardLogger - init_args: - save_dir: lightning_logs - name: "02" - version: null - callbacks: - - class_path: lightning.pytorch.callbacks.ModelCheckpoint - init_args: - monitor: val/loss - mode: min - save_top_k: 1 - filename: best-checkpoint - - class_path: lightning.pytorch.callbacks.EarlyStopping - init_args: - monitor: val/loss - mode: min - patience: 10 - verbose: false - max_epochs: 200 - min_epochs: null - max_steps: -1 - min_steps: null - overfit_batches: 0.0 - log_every_n_steps: null - inference_mode: true - default_root_dir: null - accumulate_grad_batches: 6 - gradient_clip_val: 1.0 -model: - class_path: ThermalSolver.module.GraphSolver - init_args: - model_class_path: ThermalSolver.model.local_gno.GatingGNO - model_init_args: - x_ch_node: 1 - f_ch_node: 1 - hidden: 16 - layers: 2 - edge_ch: 3 - out_ch: 1 - unrolling_steps: 32 -data: - class_path: ThermalSolver.data_module.GraphDataModule - init_args: - hf_repo: "SISSAmathLab/thermal-conduction" - split_name: "2000_ref_1" - batch_size: 4 - train_size: 0.8 - test_size: 0.1 - test_size: 0.1 -optimizer: null -lr_scheduler: null -# ckpt_path: lightning_logs/version_15/checkpoints/best-checkpoint.ckpt -ckpt_path: null diff --git a/experiments/config_04.yaml b/experiments/config_04.yaml deleted file mode 100644 index 1a8b15e..0000000 --- a/experiments/config_04.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# lightning.pytorch==2.5.5 -seed_everything: 1999 -trainer: - accelerator: gpu - strategy: auto - devices: 1 - num_nodes: 1 - precision: null - logger: - - class_path: lightning.pytorch.loggers.TensorBoardLogger - init_args: - save_dir: lightning_logs - name: "04" - version: null - callbacks: - - class_path: lightning.pytorch.callbacks.ModelCheckpoint - init_args: - monitor: val/loss - mode: min - save_top_k: 1 - filename: best-checkpoint - - class_path: lightning.pytorch.callbacks.EarlyStopping - init_args: - monitor: val/loss - mode: min - patience: 10 - verbose: false - max_epochs: 200 - min_epochs: null - max_steps: -1 - min_steps: null - overfit_batches: 0.0 - log_every_n_steps: null - inference_mode: true - default_root_dir: null - accumulate_grad_batches: 6 - gradient_clip_val: 1.0 -model: - class_path: ThermalSolver.module.GraphSolver - init_args: - model_class_path: ThermalSolver.model.local_gno.GatingGNO - model_init_args: - x_ch_node: 1 - f_ch_node: 1 - hidden: 16 - layers: 4 - edge_ch: 3 - out_ch: 1 - unrolling_steps: 16 -data: - class_path: ThermalSolver.data_module.GraphDataModule - init_args: - hf_repo: "SISSAmathLab/thermal-conduction" - split_name: "2000_ref_1" - batch_size: 4 - train_size: 0.8 - test_size: 0.1 - test_size: 0.1 -optimizer: null -lr_scheduler: null -# ckpt_path: lightning_logs/version_15/checkpoints/best-checkpoint.ckpt -ckpt_path: null diff --git a/experiments/config_08.yaml b/experiments/config_08.yaml deleted file mode 100644 index afb6c8e..0000000 --- a/experiments/config_08.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# lightning.pytorch==2.5.5 -seed_everything: 1999 -trainer: - accelerator: gpu - strategy: auto - devices: 1 - num_nodes: 1 - precision: null - logger: - - class_path: lightning.pytorch.loggers.TensorBoardLogger - init_args: - save_dir: lightning_logs - name: "08" - version: null - callbacks: - - class_path: lightning.pytorch.callbacks.ModelCheckpoint - init_args: - monitor: val/loss - mode: min - save_top_k: 1 - filename: best-checkpoint - - class_path: lightning.pytorch.callbacks.EarlyStopping - init_args: - monitor: val/loss - mode: min - patience: 10 - verbose: false - max_epochs: 200 - min_epochs: null - max_steps: -1 - min_steps: null - overfit_batches: 0.0 - log_every_n_steps: null - inference_mode: true - default_root_dir: null - accumulate_grad_batches: 6 - gradient_clip_val: 1.0 -model: - class_path: ThermalSolver.module.GraphSolver - init_args: - model_class_path: ThermalSolver.model.local_gno.GatingGNO - model_init_args: - x_ch_node: 1 - f_ch_node: 1 - hidden: 16 - layers: 8 - edge_ch: 3 - out_ch: 1 - unrolling_steps: 8 -data: - class_path: ThermalSolver.data_module.GraphDataModule - init_args: - hf_repo: "SISSAmathLab/thermal-conduction" - split_name: "2000_ref_1" - batch_size: 4 - train_size: 0.8 - test_size: 0.1 - test_size: 0.1 -optimizer: null -lr_scheduler: null -# ckpt_path: lightning_logs/version_15/checkpoints/best-checkpoint.ckpt -ckpt_path: null diff --git a/experiments/config_16.yaml b/experiments/config_16.yaml deleted file mode 100644 index 07031d3..0000000 --- a/experiments/config_16.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# lightning.pytorch==2.5.5 -seed_everything: 1999 -trainer: - accelerator: gpu - strategy: auto - devices: 1 - num_nodes: 1 - precision: null - logger: - - class_path: lightning.pytorch.loggers.TensorBoardLogger - init_args: - save_dir: lightning_logs - name: "16" - version: null - callbacks: - - class_path: lightning.pytorch.callbacks.ModelCheckpoint - init_args: - monitor: val/loss - mode: min - save_top_k: 1 - filename: best-checkpoint - - class_path: lightning.pytorch.callbacks.EarlyStopping - init_args: - monitor: val/loss - mode: min - patience: 10 - verbose: false - max_epochs: 200 - min_epochs: null - max_steps: -1 - min_steps: null - overfit_batches: 0.0 - log_every_n_steps: null - inference_mode: true - default_root_dir: null - accumulate_grad_batches: 6 - gradient_clip_val: 1.0 -model: - class_path: ThermalSolver.module.GraphSolver - init_args: - model_class_path: ThermalSolver.model.local_gno.GatingGNO - model_init_args: - x_ch_node: 1 - f_ch_node: 1 - hidden: 16 - layers: 16 - edge_ch: 3 - out_ch: 1 - unrolling_steps: 4 -data: - class_path: ThermalSolver.data_module.GraphDataModule - init_args: - hf_repo: "SISSAmathLab/thermal-conduction" - split_name: "2000_ref_1" - batch_size: 4 - train_size: 0.8 - test_size: 0.1 - test_size: 0.1 -optimizer: null -lr_scheduler: null -# ckpt_path: lightning_logs/version_15/checkpoints/best-checkpoint.ckpt -ckpt_path: null diff --git a/experiments/config_16_layer_16_hidden_refined.yaml b/experiments/config_16_layer_16_hidden_refined.yaml new file mode 100644 index 0000000..66d018e --- /dev/null +++ b/experiments/config_16_layer_16_hidden_refined.yaml @@ -0,0 +1,62 @@ +# lightning.pytorch==2.5.5 +seed_everything: 1999 +trainer: + accelerator: gpu + strategy: auto + devices: 1 + num_nodes: 1 + precision: null + logger: + - class_path: lightning.pytorch.loggers.WandbLogger + init_args: + save_dir: logs.autoregressive.wandb + project: "thermal-conduction-unsteady" + name: "16_refined" + callbacks: + - class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + dirpath: logs.autoregressive.wandb/16_refined/checkpoints + monitor: val/loss + mode: min + save_top_k: 1 + filename: best-checkpoint + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/loss + mode: min + patience: 10 + verbose: false + max_epochs: 1000 + min_epochs: null + max_steps: -1 + min_steps: null + overfit_batches: 0.0 + log_every_n_steps: null + accumulate_grad_batches: 4 + default_root_dir: null + +model: + class_path: ThermalSolver.autoregressive_module.GraphSolver + init_args: + model_class_path: ThermalSolver.model.diffusion_net.DiffusionNet + model_init_args: + input_dim: 1 + hidden_dim: 16 + output_dim: 1 + n_layers: 16 + unrolling_steps: 5 + +data: + class_path: ThermalSolver.graph_datamodule_unsteady.GraphDataModule + init_args: + hf_repo: "SISSAmathLab/thermal-conduction-unsteady" + split_name: "100_samples_easy_refined" + batch_size: 8 + train_size: 0.7 + val_size: 0.2 + test_size: 0.1 + build_radial_graph: false + remove_boundary_edges: true + start_unrolling_steps: 5 +optimizer: null +lr_scheduler: null diff --git a/experiments/config_16_layer_8_hidden_refined.yaml b/experiments/config_16_layer_8_hidden_refined.yaml new file mode 100644 index 0000000..02d8ab7 --- /dev/null +++ b/experiments/config_16_layer_8_hidden_refined.yaml @@ -0,0 +1,62 @@ +# lightning.pytorch==2.5.5 +seed_everything: 1999 +trainer: + accelerator: gpu + strategy: auto + devices: 1 + num_nodes: 1 + precision: null + logger: + - class_path: lightning.pytorch.loggers.WandbLogger + init_args: + save_dir: logs.autoregressive.wandb + project: "thermal-conduction-unsteady" + name: "16_8_refined" + callbacks: + - class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + dirpath: logs.autoregressive.wandb/16_8_refined/checkpoints + monitor: val/loss + mode: min + save_top_k: 1 + filename: best-checkpoint + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/loss + mode: min + patience: 10 + verbose: false + max_epochs: 1000 + min_epochs: null + max_steps: -1 + min_steps: null + overfit_batches: 0.0 + log_every_n_steps: null + accumulate_grad_batches: 2 + default_root_dir: null + +model: + class_path: ThermalSolver.autoregressive_module.GraphSolver + init_args: + model_class_path: ThermalSolver.model.diffusion_net.DiffusionNet + model_init_args: + input_dim: 1 + hidden_dim: 8 + output_dim: 1 + n_layers: 16 + unrolling_steps: 5 + +data: + class_path: ThermalSolver.graph_datamodule_unsteady.GraphDataModule + init_args: + hf_repo: "SISSAmathLab/thermal-conduction-unsteady" + split_name: "100_samples_easy_refined" + batch_size: 16 + train_size: 0.7 + val_size: 0.2 + test_size: 0.1 + build_radial_graph: false + remove_boundary_edges: true + start_unrolling_steps: 5 +optimizer: null +lr_scheduler: null diff --git a/experiments/config_32.yaml b/experiments/config_32.yaml deleted file mode 100644 index 3ca1392..0000000 --- a/experiments/config_32.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# lightning.pytorch==2.5.5 -seed_everything: 1999 -trainer: - accelerator: gpu - strategy: auto - devices: 1 - num_nodes: 1 - precision: null - logger: - - class_path: lightning.pytorch.loggers.TensorBoardLogger - init_args: - save_dir: lightning_logs - name: "32" - version: null - callbacks: - - class_path: lightning.pytorch.callbacks.ModelCheckpoint - init_args: - monitor: val/loss - mode: min - save_top_k: 1 - filename: best-checkpoint - - class_path: lightning.pytorch.callbacks.EarlyStopping - init_args: - monitor: val/loss - mode: min - patience: 10 - verbose: false - max_epochs: 200 - min_epochs: null - max_steps: -1 - min_steps: null - overfit_batches: 0.0 - log_every_n_steps: null - inference_mode: true - default_root_dir: null - accumulate_grad_batches: 6 - gradient_clip_val: 1.0 -model: - class_path: ThermalSolver.module.GraphSolver - init_args: - model_class_path: ThermalSolver.model.local_gno.GatingGNO - model_init_args: - x_ch_node: 1 - f_ch_node: 1 - hidden: 16 - layers: 32 - edge_ch: 3 - out_ch: 1 - unrolling_steps: 2 -data: - class_path: ThermalSolver.data_module.GraphDataModule - init_args: - hf_repo: "SISSAmathLab/thermal-conduction" - split_name: "2000_ref_1" - batch_size: 4 - train_size: 0.8 - test_size: 0.1 - test_size: 0.1 -optimizer: null -lr_scheduler: null -# ckpt_path: lightning_logs/version_15/checkpoints/best-checkpoint.ckpt -ckpt_path: null diff --git a/experiments/config_autoregressive.yaml b/experiments/config_4_layer_16_hidden.yaml similarity index 58% rename from experiments/config_autoregressive.yaml rename to experiments/config_4_layer_16_hidden.yaml index ec9e94a..2b35fea 100644 --- a/experiments/config_autoregressive.yaml +++ b/experiments/config_4_layer_16_hidden.yaml @@ -7,14 +7,16 @@ trainer: num_nodes: 1 precision: null logger: - - class_path: lightning.pytorch.loggers.TensorBoardLogger + - class_path: lightning.pytorch.loggers.WandbLogger init_args: - save_dir: logs.autoregressive - name: "test" - version: null + save_dir: logs.autoregressive.wandb/wandb + project: "thermal-conduction-unsteady" + name: "5_step_4_layers_16_hidden" + # retain: true callbacks: - class_path: lightning.pytorch.callbacks.ModelCheckpoint init_args: + dirpath: logs.autoregressive.wandb/5_step_4_layers_16_hidden/checkpoints monitor: val/loss mode: min save_top_k: 1 @@ -23,7 +25,7 @@ trainer: init_args: monitor: val/loss mode: min - patience: 50 + patience: 10 verbose: false max_epochs: 1000 min_epochs: null @@ -31,40 +33,32 @@ trainer: min_steps: null overfit_batches: 0.0 log_every_n_steps: null - accumulate_grad_batches: 1 - # reload_dataloaders_every_n_epochs: 1 + accumulate_grad_batches: 2 default_root_dir: null model: class_path: ThermalSolver.autoregressive_module.GraphSolver init_args: - model_class_path: ThermalSolver.model.learnable_finite_difference.CorrectionNet + model_class_path: ThermalSolver.model.diffusion_net.DiffusionNet model_init_args: input_dim: 1 - hidden_dim: 24 - # output_dim: 1 - n_layers: 1 - start_unrolling_steps: 1 - increase_every: 100000 - increase_rate: 2 - max_inference_iters: 300 - max_unrolling_steps: 40 - inner_steps: 1 + hidden_dim: 16 + output_dim: 1 + n_layers: 4 + unrolling_steps: 5 data: class_path: ThermalSolver.graph_datamodule_unsteady.GraphDataModule init_args: hf_repo: "SISSAmathLab/thermal-conduction-unsteady" - split_name: "50_samples_easy" - batch_size: 64 - train_size: 0.02 - val_size: 0.02 - test_size: 0.96 + split_name: "100_samples_easy_refined" + batch_size: 32 + train_size: 0.7 + val_size: 0.2 + test_size: 0.1 build_radial_graph: true radius: 0.5 remove_boundary_edges: true - start_unrolling_steps: 1 - + start_unrolling_steps: 5 optimizer: null lr_scheduler: null -# ckpt_path: logs/test/version_0/checkpoints/best-checkpoint.ckpt diff --git a/experiments/config_4_layer_8_hidden.yaml b/experiments/config_4_layer_8_hidden.yaml new file mode 100644 index 0000000..289ac7a --- /dev/null +++ b/experiments/config_4_layer_8_hidden.yaml @@ -0,0 +1,62 @@ +# lightning.pytorch==2.5.5 +seed_everything: 1999 +trainer: + accelerator: gpu + strategy: auto + devices: 1 + num_nodes: 1 + precision: null + logger: + - class_path: lightning.pytorch.loggers.WandbLogger + init_args: + save_dir: logs.autoregressive.wandb + project: "thermal-conduction-unsteady" + name: "5_step_4_layers_8_hidden" + callbacks: + - class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + dirpath: logs.autoregressive.wandb/5_step_4_layers_8_hidden_0.7_radius/checkpoints + monitor: val/loss + mode: min + save_top_k: 1 + filename: best-checkpoint + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/loss + mode: min + patience: 10 + verbose: false + max_epochs: 1000 + min_epochs: null + max_steps: -1 + min_steps: null + overfit_batches: 0.0 + log_every_n_steps: null + accumulate_grad_batches: 1 + default_root_dir: null + +model: + class_path: ThermalSolver.autoregressive_module.GraphSolver + init_args: + model_class_path: ThermalSolver.model.diffusion_net.DiffusionNet + model_init_args: + input_dim: 1 + hidden_dim: 8 + output_dim: 1 + n_layers: 4 + unrolling_steps: 5 + +data: + class_path: ThermalSolver.graph_datamodule_unsteady.GraphDataModule + init_args: + hf_repo: "SISSAmathLab/thermal-conduction-unsteady" + split_name: "100_samples_easy_refined" + batch_size: 32 + train_size: 0.7 + val_size: 0.2 + test_size: 0.1 + build_radial_graph: false + remove_boundary_edges: true + start_unrolling_steps: 5 +optimizer: null +lr_scheduler: null diff --git a/experiments/config_64.yaml b/experiments/config_64.yaml deleted file mode 100644 index 6765a5d..0000000 --- a/experiments/config_64.yaml +++ /dev/null @@ -1,63 +0,0 @@ -# lightning.pytorch==2.5.5 -seed_everything: 1999 -trainer: - accelerator: gpu - strategy: auto - devices: 1 - num_nodes: 1 - precision: null - logger: - - class_path: lightning.pytorch.loggers.TensorBoardLogger - init_args: - save_dir: lightning_logs - name: "64" - version: null - callbacks: - - class_path: lightning.pytorch.callbacks.ModelCheckpoint - init_args: - monitor: val/loss - mode: min - save_top_k: 1 - filename: best-checkpoint - - class_path: lightning.pytorch.callbacks.EarlyStopping - init_args: - monitor: val/loss - mode: min - patience: 15 - verbose: false - max_epochs: 1000 - min_epochs: null - max_steps: -1 - min_steps: null - overfit_batches: 0.0 - log_every_n_steps: null - inference_mode: true - default_root_dir: null - accumulate_grad_batches: 2 - gradient_clip_val: 1.0 - -model: - class_path: ThermalSolver.graph_module.GraphSolver - init_args: - model_class_path: ThermalSolver.model.local_gno.GatingGNO - model_init_args: - x_ch_node: 1 - f_ch_node: 1 - hidden: 16 - layers: 64 - edge_ch: 3 - out_ch: 1 - unrolling_steps: 1 -data: - class_path: ThermalSolver.graph_datamodule.GraphDataModule - init_args: - hf_repo: "SISSAmathLab/thermal-conduction" - split_name: "2000_ref_1" - batch_size: 10 - train_size: 0.8 - test_size: 0.1 - test_size: 0.1 -optimizer: null -lr_scheduler: null -# ckpt_path: lightning_logs/64/version_0/checkpoints/best-checkpoint.ckpt -ckpt_path: null diff --git a/experiments/config_8_layer_16_hidden.yaml b/experiments/config_8_layer_16_hidden.yaml new file mode 100644 index 0000000..46e012f --- /dev/null +++ b/experiments/config_8_layer_16_hidden.yaml @@ -0,0 +1,62 @@ +# lightning.pytorch==2.5.5 +seed_everything: 1999 +trainer: + accelerator: gpu + strategy: auto + devices: 1 + num_nodes: 1 + precision: null + logger: + - class_path: lightning.pytorch.loggers.WandbLogger + init_args: + save_dir: logs.autoregressive.wandb + project: "thermal-conduction-unsteady" + name: "standard" + callbacks: + - class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + dirpath: logs.autoregressive.wandb/standard/checkpoints + monitor: val/loss + mode: min + save_top_k: 1 + filename: best-checkpoint + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/loss + mode: min + patience: 10 + verbose: false + max_epochs: 1000 + min_epochs: null + max_steps: -1 + min_steps: null + overfit_batches: 0.0 + log_every_n_steps: null + accumulate_grad_batches: 2 + default_root_dir: null + +model: + class_path: ThermalSolver.autoregressive_module.GraphSolver + init_args: + model_class_path: ThermalSolver.model.diffusion_net.DiffusionNet + model_init_args: + input_dim: 1 + hidden_dim: 16 + output_dim: 1 + n_layers: 8 + unrolling_steps: 5 + +data: + class_path: ThermalSolver.graph_datamodule_unsteady.GraphDataModule + init_args: + hf_repo: "SISSAmathLab/thermal-conduction-unsteady" + split_name: "100_samples_easy" + batch_size: 16 + train_size: 0.7 + val_size: 0.2 + test_size: 0.1 + build_radial_graph: false + remove_boundary_edges: true + start_unrolling_steps: 5 +optimizer: null +lr_scheduler: null diff --git a/experiments/config_8_layer_16_hidden_refined.yaml b/experiments/config_8_layer_16_hidden_refined.yaml new file mode 100644 index 0000000..323a2a6 --- /dev/null +++ b/experiments/config_8_layer_16_hidden_refined.yaml @@ -0,0 +1,62 @@ +# lightning.pytorch==2.5.5 +seed_everything: 1999 +trainer: + accelerator: gpu + strategy: auto + devices: 1 + num_nodes: 1 + precision: null + logger: + - class_path: lightning.pytorch.loggers.WandbLogger + init_args: + save_dir: logs.autoregressive.wandb + project: "thermal-conduction-unsteady" + name: "refined" + callbacks: + - class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + dirpath: logs.autoregressive.wandb/refined/checkpoints + monitor: val/loss + mode: min + save_top_k: 1 + filename: best-checkpoint + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/loss + mode: min + patience: 10 + verbose: false + max_epochs: 1000 + min_epochs: null + max_steps: -1 + min_steps: null + overfit_batches: 0.0 + log_every_n_steps: null + accumulate_grad_batches: 2 + default_root_dir: null + +model: + class_path: ThermalSolver.autoregressive_module.GraphSolver + init_args: + model_class_path: ThermalSolver.model.diffusion_net.DiffusionNet + model_init_args: + input_dim: 1 + hidden_dim: 16 + output_dim: 1 + n_layers: 8 + unrolling_steps: 5 + +data: + class_path: ThermalSolver.graph_datamodule_unsteady.GraphDataModule + init_args: + hf_repo: "SISSAmathLab/thermal-conduction-unsteady" + split_name: "100_samples_easy_refined" + batch_size: 16 + train_size: 0.7 + val_size: 0.2 + test_size: 0.1 + build_radial_graph: false + remove_boundary_edges: true + start_unrolling_steps: 5 +optimizer: null +lr_scheduler: null diff --git a/experiments/config_8_layer_8_hidden.yaml b/experiments/config_8_layer_8_hidden.yaml new file mode 100644 index 0000000..d923397 --- /dev/null +++ b/experiments/config_8_layer_8_hidden.yaml @@ -0,0 +1,62 @@ +# lightning.pytorch==2.5.5 +seed_everything: 1999 +trainer: + accelerator: gpu + strategy: auto + devices: 1 + num_nodes: 1 + precision: null + logger: + - class_path: lightning.pytorch.loggers.WandbLogger + init_args: + save_dir: logs.autoregressive.wandb + project: "thermal-conduction-unsteady" + name: "standard" + callbacks: + - class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + dirpath: logs.autoregressive.wandb/standard/checkpoints + monitor: val/loss + mode: min + save_top_k: 1 + filename: best-checkpoint + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + monitor: val/loss + mode: min + patience: 10 + verbose: false + max_epochs: 1000 + min_epochs: null + max_steps: -1 + min_steps: null + overfit_batches: 0.0 + log_every_n_steps: null + accumulate_grad_batches: 1 + default_root_dir: null + +model: + class_path: ThermalSolver.autoregressive_module.GraphSolver + init_args: + model_class_path: ThermalSolver.model.diffusion_net.DiffusionNet + model_init_args: + input_dim: 1 + hidden_dim: 8 + output_dim: 1 + n_layers: 8 + unrolling_steps: 5 + +data: + class_path: ThermalSolver.graph_datamodule_unsteady.GraphDataModule + init_args: + hf_repo: "SISSAmathLab/thermal-conduction-unsteady" + split_name: "100_samples_easy_refined" + batch_size: 32 + train_size: 0.7 + val_size: 0.2 + test_size: 0.1 + build_radial_graph: false + remove_boundary_edges: true + start_unrolling_steps: 5 +optimizer: null +lr_scheduler: null diff --git a/experiments/config_fd.yaml b/experiments/config_fd.yaml deleted file mode 100644 index bb7a2b3..0000000 --- a/experiments/config_fd.yaml +++ /dev/null @@ -1,58 +0,0 @@ -# lightning.pytorch==2.5.5 -seed_everything: 1999 -trainer: - accelerator: gpu - strategy: auto - devices: 1 - num_nodes: 1 - precision: null - logger: - - class_path: lightning.pytorch.loggers.TensorBoardLogger - init_args: - save_dir: logs - name: "fd" - version: null - callbacks: - - class_path: lightning.pytorch.callbacks.ModelCheckpoint - init_args: - monitor: val/loss - mode: min - save_top_k: 1 - filename: best-checkpoint - - class_path: lightning.pytorch.callbacks.EarlyStopping - init_args: - monitor: val/loss - mode: min - patience: 2 - verbose: false - max_epochs: 1000 - min_epochs: null - max_steps: -1 - min_steps: null - overfit_batches: 0.0 - log_every_n_steps: null - inference_mode: true - default_root_dir: null - accumulate_grad_batches: 4 - gradient_clip_val: 1.0 -model: - class_path: ThermalSolver.graph_module.GraphSolver - init_args: - model_class_path: ThermalSolver.model.GraphFiniteDifference - # model_init_args: - max_iters: 10000 - # unrolling_steps: 64 -data: - class_path: ThermalSolver.graph_datamodule.GraphDataModule - init_args: - hf_repo: "SISSAmathLab/thermal-conduction" - split_name: "1000_1_40x30" - batch_size: 8 - train_size: 0.8 - test_size: 0.1 - test_size: 0.1 - # build_radial_graph: true - # radius: 1.5 -optimizer: null -lr_scheduler: null -# ckpt_path: lightning_logs/01/version_0/checkpoints/best-checkpoint.ckpt \ No newline at end of file diff --git a/experiments/config_gino.yaml b/experiments/config_gino.yaml deleted file mode 100644 index 50f63b3..0000000 --- a/experiments/config_gino.yaml +++ /dev/null @@ -1,74 +0,0 @@ -# lightning.pytorch==2.5.5 -seed_everything: 1999 -trainer: - accelerator: gpu - strategy: auto - devices: 1 - num_nodes: 1 - precision: null - logger: - - class_path: lightning.pytorch.loggers.TensorBoardLogger - init_args: - save_dir: logs - name: "test" - version: null - callbacks: - - class_path: lightning.pytorch.callbacks.ModelCheckpoint - init_args: - monitor: val/loss - mode: min - save_top_k: 1 - filename: best-checkpoint - - class_path: lightning.pytorch.callbacks.EarlyStopping - init_args: - monitor: val/loss - mode: min - patience: 15 - verbose: false - max_epochs: 1000 - min_epochs: null - max_steps: -1 - min_steps: null - overfit_batches: 0.0 - log_every_n_steps: null - inference_mode: true - default_root_dir: null - # accumulate_grad_batches: 2 - # gradient_clip_val: 1.0 -model: - class_path: ThermalSolver.graph_module.GraphSolver - init_args: - model_class_path: neuralop.models import GINO - model_init_args: - in_channels: 3 # Es: coordinate (x, y, z) + valore della conducibilità k - out_channels: 1 # Es: temperatura T - - # Parametri per l'encoder e il decoder GNO - gno_coord_features=3, # Dimensionalità delle coordinate per GNO (es. 3D) - gno_n_layers=2, # Numero di layer GNO nell'encoder e nel decoder - gno_hidden_channels=64, # Canali nascosti per i layer GNO - - # Parametri per il processore FNO - fno_n_modes=(16, 16, 16), # Numero di modi di Fourier per ogni dimensione - fno_n_layers=4, # Numero di layer FNO - fno_hidden_channels=64, # Canali nascosti per i layer FNO - - # Canali per il lifting e la proiezione - lifting_channels=256, # Dimensione dello spazio latente dopo il lifting iniziale - projection_channels=256, # Dimensione prima della proiezione finale - - # Padding del dominio per il processore FNO - domain_padding=0.05 - # unrolling_steps: 64 -data: - class_path: ThermalSolver.graph_datamodule.GraphDataModule - init_args: - hf_repo: "SISSAmathLab/thermal-conduction" - split_name: "2000_ref_1" - batch_size: 64 - train_size: 0.8 - test_size: 0.1 - test_size: 0.1 -optimizer: null -lr_scheduler: null -# ckpt_path: lightning_logs/01/version_0/checkpoints/best-checkpoint.ckpt \ No newline at end of file diff --git a/experiments/config_gno.yaml b/experiments/config_gno.yaml deleted file mode 100644 index 46d81c4..0000000 --- a/experiments/config_gno.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# lightning.pytorch==2.5.5 -seed_everything: 1999 -trainer: - accelerator: gpu - strategy: auto - devices: 1 - num_nodes: 1 - precision: null - logger: - - class_path: lightning.pytorch.loggers.TensorBoardLogger - init_args: - save_dir: logs - name: "test" - version: null - callbacks: - - class_path: lightning.pytorch.callbacks.ModelCheckpoint - init_args: - monitor: val/loss - mode: min - save_top_k: 1 - filename: best-checkpoint - - class_path: lightning.pytorch.callbacks.EarlyStopping - init_args: - monitor: val/loss - mode: min - patience: 25 - verbose: false - max_epochs: 1000 - min_epochs: null - max_steps: -1 - min_steps: null - overfit_batches: 0.0 - log_every_n_steps: null - # inference_mode: true - default_root_dir: null - # accumulate_grad_batches: 2 - # gradient_clip_val: 1.0 -model: - class_path: ThermalSolver.graph_module.GraphSolver - init_args: - model_class_path: ThermalSolver.model.learnable_finite_difference.CorrectionNet - curriculum_learning: true - start_iters: 5 - increase_every: 10 - increase_rate: 2 - max_iters: 2000 - accumulation_iters: 160 -data: - class_path: ThermalSolver.graph_datamodule.GraphDataModule - init_args: - hf_repo: "SISSAmathLab/thermal-conduction" - split_name: "1000_1_40x30" - batch_size: 32 - train_size: 0.8 - test_size: 0.1 - test_size: 0.1 - build_radial_graph: false - radius: 0.6 - remove_boundary_edges: false -optimizer: null -lr_scheduler: null -# ckpt_path: logs/test/version_0/checkpoints/best-checkpoint.ckpt diff --git a/experiments/config_gno_inference.yaml b/experiments/config_gno_inference.yaml deleted file mode 100644 index 5272eb7..0000000 --- a/experiments/config_gno_inference.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# lightning.pytorch==2.5.5 -seed_everything: 1999 -trainer: - accelerator: gpu - strategy: auto - devices: 1 - num_nodes: 1 - precision: null - logger: - - class_path: lightning.pytorch.loggers.TensorBoardLogger - init_args: - save_dir: logs_inference - name: "test" - version: null - callbacks: - - class_path: lightning.pytorch.callbacks.ModelCheckpoint - init_args: - monitor: val/loss - mode: min - save_top_k: 1 - filename: best-checkpoint - - class_path: lightning.pytorch.callbacks.EarlyStopping - init_args: - monitor: val/loss - mode: min - patience: 25 - verbose: false - max_epochs: 1000 - min_epochs: null - max_steps: -1 - min_steps: null - overfit_batches: 0.0 - log_every_n_steps: null - # inference_mode: true - default_root_dir: null - # accumulate_grad_batches: 2 - # gradient_clip_val: 1.0 -model: - class_path: ThermalSolver.graph_module.GraphSolver - init_args: - model_class_path: ThermalSolver.model.finite_difference.FiniteDifferenceStep - curriculum_learning: true - start_iters: 5 - increase_every: 10 - increase_rate: 2 - max_iters: 2000 - accumulation_iters: 320 -data: - class_path: ThermalSolver.graph_datamodule.GraphDataModule - init_args: - hf_repo: "SISSAmathLab/thermal-conduction" - split_name: "1000_3_40x30" - batch_size: 10 - train_size: 0.8 - test_size: 0.1 - test_size: 0.1 - build_radial_graph: True - radius: 1.2 - remove_boundary_edges: false -optimizer: null -lr_scheduler: null -# ckpt_path: logs/test/version_2/checkpoints/best-checkpoint.ckpt diff --git a/experiments/config_pointnet.yaml b/experiments/config_pointnet.yaml deleted file mode 100644 index e6461f6..0000000 --- a/experiments/config_pointnet.yaml +++ /dev/null @@ -1,56 +0,0 @@ -# lightning.pytorch==2.5.5 -seed_everything: 1999 -trainer: - accelerator: gpu - strategy: auto - devices: 1 - num_nodes: 1 - precision: null - logger: - - class_path: lightning.pytorch.loggers.TensorBoardLogger - init_args: - save_dir: lightning_logs - name: "pointnet" - version: null - callbacks: - - class_path: lightning.pytorch.callbacks.ModelCheckpoint - init_args: - monitor: val/loss - mode: min - save_top_k: 1 - filename: best-checkpoint - - class_path: lightning.pytorch.callbacks.EarlyStopping - init_args: - monitor: val/loss - mode: min - patience: 10 - verbose: false - max_epochs: 200 - min_epochs: null - max_steps: -1 - min_steps: null - overfit_batches: 0.0 - log_every_n_steps: null - inference_mode: true - default_root_dir: null - accumulate_grad_batches: 2 - gradient_clip_val: 1.0 -model: - class_path: ThermalSolver.point_module.PointSolver - init_args: - model_class_path: ThermalSolver.model.point_net.PointNet - model_init_args: - input_dim: 4 - output_dim: 1 -data: - class_path: ThermalSolver.point_datamodule.PointDataModule - init_args: - hf_repo: "SISSAmathLab/thermal-conduction" - split_name: "2000" - batch_size: 10 - train_size: 0.8 - test_size: 0.1 - test_size: 0.1 -optimizer: null -lr_scheduler: null -# ckpt_path: lightning_logs/pointnet/version_0/checkpoints/best-checkpoint.ckpt diff --git a/submit.sh b/submit.sh new file mode 100755 index 0000000..249382f --- /dev/null +++ b/submit.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# python run.py fit --config experiments/config_4_layer_8_hidden.yaml +# python run.py fit --config experiments/config_8_layer_8_hidden.yaml +python run.py fit --config experiments/config_8_layer_16_hidden_refined.yaml +python run.py fit --config experiments/config_16_layer_8_hidden_refined.yaml +python run.py fit --config experiments/config_16_layer_16_hidden_refined.yaml +python run.py fit --config experiments/config_8_layer_16_hidden.yaml +# python run.py fit --config experiments/config_4_layer_16_hidden.yaml