diff --git a/experiments/10_steps/config_16_layer_16_hidden_adaptive_refined.yaml b/experiments/10_steps/config_16_layer_16_hidden_adaptive_refined.yaml index ba93708..d1105ee 100644 --- a/experiments/10_steps/config_16_layer_16_hidden_adaptive_refined.yaml +++ b/experiments/10_steps/config_16_layer_16_hidden_adaptive_refined.yaml @@ -29,8 +29,8 @@ trainer: - class_path: ThermalSolver.switch_dataloader_callback.SwitchDataLoaderCallback init_args: increase_unrolling_steps_by: 4 - patience: 15 - last_patience: 20 + patience: 5 + last_patience: 15 max_unrolling_steps: 10 ckpt_path: logs.autoregressive.wandb/10_steps/basic.adaptive_refined/16_layer_16_hidden/ max_epochs: 1000 @@ -58,14 +58,15 @@ data: class_path: ThermalSolver.graph_datamodule_unsteady.GraphDataModule init_args: hf_repo: "SISSAmathLab/thermal-conduction-unsteady" - split_name: "3_stripes.basic.adaptive_refined" + split_name: "3_stripes.basic.1_adaptive_refined" n_elements: 100 - batch_size: 32 + batch_size: 24 train_size: 0.7 val_size: 0.2 test_size: 0.1 build_radial_graph: false remove_boundary_edges: true unrolling_steps: 2 + min_normalized_diff: 1e-4 optimizer: null lr_scheduler: null diff --git a/experiments/10_steps/config_16_layer_16_hidden_adaptive_refined_combined.yaml b/experiments/10_steps/config_16_layer_16_hidden_adaptive_refined_combined.yaml index e25335f..c6112d0 100644 --- a/experiments/10_steps/config_16_layer_16_hidden_adaptive_refined_combined.yaml +++ b/experiments/10_steps/config_16_layer_16_hidden_adaptive_refined_combined.yaml @@ -13,24 +13,11 @@ trainer: project: "thermal-conduction-unsteady-10.steps" name: "16_layer_16_hidden.adaptive_refined.combined" callbacks: - # - class_path: lightning.pytorch.callbacks.ModelCheckpoint - # init_args: - # dirpath: logs.autoregressive.wandb/16_refined.10_steps/checkpoints - # monitor: val/loss - # mode: min - # save_top_k: 1 - # filename: best-checkpoint - # - class_path: lightning.pytorch.callbacks.EarlyStopping - # init_args: - # monitor: val/loss - # mode: min - # patience: 30 - # verbose: false - class_path: ThermalSolver.switch_dataloader_callback.SwitchDataLoaderCallback init_args: increase_unrolling_steps_by: 4 - patience: 15 - last_patience: 20 + patience: 5 + last_patience: 10 max_unrolling_steps: 10 ckpt_path: logs.autoregressive.wandb/10_steps/basic.adaptive_refined.combined/16_layer_16_hidden/ max_epochs: 1000 @@ -59,16 +46,18 @@ data: init_args: hf_repo: "SISSAmathLab/thermal-conduction-unsteady" split_name: - - "4_stripes.basic.adaptive_refined" - - "3_stripes.basic.adaptive_refined" - - "2_stripes.basic.adaptive_refined" + - "4_stripes.basic.1_adaptive_refined" + - "3_stripes.basic.1_adaptive_refined" + - "2_stripes.basic.1_adaptive_refined" n_elements: 100 - batch_size: 32 + batch_size: 24 train_size: 0.7 val_size: 0.2 test_size: 0.1 build_radial_graph: false remove_boundary_edges: true unrolling_steps: 2 + min_normalized_diff: 1e-4 + optimizer: null -lr_scheduler: null +lr_scheduler: null \ No newline at end of file diff --git a/experiments/10_steps/config_16_layer_16_hidden_refined.yaml b/experiments/10_steps/config_16_layer_16_hidden_refined.yaml index c379d40..fbd1890 100644 --- a/experiments/10_steps/config_16_layer_16_hidden_refined.yaml +++ b/experiments/10_steps/config_16_layer_16_hidden_refined.yaml @@ -29,10 +29,10 @@ trainer: - class_path: ThermalSolver.switch_dataloader_callback.SwitchDataLoaderCallback init_args: increase_unrolling_steps_by: 4 - patience: 15 - last_patience: 20 + patience: 5 + last_patience: 15 max_unrolling_steps: 10 - ckpt_path: logs.autoregressive.wandb/10_steps/basic.refined.combined/16_layer_16_hidden/ + ckpt_path: logs.autoregressive.wandb/10_steps/basic.refined/16_layer_16_hidden/ max_epochs: 1000 min_epochs: null max_steps: -1 @@ -59,13 +59,14 @@ data: init_args: hf_repo: "SISSAmathLab/thermal-conduction-unsteady" split_name: "3_stripes.basic.refined" - n_elements: 100 - batch_size: 32 + n_elements: 50 + batch_size: 24 train_size: 0.7 val_size: 0.2 test_size: 0.1 build_radial_graph: false remove_boundary_edges: true unrolling_steps: 2 + min_normalized_diff: 1e-4 optimizer: null lr_scheduler: null diff --git a/experiments/10_steps/config_16_layer_16_hidden_refined_combined.yaml b/experiments/10_steps/config_16_layer_16_hidden_refined_combined.yaml index 0ec494e..baa2e83 100644 --- a/experiments/10_steps/config_16_layer_16_hidden_refined_combined.yaml +++ b/experiments/10_steps/config_16_layer_16_hidden_refined_combined.yaml @@ -29,10 +29,10 @@ trainer: - class_path: ThermalSolver.switch_dataloader_callback.SwitchDataLoaderCallback init_args: increase_unrolling_steps_by: 4 - patience: 15 - last_patience: 20 + patience: 5 + last_patience: 10 max_unrolling_steps: 10 - ckpt_path: logs.autoregressive.wandb/10_steps/basic.refined/16_layer_16_hidden/ + ckpt_path: logs.autoregressive.wandb/10_steps/basic.refined.combined/16_layer_16_hidden/ max_epochs: 1000 min_epochs: null max_steps: -1 @@ -52,7 +52,7 @@ model: hidden_dim: 16 output_dim: 1 n_layers: 16 - unrolling_steps: 2 + unrolling_steps: 10 data: class_path: ThermalSolver.graph_datamodule_unsteady.GraphDataModule @@ -62,13 +62,16 @@ data: - "4_stripes.basic.refined" - "3_stripes.basic.refined" - "2_stripes.basic.refined" - n_elements: 100 - batch_size: 32 + n_elements: 75 + batch_size: 24 train_size: 0.7 val_size: 0.2 test_size: 0.1 build_radial_graph: false remove_boundary_edges: true - unrolling_steps: 2 + unrolling_steps: 10 + min_normalized_diff: 1e-4 + optimizer: null lr_scheduler: null +ckpt_path: logs.autoregressive.wandb/10_steps/basic.refined.combined/16_layer_16_hidden/6_unrolling_best_checkpoint.ckpt \ No newline at end of file diff --git a/experiments/10_steps/config_16_layer_16_hidden_refined_star.yaml b/experiments/10_steps/config_16_layer_16_hidden_refined_star.yaml new file mode 100644 index 0000000..0b7c026 --- /dev/null +++ b/experiments/10_steps/config_16_layer_16_hidden_refined_star.yaml @@ -0,0 +1,72 @@ +# lightning.pytorch==2.5.5 +seed_everything: 1999 +trainer: + accelerator: gpu + strategy: auto + devices: 1 + num_nodes: 1 + precision: null + logger: + - class_path: lightning.pytorch.loggers.WandbLogger + init_args: + save_dir: logs.autoregressive.wandb + project: "thermal-conduction-unsteady-10.steps" + name: "16_layer_16_hidden.refined.star" + callbacks: + # - class_path: lightning.pytorch.callbacks.ModelCheckpoint + # init_args: + # dirpath: logs.autoregressive.wandb/16_refined.10_steps/checkpoints + # monitor: val/loss + # mode: min + # save_top_k: 1 + # filename: best-checkpoint + # - class_path: lightning.pytorch.callbacks.EarlyStopping + # init_args: + # monitor: val/loss + # mode: min + # patience: 30 + # verbose: false + - class_path: ThermalSolver.switch_dataloader_callback.SwitchDataLoaderCallback + init_args: + increase_unrolling_steps_by: 4 + patience: 5 + last_patience: 15 + max_unrolling_steps: 10 + ckpt_path: logs.autoregressive.wandb/10_steps/basic.refined.star/16_layer_16_hidden/ + max_epochs: 1000 + min_epochs: null + max_steps: -1 + min_steps: null + overfit_batches: 0.0 + log_every_n_steps: 0 + accumulate_grad_batches: 1 + default_root_dir: null + gradient_clip_val: 1.0 + +model: + class_path: ThermalSolver.autoregressive_module.GraphSolver + init_args: + model_class_path: ThermalSolver.model.diffusion_net.DiffusionNet + model_init_args: + input_dim: 1 + hidden_dim: 16 + output_dim: 1 + n_layers: 16 + unrolling_steps: 2 + +data: + class_path: ThermalSolver.graph_datamodule_unsteady.GraphDataModule + init_args: + hf_repo: "SISSAmathLab/thermal-conduction-unsteady" + split_name: "3_stripes.star" + n_elements: 100 + batch_size: 24 + train_size: 0.7 + val_size: 0.2 + test_size: 0.1 + build_radial_graph: false + remove_boundary_edges: true + unrolling_steps: 2 + min_normalized_diff: 1e-4 +optimizer: null +lr_scheduler: null diff --git a/experiments/10_steps/config_16_layer_16_hidden_star.yaml b/experiments/10_steps/config_16_layer_16_hidden_star.yaml new file mode 100644 index 0000000..b3f0757 --- /dev/null +++ b/experiments/10_steps/config_16_layer_16_hidden_star.yaml @@ -0,0 +1,72 @@ +# lightning.pytorch==2.5.5 +seed_everything: 1999 +trainer: + accelerator: gpu + strategy: auto + devices: 1 + num_nodes: 1 + precision: null + logger: + - class_path: lightning.pytorch.loggers.WandbLogger + init_args: + save_dir: logs.autoregressive.wandb + project: "thermal-conduction-unsteady-10.steps" + name: "16_layer_16_hidden.star" + callbacks: + # - class_path: lightning.pytorch.callbacks.ModelCheckpoint + # init_args: + # dirpath: logs.autoregressive.wandb/16_refined.10_steps/checkpoints + # monitor: val/loss + # mode: min + # save_top_k: 1 + # filename: best-checkpoint + # - class_path: lightning.pytorch.callbacks.EarlyStopping + # init_args: + # monitor: val/loss + # mode: min + # patience: 30 + # verbose: false + - class_path: ThermalSolver.switch_dataloader_callback.SwitchDataLoaderCallback + init_args: + increase_unrolling_steps_by: 4 + patience: 5 + last_patience: 15 + max_unrolling_steps: 10 + ckpt_path: logs.autoregressive.wandb/10_steps/basic.star/16_layer_16_hidden/ + max_epochs: 1000 + min_epochs: null + max_steps: -1 + min_steps: null + overfit_batches: 0.0 + log_every_n_steps: 0 + accumulate_grad_batches: 1 + default_root_dir: null + gradient_clip_val: 1.0 + +model: + class_path: ThermalSolver.autoregressive_module.GraphSolver + init_args: + model_class_path: ThermalSolver.model.diffusion_net.DiffusionNet + model_init_args: + input_dim: 1 + hidden_dim: 16 + output_dim: 1 + n_layers: 16 + unrolling_steps: 2 + +data: + class_path: ThermalSolver.graph_datamodule_unsteady.GraphDataModule + init_args: + hf_repo: "SISSAmathLab/thermal-conduction-unsteady" + split_name: "3_stripes.star.refined" + n_elements: 100 + batch_size: 24 + train_size: 0.7 + val_size: 0.2 + test_size: 0.1 + build_radial_graph: false + remove_boundary_edges: true + unrolling_steps: 2 + min_normalized_diff: 1e-4 +optimizer: null +lr_scheduler: null diff --git a/experiments/10_steps/config_16_layer_16_hidden_star_combined.yaml b/experiments/10_steps/config_16_layer_16_hidden_star_combined.yaml new file mode 100644 index 0000000..324cdcd --- /dev/null +++ b/experiments/10_steps/config_16_layer_16_hidden_star_combined.yaml @@ -0,0 +1,75 @@ +# lightning.pytorch==2.5.5 +seed_everything: 1999 +trainer: + accelerator: gpu + strategy: auto + devices: 1 + num_nodes: 1 + precision: null + logger: + - class_path: lightning.pytorch.loggers.WandbLogger + init_args: + save_dir: logs.autoregressive.wandb + project: "thermal-conduction-unsteady-10.steps" + name: "16_layer_16_hidden.star.combined" + callbacks: + # - class_path: lightning.pytorch.callbacks.ModelCheckpoint + # init_args: + # dirpath: logs.autoregressive.wandb/16_refined.10_steps/checkpoints + # monitor: val/loss + # mode: min + # save_top_k: 1 + # filename: best-checkpoint + # - class_path: lightning.pytorch.callbacks.EarlyStopping + # init_args: + # monitor: val/loss + # mode: min + # patience: 30 + # verbose: false + - class_path: ThermalSolver.switch_dataloader_callback.SwitchDataLoaderCallback + init_args: + increase_unrolling_steps_by: 4 + patience: 5 + last_patience: 15 + max_unrolling_steps: 10 + ckpt_path: logs.autoregressive.wandb/10_steps/basic.star.combined/16_layer_16_hidden/ + max_epochs: 1000 + min_epochs: null + max_steps: -1 + min_steps: null + overfit_batches: 0.0 + log_every_n_steps: 0 + accumulate_grad_batches: 1 + default_root_dir: null + gradient_clip_val: 1.0 + +model: + class_path: ThermalSolver.autoregressive_module.GraphSolver + init_args: + model_class_path: ThermalSolver.model.diffusion_net.DiffusionNet + model_init_args: + input_dim: 1 + hidden_dim: 16 + output_dim: 1 + n_layers: 16 + unrolling_steps: 2 + +data: + class_path: ThermalSolver.graph_datamodule_unsteady.GraphDataModule + init_args: + hf_repo: "SISSAmathLab/thermal-conduction-unsteady" + split_name: + - "4_stripes.star" + - "3_stripes.star" + - "2_stripes.star" + n_elements: 100 + batch_size: 24 + train_size: 0.7 + val_size: 0.2 + test_size: 0.1 + build_radial_graph: false + remove_boundary_edges: true + unrolling_steps: 2 + min_normalized_diff: 1e-4 +optimizer: null +lr_scheduler: null diff --git a/experiments/10_steps/config_8_layer_16_hidden_star.yaml b/experiments/10_steps/config_8_layer_16_hidden_star.yaml new file mode 100644 index 0000000..ab5387f --- /dev/null +++ b/experiments/10_steps/config_8_layer_16_hidden_star.yaml @@ -0,0 +1,72 @@ +# lightning.pytorch==2.5.5 +seed_everything: 1999 +trainer: + accelerator: gpu + strategy: auto + devices: 1 + num_nodes: 1 + precision: null + logger: + - class_path: lightning.pytorch.loggers.WandbLogger + init_args: + save_dir: logs.autoregressive.wandb + project: "thermal-conduction-unsteady-10.steps" + name: "8_layer_16_hidden.star" + callbacks: + # - class_path: lightning.pytorch.callbacks.ModelCheckpoint + # init_args: + # dirpath: logs.autoregressive.wandb/16_refined.10_steps/checkpoints + # monitor: val/loss + # mode: min + # save_top_k: 1 + # filename: best-checkpoint + # - class_path: lightning.pytorch.callbacks.EarlyStopping + # init_args: + # monitor: val/loss + # mode: min + # patience: 30 + # verbose: false + - class_path: ThermalSolver.switch_dataloader_callback.SwitchDataLoaderCallback + init_args: + increase_unrolling_steps_by: 4 + patience: 5 + last_patience: 15 + max_unrolling_steps: 10 + ckpt_path: logs.autoregressive.wandb/10_steps/basic.star/8_layer_16_hidden/ + max_epochs: 1000 + min_epochs: null + max_steps: -1 + min_steps: null + overfit_batches: 0.0 + log_every_n_steps: 0 + accumulate_grad_batches: 1 + default_root_dir: null + gradient_clip_val: 1.0 + +model: + class_path: ThermalSolver.autoregressive_module.GraphSolver + init_args: + model_class_path: ThermalSolver.model.diffusion_net.DiffusionNet + model_init_args: + input_dim: 1 + hidden_dim: 16 + output_dim: 1 + n_layers: 8 + unrolling_steps: 2 + +data: + class_path: ThermalSolver.graph_datamodule_unsteady.GraphDataModule + init_args: + hf_repo: "SISSAmathLab/thermal-conduction-unsteady" + split_name: "3_stripes.star.refined" + n_elements: 100 + batch_size: 24 + train_size: 0.7 + val_size: 0.2 + test_size: 0.1 + build_radial_graph: false + remove_boundary_edges: true + unrolling_steps: 2 + min_normalized_diff: 1e-4 +optimizer: null +lr_scheduler: null diff --git a/experiments/10_steps/test_config_16_layer_16_hidden_star.yaml b/experiments/10_steps/test_config_16_layer_16_hidden_star.yaml new file mode 100644 index 0000000..5a0ef7f --- /dev/null +++ b/experiments/10_steps/test_config_16_layer_16_hidden_star.yaml @@ -0,0 +1,76 @@ +# lightning.pytorch==2.5.5 +seed_everything: 1999 +trainer: + accelerator: cpu + strategy: auto + devices: 1 + num_nodes: 1 + precision: null + # logger: + # - class_path: lightning.pytorch.loggers.WandbLogger + # init_args: + # save_dir: logs.autoregressive.wandb + # project: "thermal-conduction-unsteady-10.steps" + # name: "16_layer_16_hidden.adaptive_refined.combined" + # callbacks: + # - class_path: lightning.pytorch.callbacks.ModelCheckpoint + # init_args: + # dirpath: logs.autoregressive.wandb/16_refined.10_steps/checkpoints + # monitor: val/loss + # mode: min + # save_top_k: 1 + # filename: best-checkpoint + # - class_path: lightning.pytorch.callbacks.EarlyStopping + # init_args: + # monitor: val/loss + # mode: min + # patience: 30 + # verbose: false + # - class_path: ThermalSolver.switch_dataloader_callback.SwitchDataLoaderCallback + # init_args: + # increase_unrolling_steps_by: 4 + # patience: 15 + # last_patience: 20 + # max_unrolling_steps: 10 + # ckpt_path: logs.autoregressive.wandb/10_steps/basic.adaptive_refined.combined/16_layer_16_hidden/ + max_epochs: 1000 + min_epochs: null + max_steps: -1 + min_steps: null + overfit_batches: 0.0 + log_every_n_steps: 0 + accumulate_grad_batches: 1 + default_root_dir: null + gradient_clip_val: 1.0 + +model: + class_path: ThermalSolver.autoregressive_module.GraphSolver + init_args: + model_class_path: ThermalSolver.model.diffusion_net.DiffusionNet + model_init_args: + input_dim: 1 + hidden_dim: 16 + output_dim: 1 + n_layers: 16 + unrolling_steps: 2 + +data: + class_path: ThermalSolver.graph_datamodule_unsteady.GraphDataModule + init_args: + hf_repo: "SISSAmathLab/thermal-conduction-unsteady" + split_name: + # - "2_stripes.basic.refined" + # - "3_stripes.basic.refined" + # - "4_stripes.basic.1_adaptive_refined" + - "3_stripes.star" + n_elements: 50 + batch_size: 32 + train_size: 0.7 + val_size: 0.2 + test_size: 0.1 + build_radial_graph: false + remove_boundary_edges: true + unrolling_steps: 2 +optimizer: null +lr_scheduler: null +ckpt_path: /home/folivo/storage/thermal-conduction-ml/logs.autoregressive.wandb/10_steps/basic.star.combined/16_layer_16_hidden/10_unrolling_best_checkpoint.ckpt