From 469b1c6e132463fd81953f040478808c314ec3ba Mon Sep 17 00:00:00 2001
From: Filippo Olivo <filippo@filippoolivo.com>
Date: Sun, 5 Oct 2025 10:36:23 +0200
Subject: [PATCH] small fix

---
 ThermalSolver/data_module.py     |  23 ++++-
 ThermalSolver/model/local_gno.py | 158 ++++++++++++++++++++++++-------
 ThermalSolver/module.py          |   1 -
 3 files changed, 142 insertions(+), 40 deletions(-)

diff --git a/ThermalSolver/data_module.py b/ThermalSolver/data_module.py
index f500d0f..1a48b1d 100644
--- a/ThermalSolver/data_module.py
+++ b/ThermalSolver/data_module.py
@@ -6,6 +6,7 @@ from torch_geometric.data import Data
 from torch_geometric.loader import DataLoader
 from torch_geometric.utils import to_undirected
 from .mesh_data import MeshData
+import os
 
 
 class GraphDataModule(LightningDataModule):
@@ -115,8 +116,8 @@ class GraphDataModule(LightningDataModule):
                 pos=pos,
                 edge_attr=edge_attr,
                 y=temperature.unsqueeze(-1),
-                boundary_mask=boundary_mask,
-                boundary_values=torch.tensor(0),
+                boundary_mask=torch.tensor(0),  # Fake value (to fix)
+                boundary_values=torch.tensor(0),  # Fake value (to fix)
             )
 
         return MeshData(
@@ -143,15 +144,27 @@ class GraphDataModule(LightningDataModule):
 
     def train_dataloader(self):
         return DataLoader(
-            self.train_data, batch_size=self.batch_size, shuffle=True
+            self.train_data,
+            batch_size=self.batch_size,
+            shuffle=True,
+            num_workers=8,
+            pin_memory=True,
         )
 
     def val_dataloader(self):
         return DataLoader(
-            self.val_data, batch_size=self.batch_size, shuffle=False
+            self.val_data,
+            batch_size=self.batch_size,
+            shuffle=False,
+            num_workers=8,
+            pin_memory=True,
         )
 
     def test_dataloader(self):
         return DataLoader(
-            self.test_data, batch_size=self.batch_size, shuffle=False
+            self.test_data,
+            batch_size=self.batch_size,
+            shuffle=False,
+            num_workers=8,
+            pin_memory=True,
         )
diff --git a/ThermalSolver/model/local_gno.py b/ThermalSolver/model/local_gno.py
index e6bf568..0d097e9 100644
--- a/ThermalSolver/model/local_gno.py
+++ b/ThermalSolver/model/local_gno.py
@@ -56,44 +56,100 @@ class DecX(nn.Module):
         return self.net(x)
 
 
+# class ConditionalGNOBlock(MessagePassing):
+#     def __init__(self, hidden_ch, edge_ch=0, aggr="mean"):
+#         super().__init__(aggr=aggr, node_dim=0)
+
+#         self.edge_attr_net = nn.Sequential(
+#             nn.Linear(edge_ch, hidden_ch // 2),
+#             nn.SiLU(),
+#             nn.Linear(hidden_ch // 2, 1),
+#             nn.Softplus()
+#         )
+
+#         self.diff_net = nn.Sequential(
+#             nn.Linear(hidden_ch, hidden_ch),
+#             nn.SiLU(),
+#             nn.Linear(hidden_ch, hidden_ch),
+#         )
+
+#         # self.x_net = nn.Sequential(
+#         #     nn.Linear(hidden_ch, hidden_ch),
+#         #     nn.SiLU(),
+#         #     nn.Linear(hidden_ch, hidden_ch),
+#         # )
+
+#         self.c_ij_net = nn.Sequential(
+#             nn.Linear(hidden_ch, hidden_ch // 2),
+#             nn.SiLU(),
+#             nn.Linear(hidden_ch // 2, 1),
+#             nn.Sigmoid(),
+#         )
+
+#         # self.gamma_net = nn.Sequential(
+#         #     nn.Linear(2 * hidden_ch, hidden_ch),
+#         #     nn.SiLU(),
+#         #     nn.Linear(hidden_ch, hidden_ch // 2),
+#         #     nn.SiLU(),
+#         #     nn.Linear(hidden_ch // 2, 1),
+#         #     nn.Sigmoid(),
+#         # )
+
+#         self.alpha_net = nn.Sequential(
+#             nn.Linear(2 * hidden_ch, hidden_ch),
+#             nn.SiLU(),
+#             nn.Linear(hidden_ch, hidden_ch // 2),
+#             nn.SiLU(),
+#             nn.Linear(hidden_ch // 2, 1),
+#             nn.Sigmoid(),
+#         )
+
+#     def forward(self, x, c, edge_index, edge_attr=None):
+#         return self.propagate(edge_index, x=x, c=c, edge_attr=edge_attr)
+
+#     def message(self, x_i, x_j, c_i, c_j, edge_attr):
+#         c_ij = 0.5 * (c_i + c_j)
+#         # gamma = self.gamma_net(torch.cat([x_i, x_j], dim=-1))
+#         # gate = torch.sself.edge_attr_net(edge_attr))
+#         gate = self.edge_attr_net(edge_attr)
+#         # m = (
+#         #     gamma * self.diff_net(x_j - x_i) + (1 - gamma) * self.x_net(x_j)
+#         # ) * gate
+#         m = self.diff_net(x_j - x_i) * gate
+#         m = m * self.c_ij_net(c_ij)
+#         return m
+
+#     def update(self, aggr_out, x):
+#         alpha = self.alpha_net(torch.cat([x, aggr_out], dim=-1))
+#         return x + alpha * aggr_out
+
+
 class ConditionalGNOBlock(MessagePassing):
     def __init__(self, hidden_ch, edge_ch=0, aggr="mean"):
         super().__init__(aggr=aggr, node_dim=0)
+        self.edge_ch = edge_ch
 
+        # Rete che mappa edge_attr -> coefficiente scalare (log-scale)
+        # Se edge_ch==0 useremo un coefficiente apprendibile globale
         self.edge_attr_net = nn.Sequential(
-            nn.Linear(edge_ch, hidden_ch // 2),
+            nn.Linear(edge_ch, hidden_ch),
             nn.SiLU(),
-            nn.Linear(hidden_ch // 2, hidden_ch),
-            nn.Tanh(),
-        )
-
-        self.msg_proj = nn.Sequential(
-            nn.Linear(hidden_ch, hidden_ch, bias=False),
+            nn.Linear(hidden_ch, hidden_ch // 2),
             nn.SiLU(),
-            nn.Linear(hidden_ch, hidden_ch, bias=False),
+            nn.Linear(hidden_ch // 2, 1),
+            nn.Softplus(),
         )
-
-        self.diff_net = nn.Sequential(
-            nn.Linear(hidden_ch, hidden_ch),
-            nn.SiLU(),
-            nn.Linear(hidden_ch, hidden_ch),
-        )
-
-        self.x_net = nn.Sequential(
-            nn.Linear(hidden_ch, hidden_ch),
-            nn.SiLU(),
-            nn.Linear(hidden_ch, hidden_ch),
-        )
-
+        # gating dalla condizione c_ij (restituisce scalar in (0,1))
         self.c_ij_net = nn.Sequential(
             nn.Linear(hidden_ch, hidden_ch),
             nn.SiLU(),
-            nn.Linear(hidden_ch, hidden_ch),
-            nn.Tanh(),
+            nn.Linear(hidden_ch, hidden_ch // 2),
+            nn.SiLU(),
+            nn.Linear(hidden_ch // 2, 1),
+            nn.Sigmoid(),
         )
 
-        self.balancing = nn.Parameter(torch.tensor(0.0))
-
+        # alpha per passo (clampato tramite sigmoid)
         self.alpha_net = nn.Sequential(
             nn.Linear(2 * hidden_ch, hidden_ch),
             nn.SiLU(),
@@ -103,22 +159,56 @@ class ConditionalGNOBlock(MessagePassing):
             nn.Sigmoid(),
         )
 
+        # self.norm = nn.LayerNorm(hidden_ch)
+
     def forward(self, x, c, edge_index, edge_attr=None):
+        # chiamiamo propagate; edge_attr può essere None
         return self.propagate(edge_index, x=x, c=c, edge_attr=edge_attr)
 
     def message(self, x_i, x_j, c_i, c_j, edge_attr):
-        c_ij = 0.5 * (c_i + c_j)
-        alpha = torch.sigmoid(self.balancing)
-        gate = torch.sigmoid(self.edge_attr_net(edge_attr))
-        m = (
-            alpha * self.diff_net(x_j - x_i) + (1 - alpha) * self.x_net(x_j)
-        ) * gate
-        m = m * self.c_ij_net(c_ij)
+        """
+        Implementazione diffusiva:
+        m_ij = w_ij * (x_j - x_i) * c_gate_ij
+        dove w_ij = softplus(edge_attr_net(edge_attr)) >= 0
+        """
+        # 1) calcola c_ij e gating da c
+        c_ij = 0.5 * (c_i + c_j)  # [E, H]
+        c_gate = self.c_ij_net(c_ij)  # [E, 1] in (0,1)
+
+        # 2) calcola peso scalare non-negativo per edge
+        w_raw = self.edge_attr_net(edge_attr)  # [E,1]
+
+        # softplus -> peso >= 0; aggiungo epsilon per stabilità
+        w = w_raw + 1e-12  # [E,1]
+
+        # 3) messaggio base: differenza pesata
+        diff = x_j - x_i  # [E, H]
+        m = w * diff  # broadcast: [E,1] * [E,H] -> [E,H]
+
+        # 4) applica gating dalla condizione
+        m = m * c_gate  # [E,H]
+
+        # Restituisco anche w (sfruttabile in update) — ma MessagePassing non ritorna extra,
+        # così se vuoi degree-normalization devi calcolare i gradi prima di propagate.
+        # Qui ritorno solo m: la normalizzazione per grado la faccio in update usando 'mean' aggr
         return m
 
     def update(self, aggr_out, x):
-        alpha = self.alpha_net(torch.cat([x, aggr_out], dim=-1))
-        return x + alpha * self.msg_proj(aggr_out)
+        """
+        aggr_out:
+         - se aggr='sum': somma delle w_ij*(x_j-x_i) incoming
+         - se aggr='mean': già normalizzato sul numero di vicini (ma non per somma dei pesi)
+        Qui normalizziamo implicitamente dividendo per (1 + |aggr_out|_norm) per stabilità,
+        e applichiamo il passo alpha.
+        """
+        # aggr_out = self.norm(aggr_out)  # stabilizza la scala
+
+        # alpha vettoriale/scalar: [N,1]
+        alpha = self.alpha_net(torch.cat([x, aggr_out], dim=-1))  # in (0,1)
+
+        x_new = x + alpha * aggr_out
+
+        return x_new
 
 
 class GatingGNO(nn.Module):
diff --git a/ThermalSolver/module.py b/ThermalSolver/module.py
index 842cb69..5bfecce 100644
--- a/ThermalSolver/module.py
+++ b/ThermalSolver/module.py
@@ -109,7 +109,6 @@ class GraphSolver(LightningModule):
             edge_index=edge_index,
             edge_attr=edge_attr,
             unrolling_steps=self.unrolling_steps,
-            plot_results=True,
             batch=batch.batch,
             pos=batch.pos,
         )