diff --git a/ThermalSolver/data_module.py b/ThermalSolver/data_module.py index 1a48b1d..b3a0a73 100644 --- a/ThermalSolver/data_module.py +++ b/ThermalSolver/data_module.py @@ -23,8 +23,9 @@ class GraphDataModule(LightningDataModule): super().__init__() self.hf_repo = hf_repo self.split_name = split_name - self.dataset = None - self.geometry = None + self.dataset_dict = {} + # self.geometry = None + self.geometry_dict = {} self.train_size = train_size self.val_size = val_size self.test_size = test_size @@ -32,20 +33,30 @@ class GraphDataModule(LightningDataModule): self.remove_boundary_edges = remove_boundary_edges def prepare_data(self): - hf_dataset = load_dataset(self.hf_repo, name="snapshots")[ - self.split_name - ] - self.geometry = load_dataset(self.hf_repo, name="geometry")[ - self.split_name - ] - self.data = [ - self._build_dataset(snapshot, geometry) - for snapshot, geometry in tqdm( - zip(hf_dataset, self.geometry), - desc="Building graphs", - total=len(hf_dataset), - ) - ] + dataset = load_dataset(self.hf_repo, name="snapshots")[self.split_name] + geometry = load_dataset(self.hf_repo, name="geometry")[self.split_name] + # data = [ + # self._build_dataset(snapshot, geometry) + # for snapshot, geometry in tqdm( + # zip(hf_dataset, self.geometry), + # desc="Building graphs", + # total=len(hf_dataset), + # ) + # ] + + total_len = len(dataset) + train_len = int(self.train_size * total_len) + valid_len = int(self.val_size * total_len) + self.dataset_dict = { + "train": dataset.select(range(0, train_len)), + "val": dataset.select(range(train_len, train_len + valid_len)), + "test": dataset.select(range(train_len + valid_len, total_len)), + } + self.geometry_dict = { + "train": geometry.select(range(0, train_len)), + "val": geometry.select(range(train_len, train_len + valid_len)), + "test": geometry.select(range(train_len + valid_len, total_len)), + } def _compute_boundary_mask( self, bottom_ids, right_ids, top_ids, left_ids, temperature @@ -132,15 +143,36 @@ class GraphDataModule(LightningDataModule): ) def setup(self, stage: str = None): - n = len(self.data) - train_end = int(n * self.train_size) - val_end = train_end + int(n * self.val_size) + print(type(self.dataset_dict["train"])) if stage == "fit" or stage is None: - self.train_data = self.data[:train_end] - self.val_data = self.data[train_end:val_end] + self.train_data = [ + self._build_dataset(snap, geom) + for snap, geom in tqdm( + zip( + self.dataset_dict["train"], self.geometry_dict["train"] + ), + desc="Building train graphs", + total=len(self.dataset_dict["train"]), + ) + ] + self.val_data = [ + self._build_dataset(snap, geom) + for snap, geom in tqdm( + zip(self.dataset_dict["val"], self.geometry_dict["val"]), + desc="Building val graphs", + total=len(self.dataset_dict["val"]), + ) + ] if stage == "test" or stage is None: - self.test_data = self.data[val_end:] + self.test_data = [ + self._build_dataset(snap, geom) + for snap, geom in tqdm( + zip(self.dataset_dict["test"], self.geometry_dict["test"]), + desc="Building test graphs", + total=len(self.dataset_dict["test"]), + ) + ] def train_dataloader(self): return DataLoader( diff --git a/ThermalSolver/model/local_gno.py b/ThermalSolver/model/local_gno.py index 0d097e9..efdb868 100644 --- a/ThermalSolver/model/local_gno.py +++ b/ThermalSolver/model/local_gno.py @@ -22,8 +22,9 @@ class EncX(nn.Module): super().__init__() self.net = nn.Sequential( nn.Linear(x_ch, hidden // 2), - nn.SiLU(), + nn.GELU(), nn.Linear(hidden // 2, hidden), + nn.GELU(), ) def forward(self, x): @@ -35,8 +36,9 @@ class EncC(nn.Module): super().__init__() self.net = nn.Sequential( nn.Linear(c_ch, hidden // 2), - nn.SiLU(), + nn.GELU(), nn.Linear(hidden // 2, hidden), + nn.GELU(), ) def forward(self, c): @@ -48,8 +50,9 @@ class DecX(nn.Module): super().__init__() self.net = nn.Sequential( nn.Linear(hidden, hidden // 2), - nn.SiLU(), + nn.GELU(), nn.Linear(hidden // 2, out_ch), + nn.GELU(), ) def forward(self, x): @@ -133,18 +136,18 @@ class ConditionalGNOBlock(MessagePassing): # Se edge_ch==0 useremo un coefficiente apprendibile globale self.edge_attr_net = nn.Sequential( nn.Linear(edge_ch, hidden_ch), - nn.SiLU(), + nn.GELU(), nn.Linear(hidden_ch, hidden_ch // 2), - nn.SiLU(), + nn.GELU(), nn.Linear(hidden_ch // 2, 1), nn.Softplus(), ) # gating dalla condizione c_ij (restituisce scalar in (0,1)) self.c_ij_net = nn.Sequential( nn.Linear(hidden_ch, hidden_ch), - nn.SiLU(), + nn.GELU(), nn.Linear(hidden_ch, hidden_ch // 2), - nn.SiLU(), + nn.GELU(), nn.Linear(hidden_ch // 2, 1), nn.Sigmoid(), ) @@ -152,13 +155,22 @@ class ConditionalGNOBlock(MessagePassing): # alpha per passo (clampato tramite sigmoid) self.alpha_net = nn.Sequential( nn.Linear(2 * hidden_ch, hidden_ch), - nn.SiLU(), + nn.GELU(), nn.Linear(hidden_ch, hidden_ch // 2), - nn.SiLU(), + nn.GELU(), nn.Linear(hidden_ch // 2, 1), nn.Sigmoid(), ) + self.diff_net = nn.Sequential( + nn.Linear(hidden_ch, hidden_ch * 2), + nn.GELU(), + nn.Linear(hidden_ch * 2, hidden_ch**2), + nn.GELU(), + nn.Linear(hidden_ch**2, hidden_ch), + nn.GELU(), + ) + # self.norm = nn.LayerNorm(hidden_ch) def forward(self, x, c, edge_index, edge_attr=None): @@ -171,43 +183,21 @@ class ConditionalGNOBlock(MessagePassing): m_ij = w_ij * (x_j - x_i) * c_gate_ij dove w_ij = softplus(edge_attr_net(edge_attr)) >= 0 """ - # 1) calcola c_ij e gating da c c_ij = 0.5 * (c_i + c_j) # [E, H] c_gate = self.c_ij_net(c_ij) # [E, 1] in (0,1) - - # 2) calcola peso scalare non-negativo per edge w_raw = self.edge_attr_net(edge_attr) # [E,1] - - # softplus -> peso >= 0; aggiungo epsilon per stabilità - w = w_raw + 1e-12 # [E,1] - - # 3) messaggio base: differenza pesata + w = w_raw + 1e-8 diff = x_j - x_i # [E, H] - m = w * diff # broadcast: [E,1] * [E,H] -> [E,H] - - # 4) applica gating dalla condizione + m = w * self.diff_net(diff) + diff # [E,H] m = m * c_gate # [E,H] - - # Restituisco anche w (sfruttabile in update) — ma MessagePassing non ritorna extra, - # così se vuoi degree-normalization devi calcolare i gradi prima di propagate. - # Qui ritorno solo m: la normalizzazione per grado la faccio in update usando 'mean' aggr return m def update(self, aggr_out, x): """ - aggr_out: - - se aggr='sum': somma delle w_ij*(x_j-x_i) incoming - - se aggr='mean': già normalizzato sul numero di vicini (ma non per somma dei pesi) - Qui normalizziamo implicitamente dividendo per (1 + |aggr_out|_norm) per stabilità, - e applichiamo il passo alpha. + TODO: doc """ - # aggr_out = self.norm(aggr_out) # stabilizza la scala - - # alpha vettoriale/scalar: [N,1] - alpha = self.alpha_net(torch.cat([x, aggr_out], dim=-1)) # in (0,1) - + alpha = self.alpha_net(torch.cat([x, aggr_out], dim=-1)) x_new = x + alpha * aggr_out - return x_new @@ -250,10 +240,10 @@ class GatingGNO(nn.Module): x_ = self.dec(x) plot_results_fn(x_, pos, 0, batch=batch) for _ in range(1, unrolling_steps + 1): - for blk in self.blocks: + for i, blk in enumerate(self.blocks): x = blk(x, c, edge_index, edge_attr=edge_attr) - if plot_results: - x_ = self.dec(x) - plot_results_fn(x_, pos, _, batch=batch) + if plot_results: + x_ = self.dec(x) + plot_results_fn(x_, pos, i * _, batch=batch) return self.dec(x) diff --git a/ThermalSolver/module.py b/ThermalSolver/module.py index 5bfecce..0329db5 100644 --- a/ThermalSolver/module.py +++ b/ThermalSolver/module.py @@ -111,6 +111,7 @@ class GraphSolver(LightningModule): unrolling_steps=self.unrolling_steps, batch=batch.batch, pos=batch.pos, + plot_results=True, ) loss = self._compute_loss(y_pred, y) self._log_loss(loss, batch, "test")