jadechoghari
/

mar

@@ -5,7 +5,6 @@ import math
 from .diffusion import create_diffusion
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 class DiffLoss(nn.Module):
     """Diffusion Loss"""
@@ -36,12 +35,12 @@ class DiffLoss(nn.Module):
     def sample(self, z, temperature=1.0, cfg=1.0):
         # diffusion loss sampling
         if not cfg == 1.0:
-            noise = torch.randn(z.shape[0] // 2, self.in_channels).to(device)
             noise = torch.cat([noise, noise], dim=0)
             model_kwargs = dict(c=z, cfg_scale=cfg)
             sample_fn = self.net.forward_with_cfg
         else:
-            noise = torch.randn(z.shape[0], self.in_channels).to(device)
             model_kwargs = dict(c=z)
             sample_fn = self.net.forward
@@ -91,23 +90,9 @@ class TimestepEmbedder(nn.Module):
             embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
         return embedding
-    # def forward(self, t):
-    #     t_freq = self.timestep_embedding(t, self.frequency_embedding_size)
-    #     t_emb = self.mlp(t_freq)
-    #     return t_emb
     def forward(self, t):
-        device = next(self.mlp.parameters()).device
-        t = t.to(device)
         t_freq = self.timestep_embedding(t, self.frequency_embedding_size)
-        t_freq = t_freq.to(device)
         t_emb = self.mlp(t_freq)
         return t_emb
@@ -145,7 +130,7 @@ class ResBlock(nn.Module):
 class FinalLayer(nn.Module):
     """
-    The final layer of DiT.
     """
     def __init__(self, model_channels, out_channels):
         super().__init__()
@@ -232,10 +217,10 @@ class SimpleMLPAdaLN(nn.Module):
     def forward(self, x, t, c):
         """
         Apply the model to an input batch.
-        :param x: an [N x C x ...] Tensor of inputs.
         :param t: a 1-D batch of timesteps.
         :param c: conditioning from AR transformer.
-        :return: an [N x C x ...] Tensor of outputs.
         """
         x = self.input_proj(x)
         t = self.time_embed(t)

 from .diffusion import create_diffusion
 class DiffLoss(nn.Module):
     """Diffusion Loss"""
     def sample(self, z, temperature=1.0, cfg=1.0):
         # diffusion loss sampling
         if not cfg == 1.0:
+            noise = torch.randn(z.shape[0] // 2, self.in_channels).cuda()
             noise = torch.cat([noise, noise], dim=0)
             model_kwargs = dict(c=z, cfg_scale=cfg)
             sample_fn = self.net.forward_with_cfg
         else:
+            noise = torch.randn(z.shape[0], self.in_channels).cuda()
             model_kwargs = dict(c=z)
             sample_fn = self.net.forward
             embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
         return embedding
     def forward(self, t):
         t_freq = self.timestep_embedding(t, self.frequency_embedding_size)
         t_emb = self.mlp(t_freq)
         return t_emb
 class FinalLayer(nn.Module):
     """
+    The final layer adopted from DiT.
     """
     def __init__(self, model_channels, out_channels):
         super().__init__()
     def forward(self, x, t, c):
         """
         Apply the model to an input batch.
+        :param x: an [N x C] Tensor of inputs.
         :param t: a 1-D batch of timesteps.
         :param c: conditioning from AR transformer.
+        :return: an [N x C] Tensor of outputs.
         """
         x = self.input_proj(x)
         t = self.time_embed(t)