Spaces:

sindhuhegde
/

gestsync

Running on Zero

sindhuhegde commited on Sep 1, 2024

Commit

16b19d3

1 Parent(s): 39c830f

Update app

Files changed (2) hide show

app.py CHANGED Viewed

@@ -877,7 +877,7 @@ def calc_av_scores(vid_emb, aud_emb, model):
 	'''
 	scores = calc_att_map(vid_emb, aud_emb, model)
-	att_map = logsoftmax_2d(scores)
 	scores = scores.mean(-1)
 	return scores, att_map
@@ -902,12 +902,11 @@ def calc_att_map(vid_emb, aud_emb, model):
 							   vid_emb,
 							   aud_emb,
 							   part_len=10,
-							   dim=3,
-							   device=device)
 	scores = model.logits_scale(scores[..., None]).squeeze(-1)
-	return scores
 def generate_video(frames, audio_file, video_fname):

 	'''
 	scores = calc_att_map(vid_emb, aud_emb, model)
+	att_map = logsoftmax_2d(torch.Tensor(scores))
 	scores = scores.mean(-1)
 	return scores, att_map
 							   vid_emb,
 							   aud_emb,
 							   part_len=10,
+							   dim=3)
 	scores = model.logits_scale(scores[..., None]).squeeze(-1)
+	return scores.cpu().numpy()
 def generate_video(frames, audio_file, video_fname):

utils/inference_utils.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import torch
 import numpy as np
-def run_func_in_parts(func, vid_emb, aud_emb, part_len, dim, device):
     """
     Run given function in parts, spliting the inputs on dimension dim
     This is used to save memory when inputs too large to compute on gpu
@@ -10,7 +10,7 @@ def run_func_in_parts(func, vid_emb, aud_emb, part_len, dim, device):
     for v_spl, a_spl in list(
             zip(vid_emb.split(part_len, dim=dim),
                 aud_emb.split(part_len, dim=dim))):
-        dist_chunk.append(func(v_spl.to(device), a_spl.to(device)))
     dist = torch.cat(dist_chunk, dim - 1)
     return dist

 import torch
 import numpy as np
+def run_func_in_parts(func, vid_emb, aud_emb, part_len, dim):
     """
     Run given function in parts, spliting the inputs on dimension dim
     This is used to save memory when inputs too large to compute on gpu
     for v_spl, a_spl in list(
             zip(vid_emb.split(part_len, dim=dim),
                 aud_emb.split(part_len, dim=dim))):
+        dist_chunk.append(func(v_spl, a_spl))
     dist = torch.cat(dist_chunk, dim - 1)
     return dist