Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
16b19d3
1
Parent(s):
39c830f
Update app
Browse files- app.py +3 -4
- utils/inference_utils.py +2 -2
app.py
CHANGED
|
@@ -877,7 +877,7 @@ def calc_av_scores(vid_emb, aud_emb, model):
|
|
| 877 |
'''
|
| 878 |
|
| 879 |
scores = calc_att_map(vid_emb, aud_emb, model)
|
| 880 |
-
att_map = logsoftmax_2d(scores)
|
| 881 |
scores = scores.mean(-1)
|
| 882 |
|
| 883 |
return scores, att_map
|
|
@@ -902,12 +902,11 @@ def calc_att_map(vid_emb, aud_emb, model):
|
|
| 902 |
vid_emb,
|
| 903 |
aud_emb,
|
| 904 |
part_len=10,
|
| 905 |
-
dim=3
|
| 906 |
-
device=device)
|
| 907 |
|
| 908 |
scores = model.logits_scale(scores[..., None]).squeeze(-1)
|
| 909 |
|
| 910 |
-
return scores
|
| 911 |
|
| 912 |
def generate_video(frames, audio_file, video_fname):
|
| 913 |
|
|
|
|
| 877 |
'''
|
| 878 |
|
| 879 |
scores = calc_att_map(vid_emb, aud_emb, model)
|
| 880 |
+
att_map = logsoftmax_2d(torch.Tensor(scores))
|
| 881 |
scores = scores.mean(-1)
|
| 882 |
|
| 883 |
return scores, att_map
|
|
|
|
| 902 |
vid_emb,
|
| 903 |
aud_emb,
|
| 904 |
part_len=10,
|
| 905 |
+
dim=3)
|
|
|
|
| 906 |
|
| 907 |
scores = model.logits_scale(scores[..., None]).squeeze(-1)
|
| 908 |
|
| 909 |
+
return scores.cpu().numpy()
|
| 910 |
|
| 911 |
def generate_video(frames, audio_file, video_fname):
|
| 912 |
|
utils/inference_utils.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import torch
|
| 2 |
import numpy as np
|
| 3 |
|
| 4 |
-
def run_func_in_parts(func, vid_emb, aud_emb, part_len, dim
|
| 5 |
"""
|
| 6 |
Run given function in parts, spliting the inputs on dimension dim
|
| 7 |
This is used to save memory when inputs too large to compute on gpu
|
|
@@ -10,7 +10,7 @@ def run_func_in_parts(func, vid_emb, aud_emb, part_len, dim, device):
|
|
| 10 |
for v_spl, a_spl in list(
|
| 11 |
zip(vid_emb.split(part_len, dim=dim),
|
| 12 |
aud_emb.split(part_len, dim=dim))):
|
| 13 |
-
dist_chunk.append(func(v_spl
|
| 14 |
dist = torch.cat(dist_chunk, dim - 1)
|
| 15 |
return dist
|
| 16 |
|
|
|
|
| 1 |
import torch
|
| 2 |
import numpy as np
|
| 3 |
|
| 4 |
+
def run_func_in_parts(func, vid_emb, aud_emb, part_len, dim):
|
| 5 |
"""
|
| 6 |
Run given function in parts, spliting the inputs on dimension dim
|
| 7 |
This is used to save memory when inputs too large to compute on gpu
|
|
|
|
| 10 |
for v_spl, a_spl in list(
|
| 11 |
zip(vid_emb.split(part_len, dim=dim),
|
| 12 |
aud_emb.split(part_len, dim=dim))):
|
| 13 |
+
dist_chunk.append(func(v_spl, a_spl))
|
| 14 |
dist = torch.cat(dist_chunk, dim - 1)
|
| 15 |
return dist
|
| 16 |
|