dsleo commited on
Commit
7362def
Β·
verified Β·
1 Parent(s): 640a2eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -9
app.py CHANGED
@@ -38,20 +38,26 @@ def compute_embeddings(problems):
38
  def find_similar_problems(df, similarity_threshold=0.9):
39
  """Find similar problems using cosine similarity, optimized for speed."""
40
 
41
- st.status("πŸ”„ Computing problem embeddings...")
 
 
 
 
42
  start_time = time.time()
43
  embeddings = compute_embeddings(df['problem'].tolist())
44
- st.success("βœ… Embeddings computed!", icon="βœ…")
45
-
46
- st.status("πŸ”„ Computing cosine similarity matrix...")
 
47
  similarity_matrix = util.cos_sim(embeddings, embeddings).numpy()
48
- st.success("βœ… Similarity matrix computed!", icon="βœ…")
49
 
50
- # Use numpy.triu_indices to efficiently get upper-triangle indices (excluding diagonal)
 
 
 
51
  num_problems = len(df)
52
  upper_triangle_indices = np.triu_indices(num_problems, k=1)
53
 
54
- st.status("πŸ”„ Filtering similar problems...")
55
  i_indices, j_indices = upper_triangle_indices
56
  similarity_scores = similarity_matrix[i_indices, j_indices]
57
 
@@ -61,14 +67,18 @@ def find_similar_problems(df, similarity_threshold=0.9):
61
  filtered_j = j_indices[mask]
62
  filtered_scores = similarity_scores[mask]
63
 
64
- # Convert results into a sorted list of tuples
65
  pairs = [
66
  (df.iloc[i]["uuid"], df.iloc[j]["uuid"], float(score))
67
  for i, j, score in zip(filtered_i, filtered_j, filtered_scores)
68
  ]
69
 
70
  sorted_pairs = sorted(pairs, key=lambda x: x[2], reverse=True)
71
-
 
 
 
 
 
72
  st.success(f"βœ… Analysis complete! Found {len(sorted_pairs)} similar problems in {time.time() - start_time:.2f}s", icon="πŸŽ‰")
73
 
74
  return sorted_pairs
 
38
  def find_similar_problems(df, similarity_threshold=0.9):
39
  """Find similar problems using cosine similarity, optimized for speed."""
40
 
41
+ status_msgs = [] # Store status messages to clear later
42
+
43
+ # Step 1: Compute embeddings
44
+ msg = st.status("πŸ”„ Computing problem embeddings...")
45
+ status_msgs.append(msg)
46
  start_time = time.time()
47
  embeddings = compute_embeddings(df['problem'].tolist())
48
+
49
+ # Step 2: Compute similarity matrix
50
+ msg = st.status("πŸ”„ Computing cosine similarity matrix...")
51
+ status_msgs.append(msg)
52
  similarity_matrix = util.cos_sim(embeddings, embeddings).numpy()
 
53
 
54
+ # Step 3: Filter top similarities
55
+ msg = st.status("πŸ”„ Filtering similar problems...")
56
+ status_msgs.append(msg)
57
+
58
  num_problems = len(df)
59
  upper_triangle_indices = np.triu_indices(num_problems, k=1)
60
 
 
61
  i_indices, j_indices = upper_triangle_indices
62
  similarity_scores = similarity_matrix[i_indices, j_indices]
63
 
 
67
  filtered_j = j_indices[mask]
68
  filtered_scores = similarity_scores[mask]
69
 
 
70
  pairs = [
71
  (df.iloc[i]["uuid"], df.iloc[j]["uuid"], float(score))
72
  for i, j, score in zip(filtered_i, filtered_j, filtered_scores)
73
  ]
74
 
75
  sorted_pairs = sorted(pairs, key=lambda x: x[2], reverse=True)
76
+
77
+ # Step 4: Remove intermediate messages
78
+ for msg in status_msgs:
79
+ msg.empty() # Clear only the intermediate messages
80
+
81
+ # Step 5: Display final success message
82
  st.success(f"βœ… Analysis complete! Found {len(sorted_pairs)} similar problems in {time.time() - start_time:.2f}s", icon="πŸŽ‰")
83
 
84
  return sorted_pairs