Markus Clauss DIRU Vetsuisse Claude commited on
Commit
29f4357
Β·
1 Parent(s): b65eda7

Fix ZeroGPU compatibility for HuggingFace Spaces

Browse files

- Add @spaces.GPU decorators to all GPU-intensive functions
- Import spaces module for ZeroGPU support
- Add xIELU optimization detection and status reporting
- Update requirements.txt with xIELU installation notes
- Improve UI styling and Swiss innovation messaging

πŸ€– Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <[email protected]>

Files changed (2) hide show
  1. app.py +29 -3
  2. requirements.txt +5 -1
app.py CHANGED
@@ -13,6 +13,7 @@ import torch
13
  from transformers import AutoTokenizer, AutoModelForCausalLM
14
  import warnings
15
  import os
 
16
 
17
  # Set environment variables to reduce verbosity and warnings
18
  os.environ['TRANSFORMERS_VERBOSITY'] = 'error'
@@ -20,10 +21,20 @@ os.environ['TOKENIZERS_PARALLELISM'] = 'false'
20
 
21
  warnings.filterwarnings('ignore')
22
 
 
 
 
 
 
 
 
 
 
23
  # Global variables for model and tokenizer
24
  model = None
25
  tokenizer = None
26
 
 
27
  def load_model(hf_token):
28
  """Load Apertus model with HuggingFace token"""
29
  global model, tokenizer
@@ -52,11 +63,18 @@ def load_model(hf_token):
52
  total_params = sum(p.numel() for p in model.parameters())
53
  memory_usage = torch.cuda.memory_allocated() / 1024**3 if torch.cuda.is_available() else 0
54
 
55
- return f"βœ… Model loaded successfully!\nπŸ“Š Parameters: {total_params:,}\nπŸ’Ύ Memory: {memory_usage:.1f} GB" if memory_usage > 0 else f"βœ… Model loaded successfully!\nπŸ“Š Parameters: {total_params:,}\nπŸ’Ύ CPU mode"
 
 
 
 
 
 
56
 
57
  except Exception as e:
58
  return f"❌ Failed to load model: {str(e)}\nπŸ’‘ Check your token and model access permissions."
59
 
 
60
  def chat_with_apertus(message, max_tokens=300):
61
  """Simple chat function"""
62
  global model, tokenizer
@@ -99,6 +117,7 @@ You are Apertus, a helpful Swiss AI assistant. You are transparent, multilingual
99
  except Exception as e:
100
  return f"❌ Error: {str(e)}"
101
 
 
102
  def analyze_attention(text, layer=15):
103
  """Analyze attention patterns"""
104
  global model, tokenizer
@@ -153,6 +172,7 @@ def analyze_attention(text, layer=15):
153
  except Exception as e:
154
  return None, f"❌ Error analyzing attention: {str(e)}"
155
 
 
156
  def analyze_token_predictions(text):
157
  """Analyze next token predictions"""
158
  global model, tokenizer
@@ -206,6 +226,7 @@ def analyze_token_predictions(text):
206
  except Exception as e:
207
  return None, f"❌ Error analyzing predictions: {str(e)}"
208
 
 
209
  def analyze_layer_evolution(text):
210
  """Analyze how representations evolve through layers"""
211
  global model, tokenizer
@@ -274,6 +295,7 @@ def analyze_layer_evolution(text):
274
  except Exception as e:
275
  return None, f"❌ Error analyzing layer evolution: {str(e)}"
276
 
 
277
  def analyze_weights(layer_num, layer_type):
278
  """Analyze weight distribution with research-based metrics"""
279
  global model
@@ -615,7 +637,7 @@ def create_interface():
615
  <p style="font-size: 1.1em; margin-bottom: 15px; color: #f8f9fa; font-weight: 500;">
616
  Unlike ChatGPT or Claude, you can see <strong>EVERYTHING</strong> happening inside the AI model:
617
  </p>
618
- <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 15px; margin: 20px 0;">
619
  <div style="background: rgba(13, 20, 33, 0.8); padding: 20px; border-radius: 10px; border-left: 4px solid #4dabf7; box-shadow: 0 4px 12px rgba(77, 171, 247, 0.2); border: 1px solid rgba(77, 171, 247, 0.3);">
620
  <strong style="color: #74c0fc; font-size: 1.1em;">🧠 Attention Patterns</strong><br>
621
  <span style="color: #ced4da; line-height: 1.4;">Which words the AI focuses on (like eye-tracking during reading)</span>
@@ -632,9 +654,13 @@ def create_interface():
632
  <strong style="color: #66d9ef; font-size: 1.1em;">πŸ” Thinking Process</strong><br>
633
  <span style="color: #ced4da; line-height: 1.4;">Step-by-step how responses are generated</span>
634
  </div>
 
 
 
 
635
  </div>
636
  <p style="text-align: center; font-size: 1.3em; margin-top: 25px; color: #ff6b6b; font-weight: 600;">
637
- <strong>This is complete AI transparency - no black boxes! πŸ‡¨πŸ‡­</strong>
638
  </p>
639
  </div>
640
  """)
 
13
  from transformers import AutoTokenizer, AutoModelForCausalLM
14
  import warnings
15
  import os
16
+ import spaces
17
 
18
  # Set environment variables to reduce verbosity and warnings
19
  os.environ['TRANSFORMERS_VERBOSITY'] = 'error'
 
21
 
22
  warnings.filterwarnings('ignore')
23
 
24
+ # Try to import CUDA xIELU optimization for Apertus
25
+ try:
26
+ from xielu.ops.wrappers import XIELU
27
+ XIELU_AVAILABLE = True
28
+ print("βœ… CUDA xIELU optimization available - Apertus performance enhanced!")
29
+ except ImportError:
30
+ XIELU_AVAILABLE = False
31
+ print("ℹ️ CUDA xIELU not available - using fallback (install: pip install git+https://github.com/nickjbrowning/XIELU)")
32
+
33
  # Global variables for model and tokenizer
34
  model = None
35
  tokenizer = None
36
 
37
+ @spaces.GPU
38
  def load_model(hf_token):
39
  """Load Apertus model with HuggingFace token"""
40
  global model, tokenizer
 
63
  total_params = sum(p.numel() for p in model.parameters())
64
  memory_usage = torch.cuda.memory_allocated() / 1024**3 if torch.cuda.is_available() else 0
65
 
66
+ # Check for xIELU optimization status
67
+ xielu_status = "βœ… CUDA xIELU Active" if XIELU_AVAILABLE and torch.cuda.is_available() else "⚠️ xIELU Fallback"
68
+
69
+ if memory_usage > 0:
70
+ return f"βœ… Model loaded successfully!\nπŸ“Š Parameters: {total_params:,}\nπŸ’Ύ Memory: {memory_usage:.1f} GB\nπŸš€ Optimization: {xielu_status}"
71
+ else:
72
+ return f"βœ… Model loaded successfully!\nπŸ“Š Parameters: {total_params:,}\nπŸ’Ύ CPU mode\nπŸš€ Optimization: {xielu_status}"
73
 
74
  except Exception as e:
75
  return f"❌ Failed to load model: {str(e)}\nπŸ’‘ Check your token and model access permissions."
76
 
77
+ @spaces.GPU
78
  def chat_with_apertus(message, max_tokens=300):
79
  """Simple chat function"""
80
  global model, tokenizer
 
117
  except Exception as e:
118
  return f"❌ Error: {str(e)}"
119
 
120
+ @spaces.GPU
121
  def analyze_attention(text, layer=15):
122
  """Analyze attention patterns"""
123
  global model, tokenizer
 
172
  except Exception as e:
173
  return None, f"❌ Error analyzing attention: {str(e)}"
174
 
175
+ @spaces.GPU
176
  def analyze_token_predictions(text):
177
  """Analyze next token predictions"""
178
  global model, tokenizer
 
226
  except Exception as e:
227
  return None, f"❌ Error analyzing predictions: {str(e)}"
228
 
229
+ @spaces.GPU
230
  def analyze_layer_evolution(text):
231
  """Analyze how representations evolve through layers"""
232
  global model, tokenizer
 
295
  except Exception as e:
296
  return None, f"❌ Error analyzing layer evolution: {str(e)}"
297
 
298
+ @spaces.GPU
299
  def analyze_weights(layer_num, layer_type):
300
  """Analyze weight distribution with research-based metrics"""
301
  global model
 
637
  <p style="font-size: 1.1em; margin-bottom: 15px; color: #f8f9fa; font-weight: 500;">
638
  Unlike ChatGPT or Claude, you can see <strong>EVERYTHING</strong> happening inside the AI model:
639
  </p>
640
+ <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap: 15px; margin: 20px 0;">
641
  <div style="background: rgba(13, 20, 33, 0.8); padding: 20px; border-radius: 10px; border-left: 4px solid #4dabf7; box-shadow: 0 4px 12px rgba(77, 171, 247, 0.2); border: 1px solid rgba(77, 171, 247, 0.3);">
642
  <strong style="color: #74c0fc; font-size: 1.1em;">🧠 Attention Patterns</strong><br>
643
  <span style="color: #ced4da; line-height: 1.4;">Which words the AI focuses on (like eye-tracking during reading)</span>
 
654
  <strong style="color: #66d9ef; font-size: 1.1em;">πŸ” Thinking Process</strong><br>
655
  <span style="color: #ced4da; line-height: 1.4;">Step-by-step how responses are generated</span>
656
  </div>
657
+ <div style="background: rgba(13, 20, 33, 0.8); padding: 20px; border-radius: 10px; border-left: 4px solid #ff6b6b; box-shadow: 0 4px 12px rgba(255, 107, 107, 0.2); border: 1px solid rgba(255, 107, 107, 0.3);">
658
+ <strong style="color: #ff8a8a; font-size: 1.1em;">πŸš€ CUDA xIELU</strong><br>
659
+ <span style="color: #ced4da; line-height: 1.4;">Swiss innovation: learnable activation function with GPU acceleration</span>
660
+ </div>
661
  </div>
662
  <p style="text-align: center; font-size: 1.3em; margin-top: 25px; color: #ff6b6b; font-weight: 600;">
663
+ <strong>This is complete AI transparency + Swiss innovations! πŸ‡¨πŸ‡­</strong>
664
  </p>
665
  </div>
666
  """)
requirements.txt CHANGED
@@ -5,4 +5,8 @@ gradio>=4.0.0
5
  plotly>=5.15.0
6
  numpy>=1.24.0,<2.0.0
7
  pandas>=2.0.0
8
- scipy>=1.10.0
 
 
 
 
 
5
  plotly>=5.15.0
6
  numpy>=1.24.0,<2.0.0
7
  pandas>=2.0.0
8
+ scipy>=1.10.0
9
+
10
+ # Optional CUDA xIELU optimization (requires CUDA + compilation)
11
+ # Install separately if you have NVIDIA GPU:
12
+ # pip install git+https://github.com/nickjbrowning/XIELU