Markus Clauss DIRU Vetsuisse Claude commited on
Commit
d387b61
Β·
1 Parent(s): 424d0e4

Use HF_TOKEN from environment variable for automatic model loading

Browse files

- Remove manual token input field from interface
- Read HF_TOKEN from environment variables
- Auto-load model on app startup
- Simplify user experience - no manual token entry needed
- Users now set HF_TOKEN in Space settings > Variables and secrets

πŸ€– Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <[email protected]>

Files changed (1) hide show
  1. app.py +33 -34
app.py CHANGED
@@ -48,15 +48,23 @@ except ImportError:
48
  # Global variables for model and tokenizer
49
  model = None
50
  tokenizer = None
 
 
 
 
51
 
52
  @spaces.GPU
53
- def load_model(hf_token):
54
- """Load Apertus model with HuggingFace token"""
55
- global model, tokenizer
56
-
57
- if not hf_token or not hf_token.startswith("hf_"):
58
- return "❌ Invalid HuggingFace token. Must start with 'hf_'"
59
-
 
 
 
 
60
  model_name = "swiss-ai/Apertus-8B-Instruct-2509"
61
 
62
  try:
@@ -96,6 +104,7 @@ def load_model(hf_token):
96
  # Check for xIELU optimization status
97
  xielu_status = "βœ… CUDA xIELU Active" if XIELU_AVAILABLE and torch.cuda.is_available() else "πŸ€— HuggingFace Optimized"
98
 
 
99
  if memory_usage > 0:
100
  return f"βœ… Model loaded successfully!\nπŸ“Š Parameters: {total_params:,}\nπŸ’Ύ Memory: {memory_usage:.1f} GB\nπŸš€ Optimization: {xielu_status}"
101
  else:
@@ -108,9 +117,12 @@ def load_model(hf_token):
108
  def chat_with_apertus(message, max_tokens=300):
109
  """Simple chat function"""
110
  global model, tokenizer
111
-
 
112
  if model is None or tokenizer is None:
113
- return "❌ Please load the model first by entering your HuggingFace token."
 
 
114
 
115
  try:
116
  formatted_prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
@@ -2159,31 +2171,14 @@ def create_interface():
2159
  </div>
2160
  """)
2161
 
2162
- with gr.Row():
2163
- with gr.Column(scale=2):
2164
- hf_token = gr.Textbox(
2165
- label="πŸ—οΈ HuggingFace Token",
2166
- placeholder="hf_...",
2167
- type="password",
2168
- info="Required to access swiss-ai/Apertus-8B-Instruct-2509. Get your token from: https://huggingface.co/settings/tokens",
2169
- container=True
2170
- )
2171
- with gr.Column(scale=1):
2172
- load_btn = gr.Button(
2173
- "πŸ‡¨πŸ‡­ Load Apertus Model",
2174
- variant="primary",
2175
- size="lg",
2176
- elem_classes="auth-button"
2177
- )
2178
-
2179
- with gr.Row():
2180
- model_status = gr.Textbox(
2181
- label="πŸ“Š Model Status",
2182
- interactive=False,
2183
- container=True
2184
- )
2185
-
2186
- load_btn.click(load_model, inputs=[hf_token], outputs=[model_status])
2187
 
2188
  # Main Interface Tabs
2189
  with gr.Tabs():
@@ -2202,6 +2197,7 @@ def create_interface():
2202
  chat_output = gr.Markdown(label="Apertus Response")
2203
 
2204
  chat_btn.click(chat_with_apertus, inputs=[chat_input, max_tokens], outputs=[chat_output])
 
2205
 
2206
  # Attention Analysis Tab
2207
  with gr.TabItem("πŸ‘οΈ Attention Patterns"):
@@ -2462,6 +2458,9 @@ def create_interface():
2462
  </div>
2463
  """)
2464
 
 
 
 
2465
  return demo
2466
 
2467
  # Launch the app
 
48
  # Global variables for model and tokenizer
49
  model = None
50
  tokenizer = None
51
+ model_loaded = False
52
+
53
+ # Get HF token from environment
54
+ HF_TOKEN = os.environ.get('HF_TOKEN', None)
55
 
56
  @spaces.GPU
57
+ def load_model():
58
+ """Load Apertus model with HuggingFace token from environment"""
59
+ global model, tokenizer, model_loaded
60
+
61
+ if model_loaded:
62
+ return "βœ… Model already loaded!"
63
+
64
+ hf_token = HF_TOKEN
65
+ if not hf_token:
66
+ return "❌ No HuggingFace token found. Please set HF_TOKEN environment variable."
67
+
68
  model_name = "swiss-ai/Apertus-8B-Instruct-2509"
69
 
70
  try:
 
104
  # Check for xIELU optimization status
105
  xielu_status = "βœ… CUDA xIELU Active" if XIELU_AVAILABLE and torch.cuda.is_available() else "πŸ€— HuggingFace Optimized"
106
 
107
+ model_loaded = True
108
  if memory_usage > 0:
109
  return f"βœ… Model loaded successfully!\nπŸ“Š Parameters: {total_params:,}\nπŸ’Ύ Memory: {memory_usage:.1f} GB\nπŸš€ Optimization: {xielu_status}"
110
  else:
 
117
  def chat_with_apertus(message, max_tokens=300):
118
  """Simple chat function"""
119
  global model, tokenizer
120
+
121
+ # Try to load model if not loaded
122
  if model is None or tokenizer is None:
123
+ load_result = load_model()
124
+ if "❌" in load_result:
125
+ return load_result
126
 
127
  try:
128
  formatted_prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
 
2171
  </div>
2172
  """)
2173
 
2174
+ # Model Status Display
2175
+ model_status = gr.Textbox(
2176
+ label="πŸ“Š Model Status",
2177
+ value="⏳ Loading Apertus model...",
2178
+ interactive=False,
2179
+ container=True
2180
+ )
2181
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2182
 
2183
  # Main Interface Tabs
2184
  with gr.Tabs():
 
2197
  chat_output = gr.Markdown(label="Apertus Response")
2198
 
2199
  chat_btn.click(chat_with_apertus, inputs=[chat_input, max_tokens], outputs=[chat_output])
2200
+ chat_input.submit(chat_with_apertus, inputs=[chat_input, max_tokens], outputs=[chat_output])
2201
 
2202
  # Attention Analysis Tab
2203
  with gr.TabItem("πŸ‘οΈ Attention Patterns"):
 
2458
  </div>
2459
  """)
2460
 
2461
+ # Auto-load model on startup
2462
+ demo.load(load_model, outputs=[model_status])
2463
+
2464
  return demo
2465
 
2466
  # Launch the app