Spaces:

vpkprasanna
/

TokenizerViz

Sleeping

App Files Files Community

Prasanna Kumar commited on Sep 12, 2024

Commit

007d05b

1 Parent(s): 0d3569b

Added validation on token ids input part

Browse files

Files changed (1) hide show

app.py +20 -3

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ import anthropic
 model_path = "models/"
 # Available models
-MODELS = ["Meta-Llama-3.1-8B", "gemma-2b", "gpt-3.5-turbo","gpt-4","gpt-4o" "Claude-3-Sonnet"]
 openai_models = ["gpt-3.5-turbo","gpt-4","gpt-4o"]
 # Color palette visible on both light and dark themes
 COLOR_PALETTE = [
@@ -38,6 +38,20 @@ def create_vertical_histogram(data, title):
     )
     return fig
 def process_text(text: str, model_name: str, api_key: str = None):
     if model_name in ["Meta-Llama-3.1-8B", "gemma-2b"]:
         tokenizer = AutoTokenizer.from_pretrained(model_path + model_name)
@@ -102,6 +116,9 @@ def create_html_tokens(tokens):
     return html_output
 def process_input(input_type, input_value, model_name, api_key):
     if input_type == "Text":
         text, tokens, token_ids = process_text(text=input_value, model_name=model_name, api_key=api_key)
     elif input_type == "Token IDs":
@@ -150,7 +167,7 @@ with gr.Blocks() as iface:
         input_type = gr.Radio(["Text", "Token IDs"], label="Input Type", value="Text")
         model_name = gr.Dropdown(choices=MODELS, label="Select Model", value=MODELS[0])
-    api_key = gr.Textbox(label="API Key Claude models)", type="password")
     input_text = gr.Textbox(lines=5, label="Input")
     with gr.Row():
@@ -181,7 +198,7 @@ with gr.Blocks() as iface:
     submit_button.click(
         process_input,
-        inputs=[input_type, input_text, model_name, api_key],
         outputs=[analysis_output, text_output, tokens_output, token_ids_output, words_plot, special_chars_plot, numbers_plot]
     )

 model_path = "models/"
 # Available models
+MODELS = ["Meta-Llama-3.1-8B", "gemma-2b", "gpt-3.5-turbo","gpt-4","gpt-4o"]
 openai_models = ["gpt-3.5-turbo","gpt-4","gpt-4o"]
 # Color palette visible on both light and dark themes
 COLOR_PALETTE = [
     )
     return fig
+def validate_input(input_type, input_value):
+    if input_type == "Text":
+        if not isinstance(input_value, str):
+            return False, "Input must be a string for Text input type."
+    elif input_type == "Token IDs":
+        try:
+            token_ids = ast.literal_eval(input_value)
+            if not isinstance(token_ids, list) or not all(isinstance(id, int) for id in token_ids):
+                return False, "Token IDs must be a list of integers."
+        except (ValueError, SyntaxError):
+            return False, "Invalid Token IDs format. Please provide a valid list of integers."
+    return True, ""
 def process_text(text: str, model_name: str, api_key: str = None):
     if model_name in ["Meta-Llama-3.1-8B", "gemma-2b"]:
         tokenizer = AutoTokenizer.from_pretrained(model_path + model_name)
     return html_output
 def process_input(input_type, input_value, model_name, api_key):
+    is_valid, error_message = validate_input(input_type, input_value)
+    if not is_valid:
+        raise gr.Error(error_message)
     if input_type == "Text":
         text, tokens, token_ids = process_text(text=input_value, model_name=model_name, api_key=api_key)
     elif input_type == "Token IDs":
         input_type = gr.Radio(["Text", "Token IDs"], label="Input Type", value="Text")
         model_name = gr.Dropdown(choices=MODELS, label="Select Model", value=MODELS[0])
+    # api_key = gr.Textbox(label="API Key Claude models)", type="password")
     input_text = gr.Textbox(lines=5, label="Input")
     with gr.Row():
     submit_button.click(
         process_input,
+        inputs=[input_type, input_text, model_name],
         outputs=[analysis_output, text_output, tokens_output, token_ids_output, words_plot, special_chars_plot, numbers_plot]
     )