Spaces:
Runtime error
Runtime error
| import re | |
| import threading | |
| import gradio as gr | |
| import spaces | |
| import transformers | |
| from transformers import pipeline | |
| # Loading model and tokenizer | |
| model_name = "meta-llama/Llama-3.1-8B-Instruct" | |
| if gr.NO_RELOAD: | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model_name, | |
| device_map="auto", | |
| torch_dtype="auto", | |
| ) | |
| # Marker for detecting final answer | |
| ANSWER_MARKER = "**Answer**" | |
| # Sentences to start step-by-step reasoning | |
| rethink_prepends = [ | |
| "Now, I need to understand the following ", | |
| "In my opinion ", | |
| "Let me verify if the following is correct ", | |
| "Also, I should remember that ", | |
| "Another point to note is ", | |
| "And I also remember the following fact ", | |
| "Now I think I understand sufficiently ", | |
| ] | |
| # Prompt addition for generating final answer | |
| final_answer_prompt = """ | |
| Based on my reasoning process so far, I will answer the original question in the language it was asked: | |
| {question} | |
| Here is the conclusion I've reasoned: | |
| {reasoning_conclusion} | |
| Based on the above reasoning, my final answer: | |
| {ANSWER_MARKER} | |
| """ | |
| # Settings for displaying formulas | |
| latex_delimiters = [ | |
| {"left": "$$", "right": "$$", "display": True}, | |
| {"left": "$", "right": "$", "display": False}, | |
| ] | |
| def reformat_math(text): | |
| """Modify MathJax delimiters to use Gradio syntax (Katex). | |
| This is a temporary fix for displaying math formulas in Gradio. Currently, | |
| I haven't found a way to make it work as expected with other latex_delimiters... | |
| """ | |
| text = re.sub(r"\\\[\s*(.*?)\s*\\\]", r"$$\1$$", text, flags=re.DOTALL) | |
| text = re.sub(r"\\\(\s*(.*?)\s*\\\)", r"$\1$", text, flags=re.DOTALL) | |
| return text | |
| def user_input(message, history_original, history_thinking): | |
| """Add user input to history and clear input text box""" | |
| return "", history_original + [ | |
| gr.ChatMessage(role="user", content=message.replace(ANSWER_MARKER, "")) | |
| ], history_thinking + [ | |
| gr.ChatMessage(role="user", content=message.replace(ANSWER_MARKER, "")) | |
| ] | |
| def rebuild_messages(history: list): | |
| """Reconstruct messages from history for model use without intermediate thinking process""" | |
| messages = [] | |
| for h in history: | |
| if isinstance(h, dict) and not h.get("metadata", {}).get("title", False): | |
| messages.append(h) | |
| elif ( | |
| isinstance(h, gr.ChatMessage) | |
| and h.metadata.get("title", None) is None | |
| and isinstance(h.content, str) | |
| ): | |
| messages.append({"role": h.role, "content": h.content}) | |
| return messages | |
| def bot_original( | |
| history: list, | |
| max_num_tokens: int, | |
| do_sample: bool, | |
| temperature: float, | |
| ): | |
| """Make the original model answer questions (without reasoning process)""" | |
| # For streaming tokens from thread later | |
| streamer = transformers.TextIteratorStreamer( | |
| pipe.tokenizer, # pyright: ignore | |
| skip_special_tokens=True, | |
| skip_prompt=True, | |
| ) | |
| # Prepare assistant message | |
| history.append( | |
| gr.ChatMessage( | |
| role="assistant", | |
| content=str(""), | |
| ) | |
| ) | |
| # Messages to be displayed in current chat | |
| messages = rebuild_messages(history[:-1]) # Excluding last empty message | |
| # Original model answers directly without reasoning | |
| t = threading.Thread( | |
| target=pipe, | |
| args=(messages,), | |
| kwargs=dict( | |
| max_new_tokens=max_num_tokens, | |
| streamer=streamer, | |
| do_sample=do_sample, | |
| temperature=temperature, | |
| ), | |
| ) | |
| t.start() | |
| for token in streamer: | |
| history[-1].content += token | |
| history[-1].content = reformat_math(history[-1].content) | |
| yield history | |
| t.join() | |
| yield history | |
| def bot_thinking( | |
| history: list, | |
| max_num_tokens: int, | |
| final_num_tokens: int, | |
| do_sample: bool, | |
| temperature: float, | |
| ): | |
| """Make the model answer questions with reasoning process""" | |
| # For streaming tokens from thread later | |
| streamer = transformers.TextIteratorStreamer( | |
| pipe.tokenizer, # pyright: ignore | |
| skip_special_tokens=True, | |
| skip_prompt=True, | |
| ) | |
| # For reinserting the question into reasoning if needed | |
| question = history[-1]["content"] | |
| # Prepare assistant message | |
| history.append( | |
| gr.ChatMessage( | |
| role="assistant", | |
| content=str(""), | |
| metadata={"title": "🧠 Thinking...", "status": "pending"}, | |
| ) | |
| ) | |
| # Reasoning process to be displayed in current chat | |
| messages = rebuild_messages(history) | |
| # Variable to store the entire reasoning process | |
| full_reasoning = "" | |
| # Run reasoning steps | |
| for i, prepend in enumerate(rethink_prepends): | |
| if i > 0: | |
| messages[-1]["content"] += "\n\n" | |
| messages[-1]["content"] += prepend.format(question=question) | |
| t = threading.Thread( | |
| target=pipe, | |
| args=(messages,), | |
| kwargs=dict( | |
| max_new_tokens=max_num_tokens, | |
| streamer=streamer, | |
| do_sample=do_sample, | |
| temperature=temperature, | |
| ), | |
| ) | |
| t.start() | |
| # Reconstruct history with new content | |
| history[-1].content += prepend.format(question=question) | |
| for token in streamer: | |
| history[-1].content += token | |
| history[-1].content = reformat_math(history[-1].content) | |
| yield history | |
| t.join() | |
| # Save the result of each reasoning step to full_reasoning | |
| full_reasoning = history[-1].content | |
| # Reasoning complete, now generate final answer | |
| history[-1].metadata = {"title": "💭 Thought Process", "status": "done"} | |
| # Extract conclusion part from reasoning process (approximately last 1-2 paragraphs) | |
| reasoning_parts = full_reasoning.split("\n\n") | |
| reasoning_conclusion = "\n\n".join(reasoning_parts[-2:]) if len(reasoning_parts) > 2 else full_reasoning | |
| # Add final answer message | |
| history.append(gr.ChatMessage(role="assistant", content="")) | |
| # Construct message for final answer | |
| final_messages = rebuild_messages(history[:-1]) # Excluding last empty message | |
| final_prompt = final_answer_prompt.format( | |
| question=question, | |
| reasoning_conclusion=reasoning_conclusion, | |
| ANSWER_MARKER=ANSWER_MARKER | |
| ) | |
| final_messages[-1]["content"] += final_prompt | |
| # Generate final answer | |
| t = threading.Thread( | |
| target=pipe, | |
| args=(final_messages,), | |
| kwargs=dict( | |
| max_new_tokens=final_num_tokens, | |
| streamer=streamer, | |
| do_sample=do_sample, | |
| temperature=temperature, | |
| ), | |
| ) | |
| t.start() | |
| # Stream final answer | |
| for token in streamer: | |
| history[-1].content += token | |
| history[-1].content = reformat_math(history[-1].content) | |
| yield history | |
| t.join() | |
| yield history | |
| with gr.Blocks(fill_height=True, title="ThinkFlow") as demo: | |
| # Title and description | |
| gr.Markdown("# ThinkFlow") | |
| gr.Markdown("### An LLM reasoning generation platform that automatically applies reasoning capabilities to LLM models without modification") | |
| # Features and benefits section | |
| with gr.Accordion("✨ Features & Benefits", open=True): | |
| gr.Markdown(""" | |
| - **Enhanced Reasoning**: Transform any LLM into a step-by-step reasoning engine without model modifications | |
| - **Transparency**: Visualize the model's thought process alongside direct answers | |
| - **Improved Accuracy**: See how guided reasoning leads to more accurate solutions for complex problems | |
| - **Educational Tool**: Perfect for teaching critical thinking and problem-solving approaches | |
| - **Versatile Application**: Works with mathematical problems, logical puzzles, and complex questions | |
| - **Side-by-Side Comparison**: Compare standard model responses with reasoning-enhanced outputs | |
| """) | |
| with gr.Row(scale=1): | |
| with gr.Column(scale=2): | |
| gr.Markdown("## Before (Original)") | |
| chatbot_original = gr.Chatbot( | |
| scale=1, | |
| type="messages", | |
| latex_delimiters=latex_delimiters, | |
| label="Original Model (No Reasoning)" | |
| ) | |
| with gr.Column(scale=2): | |
| gr.Markdown("## After (Thinking)") | |
| chatbot_thinking = gr.Chatbot( | |
| scale=1, | |
| type="messages", | |
| latex_delimiters=latex_delimiters, | |
| label="Model with Reasoning" | |
| ) | |
| with gr.Row(): | |
| # Define msg textbox first | |
| msg = gr.Textbox( | |
| submit_btn=True, | |
| label="", | |
| show_label=False, | |
| placeholder="Enter your question here.", | |
| autofocus=True, | |
| ) | |
| # Examples section - placed after msg variable definition | |
| with gr.Accordion("EXAMPLES", open=False): | |
| examples = gr.Examples( | |
| examples=[ | |
| "[Source: MATH-500)] How many numbers among the first 100 positive integers are divisible by 3, 4, and 5?", | |
| "[Source: MATH-500)] In the land of Ink, the money system is unique. 1 trinket equals 4 blinkets, and 3 blinkets equal 7 drinkits. What is the value of 56 drinkits in trinkets?", | |
| "[Source: MATH-500)] The average age of Amy, Ben, and Chris is 6 years. Four years ago, Chris was the same age as Amy is now. Four years from now, Ben's age will be $\\frac{3}{5}$ of Amy's age at that time. How old is Chris now?", | |
| "[Source: MATH-500)] A bag contains yellow and blue marbles. Currently, the ratio of blue marbles to yellow marbles is 4:3. After adding 5 blue marbles and removing 3 yellow marbles, the ratio becomes 7:3. How many blue marbles were in the bag before any were added?" | |
| ], | |
| inputs=msg | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("""## Parameter Adjustment""") | |
| num_tokens = gr.Slider( | |
| 50, | |
| 4000, | |
| 2000, | |
| step=1, | |
| label="Maximum tokens per reasoning step", | |
| interactive=True, | |
| ) | |
| final_num_tokens = gr.Slider( | |
| 50, | |
| 4000, | |
| 2000, | |
| step=1, | |
| label="Maximum tokens for final answer", | |
| interactive=True, | |
| ) | |
| do_sample = gr.Checkbox(True, label="Use sampling") | |
| temperature = gr.Slider(0.1, 1.0, 0.7, step=0.1, label="Temperature") | |
| # Community link at the bottom | |
| gr.Markdown("<p style='font-size: 12px;'>Community: <a href='https://discord.gg/openfreeai' target='_blank'>https://discord.gg/openfreeai</a></p>") | |
| # When user submits a message, both bots respond simultaneously | |
| msg.submit( | |
| user_input, | |
| [msg, chatbot_original, chatbot_thinking], # inputs | |
| [msg, chatbot_original, chatbot_thinking], # outputs | |
| ).then( | |
| bot_original, | |
| [ | |
| chatbot_original, | |
| num_tokens, | |
| do_sample, | |
| temperature, | |
| ], | |
| chatbot_original, # save new history in outputs | |
| ).then( | |
| bot_thinking, | |
| [ | |
| chatbot_thinking, | |
| num_tokens, | |
| final_num_tokens, | |
| do_sample, | |
| temperature, | |
| ], | |
| chatbot_thinking, # save new history in outputs | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue().launch() |