import gradio as gr from concurrent.futures import ThreadPoolExecutor import fine_tuning import rag # ----------------------------- # Load fine-tuned model # ----------------------------- fine_tuning.load_and_train() import time from concurrent.futures import ThreadPoolExecutor, as_completed def combined_generate(prompt, max_tokens): with ThreadPoolExecutor() as executor: start_times = {"Fine-tuned": time.time(), "RAG": time.time()} futures = { executor.submit(fine_tuning.generate_answer, prompt, max_tokens): "Fine-tuned", executor.submit(rag.generate_answer, prompt): "RAG", } answers = {"Fine-tuned": "", "RAG": ""} times = {"Fine-tuned": None, "RAG": None} for future in as_completed(futures): model_name = futures[future] answers[model_name] = future.result() times[model_name] = round(time.time() - start_times[model_name], 2) # Format answers with time taken ft_display = answers["Fine-tuned"] if times["Fine-tuned"] is not None: ft_display += f"\n\nā± Took {times['Fine-tuned']}s" rag_display = answers["RAG"] if times["RAG"] is not None: rag_display += f"\n\nā± Took {times['RAG']}s" yield ft_display, rag_display # ----------------------------- # Gradio Interface # ----------------------------- iface = gr.Interface( fn=combined_generate, inputs=[ gr.Textbox(label="Enter your question:", lines=5, placeholder="Type your question here..."), gr.Slider(minimum=50, maximum=500, step=10, value=200, label="Max tokens to generate") ], outputs=[ gr.Textbox(label="Fine-tuned Model Answer"), gr.Textbox(label="RAG Answer") ], title="Compare Fine-tuned Model vs RAG šŸ¤–šŸ“š", description="Ask a question and get answers from both the fine-tuned model and the RAG pipeline." ).queue() iface.launch()