Spaces:
Sleeping
Sleeping
| import os | |
| import tempfile | |
| import shutil | |
| import gradio as gr | |
| from langchain.embeddings import SentenceTransformerEmbeddings | |
| from langchain.vectorstores import Chroma | |
| from langchain.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.chains import RetrievalQA | |
| from langchain.llms import CTransformers | |
| # إعداد النموذج المحلي | |
| def load_llm(): | |
| return CTransformers( | |
| model="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", | |
| model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf", | |
| model_type="mistral", | |
| config={"max_new_tokens": 1024, "temperature": 0.1} | |
| ) | |
| # تحميل المستندات من المسار | |
| def load_documents(file_path): | |
| if file_path.endswith(".pdf"): | |
| loader = PyPDFLoader(file_path) | |
| elif file_path.endswith(".txt"): | |
| loader = TextLoader(file_path, encoding='utf-8') | |
| elif file_path.endswith(".docx"): | |
| loader = Docx2txtLoader(file_path) | |
| else: | |
| raise ValueError("نوع الملف غير مدعوم.") | |
| return loader.load() | |
| # معالجة الملف وإنشاء سلسلة السؤال والإجابة | |
| def process_file(file_path): | |
| documents = load_documents(file_path) | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100) | |
| texts = text_splitter.split_documents(documents) | |
| embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") | |
| db = Chroma.from_documents(texts, embeddings) | |
| retriever = db.as_retriever(search_kwargs={"k": 3}) | |
| llm = load_llm() | |
| qa = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| chain_type="stuff", | |
| retriever=retriever, | |
| return_source_documents=False | |
| ) | |
| return qa | |
| # الجلوبال تشين | |
| qa_chain = None | |
| # دالة معالجة السؤال | |
| def ask_question(file, question): | |
| global qa_chain | |
| if file is None or question.strip() == "": | |
| return "<div dir='rtl' style='color:red;'>الرجاء تحميل ملف وكتابة سؤال.</div>" | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.name)[-1]) as tmp: | |
| shutil.copyfileobj(file, tmp) | |
| tmp_path = tmp.name | |
| try: | |
| qa_chain = process_file(tmp_path) | |
| answer = qa_chain.run(question) | |
| return f"<div dir='rtl' style='text-align: right;'>{answer}</div>" | |
| except Exception as e: | |
| return f"<div dir='rtl' style='color:red;'>حدث خطأ أثناء المعالجة: {str(e)}</div>" | |
| # واجهة Gradio | |
| with gr.Blocks(title="Smart PDF Assistant", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("<h2 style='text-align: right;'>🧠📚 مساعد الوثائق الذكي</h2>") | |
| gr.Markdown("<div dir='rtl'>قم برفع ملف PDF أو DOCX أو TXT، ثم اطرح أي سؤال حول محتواه.</div>") | |
| with gr.Row(): | |
| file_input = gr.File(label="📎 ارفع ملفك", file_types=[".pdf", ".docx", ".txt"]) | |
| question_input = gr.Textbox(label="❓ اكتب سؤالك هنا", placeholder="ما هو ملخص هذا الملف؟") | |
| answer_output = gr.HTML(label="💬 الإجابة") | |
| ask_button = gr.Button("🔍 استعلم") | |
| ask_button.click(fn=ask_question, inputs=[file_input, question_input], outputs=answer_output) | |
| demo.launch() | |