In [1]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import CTransformers
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
import os

In [2]:
def load_pdf(data):
    loader = DirectoryLoader(data,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)
    
    documents = loader.load()
    return documents

In [3]:
extracted_data = load_pdf("./data")

In [4]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [5]:
chunks = text_split(extracted_data)
length = len(chunks)
print("Total number of chunks:", length)

Total number of chunks: 8205


In [6]:
chunks

[Document(metadata={'source': 'data\\Medical_book_merged.pdf', 'page': 1, 'page_label': '2'}, page_content='The GALE\nENCYCLOPEDIA\nof MEDICINE\nSECOND EDITION'),
 Document(metadata={'source': 'data\\Medical_book_merged.pdf', 'page': 2, 'page_label': '3'}, page_content='The GALE\nENCYCLOPEDIA\nof MEDICINE\nSECOND EDITION\nJACQUELINE L. LONGE, EDITOR\nDEIRDRE S. BLANCHFIELD, ASSOCIATE EDITOR\nVOLUME\nA-B\n1'),
 Document(metadata={'source': 'data\\Medical_book_merged.pdf', 'page': 3, 'page_label': '4'}, page_content='STAFF\nJacqueline L. Longe, Project Editor\nDeirdre S. Blanchfield, Associate Editor\nChristine B. Jeryan, Managing Editor\nDonna Olendorf, Senior Editor\nStacey Blachford, Associate Editor\nKate Kretschmann, Melissa C. McDade, Ryan\nThomason, Assistant Editors\nMark Springer, Technical Specialist\nAndrea Lopeman, Programmer/Analyst\nBarbara J. Yarrow,Manager, Imaging and Multimedia\nContent\nRobyn V . Young,Project Manager, Imaging and\nMultimedia Content\nDean Dauphinais, 

In [7]:
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
faiss_index = FAISS.from_documents(chunks, embedding_model)
faiss_index.save_local("medical_faiss_index")

  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [15]:
query_result = embedding_model.embed_query("Hello, Parthib")
len(query_result)

384

In [17]:
faiss_index = FAISS.load_local(
    "medical_faiss_index",
    embedding_model,
    allow_dangerous_deserialization=True
)
def search_similar_documents(query, k=4):
    results = faiss_index.similarity_search(query, k=k)
    return [{"content": result.page_content, "metadata": result.metadata} for result in results]


query = "i am suffering from fever for 2 days..provide me some medicines and home remids?"
top_results = search_similar_documents(query)

for idx, result in enumerate(top_results, 1):
    print(f"Result {idx}:\n{result['content']}\n")


Result 1:
228 Current Essentials of Medicine
8
Relapsing Fever (Borrelia recurrentis)
■ Essentials of Diagnosis
• History of exposure to ticks or lice in endemic area
• Abrupt fever and chills, nausea, headache, arthralgia lasting 3–10
days with relapse at intervals of 1–2 weeks
• Tachycardia, hepatosplenomegaly, rash
• Spirochetes seen on blood smear during fever; serologic diagno-
sis is difﬁcult and not widely available
■ Differential Diagnosis
• Malaria
• Leptospirosis
• Meningococcemia
• Yellow fever

Result 2:
■ Treatment
• Bed rest until vital signs and ECG become normal
• Salicylates and nonsteroidal anti-inﬂammatory drugs reduce fever
and joint complaints but do not affect the natural course of the dis-
ease; rarely, corticosteroids may be used
• If streptococcal infection is still present, penicillin is indicated
• Prevention of recurrent streptococcal pharyngitis until 18 years old
(a monthly injection of benzathine penicillin is most commonly used)
■ Pearl

Result 3:
Aspiri

In [18]:
prompt_template = """
You are a healthcare professional built by Parthib, and you can assist users with health-related issues.
Use the following pieces of information along with the LLM's knowledge to answer the user's question about diseases or healthcare.
If the following pieces provide some information, combine it with your existing knowledge to craft the most accurate and helpful response.
Include relevant details such as home remedies, medications, and other necessary actions in a clear(with highlighted and BOLD manner), point-wise manner for quick readability.
If any other related questions arise, just say, "I am a healthcare professional."
If you don't know the answer, just say that you don't know. Don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [19]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [20]:
from langchain_groq import ChatGroq
def initialize_groq_llm():
    return ChatGroq(
        groq_api_key=os.getenv("GROQ_API_KEY"),
        model_name="llama-3.3-70b-versatile",
        max_tokens=512
    )

In [21]:
def generate_response(question):

    retriever = faiss_index.as_retriever(search_kwargs={'k': 1})
    docs = retriever.get_relevant_documents(question)
    context = "\n".join([doc.page_content for doc in docs])

    llm = initialize_groq_llm()
    prompt = PromptTemplate(
        input_variables=["context", "question"],
        template=prompt_template
    )
    formatted_prompt = prompt.format(context=context, question=question)

    response = llm.invoke(formatted_prompt)
    return response.content