francismurray commited on
Commit
8ac52c9
Β·
1 Parent(s): c928117

initial commit

Browse files
Files changed (4) hide show
  1. .gitignore +2 -0
  2. README.md +33 -0
  3. app.py +172 -0
  4. requirements.txt +4 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ myenv/
2
+ .DS_Store
README.md CHANGED
@@ -9,6 +9,39 @@ app_file: app.py
9
  pinned: false
10
  license: mit
11
  short_description: Retrieval-Augmented Generation (RAG)
 
 
 
12
  ---
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
9
  pinned: false
10
  license: mit
11
  short_description: Retrieval-Augmented Generation (RAG)
12
+ models:
13
+ - bert-base-uncased
14
+ - google/flan-t5-base
15
  ---
16
 
17
+ # πŸ“„πŸ” Retrieval-Augmented Generation (RAG) Demo
18
+
19
+ A simple yet powerful RAG application that lets you upload documents and ask questions about them.
20
+
21
+ ## Features
22
+
23
+ - πŸ“„ Upload multiple .txt files
24
+ - πŸ” Automatic document processing and indexing
25
+ - πŸ’‘ Query your documents using natural language
26
+ - πŸ€– Get AI-generated answers based on your content
27
+
28
+ ## How It Works
29
+
30
+ 1. **Upload** - Add your text files to the system
31
+ 2. **Index** - Documents are embedded using `bert-base-uncased`
32
+ 3. **Query** - Ask a question about the documents
33
+ 4. **Retrieve** - The system finds the most relevant content
34
+ 5. **Generate** - `flan-t5-base` creates a natural language answer
35
+
36
+ ## Technical Details
37
+
38
+ - Built with Hugging Face's Transformers
39
+ - Uses cosine similarity for matching
40
+ - No GPU required (ZeroGPU compatible)
41
+ - Runs completely in-memory
42
+
43
+ ## Usage
44
+
45
+ Simply upload your text files, ask a question, and get an answer within seconds!
46
+
47
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import gradio as gr
4
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel
5
+
6
+ # Define device
7
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
+
9
+ # For embeddings using transformers models
10
+ def get_embeddings(texts, model, tokenizer):
11
+ encoded_input = tokenizer(texts, padding=True, truncation=True, return_tensors='pt').to(device)
12
+ with torch.no_grad():
13
+ model_output = model(**encoded_input)
14
+
15
+ # Mean pooling for sentence embeddings
16
+ token_embeddings = model_output.last_hidden_state
17
+ attention_mask = encoded_input['attention_mask']
18
+ input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
19
+ embeddings = torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
20
+ return embeddings.cpu().numpy()
21
+
22
+ # Calculate cosine similarity using numpy
23
+ def cosine_similarity(a, b):
24
+ return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
25
+
26
+ # Load models
27
+ def load_models():
28
+ # Embedding model
29
+ embed_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
30
+ embed_model = AutoModel.from_pretrained("bert-base-uncased").to(device)
31
+
32
+ # Generation model
33
+ gen_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
34
+ generator = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base").to(device)
35
+
36
+ return embed_model, embed_tokenizer, generator, gen_tokenizer
37
+
38
+ # Process uploaded text files
39
+ def process_documents(files):
40
+ documents = []
41
+ for file in files:
42
+ with open(file.name, "r", encoding="utf-8") as f:
43
+ content = f.read()
44
+ # Simple document chunking by paragraphs
45
+ paragraphs = [p.strip() for p in content.split("\n\n") if p.strip()]
46
+ documents.extend(paragraphs)
47
+ return documents
48
+
49
+ # Create index from documents
50
+ def create_index(model, tokenizer, documents):
51
+ if not documents:
52
+ return None, None
53
+
54
+ # Create embeddings
55
+ embeddings = get_embeddings(documents, model, tokenizer)
56
+ return embeddings, documents
57
+
58
+ # Retrieve relevant documents
59
+ def retrieve(query, embeddings, documents, model, tokenizer, top_k=3):
60
+ if embeddings is None or documents is None:
61
+ return []
62
+
63
+ # Encode query
64
+ query_embedding = get_embeddings([query], model, tokenizer)[0]
65
+
66
+ # Calculate similarities
67
+ similarities = [cosine_similarity(query_embedding, doc_embed) for doc_embed in embeddings]
68
+
69
+ # Get top k indices
70
+ top_indices = np.argsort(similarities)[-top_k:][::-1]
71
+
72
+ # Return relevant documents
73
+ return [documents[idx] for idx in top_indices]
74
+
75
+ # Generate answer
76
+ def generate_answer(query, context, tokenizer, generator):
77
+ if not context:
78
+ return "No documents have been uploaded yet. Please upload some text files first."
79
+
80
+ # Combine context
81
+ combined_context = " ".join(context)
82
+
83
+ # Create prompt
84
+ prompt = f"Context: {combined_context}\n\nQuestion: {query}\n\nAnswer:"
85
+
86
+ # Generate answer
87
+ inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True).to(device)
88
+
89
+ with torch.no_grad():
90
+ outputs = generator.generate(
91
+ **inputs,
92
+ max_length=256,
93
+ num_beams=4,
94
+ temperature=0.7,
95
+ top_p=0.9,
96
+ )
97
+
98
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
99
+
100
+ # RAG pipeline
101
+ def rag_pipeline(query, files):
102
+ try:
103
+ global embed_model, embed_tokenizer, generator, gen_tokenizer, doc_embeddings, indexed_documents
104
+
105
+ if not files:
106
+ return "Please upload some text files first."
107
+
108
+ # Process documents
109
+ documents = process_documents(files)
110
+
111
+ # Create embeddings
112
+ doc_embeddings, indexed_documents = create_index(embed_model, embed_tokenizer, documents)
113
+
114
+ # Retrieve relevant context
115
+ context = retrieve(query, doc_embeddings, indexed_documents, embed_model, embed_tokenizer)
116
+
117
+ # Generate answer
118
+ answer = generate_answer(query, context, gen_tokenizer, generator)
119
+
120
+ return answer
121
+ except Exception as e:
122
+ return f"An error occurred: {str(e)}"
123
+
124
+ # Initialize global variables
125
+ embed_model, embed_tokenizer, generator, gen_tokenizer = load_models()
126
+ doc_embeddings, indexed_documents = None, None
127
+
128
+ # Gradio interface
129
+ with gr.Blocks(title="RAG Demo") as demo:
130
+ gr.Markdown("# πŸ“„πŸ” Retrieval-Augmented Generation (RAG) Demo")
131
+ gr.Markdown("Upload text files and ask questions about their content.")
132
+
133
+ with gr.Row():
134
+ with gr.Column(scale=1):
135
+ file_output = gr.File(
136
+ file_count="multiple",
137
+ label="Upload Text Files (.txt)",
138
+ file_types=[".txt"],
139
+ )
140
+
141
+ with gr.Column(scale=2):
142
+ query_input = gr.Textbox(
143
+ label="Your Question",
144
+ placeholder="Ask a question about the uploaded documents...",
145
+ )
146
+ submit_btn = gr.Button("Get Answer", variant="primary")
147
+ answer_output = gr.Textbox(label="Answer", lines=10)
148
+
149
+ submit_btn.click(
150
+ rag_pipeline,
151
+ inputs=[query_input, file_output],
152
+ outputs=answer_output,
153
+ )
154
+
155
+ gr.Markdown(
156
+ """
157
+ ## How it works
158
+ 1. Upload one or more `.txt` files
159
+ 2. Ask a question related to the content
160
+ 3. The system will:
161
+ - Create embeddings using BERT
162
+ - Find similar passages using vector similarity
163
+ - Retrieve relevant context for your query
164
+ - Generate an answer using `flan-t5-base`
165
+
166
+ Built with πŸ€— Hugging Face's models and Gradio
167
+ """
168
+ )
169
+
170
+ # Launch the app
171
+ if __name__ == "__main__":
172
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ numpy<2.0
2
+ torch==2.0.1
3
+ transformers==4.26.0
4
+ gradio==5.30.0