Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,7 +15,7 @@ import sys
|
|
| 15 |
import time
|
| 16 |
import re
|
| 17 |
import textract
|
| 18 |
-
import zipfile
|
| 19 |
import random
|
| 20 |
|
| 21 |
from datetime import datetime
|
|
@@ -33,6 +33,8 @@ from langchain.chat_models import ChatOpenAI
|
|
| 33 |
from langchain.memory import ConversationBufferMemory
|
| 34 |
from langchain.chains import ConversationalRetrievalChain
|
| 35 |
from templates import css, bot_template, user_template
|
|
|
|
|
|
|
| 36 |
|
| 37 |
# page config and sidebar declares up front allow all other functions to see global class variables
|
| 38 |
st.set_page_config(page_title="GPT Streamlit Document Reasoner", layout="wide")
|
|
@@ -163,7 +165,6 @@ def add_paper_buttons_and_links():
|
|
| 163 |
add_paper_buttons_and_links()
|
| 164 |
|
| 165 |
|
| 166 |
-
|
| 167 |
# Process user input is a post processor algorithm which runs after document embedding vector DB play of GPT on context of documents..
|
| 168 |
def process_user_input(user_question):
|
| 169 |
# Check and initialize 'conversation' in session state if not present
|
|
@@ -205,32 +206,6 @@ def extract_feature_and_detail(paragraph):
|
|
| 205 |
return header, detail
|
| 206 |
return None, None
|
| 207 |
|
| 208 |
-
|
| 209 |
-
def process_user_input_old(user_question):
|
| 210 |
-
response = st.session_state.conversation({'question': user_question})
|
| 211 |
-
st.session_state.chat_history = response['chat_history']
|
| 212 |
-
for i, message in enumerate(st.session_state.chat_history):
|
| 213 |
-
template = user_template if i % 2 == 0 else bot_template
|
| 214 |
-
st.write(template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
|
| 215 |
-
# Save file output from PDF query results
|
| 216 |
-
filename = generate_filename(user_question, 'txt')
|
| 217 |
-
#create_file(filename, user_question, message.content)
|
| 218 |
-
response = message.content
|
| 219 |
-
user_prompt = user_question
|
| 220 |
-
create_file(filename, user_prompt, response, should_save)
|
| 221 |
-
#st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
def generate_filename_old(prompt, file_type):
|
| 229 |
-
central = pytz.timezone('US/Central')
|
| 230 |
-
safe_date_time = datetime.now(central).strftime("%m%d_%H%M") # Date and time DD-HHMM
|
| 231 |
-
safe_prompt = "".join(x for x in prompt if x.isalnum())[:90] # Limit file name size and trim whitespace
|
| 232 |
-
return f"{safe_date_time}_{safe_prompt}.{file_type}" # Return a safe file name
|
| 233 |
-
|
| 234 |
def generate_filename(prompt, file_type):
|
| 235 |
central = pytz.timezone('US/Central')
|
| 236 |
safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
|
|
@@ -333,71 +308,6 @@ def create_file(filename, prompt, response, should_save=True):
|
|
| 333 |
file.write(combined_content)
|
| 334 |
|
| 335 |
|
| 336 |
-
def create_file_old2(filename, prompt, response, should_save=True):
|
| 337 |
-
if not should_save:
|
| 338 |
-
return
|
| 339 |
-
|
| 340 |
-
# Step 2: Extract base filename without extension
|
| 341 |
-
base_filename, ext = os.path.splitext(filename)
|
| 342 |
-
|
| 343 |
-
# Step 3: Check if the response contains Python code
|
| 344 |
-
has_python_code = bool(re.search(r"```python([\s\S]*?)```", response))
|
| 345 |
-
|
| 346 |
-
# Step 4: Initialize the combined content
|
| 347 |
-
combined_content = ""
|
| 348 |
-
|
| 349 |
-
# Add Prompt with markdown title and emoji
|
| 350 |
-
combined_content += "# Prompt 📝\n" + prompt + "\n\n"
|
| 351 |
-
|
| 352 |
-
# Add Response with markdown title and emoji
|
| 353 |
-
combined_content += "# Response 💬\n" + response + "\n\n"
|
| 354 |
-
|
| 355 |
-
# Check for Python code or other resources and add them with markdown title and emoji
|
| 356 |
-
resources = re.findall(r"```([\s\S]*?)```", response)
|
| 357 |
-
for resource in resources:
|
| 358 |
-
# Check if the resource contains Python code
|
| 359 |
-
if "python" in resource.lower():
|
| 360 |
-
st.markdown('# Running python.. ')
|
| 361 |
-
# Remove the word 'python' from the beginning of the code block
|
| 362 |
-
cleaned_code = re.sub(r'^\s*python', '', resource, flags=re.IGNORECASE | re.MULTILINE)
|
| 363 |
-
|
| 364 |
-
# Add Code Results title with markdown and emoji
|
| 365 |
-
combined_content += "# Code Results 🚀\n"
|
| 366 |
-
|
| 367 |
-
# Capture standard output
|
| 368 |
-
original_stdout = sys.stdout
|
| 369 |
-
sys.stdout = io.StringIO()
|
| 370 |
-
|
| 371 |
-
# Execute cleaned Python code and capture the output
|
| 372 |
-
try:
|
| 373 |
-
st.markdown('# Running exec.. ')
|
| 374 |
-
|
| 375 |
-
exec(cleaned_code)
|
| 376 |
-
code_output = sys.stdout.getvalue()
|
| 377 |
-
combined_content += f"```\n{code_output}\n```\n\n"
|
| 378 |
-
realtimeEvalResponse = "# Code Results 🚀\n" + "```" + code_output + "```\n\n"
|
| 379 |
-
|
| 380 |
-
st.write(realtimeEvalResponse)
|
| 381 |
-
|
| 382 |
-
st.markdown('# Completed exec.. ')
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
except Exception as e:
|
| 386 |
-
combined_content += f"```python\nError executing Python code: {e}\n```\n\n"
|
| 387 |
-
st.markdown('# Error in exec.. ' + combined_content)
|
| 388 |
-
|
| 389 |
-
# Restore the original standard output
|
| 390 |
-
sys.stdout = original_stdout
|
| 391 |
-
else:
|
| 392 |
-
# Add Resource title with markdown and emoji for non-Python resources
|
| 393 |
-
combined_content += "# Resource 🛠️\n" + "```" + resource + "```\n\n"
|
| 394 |
-
|
| 395 |
-
# Write the combined content into one file
|
| 396 |
-
with open(f"{base_filename}-Combined.md", 'w') as file:
|
| 397 |
-
file.write(combined_content)
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
def truncate_document(document, length):
|
| 402 |
return document[:length]
|
| 403 |
|
|
@@ -474,8 +384,7 @@ def extract_mime_type(file):
|
|
| 474 |
else:
|
| 475 |
raise TypeError("Input should be a string or a streamlit.UploadedFile object")
|
| 476 |
|
| 477 |
-
|
| 478 |
-
import re
|
| 479 |
|
| 480 |
def extract_file_extension(file):
|
| 481 |
# get the file name directly from the UploadedFile object
|
|
@@ -505,20 +414,6 @@ def pdf2txt(docs):
|
|
| 505 |
text += pdf.pages[page].extract_text() # new PyPDF2 syntax
|
| 506 |
except Exception as e:
|
| 507 |
st.write(f"Error processing file {file.name}: {e}")
|
| 508 |
-
|
| 509 |
-
return text
|
| 510 |
-
|
| 511 |
-
def pdf2txt_old(pdf_docs):
|
| 512 |
-
st.write(pdf_docs)
|
| 513 |
-
for file in pdf_docs:
|
| 514 |
-
mime_type = extract_mime_type(file)
|
| 515 |
-
st.write(f"MIME type of file: {mime_type}")
|
| 516 |
-
|
| 517 |
-
text = ""
|
| 518 |
-
for pdf in pdf_docs:
|
| 519 |
-
pdf_reader = PdfReader(pdf)
|
| 520 |
-
for page in pdf_reader.pages:
|
| 521 |
-
text += page.extract_text()
|
| 522 |
return text
|
| 523 |
|
| 524 |
def txt2chunks(text):
|
|
@@ -636,8 +531,6 @@ def main():
|
|
| 636 |
if st.button('💬 Chat'):
|
| 637 |
st.write('Reasoning with your inputs...')
|
| 638 |
|
| 639 |
-
#response = chat_with_model(user_prompt, ''.join(list(document_sections,)), model_choice) # *************************************
|
| 640 |
-
|
| 641 |
# Divide the user_prompt into smaller sections
|
| 642 |
user_prompt_sections = divide_prompt(user_prompt, max_length)
|
| 643 |
full_response = ''
|
|
@@ -714,7 +607,6 @@ def main():
|
|
| 714 |
create_file(filename, user_prompt, response, should_save)
|
| 715 |
|
| 716 |
st.experimental_rerun()
|
| 717 |
-
#st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
|
| 718 |
|
| 719 |
if __name__ == "__main__":
|
| 720 |
main()
|
|
@@ -740,4 +632,3 @@ with st.sidebar:
|
|
| 740 |
st.markdown('# AI Search Index of Length:' + length + ' Created.') # add timing
|
| 741 |
filename = generate_filename(raw, 'txt')
|
| 742 |
create_file(filename, raw, '', should_save)
|
| 743 |
-
#create_file(filename, raw, '')
|
|
|
|
| 15 |
import time
|
| 16 |
import re
|
| 17 |
import textract
|
| 18 |
+
import zipfile
|
| 19 |
import random
|
| 20 |
|
| 21 |
from datetime import datetime
|
|
|
|
| 33 |
from langchain.memory import ConversationBufferMemory
|
| 34 |
from langchain.chains import ConversationalRetrievalChain
|
| 35 |
from templates import css, bot_template, user_template
|
| 36 |
+
from io import BytesIO
|
| 37 |
+
|
| 38 |
|
| 39 |
# page config and sidebar declares up front allow all other functions to see global class variables
|
| 40 |
st.set_page_config(page_title="GPT Streamlit Document Reasoner", layout="wide")
|
|
|
|
| 165 |
add_paper_buttons_and_links()
|
| 166 |
|
| 167 |
|
|
|
|
| 168 |
# Process user input is a post processor algorithm which runs after document embedding vector DB play of GPT on context of documents..
|
| 169 |
def process_user_input(user_question):
|
| 170 |
# Check and initialize 'conversation' in session state if not present
|
|
|
|
| 206 |
return header, detail
|
| 207 |
return None, None
|
| 208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
def generate_filename(prompt, file_type):
|
| 210 |
central = pytz.timezone('US/Central')
|
| 211 |
safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
|
|
|
|
| 308 |
file.write(combined_content)
|
| 309 |
|
| 310 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
def truncate_document(document, length):
|
| 312 |
return document[:length]
|
| 313 |
|
|
|
|
| 384 |
else:
|
| 385 |
raise TypeError("Input should be a string or a streamlit.UploadedFile object")
|
| 386 |
|
| 387 |
+
|
|
|
|
| 388 |
|
| 389 |
def extract_file_extension(file):
|
| 390 |
# get the file name directly from the UploadedFile object
|
|
|
|
| 414 |
text += pdf.pages[page].extract_text() # new PyPDF2 syntax
|
| 415 |
except Exception as e:
|
| 416 |
st.write(f"Error processing file {file.name}: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
return text
|
| 418 |
|
| 419 |
def txt2chunks(text):
|
|
|
|
| 531 |
if st.button('💬 Chat'):
|
| 532 |
st.write('Reasoning with your inputs...')
|
| 533 |
|
|
|
|
|
|
|
| 534 |
# Divide the user_prompt into smaller sections
|
| 535 |
user_prompt_sections = divide_prompt(user_prompt, max_length)
|
| 536 |
full_response = ''
|
|
|
|
| 607 |
create_file(filename, user_prompt, response, should_save)
|
| 608 |
|
| 609 |
st.experimental_rerun()
|
|
|
|
| 610 |
|
| 611 |
if __name__ == "__main__":
|
| 612 |
main()
|
|
|
|
| 632 |
st.markdown('# AI Search Index of Length:' + length + ' Created.') # add timing
|
| 633 |
filename = generate_filename(raw, 'txt')
|
| 634 |
create_file(filename, raw, '', should_save)
|
|
|