Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| from gtts import gTTS | |
| from urllib.parse import urlparse, parse_qs | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| import unicodedata | |
| from deepmultilingualpunctuation import PunctuationModel | |
| from transformers import pipeline | |
| def summarize_video(url): | |
| if "watch" in url: | |
| pass | |
| else: | |
| url = url.replace("youtu.be/", "www.youtube.com/watch?v=") | |
| parsed_url = urlparse(url) | |
| video_id = parse_qs(parsed_url.query)['v'][0] | |
| # Get the transcript | |
| transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
| # Combining all the lists into on unique list | |
| text = [] | |
| for i in range(0, len(transcript)): | |
| text.append(transcript[i]["text"]) | |
| # Join list items into one paragraph | |
| video_transcript = " ".join(text) | |
| print("Text transcript created") | |
| print(video_transcript) | |
| # Text normalization | |
| my_string = unicodedata.normalize('NFKD', video_transcript) | |
| print("Text normalized") | |
| # Add punctuation | |
| model = PunctuationModel() | |
| result = model.restore_punctuation(video_transcript) | |
| print("Punctuation restored") | |
| # SUMMARIZATION | |
| # instantiate the summarization pipeline | |
| summarization_pipeline = pipeline( | |
| "summarization", | |
| model="t5-base", # you can choose a different model, depending on your requirements | |
| tokenizer="t5-base" # you can choose a different tokenizer, depending on your requirements | |
| ) | |
| # define the input text to summarize | |
| input_text = result | |
| # split the input text into smaller chunks | |
| chunk_size = 5000 | |
| chunks = [input_text[i:i+chunk_size] for i in range(0, len(input_text), chunk_size)] | |
| # summarize each chunk separately | |
| summaries = [] | |
| for chunk in chunks: | |
| summary = summarization_pipeline(chunk, max_length=200, min_length=30, do_sample=False) | |
| summaries.append(summary[0]['summary_text']) | |
| # combine the summaries of all chunks into a single summary | |
| final_summary = " ".join(summaries) | |
| # print the generated summary | |
| return final_summary | |
| # Define the Streamlit app | |
| st.title("YouTube Summarizer") | |
| # Define the input form | |
| form = st.form(key="input_form") | |
| # Get the video ID from the URL | |
| video_url = form.text_input("Enter a YouTube video URL") | |
| # Submit button | |
| submit_button = form.form_submit_button("Summarize Video") | |
| # Handle form submissions | |
| if submit_button: | |
| # Call the summarize_video function to get the summary | |
| summary = summarize_video(video_url) | |
| # Display the summary to the user | |
| st.subheader("Summary") | |
| st.write(summary) | |
| # Convert text summary into audio | |
| tts = gTTS(summary) | |
| print("converting text to audio") | |
| tts.save('Summary.mp3') | |
| # Download audio transcript | |
| with open('Summary.mp3', 'rb') as f: | |
| st.download_button('Download mp3', f, file_name='Summary.mp3') | |