Spaces:
Runtime error
Runtime error
| # -*- coding: utf-8 -*- | |
| """ | |
| Created on Mon Jun 6 20:56:08 2022 | |
| @author: Aziz Baran Kurtuluş | |
| """ | |
| import os | |
| os.system('pip install nltk') | |
| os.system('pip install scikit-learn') | |
| import nltk | |
| nltk.download('punkt') | |
| nltk.download('stopwords') | |
| nltk.download('wordnet') | |
| nltk.download('omw-1.4') | |
| import streamlit as st | |
| import joblib | |
| import re | |
| import string | |
| import nltk | |
| from nltk.corpus import stopwords | |
| from nltk.stem import WordNetLemmatizer | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| site_header = st.container() | |
| business_context = st.container() | |
| data_desc = st.container() | |
| performance = st.container() | |
| tweet_input = st.container() | |
| model_results = st.container() | |
| sentiment_analysis = st.container() | |
| contact = st.container() | |
| with site_header: | |
| st.title('Toxic Comment Detection') | |
| with tweet_input: | |
| st.header('Is Your Text Considered Toxic?') | |
| st.write("""*Please note that this prediction is based on how the model was trained, so it may not be an accurate representation.*""") | |
| user_text = st.text_input('Enter Text', max_chars=280) | |
| with model_results: | |
| st.subheader('Prediction:') | |
| if user_text: | |
| # processing user_text | |
| # removing punctuation | |
| user_text = re.sub('[%s]' % re.escape(string.punctuation), '', user_text) | |
| # tokenizing | |
| stop_words = set(stopwords.words('english')) | |
| tokens = nltk.word_tokenize(user_text) | |
| # removing stop words | |
| stopwords_removed = [token.lower() for token in tokens if token.lower() not in stop_words] | |
| # taking root word | |
| lemmatizer = WordNetLemmatizer() | |
| lemmatized_output = [] | |
| for word in stopwords_removed: | |
| lemmatized_output.append(lemmatizer.lemmatize(word)) | |
| # instantiating tfidf vectorizor | |
| tfidf = TfidfVectorizer(stop_words= 'english', ngram_range=(1,2)) | |
| X_train = joblib.load(open('resources/X_train.pickel', 'rb')) | |
| X_test = lemmatized_output | |
| X_train_count = tfidf.fit_transform(X_train) | |
| X_test_count = tfidf.transform(X_test) | |
| # loading in model | |
| final_model = joblib.load(open('resources/final_bayes.pickel', 'rb')) | |
| # applying the model to make predictions | |
| prediction = final_model.predict(X_test_count[0]) | |
| if prediction == 0: | |
| st.subheader('**Not Toxic**') | |
| else: | |
| st.subheader('**Toxic**') | |
| st.text('') | |