Spaces:

ak5005
/

derrobot

Sleeping

App Files Files Community

Aidan Phillips commited on Apr 9

Commit

0885169

1 Parent(s): f5893dd

accuracy scoring pretty good

Browse files

Files changed (3) hide show

categories/accuracy.py +146 -0
categories/fluency.py +103 -66
scorer.ipynb +44 -22

categories/accuracy.py CHANGED Viewed

	@@ -0,0 +1,146 @@

+import string
+import torch
+from scipy.spatial.distance import cosine
+from simalign import SentenceAligner
+from transformers import AutoModel, AutoTokenizer
+# setup global variables on import (bad practice, but whatever)
+# --------------------------------------------------------------
+aligner = SentenceAligner(model="distilbert-base-multilingual-cased", layer=6)
+tokenizer = AutoTokenizer.from_pretrained("distilbert-base-multilingual-cased")
+model = AutoModel.from_pretrained("distilbert-base-multilingual-cased")
+def accuracy(src_sentence: str, trg_sentence: str) -> dict:
+    """
+    Calculate the accuracy of a translation by comparing the source and target
+    sentences.
+    Parameters:
+        src_sentence (str): The source sentence.
+        trg_sentence (str): The target sentence.
+    Returns:
+        dict: A dictionary containing the accuracy score and errors.
+    """
+    # Preprocess both sentences
+    src_sentence = __preprocess_text(src_sentence)
+    trg_sentence = __preprocess_text(trg_sentence)
+    r = __get_alignment_score(src_sentence, trg_sentence)
+    score = __get_bertscore(src_sentence, trg_sentence)
+    res = {"score": __bertscore_to_percentage(score), "errors": r}
+    return res
+def __preprocess_text(text: str) -> str:
+    """
+    Remove punctuation and convert text to lowercase.
+    Parameters:
+        text (str): The text to preprocess.
+    Returns:
+        str: The preprocessed text.
+    """
+    # Remove punctuation
+    text = text.translate(str.maketrans("", "", string.punctuation))
+    # Convert to lowercase
+    text = text.lower()
+    return text
+def __get_bertscore(src_sentence: str, trg_sentence: str) -> float:
+    """
+    Get the BERTScore between two sentences.
+    Parameters:
+        src_sentence (str): The source sentence.
+        trg_sentence (str): The target sentence.
+    Returns:
+        float: The BERTScore.
+    """
+    # Tokenize and generate embeddings
+    inputs_src = tokenizer(
+        src_sentence, return_tensors="pt", padding=True, truncation=True
+    )
+    inputs_trg = tokenizer(
+        trg_sentence, return_tensors="pt", padding=True, truncation=True
+    )
+    with torch.no_grad():
+        outputs_src = model(**inputs_src)
+        outputs_trg = model(**inputs_trg)
+    # Get sentence embeddings by averaging token embeddings (from last hidden state)
+    src_embedding = torch.mean(outputs_src.last_hidden_state, dim=1).squeeze().numpy()
+    trg_embedding = torch.mean(outputs_trg.last_hidden_state, dim=1).squeeze().numpy()
+    # Calculate cosine similarity (1 - cosine distance)
+    similarity = 1 - cosine(src_embedding, trg_embedding)
+    return similarity
+def __bertscore_to_percentage(similarity: float) -> float:
+    """
+    Convert the BERTScore cosine similarity to a percentage score (0-100).
+    Parameters:
+        similarity (float): The cosine similarity from BERTScore.
+    Returns:
+        int: A score from 0 to 100.
+    """
+    # Scale the similarity score from [-1, 1] range to [0, 100] (rarely negative)
+    scaled_score = max(((similarity) / 2) * 100, 0)
+    return round(scaled_score, 2)
+def __get_alignment_score(src_sentence: str, trg_sentence: str) -> list:
+    """
+    Get the alignment score between two sentences.
+    Parameters:
+        src_sentence (str): The source sentence.
+        trg_sentence (str): The target sentence.
+    Returns:
+        list: Mistranslations
+    """
+    src_list = src_sentence.split()
+    trg_list = trg_sentence.split()
+    # The output is a dictionary with different matching methods.
+    # Each method has a list of pairs indicating the indexes of aligned words (The alignments are zero-indexed).
+    alignments = aligner.get_word_aligns(src_list, trg_list)
+    src_aligns = {x[0] for x in alignments["inter"]}
+    trg_aligns = {x[1] for x in alignments["inter"]}
+    mistranslations = []
+    for i in range(len(src_list)):
+        if i not in src_aligns:
+            mistranslations.append(
+                {
+                    "start": i,
+                    "end": i,
+                    "message": f"Word {src_list[i]} possibly mistranslated or omitted",
+                }
+            )
+    for i in range(len(trg_list)):
+        if i not in trg_aligns:
+            mistranslations.append(
+                {
+                    "start": i,
+                    "end": i,
+                    "message": f"Word {trg_list[i]} possibly mistranslated or added erroneously",
+                }
+            )
+    return mistranslations

categories/fluency.py CHANGED Viewed

@@ -1,28 +1,29 @@
 import language_tool_python
-from transformers import AutoTokenizer, AutoModelForMaskedLM
-import torch
 import numpy as np
 import spacy
 import wordfreq
 # setup global variables on import (bad practice, but whatever)
-#--------------------------------------------------------------
 # grammar checker
-tool = language_tool_python.LanguageTool('en-US')
 # masked language model and tokenizer from huggingface
-model_name="distilbert-base-multilingual-cased"
 model = AutoModelForMaskedLM.from_pretrained(model_name)
 model.eval()
-tokenizer = AutoTokenizer.from_pretrained(model_name)   # tokenizer
 # spacy model for parsing
 nlp = spacy.load("en_core_web_sm")
-def __get_rarity(word, lang="en") -> float:
     """
-    Returns the rarity of a word in the given language. word_freq retuns a value
     between 0 and 1, where 1 is the most common word. Therefore, taking the log results
     in a value between 0 (log 1 = 0) and -27.63 (log 1e-12). We then negate it so super
     rare words have a high score and common words have a low score.
@@ -30,20 +31,21 @@ def __get_rarity(word, lang="en") -> float:
     Parameters:
         word (str): The word to check.
         lang (str): The language to check. Default is "en".
     Returns:
         float: The rarity of the word.
     """
     return -np.log(wordfreq.word_frequency(word, lang) + 1e-12)
-def __produce_groupings(offset_mapping, input_ids):
     """
     Produce groupings of tokens that are part of the same word.
     Parameters:
         offset_mapping (list): The offset mapping of the tokens.
         input_ids (list): The input ids of the tokens.
     Returns:
         list: A list of groupings of tokens.
     """
@@ -64,10 +66,11 @@ def __produce_groupings(offset_mapping, input_ids):
     # Append final group
     if current_group:
         res.append(current_group)
     return res
-def pseudo_perplexity(text, threshold=4, max_len=128):
     """
     Calculate the pseudo-perplexity of a text using a masked language model. Return all
     words that exceed a threshold of "adjusted awkwardness". The threshold is a measure
@@ -77,7 +80,7 @@ def pseudo_perplexity(text, threshold=4, max_len=128):
         text (str): The text to check.
         threshold (float): The threshold for awkwardness. Default is 4.
         max_len (int): The maximum length of the text. Default is 128.
     Returns:
         dict: A dictionary containing the score and errors.
     """
@@ -94,7 +97,7 @@ def pseudo_perplexity(text, threshold=4, max_len=128):
     for group in word_groups:
         # Skip special tokens (CLS and SEP)
         if group[0] == 0 or group[-1] == len(input_ids) - 1:
-            continue
         # Mask the word group
         masked = input_ids.clone()
@@ -119,7 +122,9 @@ def pseudo_perplexity(text, threshold=4, max_len=128):
         word_loss = -np.sum(log_probs) / len(log_probs)
         # Adjust the loss based on the rarity of the word
         word = tokenizer.decode(input_ids[group[0]])
-        word_loss -= 0.6 * __get_rarity(word) # subtract rarity (rare words reduce loss)
         loss_values.append(word_loss)
     # Structure the results for output
@@ -129,22 +134,24 @@ def pseudo_perplexity(text, threshold=4, max_len=128):
     for i, l in enumerate(loss_values):
         if l < threshold:
             continue
-        errors.append({
-            "start": i,
-            "end": i,
-            "message": f"Adjusted liklihood {l} over threshold {threshold}"
-        })
-    res = {
-        "score": __fluency_score(average_loss),
-        "errors": errors
-    }
     return res
-def __fluency_score(loss, midpoint=5, steepness=0.3):
     """
-    Transform the loss into a score from 0 to 100. Steepness controls how quickly the
     score drops as loss increases and midpoint controls the loss at which the score is
     50.
@@ -152,20 +159,21 @@ def __fluency_score(loss, midpoint=5, steepness=0.3):
         loss (float): The loss to transform.
         midpoint (float): The loss at which the score is 50. Default is 5.
         steepness (float): The steepness of the curve. Default is 0.3.
     Returns:
         float: The score from 0 to 100.
     """
     score = 100 / (1 + np.exp(steepness * (loss - midpoint)))
     return round(score, 2)
-def grammar_errors(text) -> tuple[int, list[str]]:
     """
     Check the grammar of a text using a grammar checker and a structural grammar check.
     Parameters:
         text (str): The text to check.
     Returns:
         dict: A dictionary containing the score and errors.
     """
@@ -195,83 +203,112 @@ def grammar_errors(text) -> tuple[int, list[str]]:
     grammar_score = len(r) / len(text.split())
-    res = {
-        "score": __grammar_score_from_prob(grammar_score),
-        "errors": r
-    }
     return res
-def __grammar_score_from_prob(error_ratio):
     """
     Transform the number of errors divided by words into a score from 0 to 100.
-    Steepness controls how quickly the score drops as errors increase.
     """
-    score = 100*(1-error_ratio)
     return round(score, 2)
-def __check_structural_grammar(text):
     doc = nlp(text)
     issues = []
     # 1. Missing main verb (ROOT)
-    root_verbs = [tok for tok in doc if tok.dep_ == "ROOT" and tok.pos_ in {"VERB", "AUX"}]
     if not root_verbs:
         root_root = [tok for tok in doc if tok.dep_ == "ROOT"]
         token = root_root[0] if root_root else doc[0]
-        issues.append({
-            "start": token.i,
-            "end": token.i + 1,
-            "message": "Sentence is missing a main verb (no ROOT verb)."
-        })
     # 2. Verb(s) present but no subject
     verbs = [tok for tok in doc if tok.pos_ in {"VERB", "AUX"}]
     subjects = [tok for tok in doc if tok.dep_ in {"nsubj", "nsubjpass"}]
     if verbs and not subjects:
         for verb in verbs:
-            issues.append({
-                "start": verb.i,
-                "end": verb.i + 1,
-                "message": "Sentence has verb(s) but no subject (possible fragment)."
-            })
     # 3. Dangling prepositions
     for tok in doc:
         if tok.pos_ == "ADP" and len(list(tok.children)) == 0:
-            issues.append({
-                "start": tok.i,
-                "end": tok.i + 1,
-                "message": f"Dangling preposition '{tok.text}' (no object or complement)."
-            })
     # 4. Noun pile-up (no verbs, all tokens are nominal)
-    if not any(tok.pos_ in {"VERB", "AUX"} for tok in doc) and \
-       all(tok.pos_ in {"NOUN", "PROPN", "ADJ", "DET", "NUM"} for tok in doc if tok.is_alpha):
         token = doc[0]
-        issues.append({
-            "start": token.i,
-            "end": token.i + 1,
-            "message": "Sentence lacks a verb or any verbal structure (nominal phrase pile-up)."
-        })
     # 5. Multiple ROOTs (possible run-on)
     root_count = sum(1 for tok in doc if tok.dep_ == "ROOT")
     if root_count > 1:
         for tok in doc:
             if tok.dep_ == "ROOT":
-                issues.append({
-                    "start": tok.i,
-                    "end": tok.i + 1,
-                    "message": "Sentence has multiple ROOTs — possible run-on sentence."
-                })
     return issues
 def main():
     pass
 if __name__ == "__main__":
     main()

 import language_tool_python
 import numpy as np
 import spacy
+import torch
 import wordfreq
+from transformers import AutoModelForMaskedLM, AutoTokenizer
 # setup global variables on import (bad practice, but whatever)
+# --------------------------------------------------------------
 # grammar checker
+tool = language_tool_python.LanguageTool("en-US")
 # masked language model and tokenizer from huggingface
+model_name = "distilbert-base-multilingual-cased"
 model = AutoModelForMaskedLM.from_pretrained(model_name)
 model.eval()
+tokenizer = AutoTokenizer.from_pretrained(model_name)  # tokenizer
 # spacy model for parsing
 nlp = spacy.load("en_core_web_sm")
+def __get_rarity(word: str, lang: str = "en") -> float:
     """
+    Returns the rarity of a word in the given language. word_freq retuns a value
     between 0 and 1, where 1 is the most common word. Therefore, taking the log results
     in a value between 0 (log 1 = 0) and -27.63 (log 1e-12). We then negate it so super
     rare words have a high score and common words have a low score.
     Parameters:
         word (str): The word to check.
         lang (str): The language to check. Default is "en".
     Returns:
         float: The rarity of the word.
     """
     return -np.log(wordfreq.word_frequency(word, lang) + 1e-12)
+def __produce_groupings(offset_mapping: list, input_ids: list) -> list:
     """
     Produce groupings of tokens that are part of the same word.
     Parameters:
         offset_mapping (list): The offset mapping of the tokens.
         input_ids (list): The input ids of the tokens.
     Returns:
         list: A list of groupings of tokens.
     """
     # Append final group
     if current_group:
         res.append(current_group)
     return res
+def pseudo_perplexity(text: str, threshold: int = 4, max_len: int = 128) -> dict:
     """
     Calculate the pseudo-perplexity of a text using a masked language model. Return all
     words that exceed a threshold of "adjusted awkwardness". The threshold is a measure
         text (str): The text to check.
         threshold (float): The threshold for awkwardness. Default is 4.
         max_len (int): The maximum length of the text. Default is 128.
     Returns:
         dict: A dictionary containing the score and errors.
     """
     for group in word_groups:
         # Skip special tokens (CLS and SEP)
         if group[0] == 0 or group[-1] == len(input_ids) - 1:
+            continue
         # Mask the word group
         masked = input_ids.clone()
         word_loss = -np.sum(log_probs) / len(log_probs)
         # Adjust the loss based on the rarity of the word
         word = tokenizer.decode(input_ids[group[0]])
+        word_loss -= 0.6 * __get_rarity(
+            word
+        )  # subtract rarity (rare words reduce loss)
         loss_values.append(word_loss)
     # Structure the results for output
     for i, l in enumerate(loss_values):
         if l < threshold:
             continue
+        errors.append(
+            {
+                "start": i,
+                "end": i,
+                "message": f"Adjusted liklihood {l} over threshold {threshold}",
+            }
+        )
+    res = {"score": __fluency_score(average_loss), "errors": errors}
     return res
+def __fluency_score(
+    loss: float, midpoint: float = 5.0, steepness: float = 0.3
+) -> float:
     """
+    Transform the loss into a score from 0 to 100. Steepness controls how quickly the
     score drops as loss increases and midpoint controls the loss at which the score is
     50.
         loss (float): The loss to transform.
         midpoint (float): The loss at which the score is 50. Default is 5.
         steepness (float): The steepness of the curve. Default is 0.3.
     Returns:
         float: The score from 0 to 100.
     """
     score = 100 / (1 + np.exp(steepness * (loss - midpoint)))
     return round(score, 2)
+def grammar_errors(text: str) -> dict:
     """
     Check the grammar of a text using a grammar checker and a structural grammar check.
     Parameters:
         text (str): The text to check.
     Returns:
         dict: A dictionary containing the score and errors.
     """
     grammar_score = len(r) / len(text.split())
+    res = {"score": __grammar_score_from_prob(grammar_score), "errors": r}
     return res
+def __grammar_score_from_prob(error_ratio: float) -> float:
     """
     Transform the number of errors divided by words into a score from 0 to 100.
+    Parameters:
+        error_ratio (float): The ratio of errors to words.
+    Returns:
+        float: The score from 0 to 100.
     """
+    score = 100 * (1 - error_ratio)
     return round(score, 2)
+def __check_structural_grammar(text: str) -> list:
+    """
+    Check the structural grammar of a text using spaCy.
+    Parameters:
+        text (str): The text to check.
+    Returns:
+        list: A list of structural grammar errors.
+    """
     doc = nlp(text)
     issues = []
     # 1. Missing main verb (ROOT)
+    root_verbs = [
+        tok for tok in doc if tok.dep_ == "ROOT" and tok.pos_ in {"VERB", "AUX"}
+    ]
     if not root_verbs:
         root_root = [tok for tok in doc if tok.dep_ == "ROOT"]
         token = root_root[0] if root_root else doc[0]
+        issues.append(
+            {
+                "start": token.i,
+                "end": token.i + 1,
+                "message": "Sentence is missing a main verb (no ROOT verb).",
+            }
+        )
     # 2. Verb(s) present but no subject
     verbs = [tok for tok in doc if tok.pos_ in {"VERB", "AUX"}]
     subjects = [tok for tok in doc if tok.dep_ in {"nsubj", "nsubjpass"}]
     if verbs and not subjects:
         for verb in verbs:
+            issues.append(
+                {
+                    "start": verb.i,
+                    "end": verb.i + 1,
+                    "message": "Sentence has verb(s) but no subject (possible fragment).",
+                }
+            )
     # 3. Dangling prepositions
     for tok in doc:
         if tok.pos_ == "ADP" and len(list(tok.children)) == 0:
+            issues.append(
+                {
+                    "start": tok.i,
+                    "end": tok.i + 1,
+                    "message": f"Dangling preposition '{tok.text}' (no object or complement).",
+                }
+            )
     # 4. Noun pile-up (no verbs, all tokens are nominal)
+    if not any(tok.pos_ in {"VERB", "AUX"} for tok in doc) and all(
+        tok.pos_ in {"NOUN", "PROPN", "ADJ", "DET", "NUM"}
+        for tok in doc
+        if tok.is_alpha
+    ):
         token = doc[0]
+        issues.append(
+            {
+                "start": token.i,
+                "end": token.i + 1,
+                "message": "Sentence lacks a verb or any verbal structure (nominal phrase pile-up).",
+            }
+        )
     # 5. Multiple ROOTs (possible run-on)
     root_count = sum(1 for tok in doc if tok.dep_ == "ROOT")
     if root_count > 1:
         for tok in doc:
             if tok.dep_ == "ROOT":
+                issues.append(
+                    {
+                        "start": tok.i,
+                        "end": tok.i + 1,
+                        "message": "Sentence has multiple ROOTs — possible run-on sentence.",
+                    }
+                )
     return issues
+# Unit tests can go here eventually
 def main():
     pass
 if __name__ == "__main__":
     main()

scorer.ipynb CHANGED Viewed

@@ -4,78 +4,100 @@
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
-   "outputs": [],
    "source": [
-    "from categories.fluency import *"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Sentence: caveman speak weird few word good\n"
      ]
     }
    ],
    "source": [
-    "s = input(\"Enter a sentence: \")  # Prompt the user to enter a sentence\n",
     "\n",
-    "if s == \"\":\n",
-    "    s = \"The cat sat the quickly up apples banana.\"\n",
     "\n",
-    "print(\"Sentence:\", s)  # Print the input sentence\n",
     "\n",
-    "err = grammar_errors(s)  # Call the function to execute the grammar error checking\n",
-    "flu = pseudo_perplexity(s, threshold=3.25)  # Call the function to execute the fluency checking"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "This sentence does not start with an uppercase letter.: caveman speak\n",
-      "Perplexity 4.2750282429106585 over threshold 3.25: caveman\n",
-      "Perplexity 5.191700905668536 over threshold 3.25: few\n",
-      "Perplexity 3.8370066187600944 over threshold 3.25: good\n"
      ]
     }
    ],
    "source": [
-    "combined_err = err[\"errors\"] + flu[\"errors\"]  # Combine the error counts from both functions\n",
     "\n",
     "for e in combined_err:\n",
-    "    substr = \" \".join(s.split(\" \")[e[\"start\"]:e[\"end\"]+1])\n",
     "    print(f\"{e['message']}: {substr}\")  # Print the error messages\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "100.0 80.14\n",
-      "Fluency Score: 90.07\n"
      ]
     }
    ],
    "source": [
     "fluency_score = 0.5 * err[\"score\"] + 0.5 * flu[\"score\"]  # Calculate the fluency score\n",
-    "print(err[\"score\"], flu[\"score\"])  # Print the individual scores\n",
-    "print(\"Fluency Score:\", fluency_score)  # Print the fluency score"
    ]
   }
  ],

    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2025-04-08 22:18:10,848 - simalign.simalign - INFO - Initialized the EmbeddingLoader with model: distilbert-base-multilingual-cased\n",
+      "Initialized the EmbeddingLoader with model: distilbert-base-multilingual-cased\n"
+     ]
+    }
+   ],
    "source": [
+    "from categories.fluency import *\n",
+    "from categories.accuracy import *"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Sentence: The cat sat the quickly up apples banana.\n"
      ]
     }
    ],
    "source": [
+    "src_sent = \"Das ist ein Test.\"  # Example source sentence\n",
+    "trg_sent = input(f\"{src_sent}: \")  # Prompt the user to enter a sentence\n",
     "\n",
+    "if trg_sent == \"\":\n",
+    "    trg_sent = \"The cat sat the quickly up apples banana.\"\n",
     "\n",
+    "print(\"Sentence:\", trg_sent)  # Print the input sentence\n",
     "\n",
+    "err = grammar_errors(trg_sent)  # Call the function to execute the grammar error checking\n",
+    "flu = pseudo_perplexity(trg_sent, threshold=3.1)  # Call the function to execute the fluency checking\n",
+    "acc = accuracy(src_sent, trg_sent)  # Call the function to execute the accuracy checking"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "An apostrophe may be missing.: apples banana.\n",
+      "Adjusted liklihood 4.8056646935577145 over threshold 3.1: sat\n",
+      "Adjusted liklihood 4.473408069089179 over threshold 3.1: the\n",
+      "Adjusted liklihood 4.732453441503642 over threshold 3.1: quickly\n",
+      "Adjusted liklihood 5.1115574262487735 over threshold 3.1: apples\n",
+      "Word ist possibly mistranslated or omitted: cat\n",
+      "Word ein possibly mistranslated or omitted: sat\n",
+      "Word sat possibly mistranslated or added erroneously: sat\n",
+      "Word the possibly mistranslated or added erroneously: the\n",
+      "Word quickly possibly mistranslated or added erroneously: quickly\n",
+      "Word up possibly mistranslated or added erroneously: up\n",
+      "Word apples possibly mistranslated or added erroneously: apples\n",
+      "Word banana possibly mistranslated or added erroneously: banana.\n"
      ]
     }
    ],
    "source": [
+    "combined_err = err[\"errors\"] + flu[\"errors\"] + acc[\"errors\"]  # Combine the error counts from both functions\n",
     "\n",
     "for e in combined_err:\n",
+    "    substr = \" \".join(trg_sent.split(\" \")[e[\"start\"]:e[\"end\"]+1])\n",
     "    print(f\"{e['message']}: {substr}\")  # Print the error messages\n"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Fluency Score: 76.61500000000001\n",
+      "Accuracy Score: 24.45\n"
      ]
     }
    ],
    "source": [
     "fluency_score = 0.5 * err[\"score\"] + 0.5 * flu[\"score\"]  # Calculate the fluency score\n",
+    "print(\"Fluency Score:\", round(fluency_score, 2))  # Print the fluency score\n",
+    "\n",
+    "print(\"Accuracy Score:\", acc[\"score\"])  # Print the accuracy score"
    ]
   }
  ],