Spaces:

TiberiuCristianLeon
/

TranslateGradio

Running

App Files Files

TiberiuCristianLeon commited on 6 days ago

Commit

ee8f0d6

verified ·

1 Parent(s): d9a4ca6

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -3

app.py CHANGED Viewed

@@ -231,12 +231,23 @@ class Translators:
         from quickmt import Translator
         # 'auto' auto-detects GPU, set to "cpu" to force CPU inference
         # device = 'gpu' if torch.cuda.is_available() else 'cpu'
-        translator = Translator(str(model_path), device="auto", compute_type="auto", beam_size=5, max_input_length = 512, max_decoding_length = 512)
         # translation = Translator(f"./quickmt-{self.sl}-{self.tl}/", device="auto/cpu", intra_threads=2, inter_threads=2, compute_type="int8")
-        # set beam size to 1 for faster speed (but lower quality) device="auto/cpu/gpu"
         # Options for compute_type: default, auto, int8, int8_float32, int8_float16, int8_bfloat16, int16, float16, bfloat16, float32
         # "int8" will work well for inference on CPU and give "int8_float16" or "int8_bfloat16" a try for GPU inference.
-        translation = translator(input_text)
         # print(model_path, input_text, translation)
         return translation

         from quickmt import Translator
         # 'auto' auto-detects GPU, set to "cpu" to force CPU inference
         # device = 'gpu' if torch.cuda.is_available() else 'cpu'
+        translator = Translator(str(model_path), device="auto", compute_type="auto")
         # translation = Translator(f"./quickmt-{self.sl}-{self.tl}/", device="auto/cpu", intra_threads=2, inter_threads=2, compute_type="int8")
+        # ctranslate2._ext.Translator(model_path: str, device: str = 'cpu', *, device_index: Union[int, List[int]] = 0, compute_type: Union[str, Dict[str, str]] = 'default',
+        # inter_threads: int = 1, intra_threads: int = 0, max_queued_batches: int = 0, flash_attention: bool = False, tensor_parallel: bool = False, files: object = None)
         # Options for compute_type: default, auto, int8, int8_float32, int8_float16, int8_bfloat16, int16, float16, bfloat16, float32
         # "int8" will work well for inference on CPU and give "int8_float16" or "int8_bfloat16" a try for GPU inference.
+        # (self: ctranslate2._ext.Translator, source: List[List[str]], target_prefix: Optional[List[Optional[List[str]]]] = None, *, max_batch_size: int = 0,
+        # batch_type: str = 'examples', asynchronous: bool = False, beam_size: int = 2, patience: float = 1, num_hypotheses: int = 1, length_penalty: float = 1,
+        # coverage_penalty: float = 0, repetition_penalty: float = 1, no_repeat_ngram_size: int = 0, disable_unk: bool = False,
+        # suppress_sequences: Optional[List[List[str]]] = None, end_token: Optional[Union[str, List[str], List[int]]] = None, return_end_token: bool = False,
+        # prefix_bias_beta: float = 0, max_input_length: int = 1024, max_decoding_length: int = 256, min_decoding_length: int = 1, use_vmap: bool = False,
+        # return_scores: bool = False, return_logits_vocab: bool = False, return_attention: bool = False, return_alternatives: bool = False,
+        # min_alternative_expansion_prob: float = 0, sampling_topk: int = 1, sampling_topp: float = 1, sampling_temperature: float = 1, replace_unknowns: bool = False,
+        # callback: Callable[[ctranslate2._ext.GenerationStepResult], bool] = None) -> Union[List[ctranslate2._ext.TranslationResult], List[ctranslate2._ext.AsyncTranslationResult]]
+        # set beam size to 1 for faster speed (but lower quality) device="auto/cpu/gpu"
+        translation = translator(input_text, beam_size=5, max_input_length = 512, max_decoding_length = 512)
         # print(model_path, input_text, translation)
         return translation