Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -231,12 +231,23 @@ class Translators:
|
|
| 231 |
from quickmt import Translator
|
| 232 |
# 'auto' auto-detects GPU, set to "cpu" to force CPU inference
|
| 233 |
# device = 'gpu' if torch.cuda.is_available() else 'cpu'
|
| 234 |
-
translator = Translator(str(model_path), device="auto", compute_type="auto"
|
| 235 |
# translation = Translator(f"./quickmt-{self.sl}-{self.tl}/", device="auto/cpu", intra_threads=2, inter_threads=2, compute_type="int8")
|
| 236 |
-
#
|
|
|
|
| 237 |
# Options for compute_type: default, auto, int8, int8_float32, int8_float16, int8_bfloat16, int16, float16, bfloat16, float32
|
| 238 |
# "int8" will work well for inference on CPU and give "int8_float16" or "int8_bfloat16" a try for GPU inference.
|
| 239 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
# print(model_path, input_text, translation)
|
| 241 |
return translation
|
| 242 |
|
|
|
|
| 231 |
from quickmt import Translator
|
| 232 |
# 'auto' auto-detects GPU, set to "cpu" to force CPU inference
|
| 233 |
# device = 'gpu' if torch.cuda.is_available() else 'cpu'
|
| 234 |
+
translator = Translator(str(model_path), device="auto", compute_type="auto")
|
| 235 |
# translation = Translator(f"./quickmt-{self.sl}-{self.tl}/", device="auto/cpu", intra_threads=2, inter_threads=2, compute_type="int8")
|
| 236 |
+
# ctranslate2._ext.Translator(model_path: str, device: str = 'cpu', *, device_index: Union[int, List[int]] = 0, compute_type: Union[str, Dict[str, str]] = 'default',
|
| 237 |
+
# inter_threads: int = 1, intra_threads: int = 0, max_queued_batches: int = 0, flash_attention: bool = False, tensor_parallel: bool = False, files: object = None)
|
| 238 |
# Options for compute_type: default, auto, int8, int8_float32, int8_float16, int8_bfloat16, int16, float16, bfloat16, float32
|
| 239 |
# "int8" will work well for inference on CPU and give "int8_float16" or "int8_bfloat16" a try for GPU inference.
|
| 240 |
+
|
| 241 |
+
# (self: ctranslate2._ext.Translator, source: List[List[str]], target_prefix: Optional[List[Optional[List[str]]]] = None, *, max_batch_size: int = 0,
|
| 242 |
+
# batch_type: str = 'examples', asynchronous: bool = False, beam_size: int = 2, patience: float = 1, num_hypotheses: int = 1, length_penalty: float = 1,
|
| 243 |
+
# coverage_penalty: float = 0, repetition_penalty: float = 1, no_repeat_ngram_size: int = 0, disable_unk: bool = False,
|
| 244 |
+
# suppress_sequences: Optional[List[List[str]]] = None, end_token: Optional[Union[str, List[str], List[int]]] = None, return_end_token: bool = False,
|
| 245 |
+
# prefix_bias_beta: float = 0, max_input_length: int = 1024, max_decoding_length: int = 256, min_decoding_length: int = 1, use_vmap: bool = False,
|
| 246 |
+
# return_scores: bool = False, return_logits_vocab: bool = False, return_attention: bool = False, return_alternatives: bool = False,
|
| 247 |
+
# min_alternative_expansion_prob: float = 0, sampling_topk: int = 1, sampling_topp: float = 1, sampling_temperature: float = 1, replace_unknowns: bool = False,
|
| 248 |
+
# callback: Callable[[ctranslate2._ext.GenerationStepResult], bool] = None) -> Union[List[ctranslate2._ext.TranslationResult], List[ctranslate2._ext.AsyncTranslationResult]]
|
| 249 |
+
# set beam size to 1 for faster speed (but lower quality) device="auto/cpu/gpu"
|
| 250 |
+
translation = translator(input_text, beam_size=5, max_input_length = 512, max_decoding_length = 512)
|
| 251 |
# print(model_path, input_text, translation)
|
| 252 |
return translation
|
| 253 |
|