TiberiuCristianLeon commited on
Commit
ee8f0d6
·
verified ·
1 Parent(s): d9a4ca6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -3
app.py CHANGED
@@ -231,12 +231,23 @@ class Translators:
231
  from quickmt import Translator
232
  # 'auto' auto-detects GPU, set to "cpu" to force CPU inference
233
  # device = 'gpu' if torch.cuda.is_available() else 'cpu'
234
- translator = Translator(str(model_path), device="auto", compute_type="auto", beam_size=5, max_input_length = 512, max_decoding_length = 512)
235
  # translation = Translator(f"./quickmt-{self.sl}-{self.tl}/", device="auto/cpu", intra_threads=2, inter_threads=2, compute_type="int8")
236
- # set beam size to 1 for faster speed (but lower quality) device="auto/cpu/gpu"
 
237
  # Options for compute_type: default, auto, int8, int8_float32, int8_float16, int8_bfloat16, int16, float16, bfloat16, float32
238
  # "int8" will work well for inference on CPU and give "int8_float16" or "int8_bfloat16" a try for GPU inference.
239
- translation = translator(input_text)
 
 
 
 
 
 
 
 
 
 
240
  # print(model_path, input_text, translation)
241
  return translation
242
 
 
231
  from quickmt import Translator
232
  # 'auto' auto-detects GPU, set to "cpu" to force CPU inference
233
  # device = 'gpu' if torch.cuda.is_available() else 'cpu'
234
+ translator = Translator(str(model_path), device="auto", compute_type="auto")
235
  # translation = Translator(f"./quickmt-{self.sl}-{self.tl}/", device="auto/cpu", intra_threads=2, inter_threads=2, compute_type="int8")
236
+ # ctranslate2._ext.Translator(model_path: str, device: str = 'cpu', *, device_index: Union[int, List[int]] = 0, compute_type: Union[str, Dict[str, str]] = 'default',
237
+ # inter_threads: int = 1, intra_threads: int = 0, max_queued_batches: int = 0, flash_attention: bool = False, tensor_parallel: bool = False, files: object = None)
238
  # Options for compute_type: default, auto, int8, int8_float32, int8_float16, int8_bfloat16, int16, float16, bfloat16, float32
239
  # "int8" will work well for inference on CPU and give "int8_float16" or "int8_bfloat16" a try for GPU inference.
240
+
241
+ # (self: ctranslate2._ext.Translator, source: List[List[str]], target_prefix: Optional[List[Optional[List[str]]]] = None, *, max_batch_size: int = 0,
242
+ # batch_type: str = 'examples', asynchronous: bool = False, beam_size: int = 2, patience: float = 1, num_hypotheses: int = 1, length_penalty: float = 1,
243
+ # coverage_penalty: float = 0, repetition_penalty: float = 1, no_repeat_ngram_size: int = 0, disable_unk: bool = False,
244
+ # suppress_sequences: Optional[List[List[str]]] = None, end_token: Optional[Union[str, List[str], List[int]]] = None, return_end_token: bool = False,
245
+ # prefix_bias_beta: float = 0, max_input_length: int = 1024, max_decoding_length: int = 256, min_decoding_length: int = 1, use_vmap: bool = False,
246
+ # return_scores: bool = False, return_logits_vocab: bool = False, return_attention: bool = False, return_alternatives: bool = False,
247
+ # min_alternative_expansion_prob: float = 0, sampling_topk: int = 1, sampling_topp: float = 1, sampling_temperature: float = 1, replace_unknowns: bool = False,
248
+ # callback: Callable[[ctranslate2._ext.GenerationStepResult], bool] = None) -> Union[List[ctranslate2._ext.TranslationResult], List[ctranslate2._ext.AsyncTranslationResult]]
249
+ # set beam size to 1 for faster speed (but lower quality) device="auto/cpu/gpu"
250
+ translation = translator(input_text, beam_size=5, max_input_length = 512, max_decoding_length = 512)
251
  # print(model_path, input_text, translation)
252
  return translation
253