Spaces:

shivanshneu
/

translingo

Sleeping

App Files Files Community

translingo / inference /beam_search.py

Ratan1

finalized

9618094 about 1 month ago

raw

history blame contribute delete

8.14 kB

	import torch
	import torch.nn.functional as F
	from typing import List, Tuple, Optional
	import numpy as np
	from dataclasses import dataclass

	@dataclass
	class BeamHypothesis:
	"""Single hypothesis in beam search"""
	tokens: List[int]
	log_prob: float
	finished: bool = False

	class BeamSearch:
	"""Beam search decoder for transformer models"""

	def __init__(self, beam_size: int = 4, length_penalty: float = 0.6,
	coverage_penalty: float = 0.0, no_repeat_ngram_size: int = 3):
	self.beam_size = beam_size
	self.length_penalty = length_penalty
	self.coverage_penalty = coverage_penalty
	self.no_repeat_ngram_size = no_repeat_ngram_size # Changed default from 0 to 3

	def search(self, model, src: torch.Tensor, max_length: int = 100,
	bos_id: int = 2, eos_id: int = 3, pad_id: int = 0) -> List[List[int]]:
	"""
	Perform beam search decoding

	Args:
	model: Transformer model
	src: Source sequence [batch_size, src_len]
	max_length: Maximum decoding length
	bos_id: Beginning of sequence token
	eos_id: End of sequence token
	pad_id: Padding token

	Returns:
	List of decoded sequences
	"""
	batch_size = src.size(0)
	device = src.device

	# Encode source
	src_mask = (src != pad_id).unsqueeze(1).unsqueeze(2)
	memory = model.encode(src, src_mask)

	# Initialize beams
	beams = [[BeamHypothesis([bos_id], 0.0)] for _ in range(batch_size)]

	for step in range(max_length - 1):
	all_candidates = []

	for batch_idx in range(batch_size):
	# NEW: Stop if the BEST beam (first one after sorting) is finished
	if beams[batch_idx] and beams[batch_idx][0].finished:
	continue

	# Also skip if all beams are finished
	if all(hyp.finished for hyp in beams[batch_idx]):
	continue

	# Prepare input for all beams
	beam_tokens = []
	beam_indices = []

	for beam_idx, hypothesis in enumerate(beams[batch_idx]):
	if not hypothesis.finished:
	beam_tokens.append(hypothesis.tokens)
	beam_indices.append(beam_idx)

	if not beam_tokens:
	continue

	# Create batch of sequences
	tgt = torch.tensor(beam_tokens, device=device)

	# Decode
	tgt_mask = torch.ones(len(beam_tokens), 1, tgt.size(1), tgt.size(1), device=device)
	tgt_mask = torch.tril(tgt_mask)

	# Expand memory for beam size
	expanded_memory = memory[batch_idx:batch_idx+1].expand(len(beam_tokens), -1, -1)
	expanded_src_mask = src_mask[batch_idx:batch_idx+1].expand(len(beam_tokens), -1, -1, -1)

	# Get predictions
	decoder_output = model.decode(tgt, expanded_memory, tgt_mask, expanded_src_mask)
	logits = model.output_projection(decoder_output[:, -1, :])
	log_probs = F.log_softmax(logits, dim=-1)

	# Get top k tokens for each beam
	vocab_size = log_probs.size(-1)
	top_log_probs, top_indices = torch.topk(log_probs, min(self.beam_size, vocab_size))

	# Create new candidates
	candidates = []

	for beam_local_idx, (beam_idx, beam_log_probs, beam_indices_local) in enumerate(
	zip(beam_indices, top_log_probs, top_indices)):

	hypothesis = beams[batch_idx][beam_idx]

	for token_rank, (token_log_prob, token_id) in enumerate(
	zip(beam_log_probs, beam_indices_local)):

	new_tokens = hypothesis.tokens + [token_id.item()]

	# Apply no-repeat penalty
	if self._has_repeated_ngram(new_tokens):
	continue

	new_log_prob = hypothesis.log_prob + token_log_prob.item()

	# Apply length penalty
	score = self._apply_length_penalty(new_log_prob, len(new_tokens))

	candidates.append((
	score,
	BeamHypothesis(
	tokens=new_tokens,
	log_prob=new_log_prob,
	finished=(token_id.item() == eos_id)
	)
	))

	# Select top beam_size candidates
	candidates.sort(key=lambda x: x[0], reverse=True)

	new_beams = []
	for score, hypothesis in candidates[:self.beam_size]:
	new_beams.append(hypothesis)

	# If we have no candidates, keep the old beams
	if not new_beams:
	new_beams = beams[batch_idx]

	beams[batch_idx] = new_beams

	# Extract best sequences
	results = []
	for batch_idx in range(batch_size):
	# Sort by score
	sorted_hyps = sorted(
	beams[batch_idx],
	key=lambda h: self._apply_length_penalty(h.log_prob, len(h.tokens)),
	reverse=True
	)

	# Get best hypothesis
	best_hyp = sorted_hyps[0]
	results.append(best_hyp.tokens)

	return results

	def _apply_length_penalty(self, log_prob: float, length: int) -> float:
	"""Apply length penalty to score"""
	return log_prob / (length ** self.length_penalty)

	def _has_repeated_ngram(self, tokens: List[int]) -> bool:
	"""Check if sequence has repeated n-grams"""
	if self.no_repeat_ngram_size <= 0:
	return False

	ngrams = set()
	for i in range(len(tokens) - self.no_repeat_ngram_size + 1):
	ngram = tuple(tokens[i:i + self.no_repeat_ngram_size])
	if ngram in ngrams:
	return True
	ngrams.add(ngram)

	return False


	class GreedyDecoder:
	"""Simple greedy decoder for fast inference"""

	@staticmethod
	def decode(model, src: torch.Tensor, max_length: int = 100,
	bos_id: int = 2, eos_id: int = 3, pad_id: int = 0) -> List[List[int]]:
	"""
	Perform greedy decoding

	Args:
	model: Transformer model
	src: Source sequence [batch_size, src_len]
	max_length: Maximum decoding length
	bos_id: Beginning of sequence token
	eos_id: End of sequence token
	pad_id: Padding token

	Returns:
	List of decoded sequences
	"""
	batch_size = src.size(0)
	device = src.device

	# Use model's built-in generate method
	with torch.no_grad():
	translations = model.generate(
	src,
	max_length=max_length,
	bos_id=bos_id,
	eos_id=eos_id
	)

	# Convert to list
	results = []
	for i in range(batch_size):
	tokens = translations[i].cpu().tolist()
	# Remove padding and special tokens if needed
	if eos_id in tokens:
	eos_idx = tokens.index(eos_id)
	tokens = tokens[:eos_idx + 1]
	results.append(tokens)

	return results