Spaces:

shivanshneu
/

translingo

Sleeping

App Files Files Community

translingo / test_setup.py

Ratan1

Initial commit: Complete TransLingo translation system

1620846 about 1 month ago

raw

history blame contribute delete

4.68 kB

	#!/usr/bin/env python3
	"""
	Test script to verify TransLingo setup
	"""

	import os
	import sys
	import torch
	import logging

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	def test_imports():
	"""Test if all modules can be imported"""
	logger.info("Testing imports...")

	modules = [
	"data.download",
	"data.preprocessing",
	"model.transformer",
	"model.attention",
	"model.embeddings",
	"model.layers",
	"training.train",
	"training.loss",
	"training.optimizer",
	"inference.beam_search",
	"inference.translate",
	"utils.metrics",
	"frontend.gradio_app"
	]

	failed = []
	for module in modules:
	try:
	__import__(module)
	logger.info(f"✅ {module}")
	except Exception as e:
	logger.error(f"❌ {module}: {e}")
	failed.append(module)

	return len(failed) == 0

	def test_cuda():
	"""Test CUDA availability"""
	logger.info("\nTesting CUDA...")
	logger.info(f"PyTorch version: {torch.__version__}")
	logger.info(f"CUDA available: {torch.cuda.is_available()}")

	if torch.cuda.is_available():
	logger.info(f"GPU: {torch.cuda.get_device_name(0)}")
	logger.info(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
	else:
	logger.warning("CUDA not available - training will be slow!")

	return True

	def test_model_creation():
	"""Test if model can be created"""
	logger.info("\nTesting model creation...")

	try:
	from model.transformer import Transformer

	# Create small test model
	model = Transformer(
	vocab_size=1000,
	d_model=128,
	n_heads=4,
	n_layers=2,
	d_ff=512,
	max_seq_length=50
	)

	# Test forward pass
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	model = model.to(device)

	src = torch.randint(0, 1000, (2, 10)).to(device)
	tgt = torch.randint(0, 1000, (2, 10)).to(device)

	output = model(src, tgt)
	logger.info(f"✅ Model output shape: {output.shape}")
	logger.info(f"✅ Model parameters: {sum(p.numel() for p in model.parameters()):,}")

	return True

	except Exception as e:
	logger.error(f"❌ Model creation failed: {e}")
	return False

	def test_data_directory():
	"""Test data directory structure"""
	logger.info("\nChecking directory structure...")

	dirs_to_check = [
	"data",
	"data/raw",
	"data/processed",
	"model",
	"training",
	"inference",
	"utils",
	"api",
	"frontend",
	"notebooks",
	"configs"
	]

	all_exist = True
	for dir_path in dirs_to_check:
	if os.path.exists(dir_path):
	logger.info(f"✅ {dir_path}")
	else:
	logger.error(f"❌ {dir_path} - missing")
	all_exist = False

	return all_exist

	def main():
	"""Run all tests"""
	logger.info("=" * 50)
	logger.info("TransLingo Setup Test")
	logger.info("=" * 50)

	tests = [
	("Directory Structure", test_data_directory),
	("Module Imports", test_imports),
	("CUDA/GPU", test_cuda),
	("Model Creation", test_model_creation)
	]

	results = []
	for test_name, test_func in tests:
	try:
	result = test_func()
	results.append((test_name, result))
	except Exception as e:
	logger.error(f"Test {test_name} crashed: {e}")
	results.append((test_name, False))

	# Summary
	logger.info("\n" + "=" * 50)
	logger.info("Test Summary")
	logger.info("=" * 50)

	all_passed = True
	for test_name, passed in results:
	status = "✅ PASSED" if passed else "❌ FAILED"
	logger.info(f"{test_name}: {status}")
	if not passed:
	all_passed = False

	if all_passed:
	logger.info("\n🎉 All tests passed! You're ready to start training.")
	logger.info("\nNext steps:")
	logger.info("1. Upload notebooks/colab_training.py to Google Colab")
	logger.info("2. Run training on Colab with GPU")
	logger.info("3. Download checkpoints when training completes")
	logger.info("4. Run: python frontend/gradio_app.py")
	else:
	logger.error("\n❌ Some tests failed. Please fix the issues before proceeding.")

	return 0 if all_passed else 1

	if __name__ == "__main__":
	sys.exit(main())