translingo / test_setup.py
Ratan1's picture
Initial commit: Complete TransLingo translation system
1620846
#!/usr/bin/env python3
"""
Test script to verify TransLingo setup
"""
import os
import sys
import torch
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def test_imports():
"""Test if all modules can be imported"""
logger.info("Testing imports...")
modules = [
"data.download",
"data.preprocessing",
"model.transformer",
"model.attention",
"model.embeddings",
"model.layers",
"training.train",
"training.loss",
"training.optimizer",
"inference.beam_search",
"inference.translate",
"utils.metrics",
"frontend.gradio_app"
]
failed = []
for module in modules:
try:
__import__(module)
logger.info(f"βœ… {module}")
except Exception as e:
logger.error(f"❌ {module}: {e}")
failed.append(module)
return len(failed) == 0
def test_cuda():
"""Test CUDA availability"""
logger.info("\nTesting CUDA...")
logger.info(f"PyTorch version: {torch.__version__}")
logger.info(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
logger.info(f"GPU: {torch.cuda.get_device_name(0)}")
logger.info(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
logger.warning("CUDA not available - training will be slow!")
return True
def test_model_creation():
"""Test if model can be created"""
logger.info("\nTesting model creation...")
try:
from model.transformer import Transformer
# Create small test model
model = Transformer(
vocab_size=1000,
d_model=128,
n_heads=4,
n_layers=2,
d_ff=512,
max_seq_length=50
)
# Test forward pass
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
src = torch.randint(0, 1000, (2, 10)).to(device)
tgt = torch.randint(0, 1000, (2, 10)).to(device)
output = model(src, tgt)
logger.info(f"βœ… Model output shape: {output.shape}")
logger.info(f"βœ… Model parameters: {sum(p.numel() for p in model.parameters()):,}")
return True
except Exception as e:
logger.error(f"❌ Model creation failed: {e}")
return False
def test_data_directory():
"""Test data directory structure"""
logger.info("\nChecking directory structure...")
dirs_to_check = [
"data",
"data/raw",
"data/processed",
"model",
"training",
"inference",
"utils",
"api",
"frontend",
"notebooks",
"configs"
]
all_exist = True
for dir_path in dirs_to_check:
if os.path.exists(dir_path):
logger.info(f"βœ… {dir_path}")
else:
logger.error(f"❌ {dir_path} - missing")
all_exist = False
return all_exist
def main():
"""Run all tests"""
logger.info("=" * 50)
logger.info("TransLingo Setup Test")
logger.info("=" * 50)
tests = [
("Directory Structure", test_data_directory),
("Module Imports", test_imports),
("CUDA/GPU", test_cuda),
("Model Creation", test_model_creation)
]
results = []
for test_name, test_func in tests:
try:
result = test_func()
results.append((test_name, result))
except Exception as e:
logger.error(f"Test {test_name} crashed: {e}")
results.append((test_name, False))
# Summary
logger.info("\n" + "=" * 50)
logger.info("Test Summary")
logger.info("=" * 50)
all_passed = True
for test_name, passed in results:
status = "βœ… PASSED" if passed else "❌ FAILED"
logger.info(f"{test_name}: {status}")
if not passed:
all_passed = False
if all_passed:
logger.info("\nπŸŽ‰ All tests passed! You're ready to start training.")
logger.info("\nNext steps:")
logger.info("1. Upload notebooks/colab_training.py to Google Colab")
logger.info("2. Run training on Colab with GPU")
logger.info("3. Download checkpoints when training completes")
logger.info("4. Run: python frontend/gradio_app.py")
else:
logger.error("\n❌ Some tests failed. Please fix the issues before proceeding.")
return 0 if all_passed else 1
if __name__ == "__main__":
sys.exit(main())