Spaces:

shivanshneu
/

translingo

Sleeping

File size: 4,679 Bytes
#!/usr/bin/env python3
"""
Test script to verify TransLingo setup
"""

import os
import sys
import torch
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def test_imports():
    """Test if all modules can be imported"""
    logger.info("Testing imports...")
    
    modules = [
        "data.download",
        "data.preprocessing",
        "model.transformer",
        "model.attention",
        "model.embeddings",
        "model.layers",
        "training.train",
        "training.loss",
        "training.optimizer",
        "inference.beam_search",
        "inference.translate",
        "utils.metrics",
        "frontend.gradio_app"
    ]
    
    failed = []
    for module in modules:
        try:
            __import__(module)
            logger.info(f"✅ {module}")
        except Exception as e:
            logger.error(f"❌ {module}: {e}")
            failed.append(module)
    
    return len(failed) == 0

def test_cuda():
    """Test CUDA availability"""
    logger.info("\nTesting CUDA...")
    logger.info(f"PyTorch version: {torch.__version__}")
    logger.info(f"CUDA available: {torch.cuda.is_available()}")
    
    if torch.cuda.is_available():
        logger.info(f"GPU: {torch.cuda.get_device_name(0)}")
        logger.info(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    else:
        logger.warning("CUDA not available - training will be slow!")
    
    return True

def test_model_creation():
    """Test if model can be created"""
    logger.info("\nTesting model creation...")
    
    try:
        from model.transformer import Transformer
        
        # Create small test model
        model = Transformer(
            vocab_size=1000,
            d_model=128,
            n_heads=4,
            n_layers=2,
            d_ff=512,
            max_seq_length=50
        )
        
        # Test forward pass
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = model.to(device)
        
        src = torch.randint(0, 1000, (2, 10)).to(device)
        tgt = torch.randint(0, 1000, (2, 10)).to(device)
        
        output = model(src, tgt)
        logger.info(f"✅ Model output shape: {output.shape}")
        logger.info(f"✅ Model parameters: {sum(p.numel() for p in model.parameters()):,}")
        
        return True
    
    except Exception as e:
        logger.error(f"❌ Model creation failed: {e}")
        return False

def test_data_directory():
    """Test data directory structure"""
    logger.info("\nChecking directory structure...")
    
    dirs_to_check = [
        "data",
        "data/raw",
        "data/processed",
        "model",
        "training",
        "inference",
        "utils",
        "api",
        "frontend",
        "notebooks",
        "configs"
    ]
    
    all_exist = True
    for dir_path in dirs_to_check:
        if os.path.exists(dir_path):
            logger.info(f"✅ {dir_path}")
        else:
            logger.error(f"❌ {dir_path} - missing")
            all_exist = False
    
    return all_exist

def main():
    """Run all tests"""
    logger.info("=" * 50)
    logger.info("TransLingo Setup Test")
    logger.info("=" * 50)
    
    tests = [
        ("Directory Structure", test_data_directory),
        ("Module Imports", test_imports),
        ("CUDA/GPU", test_cuda),
        ("Model Creation", test_model_creation)
    ]
    
    results = []
    for test_name, test_func in tests:
        try:
            result = test_func()
            results.append((test_name, result))
        except Exception as e:
            logger.error(f"Test {test_name} crashed: {e}")
            results.append((test_name, False))
    
    # Summary
    logger.info("\n" + "=" * 50)
    logger.info("Test Summary")
    logger.info("=" * 50)
    
    all_passed = True
    for test_name, passed in results:
        status = "✅ PASSED" if passed else "❌ FAILED"
        logger.info(f"{test_name}: {status}")
        if not passed:
            all_passed = False
    
    if all_passed:
        logger.info("\n🎉 All tests passed! You're ready to start training.")
        logger.info("\nNext steps:")
        logger.info("1. Upload notebooks/colab_training.py to Google Colab")
        logger.info("2. Run training on Colab with GPU")
        logger.info("3. Download checkpoints when training completes")
        logger.info("4. Run: python frontend/gradio_app.py")
    else:
        logger.error("\n❌ Some tests failed. Please fix the issues before proceeding.")
    
    return 0 if all_passed else 1

if __name__ == "__main__":
    sys.exit(main())