File size: 4,679 Bytes
1620846
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#!/usr/bin/env python3
"""
Test script to verify TransLingo setup
"""

import os
import sys
import torch
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def test_imports():
    """Test if all modules can be imported"""
    logger.info("Testing imports...")
    
    modules = [
        "data.download",
        "data.preprocessing",
        "model.transformer",
        "model.attention",
        "model.embeddings",
        "model.layers",
        "training.train",
        "training.loss",
        "training.optimizer",
        "inference.beam_search",
        "inference.translate",
        "utils.metrics",
        "frontend.gradio_app"
    ]
    
    failed = []
    for module in modules:
        try:
            __import__(module)
            logger.info(f"βœ… {module}")
        except Exception as e:
            logger.error(f"❌ {module}: {e}")
            failed.append(module)
    
    return len(failed) == 0

def test_cuda():
    """Test CUDA availability"""
    logger.info("\nTesting CUDA...")
    logger.info(f"PyTorch version: {torch.__version__}")
    logger.info(f"CUDA available: {torch.cuda.is_available()}")
    
    if torch.cuda.is_available():
        logger.info(f"GPU: {torch.cuda.get_device_name(0)}")
        logger.info(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    else:
        logger.warning("CUDA not available - training will be slow!")
    
    return True

def test_model_creation():
    """Test if model can be created"""
    logger.info("\nTesting model creation...")
    
    try:
        from model.transformer import Transformer
        
        # Create small test model
        model = Transformer(
            vocab_size=1000,
            d_model=128,
            n_heads=4,
            n_layers=2,
            d_ff=512,
            max_seq_length=50
        )
        
        # Test forward pass
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = model.to(device)
        
        src = torch.randint(0, 1000, (2, 10)).to(device)
        tgt = torch.randint(0, 1000, (2, 10)).to(device)
        
        output = model(src, tgt)
        logger.info(f"βœ… Model output shape: {output.shape}")
        logger.info(f"βœ… Model parameters: {sum(p.numel() for p in model.parameters()):,}")
        
        return True
    
    except Exception as e:
        logger.error(f"❌ Model creation failed: {e}")
        return False

def test_data_directory():
    """Test data directory structure"""
    logger.info("\nChecking directory structure...")
    
    dirs_to_check = [
        "data",
        "data/raw",
        "data/processed",
        "model",
        "training",
        "inference",
        "utils",
        "api",
        "frontend",
        "notebooks",
        "configs"
    ]
    
    all_exist = True
    for dir_path in dirs_to_check:
        if os.path.exists(dir_path):
            logger.info(f"βœ… {dir_path}")
        else:
            logger.error(f"❌ {dir_path} - missing")
            all_exist = False
    
    return all_exist

def main():
    """Run all tests"""
    logger.info("=" * 50)
    logger.info("TransLingo Setup Test")
    logger.info("=" * 50)
    
    tests = [
        ("Directory Structure", test_data_directory),
        ("Module Imports", test_imports),
        ("CUDA/GPU", test_cuda),
        ("Model Creation", test_model_creation)
    ]
    
    results = []
    for test_name, test_func in tests:
        try:
            result = test_func()
            results.append((test_name, result))
        except Exception as e:
            logger.error(f"Test {test_name} crashed: {e}")
            results.append((test_name, False))
    
    # Summary
    logger.info("\n" + "=" * 50)
    logger.info("Test Summary")
    logger.info("=" * 50)
    
    all_passed = True
    for test_name, passed in results:
        status = "βœ… PASSED" if passed else "❌ FAILED"
        logger.info(f"{test_name}: {status}")
        if not passed:
            all_passed = False
    
    if all_passed:
        logger.info("\nπŸŽ‰ All tests passed! You're ready to start training.")
        logger.info("\nNext steps:")
        logger.info("1. Upload notebooks/colab_training.py to Google Colab")
        logger.info("2. Run training on Colab with GPU")
        logger.info("3. Download checkpoints when training completes")
        logger.info("4. Run: python frontend/gradio_app.py")
    else:
        logger.error("\n❌ Some tests failed. Please fix the issues before proceeding.")
    
    return 0 if all_passed else 1

if __name__ == "__main__":
    sys.exit(main())