| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242 |
- #!/usr/bin/env python3
- """
- Test script to verify that the GradScaler error fix is working correctly.
- This script creates a minimal training setup to test if mixed precision training
- works without the "No inf checks were recorded for this optimizer" error.
- """
- import torch
- import torch.nn as nn
- import torch.optim as optim
- import numpy as np
- import tempfile
- import shutil
- from pathlib import Path
- # Import our training components
- from trainer.base import TrainerConfig
- from trainer.voice_recognition.trainer import VoiceRecognitionTrainer
- from trainer.voice_recognition.models import ECAPA_TDNN, AngularMarginLoss
- def create_test_data(data_dir: str, num_speakers: int = 3, samples_per_speaker: int = 10):
- """Create minimal test data for voice recognition training."""
- print(f"Creating test data in {data_dir}")
-
- # Create directory structure
- raw_dir = Path(data_dir) / "raw"
- raw_dir.mkdir(parents=True, exist_ok=True)
-
- # Audio parameters
- sample_rate = 16000
- duration = 2.0 # 2 seconds
- samples_per_audio = int(sample_rate * duration)
-
- # Create audio files for each speaker
- for speaker_id in range(num_speakers):
- speaker_name = f"speaker_{speaker_id:02d}"
- speaker_dir = raw_dir / speaker_name
- speaker_dir.mkdir(exist_ok=True)
-
- for sample_id in range(samples_per_speaker):
- # Generate synthetic audio (white noise with some structure)
- audio_data = np.random.randn(samples_per_audio).astype(np.float32)
-
- # Add some speaker-specific characteristics
- freq = 440 + speaker_id * 100 # Different base frequency per speaker
- t = np.linspace(0, duration, samples_per_audio)
- tone = 0.1 * np.sin(2 * np.pi * freq * t).astype(np.float32)
- audio_data = audio_data * 0.8 + tone
-
- # Normalize
- audio_data = audio_data / np.max(np.abs(audio_data))
-
- # Save as numpy file (our preprocessor expects this format)
- audio_file = speaker_dir / f"sample_{sample_id:03d}.npy"
- np.save(audio_file, audio_data)
-
- print(f"Created {num_speakers} speakers with {samples_per_speaker} samples each")
- return str(raw_dir)
- def test_training_stability():
- """Test that training can proceed without GradScaler errors."""
- print("Testing voice recognition training stability...")
-
- # Create temporary directory for test data
- temp_dir = tempfile.mkdtemp()
-
- try:
- # Create test data
- data_dir = Path(temp_dir) / "voice_recognition"
- create_test_data(str(data_dir), num_speakers=3, samples_per_speaker=5)
-
- # Create trainer configuration
- config = TrainerConfig(
- trainer_name="test_voice_recognition",
- model_name="test_model",
- data_dir=str(data_dir),
- output_dir=str(Path(temp_dir) / "output"),
-
- # Small training parameters for quick test
- batch_size=4,
- learning_rate=0.001,
- num_epochs=3, # Just a few epochs to test stability
- min_epochs=1,
- early_stopping_patience=5,
-
- # Audio parameters
- sample_rate=16000,
- audio_length=1.5,
- n_mels=40,
- n_fft=512,
- hop_length=160,
- win_length=400,
-
- # Enable augmentation to stress-test the system
- use_augmentation=True,
- noise_factor=0.1,
- speed_factor=0.05,
-
- # Mixed precision settings
- use_mixed_precision=True,
- gradient_clip_norm=1.0,
-
- # Model configuration
- custom_params={
- 'model_type': 'ecapa_tdnn',
- 'loss_type': 'angular_margin',
- 'embedding_dim': 128, # Smaller embedding for faster testing
- 'angular_margin': 0.3,
- 'angular_scale': 32.0,
- 'ecapa_channels': 256, # Smaller model
- 'speaker_mapping': {}
- }
- )
-
- print("Configuration created successfully")
-
- # Create trainer
- trainer = VoiceRecognitionTrainer(config)
- print("Trainer created successfully")
-
- # Test training
- print("Starting training test...")
- try:
- # This should not raise the GradScaler error
- results = trainer.train()
- print("✅ Training completed successfully!")
-
- # Check if we got reasonable results
- metrics = results['training_metrics']
- print(f"Final training loss: {metrics.metrics['train_loss'][-1]:.6f}")
- print(f"Training completed {metrics.current_epoch + 1} epochs")
-
- return True
-
- except Exception as e:
- if "No inf checks were recorded for this optimizer" in str(e):
- print("❌ GradScaler error still occurs!")
- print(f"Error: {e}")
- return False
- else:
- print(f"⚠️ Different error occurred: {e}")
- # Other errors might be expected (e.g., convergence issues with synthetic data)
- return True
-
- except Exception as e:
- print(f"❌ Test setup failed: {e}")
- return False
-
- finally:
- # Clean up temporary directory
- shutil.rmtree(temp_dir, ignore_errors=True)
- print(f"Cleaned up temporary directory: {temp_dir}")
- def test_mixed_precision_components():
- """Test individual mixed precision components."""
- print("Testing mixed precision components...")
-
- device = "cuda" if torch.cuda.is_available() else "cpu"
- if device == "cpu":
- print("⚠️ CUDA not available, skipping mixed precision tests")
- return True
-
- print(f"Using device: {device}")
-
- try:
- # Test GradScaler initialization
- scaler = torch.cuda.amp.GradScaler()
- print("✅ GradScaler initialized successfully")
-
- # Test a simple model with mixed precision
- model = nn.Sequential(
- nn.Linear(10, 20),
- nn.ReLU(),
- nn.Linear(20, 5)
- ).to(device)
-
- optimizer = optim.AdamW(model.parameters(), lr=0.001)
- criterion = nn.CrossEntropyLoss()
-
- # Test training step with mixed precision
- model.train()
- for step in range(5):
- # Create synthetic data
- x = torch.randn(4, 10, device=device)
- y = torch.randint(0, 5, (4,), device=device)
-
- optimizer.zero_grad()
-
- with torch.cuda.amp.autocast():
- outputs = model(x)
- loss = criterion(outputs, y)
-
- # This is where the error would occur
- scaler.scale(loss).backward()
- scaler.step(optimizer)
- scaler.update()
-
- print(f"Step {step + 1}: loss = {loss.item():.6f}")
-
- print("✅ Mixed precision training step completed successfully")
- return True
-
- except Exception as e:
- print(f"❌ Mixed precision test failed: {e}")
- return False
- def main():
- """Run all tests."""
- print("=" * 60)
- print("Testing GradScaler Fix for Voice Recognition Training")
- print("=" * 60)
-
- # Test 1: Mixed precision components
- print("\n1. Testing mixed precision components...")
- mp_success = test_mixed_precision_components()
-
- # Test 2: Full training pipeline
- print("\n2. Testing full training pipeline...")
- training_success = test_training_stability()
-
- # Summary
- print("\n" + "=" * 60)
- print("TEST SUMMARY")
- print("=" * 60)
- print(f"Mixed precision components: {'✅ PASS' if mp_success else '❌ FAIL'}")
- print(f"Training stability: {'✅ PASS' if training_success else '❌ FAIL'}")
-
- if mp_success and training_success:
- print("\n🎉 All tests passed! The GradScaler fix is working correctly.")
- return 0
- else:
- print("\n⚠️ Some tests failed. Review the output above for details.")
- return 1
- if __name__ == "__main__":
- exit(main())
|