| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293 |
- #!/usr/bin/env python3
- """
- Audio Processing Parameter Fix Script
- This script identifies and fixes common audio processing parameter issues
- in the Trixy voice assistant codebase, specifically MelSpectrogram and MFCC
- parameter compatibility with current torchaudio versions.
- """
- import os
- import re
- import json
- import logging
- from pathlib import Path
- from typing import List, Dict, Any
- # Configure logging
- logging.basicConfig(level=logging.INFO)
- logger = logging.getLogger(__name__)
- class AudioProcessingFixer:
- """Fix audio processing parameter issues."""
-
- def __init__(self, source_dir: str = "."):
- """Initialize the fixer."""
- self.source_dir = Path(source_dir)
- self.issues_found = []
- self.fixes_applied = []
-
- def scan_for_issues(self) -> List[Dict[str, Any]]:
- """Scan for potential audio processing parameter issues."""
- logger.info("Scanning for audio processing parameter issues...")
-
- # Patterns to look for
- patterns = [
- # Wrong parameter names in MelSpectrogram
- (r'MelSpectrogram\([^)]*fmin\s*=', 'MelSpectrogram using fmin instead of f_min'),
- (r'MelSpectrogram\([^)]*fmax\s*=', 'MelSpectrogram using fmax instead of f_max'),
-
- # Wrong parameter names in MFCC melkwargs
- (r'melkwargs\s*=\s*{[^}]*["\']fmin["\']', 'MFCC melkwargs using fmin instead of f_min'),
- (r'melkwargs\s*=\s*{[^}]*["\']fmax["\']', 'MFCC melkwargs using fmax instead of f_max'),
-
- # Deprecated normalized parameter usage
- (r'MelSpectrogram\([^)]*normalized\s*=\s*True', 'MelSpectrogram using deprecated normalized=True'),
- ]
-
- # Scan Python files
- python_files = list(self.source_dir.rglob("*.py"))
-
- for file_path in python_files:
- try:
- with open(file_path, 'r', encoding='utf-8') as f:
- content = f.read()
-
- for pattern, description in patterns:
- matches = re.finditer(pattern, content, re.MULTILINE | re.DOTALL)
- for match in matches:
- line_num = content[:match.start()].count('\n') + 1
- self.issues_found.append({
- 'file': str(file_path),
- 'line': line_num,
- 'pattern': pattern,
- 'description': description,
- 'match': match.group(0)
- })
-
- except Exception as e:
- logger.warning(f"Error scanning {file_path}: {e}")
-
- # Scan configuration files
- config_files = list(self.source_dir.rglob("*.json"))
-
- for file_path in config_files:
- try:
- with open(file_path, 'r', encoding='utf-8') as f:
- content = f.read()
-
- # Look for audio processing configuration
- if any(term in content.lower() for term in ['mel', 'mfcc', 'audio', 'spectrogram']):
- # Check for wrong parameter names in JSON
- if 'fmin' in content and 'f_min' not in content:
- self.issues_found.append({
- 'file': str(file_path),
- 'line': 1,
- 'pattern': 'fmin in JSON config',
- 'description': 'JSON config using fmin instead of f_min',
- 'match': 'fmin in configuration'
- })
-
- except Exception as e:
- logger.warning(f"Error scanning {file_path}: {e}")
-
- logger.info(f"Found {len(self.issues_found)} potential issues")
- return self.issues_found
-
- def test_current_audio_pipeline(self) -> bool:
- """Test the current audio processing pipeline."""
- logger.info("Testing current audio processing pipeline...")
-
- try:
- # Test data pipeline
- from trainer.data_pipeline import AudioProcessingConfig, AudioProcessor
-
- config = AudioProcessingConfig()
- processor = AudioProcessor(config)
- logger.info("✓ AudioProcessor creation successful")
-
- # Test voice recognition audio features
- from trixy_core.ml.voice_recognition.audio_features import create_feature_extractor
-
- log_mel_extractor = create_feature_extractor("log_mel", sample_rate=16000)
- mfcc_extractor = create_feature_extractor("mfcc", sample_rate=16000)
- logger.info("✓ Voice recognition feature extractors creation successful")
-
- # Test wakeword audio features
- from trixy_core.ml.wakeword.audio_features import create_feature_extractor as ww_create_extractor
-
- ww_extractor = ww_create_extractor()
- logger.info("✓ Wakeword feature extractor creation successful")
-
- # Test actual feature extraction with dummy data
- import torch
- dummy_audio = torch.randn(1, 16000) # 1 second of audio
-
- # Test all extractors
- features1 = processor.extract_features(dummy_audio)
- features2 = log_mel_extractor(dummy_audio)
- features3 = mfcc_extractor(dummy_audio)
- features4 = ww_extractor.extract_features(dummy_audio)
-
- logger.info("✓ All audio feature extraction tests passed")
- return True
-
- except Exception as e:
- logger.error(f"Audio pipeline test failed: {e}")
- import traceback
- traceback.print_exc()
- return False
-
- def validate_torchaudio_compatibility(self) -> Dict[str, Any]:
- """Validate torchaudio compatibility."""
- logger.info("Validating torchaudio compatibility...")
-
- import torch
- import torchaudio
-
- version_info = {
- 'torch_version': torch.__version__,
- 'torchaudio_version': torchaudio.__version__,
- 'compatible': True,
- 'recommendations': []
- }
-
- # Test MelSpectrogram parameters
- try:
- import torchaudio.transforms as T
-
- # Test correct parameter names
- mel_spec = T.MelSpectrogram(
- sample_rate=16000,
- n_fft=512,
- hop_length=160,
- n_mels=40,
- f_min=80.0,
- f_max=8000.0
- )
- logger.info("✓ MelSpectrogram with f_min/f_max works")
-
- # Test MFCC with melkwargs
- mfcc = T.MFCC(
- sample_rate=16000,
- n_mfcc=40,
- melkwargs={
- 'n_fft': 512,
- 'hop_length': 160,
- 'n_mels': 40,
- 'f_min': 80.0,
- 'f_max': 8000.0
- }
- )
- logger.info("✓ MFCC with melkwargs f_min/f_max works")
-
- except Exception as e:
- version_info['compatible'] = False
- version_info['recommendations'].append(f"MelSpectrogram/MFCC parameter error: {e}")
-
- # Check for deprecated features
- try:
- from torch.cuda.amp import GradScaler
- version_info['recommendations'].append(
- "Consider updating GradScaler usage to torch.amp.GradScaler('cuda') "
- "to avoid deprecation warnings"
- )
- except ImportError:
- pass
-
- return version_info
-
- def create_compatibility_fixes(self) -> str:
- """Create a compatibility fix patch."""
- fixes = []
-
- # Check if we need to fix GradScaler usage
- grad_scaler_files = []
- for file_path in self.source_dir.rglob("*.py"):
- try:
- with open(file_path, 'r', encoding='utf-8') as f:
- content = f.read()
- if 'torch.cuda.amp.GradScaler()' in content:
- grad_scaler_files.append(file_path)
- except:
- pass
-
- if grad_scaler_files:
- fixes.append("Fix GradScaler deprecation warning:")
- for file_path in grad_scaler_files:
- fixes.append(f" - {file_path}: Replace torch.cuda.amp.GradScaler() with torch.amp.GradScaler('cuda')")
-
- return "\n".join(fixes) if fixes else "No compatibility fixes needed."
-
- def run_comprehensive_check(self) -> Dict[str, Any]:
- """Run comprehensive audio processing check."""
- logger.info("Running comprehensive audio processing check...")
-
- results = {
- 'issues_found': self.scan_for_issues(),
- 'pipeline_test': self.test_current_audio_pipeline(),
- 'compatibility': self.validate_torchaudio_compatibility(),
- 'recommendations': []
- }
-
- # Generate recommendations
- if not results['issues_found']:
- results['recommendations'].append("✓ No audio processing parameter issues found")
- else:
- results['recommendations'].append(f"⚠️ Found {len(results['issues_found'])} potential issues")
- for issue in results['issues_found']:
- results['recommendations'].append(f" - {issue['file']}:{issue['line']} - {issue['description']}")
-
- if results['pipeline_test']:
- results['recommendations'].append("✓ Audio processing pipeline tests passed")
- else:
- results['recommendations'].append("❌ Audio processing pipeline tests failed")
-
- if results['compatibility']['compatible']:
- results['recommendations'].append("✓ Torchaudio compatibility validated")
- else:
- results['recommendations'].append("❌ Torchaudio compatibility issues found")
-
- # Add fix suggestions
- fix_suggestions = self.create_compatibility_fixes()
- if fix_suggestions != "No compatibility fixes needed.":
- results['recommendations'].append("Suggested fixes:")
- results['recommendations'].append(fix_suggestions)
-
- return results
- def main():
- """Main function."""
- fixer = AudioProcessingFixer()
- results = fixer.run_comprehensive_check()
-
- print("\n" + "="*60)
- print("AUDIO PROCESSING COMPATIBILITY CHECK RESULTS")
- print("="*60)
-
- print(f"\nTorch version: {results['compatibility']['torch_version']}")
- print(f"Torchaudio version: {results['compatibility']['torchaudio_version']}")
-
- print(f"\nIssues found: {len(results['issues_found'])}")
- print(f"Pipeline test passed: {results['pipeline_test']}")
- print(f"Compatibility validated: {results['compatibility']['compatible']}")
-
- print("\nRecommendations:")
- for rec in results['recommendations']:
- print(f" {rec}")
-
- if results['issues_found']:
- print("\nDetailed issues:")
- for issue in results['issues_found']:
- print(f" {issue['file']}:{issue['line']} - {issue['description']}")
- print(f" Match: {issue['match']}")
-
- print("\n" + "="*60)
-
- return results
- if __name__ == "__main__":
- main()
|