| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164 |
- # Trixy Voice Assistant - Machine Learning Requirements
- # ====================================================
- # Dependencies for ML training, model development, and advanced audio processing
- # Includes the main requirements.txt as foundation
- # Include base requirements
- -r requirements.txt
- # Advanced PyTorch ecosystem
- torch>=1.11.0,<2.0.0 # Already in base, ensuring compatibility
- torchaudio>=0.11.0,<1.0.0 # Already in base, ensuring compatibility
- torchvision>=0.12.0,<1.0.0 # Computer vision utilities (may be used for spectrograms)
- # ONNX support for model export/import
- onnx>=1.12.0 # ONNX model format support
- onnxruntime>=1.12.0 # ONNX runtime for inference
- onnx-simplifier>=0.4.0 # ONNX model optimization
- # Scientific computing and data analysis
- numpy>=1.21.0,<2.0.0 # Already in base, ensuring compatibility
- scipy>=1.7.0 # Scientific computing (signal processing, optimization)
- pandas>=1.3.0 # Data manipulation and analysis
- # Machine learning utilities
- scikit-learn>=1.0.0 # Already in base, expanded ML utilities
- joblib>=1.1.0 # Parallel processing and model persistence
- # Audio processing and signal analysis
- librosa>=0.9.0 # Advanced audio analysis library
- soundfile>=0.10.3 # Already in base, audio file I/O
- audioread>=2.1.0 # Audio file reading backend for librosa
- resampy>=0.2.2 # High-quality audio resampling
- # Voice Activity Detection (VAD)
- webrtcvad>=2.0.0 # WebRTC VAD implementation
- silero-vad>=4.0.0 # Silero VAD models
- # Data visualization for training analysis
- matplotlib>=3.5.0 # Plotting and visualization
- seaborn>=0.11.0 # Statistical data visualization
- plotly>=5.0.0 # Interactive plotting
- tensorboard>=2.8.0 # Training visualization and monitoring
- # Model training utilities
- tqdm>=4.60.0 # Progress bars for training loops
- wandb>=0.12.0 # Weights & Biases experiment tracking (optional)
- mlflow>=1.24.0 # ML experiment tracking and model registry
- # Data augmentation
- audiomentations>=0.30.0 # Audio data augmentation
- albumentations>=1.1.0 # Image augmentations (for spectrograms)
- # Hyperparameter optimization
- optuna>=3.0.0 # Hyperparameter optimization framework
- ray[tune]>=2.0.0 # Distributed hyperparameter tuning
- # Model compression and optimization
- torch-pruning>=1.0.0 # Neural network pruning
- torch-distillation>=0.1.0 # Knowledge distillation (if available)
- # CUDA utilities (for GPU acceleration)
- # These are typically installed with PyTorch, but listed for clarity
- # cupy-cuda11x>=10.0.0 # CUDA-accelerated NumPy (optional, uncomment if needed)
- # numba>=0.56.0 # JIT compilation for numerical functions
- # Memory optimization
- pympler>=0.9 # Memory profiling and analysis
- memory-profiler>=0.60.0 # Already in base, memory monitoring
- # Distributed training support
- torch-distributed>=0.1.0 # Distributed training utilities (if available)
- # Model serving and deployment
- fastapi>=0.75.0 # Web API framework for model serving
- uvicorn>=0.17.0 # ASGI server for FastAPI
- pydantic>=1.8.0,<2.0.0 # Already in base, data validation
- # Configuration management for ML experiments
- hydra-core>=1.1.0 # Configuration management for ML experiments
- omegaconf>=2.1.0 # Configuration system for Hydra
- # Time series analysis (for audio sequences)
- statsmodels>=0.13.0 # Statistical models and time series analysis
- # Parallel processing
- multiprocess>=0.70.0 # Better multiprocessing
- concurrent.futures>=3.1.1; python_version<"3.9" # Already in base
- # Audio codec support
- pyaudio>=0.2.11 # Real-time audio I/O (may need system dependencies)
- portaudio19>=19.6.0 # PortAudio backend for PyAudio
- # Voice recognition specific libraries
- speechbrain>=0.5.0 # SpeechBrain toolkit for speech processing
- transformers>=4.15.0 # Hugging Face transformers (for advanced models)
- datasets>=2.0.0 # Hugging Face datasets
- # Advanced signal processing
- pyroomacoustics>=0.6.0 # Room acoustics simulation
- pystoi>=0.3.0 # Short-time objective intelligibility measure
- pesq>=0.0.3 # Perceptual evaluation of speech quality
- # GPU memory management
- gpustat>=1.0.0 # GPU monitoring
- nvidia-ml-py3>=7.352.0 # NVIDIA GPU management
- # Advanced optimization
- torch-optimizer>=0.3.0 # Additional optimizers for PyTorch
- ranger-fm>=1.9.0 # Ranger optimizer (if available)
- # Feature engineering
- featuretools>=1.0.0 # Automated feature engineering
- category_encoders>=2.3.0 # Categorical data encoding
- # Model interpretation and analysis
- shap>=0.40.0 # Model explanation and interpretation
- lime>=0.2.0 # Local interpretable model explanations
- captum>=0.5.0 # Model interpretability for PyTorch
- # Evaluation metrics
- torchmetrics>=0.7.0 # PyTorch metrics collection
- evaluate>=0.3.0 # Hugging Face evaluation library
- # Data loading and preprocessing
- webdataset>=0.2.0 # Efficient data loading for large datasets
- ffcv>=1.0.0 # Fast computer vision data loading (if applicable)
- # Audio format conversion
- pydub>=0.25.0 # Audio manipulation and format conversion
- ffmpeg-python>=0.2.0 # FFmpeg Python bindings
- # Annotation and labeling tools
- label-studio-sdk>=0.0.20 # Label Studio SDK for data annotation
- # Advanced audio features
- pyworld>=0.3.0 # WORLD vocoder for voice analysis
- praat-parselmouth>=0.4.0 # Praat phonetics software interface
- # Speaker diarization
- pyannote.audio>=2.0.0 # Speaker diarization and voice activity detection
- pyannote.core>=4.5.0 # Core utilities for pyannote
- pyannote.database>=4.1.0 # Database utilities for pyannote
- pyannote.metrics>=3.2.0 # Evaluation metrics for pyannote
- # Optional: Advanced deep learning frameworks
- # lightning>=1.5.0 # PyTorch Lightning for structured training
- # timm>=0.6.0 # PyTorch image models (for vision-based features)
- # Voice synthesis (TTS) - if needed for training data generation
- espnet>=202207 # ESPnet speech processing toolkit (optional)
- # Real-time processing
- rtaudio>=0.1.0 # Real-time audio processing
- # System dependencies note:
- # Some packages may require system-level dependencies:
- # - PortAudio (for pyaudio)
- # - FFmpeg (for audio processing)
- # - CUDA toolkit (for GPU acceleration)
- # - Intel MKL or OpenBLAS (for optimized linear algebra)
- # Installation command examples:
- # pip install -r requirements-ml.txt
- # pip install -r requirements-ml.txt --find-links https://download.pytorch.org/whl/torch_stable.html
|