requirements-ml.txt 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. # Trixy Voice Assistant - Machine Learning Requirements
  2. # ====================================================
  3. # Dependencies for ML training, model development, and advanced audio processing
  4. # Includes the main requirements.txt as foundation
  5. # Include base requirements
  6. -r requirements.txt
  7. # Advanced PyTorch ecosystem
  8. torch>=1.11.0,<2.0.0 # Already in base, ensuring compatibility
  9. torchaudio>=0.11.0,<1.0.0 # Already in base, ensuring compatibility
  10. torchvision>=0.12.0,<1.0.0 # Computer vision utilities (may be used for spectrograms)
  11. # ONNX support for model export/import
  12. onnx>=1.12.0 # ONNX model format support
  13. onnxruntime>=1.12.0 # ONNX runtime for inference
  14. onnx-simplifier>=0.4.0 # ONNX model optimization
  15. # Scientific computing and data analysis
  16. numpy>=1.21.0,<2.0.0 # Already in base, ensuring compatibility
  17. scipy>=1.7.0 # Scientific computing (signal processing, optimization)
  18. pandas>=1.3.0 # Data manipulation and analysis
  19. # Machine learning utilities
  20. scikit-learn>=1.0.0 # Already in base, expanded ML utilities
  21. joblib>=1.1.0 # Parallel processing and model persistence
  22. # Audio processing and signal analysis
  23. librosa>=0.9.0 # Advanced audio analysis library
  24. soundfile>=0.10.3 # Already in base, audio file I/O
  25. audioread>=2.1.0 # Audio file reading backend for librosa
  26. resampy>=0.2.2 # High-quality audio resampling
  27. # Voice Activity Detection (VAD)
  28. webrtcvad>=2.0.0 # WebRTC VAD implementation
  29. silero-vad>=4.0.0 # Silero VAD models
  30. # Data visualization for training analysis
  31. matplotlib>=3.5.0 # Plotting and visualization
  32. seaborn>=0.11.0 # Statistical data visualization
  33. plotly>=5.0.0 # Interactive plotting
  34. tensorboard>=2.8.0 # Training visualization and monitoring
  35. # Model training utilities
  36. tqdm>=4.60.0 # Progress bars for training loops
  37. wandb>=0.12.0 # Weights & Biases experiment tracking (optional)
  38. mlflow>=1.24.0 # ML experiment tracking and model registry
  39. # Data augmentation
  40. audiomentations>=0.30.0 # Audio data augmentation
  41. albumentations>=1.1.0 # Image augmentations (for spectrograms)
  42. # Hyperparameter optimization
  43. optuna>=3.0.0 # Hyperparameter optimization framework
  44. ray[tune]>=2.0.0 # Distributed hyperparameter tuning
  45. # Model compression and optimization
  46. torch-pruning>=1.0.0 # Neural network pruning
  47. torch-distillation>=0.1.0 # Knowledge distillation (if available)
  48. # CUDA utilities (for GPU acceleration)
  49. # These are typically installed with PyTorch, but listed for clarity
  50. # cupy-cuda11x>=10.0.0 # CUDA-accelerated NumPy (optional, uncomment if needed)
  51. # numba>=0.56.0 # JIT compilation for numerical functions
  52. # Memory optimization
  53. pympler>=0.9 # Memory profiling and analysis
  54. memory-profiler>=0.60.0 # Already in base, memory monitoring
  55. # Distributed training support
  56. torch-distributed>=0.1.0 # Distributed training utilities (if available)
  57. # Model serving and deployment
  58. fastapi>=0.75.0 # Web API framework for model serving
  59. uvicorn>=0.17.0 # ASGI server for FastAPI
  60. pydantic>=1.8.0,<2.0.0 # Already in base, data validation
  61. # Configuration management for ML experiments
  62. hydra-core>=1.1.0 # Configuration management for ML experiments
  63. omegaconf>=2.1.0 # Configuration system for Hydra
  64. # Time series analysis (for audio sequences)
  65. statsmodels>=0.13.0 # Statistical models and time series analysis
  66. # Parallel processing
  67. multiprocess>=0.70.0 # Better multiprocessing
  68. concurrent.futures>=3.1.1; python_version<"3.9" # Already in base
  69. # Audio codec support
  70. pyaudio>=0.2.11 # Real-time audio I/O (may need system dependencies)
  71. portaudio19>=19.6.0 # PortAudio backend for PyAudio
  72. # Voice recognition specific libraries
  73. speechbrain>=0.5.0 # SpeechBrain toolkit for speech processing
  74. transformers>=4.15.0 # Hugging Face transformers (for advanced models)
  75. datasets>=2.0.0 # Hugging Face datasets
  76. # Advanced signal processing
  77. pyroomacoustics>=0.6.0 # Room acoustics simulation
  78. pystoi>=0.3.0 # Short-time objective intelligibility measure
  79. pesq>=0.0.3 # Perceptual evaluation of speech quality
  80. # GPU memory management
  81. gpustat>=1.0.0 # GPU monitoring
  82. nvidia-ml-py3>=7.352.0 # NVIDIA GPU management
  83. # Advanced optimization
  84. torch-optimizer>=0.3.0 # Additional optimizers for PyTorch
  85. ranger-fm>=1.9.0 # Ranger optimizer (if available)
  86. # Feature engineering
  87. featuretools>=1.0.0 # Automated feature engineering
  88. category_encoders>=2.3.0 # Categorical data encoding
  89. # Model interpretation and analysis
  90. shap>=0.40.0 # Model explanation and interpretation
  91. lime>=0.2.0 # Local interpretable model explanations
  92. captum>=0.5.0 # Model interpretability for PyTorch
  93. # Evaluation metrics
  94. torchmetrics>=0.7.0 # PyTorch metrics collection
  95. evaluate>=0.3.0 # Hugging Face evaluation library
  96. # Data loading and preprocessing
  97. webdataset>=0.2.0 # Efficient data loading for large datasets
  98. ffcv>=1.0.0 # Fast computer vision data loading (if applicable)
  99. # Audio format conversion
  100. pydub>=0.25.0 # Audio manipulation and format conversion
  101. ffmpeg-python>=0.2.0 # FFmpeg Python bindings
  102. # Annotation and labeling tools
  103. label-studio-sdk>=0.0.20 # Label Studio SDK for data annotation
  104. # Advanced audio features
  105. pyworld>=0.3.0 # WORLD vocoder for voice analysis
  106. praat-parselmouth>=0.4.0 # Praat phonetics software interface
  107. # Speaker diarization
  108. pyannote.audio>=2.0.0 # Speaker diarization and voice activity detection
  109. pyannote.core>=4.5.0 # Core utilities for pyannote
  110. pyannote.database>=4.1.0 # Database utilities for pyannote
  111. pyannote.metrics>=3.2.0 # Evaluation metrics for pyannote
  112. # Optional: Advanced deep learning frameworks
  113. # lightning>=1.5.0 # PyTorch Lightning for structured training
  114. # timm>=0.6.0 # PyTorch image models (for vision-based features)
  115. # Voice synthesis (TTS) - if needed for training data generation
  116. espnet>=202207 # ESPnet speech processing toolkit (optional)
  117. # Real-time processing
  118. rtaudio>=0.1.0 # Real-time audio processing
  119. # System dependencies note:
  120. # Some packages may require system-level dependencies:
  121. # - PortAudio (for pyaudio)
  122. # - FFmpeg (for audio processing)
  123. # - CUDA toolkit (for GPU acceleration)
  124. # - Intel MKL or OpenBLAS (for optimized linear algebra)
  125. # Installation command examples:
  126. # pip install -r requirements-ml.txt
  127. # pip install -r requirements-ml.txt --find-links https://download.pytorch.org/whl/torch_stable.html