Bladeren bron

Subdirectories durchsuchen (Trainingsdaten)

Patrick Baumgartner 1 jaar geleden
bovenliggende
commit
45b1ebfaa4
2 gewijzigde bestanden met toevoegingen van 30 en 9 verwijderingen
  1. 10 2
      .gitignore
  2. 20 7
      trainer/utils/file_utils.py

+ 10 - 2
.gitignore

@@ -6,5 +6,13 @@
 /trainer/env/
 /trainer/testdaten
 **/_*/
-/trainer/training/background2/
-/trainer/training/*/*/processed/
+/trainer/training/**/processed/
+/trainer/training/**/*.wav
+/trainer/training/**/*.mp3
+/trainer/training/**/*.m4a
+**/*.bak
+/trainer/training/**/*.json
+/trainer/model/**/*.jon
+/trainer/model/**/*.h5
+/trainer/model/**/*.pth
+/trainer/model/**/*.keras

+ 20 - 7
trainer/utils/file_utils.py

@@ -1,4 +1,5 @@
 from cmath import e
+from genericpath import isdir
 import os
 from re import A
 import wave
@@ -18,14 +19,26 @@ def ensure_directory_exists(directory):
     if not os.path.exists(directory):
         os.makedirs(directory)
 
-def list_files_in_directory(directory, extension=None):
+def list_files_in_directory(directory, extension=None, deep=0):
     if not os.path.exists(directory):
         raise FileNotFoundError(f"Directory {directory} does not exist.")
-    
+    isarray = isinstance(extension, list)
     files = []
     for file in os.listdir(directory):
-        if extension and not file.endswith(extension):
-            continue
+        if os.path.isdir(directory+file+"/") and deep>0 and file != "processed":
+            dfs = list_files_in_directory(directory+file+"/", deep-1)
+            for df in dfs:
+                files.append(df)
+        if extension:
+            if isarray:
+                found=False
+                for e in extension:
+                    if file.endswith(e):
+                        found=True
+                if found==False:
+                    continue
+            elif not file.endswith(extension):
+                continue
         files.append(directory+file)
     return files
 
@@ -136,9 +149,9 @@ def preprocessing_training_data(model_dir,data_dir, sc_data_dir, fail_data_dir):
     import librosa
     
     data_path_dict = {
-        0: list_files_in_directory(fail_data_dir, extension='.wav'),
-        1: list_files_in_directory(data_dir, extension='.wav'),
-        2: list_files_in_directory(sc_data_dir, extension='.wav')
+        0: list_files_in_directory(fail_data_dir, extension=['.wav','.mp3','.m4a'], deep=3),
+        1: list_files_in_directory(data_dir, extension=['.wav','.mp3','.m4a'], deep=3),
+        2: list_files_in_directory(sc_data_dir, extension=['.wav','.mp3','.m4a'], deep=3)
     }
 
     #walley_sample = "training/background/rec04.wav"