From eda55da82d64a795c27221c72923572cb15f7730 Mon Sep 17 00:00:00 2001
From: Rob Hallam <0504004h@student.gla.ac.uk>
Date: Sun, 15 Sep 2024 23:16:27 +0100
Subject: [PATCH] refactor: improve docstrings, move laughter FE loc to config

---
 pipeline/feature_extractors.py | 69 ++++++++++++++++------------------
 1 file changed, 33 insertions(+), 36 deletions(-)

diff --git a/pipeline/feature_extractors.py b/pipeline/feature_extractors.py
index c061a8d..70de792 100644
--- a/pipeline/feature_extractors.py
+++ b/pipeline/feature_extractors.py
@@ -18,7 +18,6 @@ logger = logging.getLogger(__name__)
 
 class FeatureExtractor(ABC):
     """Feature extractor interface."""
-    # TODO: #API -- decide if .features will be a member variable
     def _run_get_output(self, cmd: list, cwd:str=".") -> str:
         """Run a command and return the output as a string
 
@@ -38,20 +37,21 @@ class FeatureExtractor(ABC):
 class LaughterFeatureExtractor(FeatureExtractor):
     """Feature extractor for laughter detection.
 
-    This class is responsible for extracting features corresponding to laughter in media files.
+    This class is responsible for extracting features corresponding to laughter in media files. Uses jrgillick's laughter-detection library.
 
     Here:
 
-     setup() is used to validate input files & config, which may involve processing video files to extract audio
+     setup() (not needed for laughter-detection, as it can work with AV files directly)
 
      run() is used to extract features from the audio using jrgillick's laughter-detection
 
-     teardown() is used to clean up any temporary files created during setup according to the config
+     teardown() (not needed)
 
-    See: https://github.com/jrgillick/laughter-detection for the laughter-detection library
+    @see: https://github.com/jrgillick/laughter-detection for the laughter-detection library
     """
-    _PREPEND_TIME = 7.0 # seconds before the laugh
-    _APPEND_TIME = 3.0 # seconds after the laugh
+    _PREPEND_TIME = 7.0 # seconds before the laugh to capture whatever was funny
+    _APPEND_TIME = 3.0 # seconds after the laugh to capture the reaction
+    _CONFIG_LAUGH_DETECTOR_DIR = "/home/robert/mounts/980data/code/laughter-detection/"
 
 
     def __init__(self, input_files=None, config=None):
@@ -60,12 +60,12 @@ class LaughterFeatureExtractor(FeatureExtractor):
         self.config = config
         self.features = []
 
-    def _laughdetect(self, audio_file) -> list:
+    def _laughdetect(self, audio_file, laugh_detector_dir=_CONFIG_LAUGH_DETECTOR_DIR) -> list:
         """Run laughter detection on the audio file
 
         Returns a list of 2-tuples, each representing a laugh instance in the audio file
+        in the format: (start, end) in seconds
         """
-        laugh_detector_dir = "/home/robert/mounts/980data/code/laughter-detection/"
         laugh_detector_script = "segment_laughter.py"
         # fake output for testing
         # laugh_detector_path = "tests/fake_segment_laughter.py"
@@ -85,7 +85,7 @@ class LaughterFeatureExtractor(FeatureExtractor):
                 for instance in laugh_output.splitlines()
                 if instance.startswith("instance: ")]
 
-    def _adjust_features(self):
+    def _adjust_features(self) -> None:
         """Adjust features according to config
 
         Generically, this ensures features conform to config - min/max feature length, etc.
@@ -96,9 +96,6 @@ class LaughterFeatureExtractor(FeatureExtractor):
         and append 5 seconds (for example), or 12s and 3s. We may wish to do this pre/post adjustment
         for all laughter features found, regardless of length.
 
-        TODO: figure out how we're going to handle length adjustments
-        TODO: config for length adjustments per design doc
-        TODO: play with numbers more to see what works best
         """
         for feature in self.features:
             # do the pre & post adjustment
@@ -106,38 +103,36 @@ class LaughterFeatureExtractor(FeatureExtractor):
             feature.interval.move_end(self._APPEND_TIME, relative=True)
 
     def setup(self):
-        """Setup the laughter feature extractor -- validate input files & config
-
-        jrgillick's laughter-detection library can work with AV files directly
+        """Setup the laughter feature extractor -- not needed.
 
-        TODO: validate input files
-        TODO: handle config
+        jrgillick's laughter-detection library can work with AV files directly!
         """
-        logger.debug("LaughterFeatureExtractor setup")
-
-        # Validate input files
-        if not self.input_files:
-            raise ValueError("No input files provided")
 
-        # TODO: convert video to audio if needed
 
     def run(self):
-        """Extract laughter features for each input file"""
+        """Extract laughter features for each input file.
+
+        Heavy lifting is performed in _laughdetect()
+
+        Tuples from _laughdetect are used to create Feature objects, which are appended to self.features by convention
+
+        @see: utils.py:Feature, Interval
+        """
         if self.input_files:
             for file in self.input_files:
-                # adjust this call for better test mocking
                 laughs = self._laughdetect(file.path)
                 for laugh in laughs:
                     start, end = laugh
                     self.features.append(Feature(interval=Interval(start=start, end=end),
                                                  source=file, feature_extractor="laughter"))
-                    # TODO: implement options eg minimum feature length
 
-        # adjust features
-        self._adjust_features()
+            # adjust features
+            self._adjust_features()
+
 
     def teardown(self):
-        pass
+        """No cleanup needed!"""
+
 
 class RandomFeatureExtractor(FeatureExtractor):
     """Feature extractor for random feature generation.
@@ -146,14 +141,15 @@ class RandomFeatureExtractor(FeatureExtractor):
 
     Here:
 
-     setup() is used to validate input files & config
+     setup() is not needed
 
      run() is used to generate random features
 
-     teardown() is used to clean up any temporary files created during setup according to the config
+     teardown() is not needed
     """
-    NUM_FEATURES = 5
-    MAX_DURATION = 20.0
+    NUM_FEATURES = 30
+    MAX_DURATION = 15.0
+    MIN_DURATION = 5.0
 
     def __init__(self, input_files=None, config=None):
         """It is expected that input_files is a SourceMedia object"""
@@ -177,8 +173,9 @@ class RandomFeatureExtractor(FeatureExtractor):
 
         for file in self.input_files:
             for _ in range(self.NUM_FEATURES):
-                # round to 3 decimal places
-                duration = random.random() * self.MAX_DURATION
+                # determine duration between MIN and MAX, round to 3 decimal places
+                duration = round(random.uniform(self.MIN_DURATION, self.MAX_DURATION), 3)
+
                 start = random.random() * file.duration() - duration
                 self.features.append(Feature(interval=Interval(start=start, duration=duration),
                                              source=file, feature_extractor="random"))