diff --git a/pipeline/feature_extractors.py b/pipeline/feature_extractors.py index 7502b23..8f2c875 100644 --- a/pipeline/feature_extractors.py +++ b/pipeline/feature_extractors.py @@ -201,13 +201,17 @@ class LoudAudioFeatureExtractor(FeatureExtractor): teardown() is used to clean up temporary files created during setup (if specified by config) """ _CONFIG_DEFAULT_NUM_FEATURES = 5 # keep the top 5 loudnesses - def __init__(self, input_files=None, config=None, num_features=_CONFIG_DEFAULT_NUM_FEATURES): + _CONFIG_DEFAULT_MIN_DURATION = 5.00 # seconds + def __init__(self, input_files=None, config=None, + num_features=_CONFIG_DEFAULT_NUM_FEATURES, + min_duration=_CONFIG_DEFAULT_MIN_DURATION): if not input_files: raise ValueError("No input files provided!") self.input_files = input_files self.config = config self.features = [] self._num_features = num_features + self._min_duration = min_duration def _audio_file_from_path(self, path: str) -> str: """Return the audio file path given a video file path @@ -251,9 +255,28 @@ class LoudAudioFeatureExtractor(FeatureExtractor): return loudness_features - def _keep_num(self, loudnesses, num=_CONFIG_DEFAULT_NUM_FEATURES) -> list: - """Keep the top n loudnesses (default: 5)""" - return sorted(loudnesses, key=lambda x: x[1], reverse=True)[:num] + def _keep_num(self, features, num=_CONFIG_DEFAULT_NUM_FEATURES) -> list: + """Keep the top n features (default: 5) + + Approach: + - for range in 0-n + + expand the nth top feature to min duration + (move start back by 0.5*min_duration, end forward by 0.5*min_duration) + + drop any features that are now in that feature's range + - return the top n features + + Each feature is a Feature object, with an Interval object + """ + for i in range(num): + # expand the feature to min_duration + features[i].interval.move_start(-0.5*self._min_duration, relative=True) + features[i].interval.move_end(0.5*self._min_duration, relative=True) + # drop any features that are now in that feature's range + features = [f for f in features if + f.interval.start < features[i].interval.start or + f.interval.end > features[i].interval.end] + + return features[:num] def setup(self): """extract audio from video files to be processed by pyloudnorm @@ -272,11 +295,14 @@ class LoudAudioFeatureExtractor(FeatureExtractor): for file in self.input_files: audio_file = self._audio_file_from_path(file.path) loudnesses = self._loudnorm(audio_file) - top_loudnesses = self._keep_num(loudnesses, self._num_features) - for time, loudness in top_loudnesses: - self.features.append(Feature(interval=Interval(start=time, duration=0.500), + + features = [] + for time, loudness in loudnesses: + features.append(Feature(interval=Interval(start=time, duration=0.500), source=file, feature_extractor="loudness", score=loudness)) + # prune features list to keep self.num_features + self.features = self._keep_num(features, self._num_features) class VideoActivityFeatureExtractor(FeatureExtractor): @@ -354,7 +380,7 @@ class VideoActivityFeatureExtractor(FeatureExtractor): return scores[:int(len(scores) * (percent / 100))] def _keep_num(self, features, num=_CONFIG_DEFAULT_NUM_FEATURES) -> list: - """Keep the top n activity features (default: 5) + """Keep the top n features (default: 5) Approach: - for range in 0-n