Browse Source

feat: Update LoudFE & VAFE to not keep adjacent features

Otherwise features tend to cluster
main
Rob Hallam 2 months ago
parent
commit
460819d678
1 changed files with 29 additions and 18 deletions
  1. +29
    -18
      pipeline/feature_extractors.py

+ 29
- 18
pipeline/feature_extractors.py View File

@@ -200,7 +200,7 @@ class LoudAudioFeatureExtractor(FeatureExtractor):


teardown() is used to clean up temporary files created during setup (if specified by config) teardown() is used to clean up temporary files created during setup (if specified by config)
""" """
_CONFIG_DEFAULT_NUM_FEATURES = 5 # keep the top 5 loudnesses
_CONFIG_DEFAULT_NUM_FEATURES = 15 # keep the top 5 loudnesses
_CONFIG_DEFAULT_MIN_DURATION = 5.00 # seconds _CONFIG_DEFAULT_MIN_DURATION = 5.00 # seconds
def __init__(self, input_files=None, config=None, def __init__(self, input_files=None, config=None,
num_features=_CONFIG_DEFAULT_NUM_FEATURES, num_features=_CONFIG_DEFAULT_NUM_FEATURES,
@@ -255,28 +255,33 @@ class LoudAudioFeatureExtractor(FeatureExtractor):


return loudness_features return loudness_features


def _keep_num(self, features, num=_CONFIG_DEFAULT_NUM_FEATURES) -> list:
def _keep_num(self, features, num=_CONFIG_DEFAULT_NUM_FEATURES, margin=10.0) -> list:
"""Keep the top n features (default: 5) """Keep the top n features (default: 5)


Approach: Approach:
- for range in 0-n - for range in 0-n
+ expand the nth top feature to min duration + expand the nth top feature to min duration
(move start back by 0.5*min_duration, end forward by 0.5*min_duration) (move start back by 0.5*min_duration, end forward by 0.5*min_duration)
+ drop any features that are now in that feature's range
+ drop any features that are now in that feature's range (plus margin)
- return the top n features - return the top n features


Each feature is a Feature object, with an Interval object Each feature is a Feature object, with an Interval object
""" """
keep_features = []
# ensure features are sorted by score
features = sorted(features, key=lambda x: x.score, reverse=True)
for i in range(num): for i in range(num):
current_feature = features.pop(0)
# expand the feature to min_duration # expand the feature to min_duration
features[i].interval.move_start(-0.5*self._min_duration, relative=True)
features[i].interval.move_end(0.5*self._min_duration, relative=True)
# drop any features that are now in that feature's range
current_feature.interval.move_start(-0.5*self._min_duration, relative=True)
current_feature.interval.move_end(0.5*self._min_duration, relative=True)
keep_features.append(current_feature)
# drop any features that are now in that feature's range (plus margin)
features = [f for f in features if features = [f for f in features if
f.interval.start < features[i].interval.start or
f.interval.end > features[i].interval.end]
(f.interval.end < current_feature.interval.start-margin or
f.interval.start > current_feature.interval.end+margin)]


return features[:num]
return keep_features


def setup(self): def setup(self):
"""extract audio from video files to be processed by pyloudnorm """extract audio from video files to be processed by pyloudnorm
@@ -322,7 +327,7 @@ class VideoActivityFeatureExtractor(FeatureExtractor):


#TODO: minimum duration -- consider whether to do here, or expand duration post-consolidation #TODO: minimum duration -- consider whether to do here, or expand duration post-consolidation
""" """
_CONFIG_DEFAULT_NUM_FEATURES = 5 # keep the top 5 activity moments
_CONFIG_DEFAULT_NUM_FEATURES = 15 # keep the top 5 activity moments
_CONFIG_DEFAULT_MIN_DURATION = 5.00 # seconds _CONFIG_DEFAULT_MIN_DURATION = 5.00 # seconds
def __init__(self, input_files=None, config=None, def __init__(self, input_files=None, config=None,
num_features=_CONFIG_DEFAULT_NUM_FEATURES, num_features=_CONFIG_DEFAULT_NUM_FEATURES,
@@ -379,28 +384,34 @@ class VideoActivityFeatureExtractor(FeatureExtractor):
scores = sorted(scores, key=lambda x: x[1], reverse=True) scores = sorted(scores, key=lambda x: x[1], reverse=True)
return scores[:int(len(scores) * (percent / 100))] return scores[:int(len(scores) * (percent / 100))]


def _keep_num(self, features, num=_CONFIG_DEFAULT_NUM_FEATURES) -> list:
def _keep_num(self, features, num=_CONFIG_DEFAULT_NUM_FEATURES, margin=10.0) -> list:
"""Keep the top n features (default: 5) """Keep the top n features (default: 5)


Approach: Approach:
- for range in 0-n - for range in 0-n
+ expand the nth top feature to min duration + expand the nth top feature to min duration
(move start back by 0.5*min_duration, end forward by 0.5*min_duration) (move start back by 0.5*min_duration, end forward by 0.5*min_duration)
+ drop any features that are now in that feature's range
+ drop any features that are now in that feature's range (plus margin)
- return the top n features - return the top n features


Each feature is a Feature object, with an Interval object Each feature is a Feature object, with an Interval object
""" """
keep_features = []
# ensure features are sorted by score
features = sorted(features, key=lambda x: x.score, reverse=True)
for i in range(num): for i in range(num):
current_feature = features.pop(0)
# expand the feature to min_duration # expand the feature to min_duration
features[i].interval.move_start(-0.5*self._min_duration, relative=True)
features[i].interval.move_end(0.5*self._min_duration, relative=True)
# drop any features that are now in that feature's range
current_feature.interval.move_start(-0.5*self._min_duration, relative=True)
current_feature.interval.move_end(0.5*self._min_duration, relative=True)
keep_features.append(current_feature)
# drop any features that are now in that feature's range (plus margin)
features = [f for f in features if features = [f for f in features if
f.interval.start < features[i].interval.start or
f.interval.end > features[i].interval.end]
(f.interval.end < current_feature.interval.start-margin or
f.interval.start > current_feature.interval.end+margin)]

return keep_features


return features[:num]


def setup(self): def setup(self):
pass pass


Loading…
Cancel
Save