Browse Source

Merge branch 'fix-vafe-and-loudfe' into feat-exemplar-scripts

main
Rob Hallam 2 months ago
parent
commit
2bea733906
1 changed files with 69 additions and 9 deletions
  1. +69
    -9
      pipeline/feature_extractors.py

+ 69
- 9
pipeline/feature_extractors.py View File

@@ -201,13 +201,17 @@ class LoudAudioFeatureExtractor(FeatureExtractor):
teardown() is used to clean up temporary files created during setup (if specified by config) teardown() is used to clean up temporary files created during setup (if specified by config)
""" """
_CONFIG_DEFAULT_NUM_FEATURES = 5 # keep the top 5 loudnesses _CONFIG_DEFAULT_NUM_FEATURES = 5 # keep the top 5 loudnesses
def __init__(self, input_files=None, config=None, num_features=_CONFIG_DEFAULT_NUM_FEATURES):
_CONFIG_DEFAULT_MIN_DURATION = 5.00 # seconds
def __init__(self, input_files=None, config=None,
num_features=_CONFIG_DEFAULT_NUM_FEATURES,
min_duration=_CONFIG_DEFAULT_MIN_DURATION):
if not input_files: if not input_files:
raise ValueError("No input files provided!") raise ValueError("No input files provided!")
self.input_files = input_files self.input_files = input_files
self.config = config self.config = config
self.features = [] self.features = []
self._num_features = num_features self._num_features = num_features
self._min_duration = min_duration


def _audio_file_from_path(self, path: str) -> str: def _audio_file_from_path(self, path: str) -> str:
"""Return the audio file path given a video file path """Return the audio file path given a video file path
@@ -251,9 +255,28 @@ class LoudAudioFeatureExtractor(FeatureExtractor):


return loudness_features return loudness_features


def _keep_num(self, loudnesses, num=_CONFIG_DEFAULT_NUM_FEATURES) -> list:
"""Keep the top n loudnesses (default: 5)"""
return sorted(loudnesses, key=lambda x: x[1], reverse=True)[:num]
def _keep_num(self, features, num=_CONFIG_DEFAULT_NUM_FEATURES) -> list:
"""Keep the top n features (default: 5)

Approach:
- for range in 0-n
+ expand the nth top feature to min duration
(move start back by 0.5*min_duration, end forward by 0.5*min_duration)
+ drop any features that are now in that feature's range
- return the top n features

Each feature is a Feature object, with an Interval object
"""
for i in range(num):
# expand the feature to min_duration
features[i].interval.move_start(-0.5*self._min_duration, relative=True)
features[i].interval.move_end(0.5*self._min_duration, relative=True)
# drop any features that are now in that feature's range
features = [f for f in features if
f.interval.start < features[i].interval.start or
f.interval.end > features[i].interval.end]

return features[:num]


def setup(self): def setup(self):
"""extract audio from video files to be processed by pyloudnorm """extract audio from video files to be processed by pyloudnorm
@@ -272,11 +295,14 @@ class LoudAudioFeatureExtractor(FeatureExtractor):
for file in self.input_files: for file in self.input_files:
audio_file = self._audio_file_from_path(file.path) audio_file = self._audio_file_from_path(file.path)
loudnesses = self._loudnorm(audio_file) loudnesses = self._loudnorm(audio_file)
top_loudnesses = self._keep_num(loudnesses, self._num_features)
for time, loudness in top_loudnesses:
self.features.append(Feature(interval=Interval(start=time, duration=0.500),

features = []
for time, loudness in loudnesses:
features.append(Feature(interval=Interval(start=time, duration=0.500),
source=file, feature_extractor="loudness", source=file, feature_extractor="loudness",
score=loudness)) score=loudness))
# prune features list to keep self.num_features
self.features = self._keep_num(features, self._num_features)




class VideoActivityFeatureExtractor(FeatureExtractor): class VideoActivityFeatureExtractor(FeatureExtractor):
@@ -296,12 +322,18 @@ class VideoActivityFeatureExtractor(FeatureExtractor):


#TODO: minimum duration -- consider whether to do here, or expand duration post-consolidation #TODO: minimum duration -- consider whether to do here, or expand duration post-consolidation
""" """
def __init__(self, input_files=None, config=None):
_CONFIG_DEFAULT_NUM_FEATURES = 5 # keep the top 5 activity moments
_CONFIG_DEFAULT_MIN_DURATION = 5.00 # seconds
def __init__(self, input_files=None, config=None,
num_features=_CONFIG_DEFAULT_NUM_FEATURES,
min_duration=_CONFIG_DEFAULT_MIN_DURATION):
if not input_files: if not input_files:
raise ValueError("No input files provided!") raise ValueError("No input files provided!")
self.input_files = input_files self.input_files = input_files
self.config = config self.config = config
self.features = [] self.features = []
self._num_features = num_features
self._min_duration = min_duration


def _scdet(self, video_file): def _scdet(self, video_file):
"""Run scdet filter on the video file""" """Run scdet filter on the video file"""
@@ -347,6 +379,29 @@ class VideoActivityFeatureExtractor(FeatureExtractor):
scores = sorted(scores, key=lambda x: x[1], reverse=True) scores = sorted(scores, key=lambda x: x[1], reverse=True)
return scores[:int(len(scores) * (percent / 100))] return scores[:int(len(scores) * (percent / 100))]


def _keep_num(self, features, num=_CONFIG_DEFAULT_NUM_FEATURES) -> list:
"""Keep the top n features (default: 5)

Approach:
- for range in 0-n
+ expand the nth top feature to min duration
(move start back by 0.5*min_duration, end forward by 0.5*min_duration)
+ drop any features that are now in that feature's range
- return the top n features

Each feature is a Feature object, with an Interval object
"""
for i in range(num):
# expand the feature to min_duration
features[i].interval.move_start(-0.5*self._min_duration, relative=True)
features[i].interval.move_end(0.5*self._min_duration, relative=True)
# drop any features that are now in that feature's range
features = [f for f in features if
f.interval.start < features[i].interval.start or
f.interval.end > features[i].interval.end]

return features[:num]

def setup(self): def setup(self):
pass pass


@@ -354,11 +409,16 @@ class VideoActivityFeatureExtractor(FeatureExtractor):
for file in self.input_files: for file in self.input_files:
scores = self._scdet(file.path) scores = self._scdet(file.path)
means = sorted(self._nonoverlap_mean(scores), key=lambda x: x[1], reverse=True) means = sorted(self._nonoverlap_mean(scores), key=lambda x: x[1], reverse=True)

features = []
for time, score in self._drop_lowest(means, 66): for time, score in self._drop_lowest(means, 66):
self.features.append(Feature(interval=Interval(start=time, duration=0.500),
features.append(Feature(interval=Interval(start=time, duration=0.500),
source=file, feature_extractor="videoactivity", source=file, feature_extractor="videoactivity",
score=score)) score=score))


# prune features list to keep self.num_features
self.features = self._keep_num(features, self._num_features)

def teardown(self): def teardown(self):
pass pass




Loading…
Cancel
Save