Merge branch 'fix-vafe-and-loudfe' into feat-exemplar-scripts

1年前 · 2bea733906
--- a/pipeline/feature_extractors.py
+++ b/pipeline/feature_extractors.py
@@ -201,13 +201,17 @@ class LoudAudioFeatureExtractor(FeatureExtractor):
     teardown() is used to clean up temporary files created during setup (if specified by config)
    """
    _CONFIG_DEFAULT_NUM_FEATURES = 5 # keep the top 5 loudnesses
    def __init__(self, input_files=None, config=None, num_features=_CONFIG_DEFAULT_NUM_FEATURES):
    _CONFIG_DEFAULT_MIN_DURATION = 5.00 # seconds
    def __init__(self, input_files=None, config=None,
                 num_features=_CONFIG_DEFAULT_NUM_FEATURES,
                 min_duration=_CONFIG_DEFAULT_MIN_DURATION):
        if not input_files:
            raise ValueError("No input files provided!")
        self.input_files = input_files
        self.config = config
        self.features = []
        self._num_features = num_features
        self._min_duration = min_duration

    def _audio_file_from_path(self, path: str) -> str:
        """Return the audio file path given a video file path
@@ -251,9 +255,28 @@ class LoudAudioFeatureExtractor(FeatureExtractor):

        return loudness_features

    def _keep_num(self, loudnesses, num=_CONFIG_DEFAULT_NUM_FEATURES) -> list:
        """Keep the top n loudnesses (default: 5)"""
        return sorted(loudnesses, key=lambda x: x[1], reverse=True)[:num]
    def _keep_num(self, features, num=_CONFIG_DEFAULT_NUM_FEATURES) -> list:
        """Keep the top n features (default: 5)

        Approach:
         - for range in 0-n
           + expand the nth top feature to min duration
            (move start back by 0.5*min_duration, end forward by 0.5*min_duration)
           + drop any features that are now in that feature's range
         - return the top n features

        Each feature is a Feature object, with an Interval object
        """
        for i in range(num):
            # expand the feature to min_duration
            features[i].interval.move_start(-0.5*self._min_duration, relative=True)
            features[i].interval.move_end(0.5*self._min_duration, relative=True)
            # drop any features that are now in that feature's range
            features = [f for f in features if
                        f.interval.start < features[i].interval.start or
                        f.interval.end > features[i].interval.end]

        return features[:num]

    def setup(self):
        """extract audio from video files to be processed by pyloudnorm
@@ -272,11 +295,14 @@ class LoudAudioFeatureExtractor(FeatureExtractor):
        for file in self.input_files:
            audio_file = self._audio_file_from_path(file.path)
            loudnesses = self._loudnorm(audio_file)
            top_loudnesses = self._keep_num(loudnesses, self._num_features)
            for time, loudness in top_loudnesses:
                self.features.append(Feature(interval=Interval(start=time, duration=0.500),

            features = []
            for time, loudness in loudnesses:
                features.append(Feature(interval=Interval(start=time, duration=0.500),
                                             source=file, feature_extractor="loudness",
                                             score=loudness))
            # prune features list to keep self.num_features
            self.features = self._keep_num(features, self._num_features)


 class VideoActivityFeatureExtractor(FeatureExtractor):
@@ -296,12 +322,18 @@ class VideoActivityFeatureExtractor(FeatureExtractor):

    #TODO: minimum duration -- consider whether to do here, or expand duration post-consolidation
    """
    def __init__(self, input_files=None, config=None):
    _CONFIG_DEFAULT_NUM_FEATURES = 5 # keep the top 5 activity moments
    _CONFIG_DEFAULT_MIN_DURATION = 5.00 # seconds
    def __init__(self, input_files=None, config=None,
                 num_features=_CONFIG_DEFAULT_NUM_FEATURES,
                 min_duration=_CONFIG_DEFAULT_MIN_DURATION):
        if not input_files:
            raise ValueError("No input files provided!")
        self.input_files = input_files
        self.config = config
        self.features = []
        self._num_features = num_features
        self._min_duration = min_duration

    def _scdet(self, video_file):
        """Run scdet filter on the video file"""
@@ -347,6 +379,29 @@ class VideoActivityFeatureExtractor(FeatureExtractor):
        scores = sorted(scores, key=lambda x: x[1], reverse=True)
        return scores[:int(len(scores) * (percent / 100))]

    def _keep_num(self, features, num=_CONFIG_DEFAULT_NUM_FEATURES) -> list:
        """Keep the top n features (default: 5)

        Approach:
         - for range in 0-n
           + expand the nth top feature to min duration
            (move start back by 0.5*min_duration, end forward by 0.5*min_duration)
           + drop any features that are now in that feature's range
         - return the top n features

        Each feature is a Feature object, with an Interval object
        """
        for i in range(num):
            # expand the feature to min_duration
            features[i].interval.move_start(-0.5*self._min_duration, relative=True)
            features[i].interval.move_end(0.5*self._min_duration, relative=True)
            # drop any features that are now in that feature's range
            features = [f for f in features if
                        f.interval.start < features[i].interval.start or
                        f.interval.end > features[i].interval.end]

        return features[:num]

    def setup(self):
        pass

@@ -354,11 +409,16 @@ class VideoActivityFeatureExtractor(FeatureExtractor):
        for file in self.input_files:
            scores = self._scdet(file.path)
            means = sorted(self._nonoverlap_mean(scores), key=lambda x: x[1], reverse=True)

            features = []
            for time, score in self._drop_lowest(means, 66):
                self.features.append(Feature(interval=Interval(start=time, duration=0.500),
                features.append(Feature(interval=Interval(start=time, duration=0.500),
                                             source=file, feature_extractor="videoactivity",
                                             score=score))

            # prune features list to keep self.num_features
            self.features = self._keep_num(features, self._num_features)

    def teardown(self):
        pass