feat: initial sketch of WhisperFE (WFE)

WhisperFE will be slightly different to other FEs in that there is/are specific target words to be searched for. Not specifying these could be an error (this commit specifies this as such) but a better approach may be to downgrade that to a (logging) notice, and simply match nothing / early exit.
7 kuukautta sitten · 66b6a4abdc
--- a/pipeline/feature_extractors.py
+++ b/pipeline/feature_extractors.py
@@ -400,3 +400,40 @@ class JSONFeatureExtractor(FeatureExtractor):

    def teardown(self):
        pass


 class WordFeatureExtractor(FeatureExtractor):
    """Feature extractor for specific word detection (uses Whisper)"""
    def __init__(self, input_files=None, config=None):
        if not input_files:
            raise ValueError("No input files provided!")
        self.input_files = input_files
        self.config = config
        self.features = []

    def setup(self, words=[]):
        """Setup the word feature extractor -- validate input files & config

        Whisper expects a list of words to search for in the audio
        """
        logger.debug("WordFeatureExtractor setup")

        # Validate input files
        if not self.input_files:
            raise ValueError("No input files provided")

        # Validate words
        if not words:
            raise ValueError("No words provided")
        words = words
        # TODO: consider stripping punctuation since Whisper produces words+punctuation
        # and we might want to strip the punctuation there too

    def run(self):
        """Extract features corresponding to supplied target words (defined in setup) for each input file

        Use Whisper to detect words in the audio, then match these to target words and create features
        """

    def teardown(self):
        """Clean up after Whisper"""