diff --git a/pipeline/feature_extractors.py b/pipeline/feature_extractors.py index cf9dbdb..dad9f15 100644 --- a/pipeline/feature_extractors.py +++ b/pipeline/feature_extractors.py @@ -400,3 +400,40 @@ class JSONFeatureExtractor(FeatureExtractor): def teardown(self): pass + + +class WordFeatureExtractor(FeatureExtractor): + """Feature extractor for specific word detection (uses Whisper)""" + def __init__(self, input_files=None, config=None): + if not input_files: + raise ValueError("No input files provided!") + self.input_files = input_files + self.config = config + self.features = [] + + def setup(self, words=[]): + """Setup the word feature extractor -- validate input files & config + + Whisper expects a list of words to search for in the audio + """ + logger.debug("WordFeatureExtractor setup") + + # Validate input files + if not self.input_files: + raise ValueError("No input files provided") + + # Validate words + if not words: + raise ValueError("No words provided") + words = words + # TODO: consider stripping punctuation since Whisper produces words+punctuation + # and we might want to strip the punctuation there too + + def run(self): + """Extract features corresponding to supplied target words (defined in setup) for each input file + + Use Whisper to detect words in the audio, then match these to target words and create features + """ + + def teardown(self): + """Clean up after Whisper"""