|
|
@@ -400,3 +400,40 @@ class JSONFeatureExtractor(FeatureExtractor): |
|
|
|
|
|
|
|
def teardown(self): |
|
|
|
pass |
|
|
|
|
|
|
|
|
|
|
|
class WordFeatureExtractor(FeatureExtractor): |
|
|
|
"""Feature extractor for specific word detection (uses Whisper)""" |
|
|
|
def __init__(self, input_files=None, config=None): |
|
|
|
if not input_files: |
|
|
|
raise ValueError("No input files provided!") |
|
|
|
self.input_files = input_files |
|
|
|
self.config = config |
|
|
|
self.features = [] |
|
|
|
|
|
|
|
def setup(self, words=[]): |
|
|
|
"""Setup the word feature extractor -- validate input files & config |
|
|
|
|
|
|
|
Whisper expects a list of words to search for in the audio |
|
|
|
""" |
|
|
|
logger.debug("WordFeatureExtractor setup") |
|
|
|
|
|
|
|
# Validate input files |
|
|
|
if not self.input_files: |
|
|
|
raise ValueError("No input files provided") |
|
|
|
|
|
|
|
# Validate words |
|
|
|
if not words: |
|
|
|
raise ValueError("No words provided") |
|
|
|
words = words |
|
|
|
# TODO: consider stripping punctuation since Whisper produces words+punctuation |
|
|
|
# and we might want to strip the punctuation there too |
|
|
|
|
|
|
|
def run(self): |
|
|
|
"""Extract features corresponding to supplied target words (defined in setup) for each input file |
|
|
|
|
|
|
|
Use Whisper to detect words in the audio, then match these to target words and create features |
|
|
|
""" |
|
|
|
|
|
|
|
def teardown(self): |
|
|
|
"""Clean up after Whisper""" |