From 251679b89bcb9cff7db343a754bf95574ef9c8ea Mon Sep 17 00:00:00 2001 From: Rob Hallam <0504004h@student.gla.ac.uk> Date: Sat, 31 Aug 2024 14:36:09 +0100 Subject: [PATCH] test: Add functional tests to WordFeatureExtractor (see note) Functional tests for WordFeatureExtractor consist of making sure it can find words known in advance. The Harvard Sentences [1] are a useful means of doing that. These are 'standard sentences' that are used for speech quality measurements, and so would be decent candidates for assessing word recognition. The Open Speech REpository [2] has samples of sentences to download. In testing, the Whisper medium model had trouble with a few words: - glue - well - punch - truck I'm not sure why. Even when I recorded myself speaking the Harvard sentences in higher quality (OSR files are 8kHz range) it would still not recognise these words. A separate functional test of only those words was added as a result. This would perhaps be worth exploring in more detail if there was time. [1]: See eg https://www.cs.columbia.edu/~hgs/audio/harvard.html [2]: https://www.voiptroubleshooter.com/open_speech/index.html --- test/test_feature_extractors_functional.py | 77 ++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/test/test_feature_extractors_functional.py b/test/test_feature_extractors_functional.py index e3736f0..45b2ab6 100644 --- a/test/test_feature_extractors_functional.py +++ b/test/test_feature_extractors_functional.py @@ -150,6 +150,83 @@ class TestLoudAudioFEFunctional(FEFunctionalTest): # check if the feature was extracted: self.assertFalse(testfe.features) +class TestWordFEFunctional(FEFunctionalTest): + """TestWordFEFunctional -- functional tests for word detection feature extractor (uses Whisper)""" + @pytest.mark.slow + @pytest.mark.veryslow + def test_audio_word_detection_harvard1_functional(self): + """Test audio word detection feature extractor + Uses: + - sample-manual-audio-harvardsentences-video-colours.mp4 + :: Harvard sentences (list 1) up to item 1.8 ("The birch canoe... The hogs were fed") + -- pass if words detected from this set + """ + SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-harvardsentences-video-colours.mp4" + DETECT_WORDS = ["birch", "smooth", "chicken", "depth", + "juice", "lemons", "box", "thrown", "beside", + "hogs", "fed"] + + # create mock source with the video + source = mocks.MockSource(path=SAMPLE_VIDEO) + + # create the feature extractor + testfe = extractors.WordFeatureExtractor(input_files=[source]) + testfe.setup(words=DETECT_WORDS) + testfe.run() + testfe.teardown() + + self.assertGreaterEqual(len(testfe.features), len(DETECT_WORDS)) + + + @pytest.mark.slow + @pytest.mark.veryslow + def test_audio_word_detection_harvard1_rdh_functional(self): + """Test audio word detection feature extractor + Uses: + - sample-manual-audio-harvardsentences-rdh-video-colours.mp4 + :: Harvard sentences (list 1) up to item 1.8 ("The birch canoe... The hogs were fed") read by RDH + -- pass if words detected from this set + """ + SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-harvardsentences-rdh-video-colours.mp4" + DETECT_WORDS = ["birch", "smooth", "chicken", "depth", + "juice", "lemons", "box", "thrown", "beside", + "hogs", "fed"] + # DETECT_WORDS = ["birch", "smooth", "glue", "chicken", "depth", "well", + # "juice", "lemons", "punch", "box", "thrown", "beside", + # "truck", "hogs", "fed"] + + # create mock source with the video + source = mocks.MockSource(path=SAMPLE_VIDEO) + + # create the feature extractor + testfe = extractors.WordFeatureExtractor(input_files=[source]) + testfe.setup(words=DETECT_WORDS) + testfe.run() + testfe.teardown() + + self.assertGreaterEqual(len(testfe.features), len(DETECT_WORDS)) + + def test_audio_word_detection_harvard_gluewellpunchtruck_rdh_functional(self): + """Test audio word detection feature extractor + Uses: + - sample-manual-audio-harvardsentences-rdh2-video-colours.mp4 + :: only the words "glue", "well", "punch", "truck" are read by RDH + """ + + SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-harvardsentences-rdh2-video-colours.mp4" + DETECT_WORDS = ["glue", "well", "punch", "truck"] + + # create mock source with the video + source = mocks.MockSource(path=SAMPLE_VIDEO) + + # create the feature extractor + testfe = extractors.WordFeatureExtractor(input_files=[source]) + testfe.setup(words=DETECT_WORDS) + testfe.run() + testfe.teardown() + + # check if the word was feature extracted: + self.assertGreaterEqual(len(testfe.features), 4) if __name__ == "__main__": unittest.main()