From 251679b89bcb9cff7db343a754bf95574ef9c8ea Mon Sep 17 00:00:00 2001
From: Rob Hallam <0504004h@student.gla.ac.uk>
Date: Sat, 31 Aug 2024 14:36:09 +0100
Subject: [PATCH] test: Add functional tests to WordFeatureExtractor (see note)

Functional tests for WordFeatureExtractor consist of making sure it can find
words known in advance. The Harvard Sentences [1] are a useful means of doing
that. These are 'standard sentences' that are used for speech quality
measurements, and so would be decent candidates for assessing word recognition.

The Open Speech REpository [2] has samples of sentences to download.

In testing, the Whisper medium model had trouble with a few words:
 - glue
 - well
 - punch
 - truck

I'm not sure why. Even when I recorded myself speaking the Harvard sentences in
higher quality (OSR files are 8kHz range) it would still not recognise these
words. A separate functional test of only those words was added as a result.
This would perhaps be worth exploring in more detail if there was time.

[1]: See eg https://www.cs.columbia.edu/~hgs/audio/harvard.html
[2]: https://www.voiptroubleshooter.com/open_speech/index.html
---
 test/test_feature_extractors_functional.py | 77 ++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/test/test_feature_extractors_functional.py b/test/test_feature_extractors_functional.py
index e3736f0..45b2ab6 100644
--- a/test/test_feature_extractors_functional.py
+++ b/test/test_feature_extractors_functional.py
@@ -150,6 +150,83 @@ class TestLoudAudioFEFunctional(FEFunctionalTest):
         # check if the feature was extracted:
         self.assertFalse(testfe.features)
 
+class TestWordFEFunctional(FEFunctionalTest):
+    """TestWordFEFunctional -- functional tests for word detection feature extractor (uses Whisper)"""
+    @pytest.mark.slow
+    @pytest.mark.veryslow
+    def test_audio_word_detection_harvard1_functional(self):
+        """Test audio word detection feature extractor
+        Uses:
+          - sample-manual-audio-harvardsentences-video-colours.mp4
+            :: Harvard sentences (list 1) up to item 1.8 ("The birch canoe... The hogs were fed")
+               --  pass if words detected from this set
+        """
+        SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-harvardsentences-video-colours.mp4"
+        DETECT_WORDS = ["birch", "smooth", "chicken", "depth",
+                        "juice", "lemons", "box", "thrown", "beside",
+                        "hogs", "fed"]
+
+        # create mock source with the video
+        source = mocks.MockSource(path=SAMPLE_VIDEO)
+
+        # create the feature extractor
+        testfe = extractors.WordFeatureExtractor(input_files=[source])
+        testfe.setup(words=DETECT_WORDS)
+        testfe.run()
+        testfe.teardown()
+
+        self.assertGreaterEqual(len(testfe.features), len(DETECT_WORDS))
+
+
+    @pytest.mark.slow
+    @pytest.mark.veryslow
+    def test_audio_word_detection_harvard1_rdh_functional(self):
+        """Test audio word detection feature extractor
+        Uses:
+          - sample-manual-audio-harvardsentences-rdh-video-colours.mp4
+            :: Harvard sentences (list 1) up to item 1.8 ("The birch canoe... The hogs were fed") read by RDH
+               --  pass if words detected from this set
+        """
+        SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-harvardsentences-rdh-video-colours.mp4"
+        DETECT_WORDS = ["birch", "smooth", "chicken", "depth",
+                        "juice", "lemons", "box", "thrown", "beside",
+                        "hogs", "fed"]
+        # DETECT_WORDS = ["birch", "smooth", "glue", "chicken", "depth", "well",
+        #                 "juice", "lemons", "punch", "box", "thrown", "beside",
+        #                 "truck", "hogs", "fed"]
+
+        # create mock source with the video
+        source = mocks.MockSource(path=SAMPLE_VIDEO)
+
+        # create the feature extractor
+        testfe = extractors.WordFeatureExtractor(input_files=[source])
+        testfe.setup(words=DETECT_WORDS)
+        testfe.run()
+        testfe.teardown()
+
+        self.assertGreaterEqual(len(testfe.features), len(DETECT_WORDS))
+
+    def test_audio_word_detection_harvard_gluewellpunchtruck_rdh_functional(self):
+        """Test audio word detection feature extractor
+           Uses:
+             - sample-manual-audio-harvardsentences-rdh2-video-colours.mp4
+               :: only the words "glue", "well", "punch", "truck" are read by RDH
+        """
+
+        SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-harvardsentences-rdh2-video-colours.mp4"
+        DETECT_WORDS = ["glue", "well", "punch", "truck"]
+
+        # create mock source with the video
+        source = mocks.MockSource(path=SAMPLE_VIDEO)
+
+        # create the feature extractor
+        testfe = extractors.WordFeatureExtractor(input_files=[source])
+        testfe.setup(words=DETECT_WORDS)
+        testfe.run()
+        testfe.teardown()
+
+        # check if the word was feature extracted:
+        self.assertGreaterEqual(len(testfe.features), 4)
 
 if __name__ == "__main__":
     unittest.main()