"""test_feature_extractors_functional.py -- functional tests for feature extractors This module contains functional tests for FEs using crafted and/or generated media files to verify that the FEs are working as expected: - laughter detection -- uses videos with laughs at known times - video activity -- uses videos with visual activity at known times - audio loudness -- uses videos with audio at known times etc. These tests are marked slow to avoid running them during normal test runs. """ import pytest import unittest import pipeline.feature_extractors as extractors import test.mocks as mocks class FEFunctionalTest(unittest.TestCase): """FEFunctionalTest -- base class for functional tests for feature extractors """ SAMPLE_DIR = "/home/robert/code/softdev2023-24/summerproject/highlights/test/sample_videos" @pytest.mark.slow @pytest.mark.veryslow class TestLaughterFEFunctional(FEFunctionalTest): """TestLaughterFEFunctional -- functional tests for laughter detection feature extractor""" def test_laughter_detection_positive(self): """Test laughter detection feature extractor Uses: - sample_videos/sample-manual-audio-laughs-video-colours.mp4 :: laughters at 15-20s -- pass iff laughter features extracted in this range, *but* NOTE: LaughFE subtracts from start time to capture what preceded the laughter so we need to subtract this time (and adds a little after too) FE 'exposes' these as _PREPEND_TIME and _APPEND_TIME Note: takes 8-10s to run for this 30s video using GTX 970. As such this test can be skipped with either: "-m 'not veryslow'" or "-m 'not slow'" """ SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-laughs-video-colours.mp4" START_TIME = 15 END_TIME = 20 # create mock source with the video source = mocks.MockSource(path=SAMPLE_VIDEO) # create the feature extractor testfe = extractors.LaughterFeatureExtractor(input_files=[source]) testfe.setup() testfe.run() testfe.teardown() # check if the feature was extracted: self.assertTrue(testfe.features) # check if the feature interval is within the expected range self.assertTrue(testfe.features[0].interval.start >= (START_TIME - testfe._PREPEND_TIME)) self.assertTrue(testfe.features[0].interval.end <= (END_TIME + testfe._APPEND_TIME)) def test_laughter_detection_negative(self): """Negative test for laughter detection feature extractor -- should not detect laughter in a silent video Uses: - sample_videos/sample-manual-audio-blank-video-colours.mp4 :: silent video (30s) -- pass iff no laughter features """ SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-blank-video-colours.mp4" # create mock source with the video source = mocks.MockSource(path=SAMPLE_VIDEO) # create the feature extractor testfe = extractors.LaughterFeatureExtractor(input_files=[source]) testfe.setup() testfe.run() testfe.teardown() # check if the feature was extracted: self.assertFalse(testfe.features) class TestVideoActivityFEFunctional(FEFunctionalTest): """TestVisualActivityFEFunctional -- functional tests for visual activity feature extractor """ def test_visual_activity_functional(self): """Test visual activity feature extractor use: - sample_videos/sample-manual-visualactivity.mp4 :: activity at 15-20s -- pass if activity detected anywhere in this range """ SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-visualactivity.mp4" START_TIME = 15 END_TIME = 20 # create mock source with the video source = mocks.MockSource(path=SAMPLE_VIDEO) # create the feature extractor testfe = extractors.VideoActivityFeatureExtractor(input_files=[source]) testfe.setup() testfe.run() testfe.teardown() # check if the feature was extracted: self.assertTrue(testfe.features) # check if the feature interval is within the expected range self.assertTrue(testfe.features[0].interval.start >= START_TIME) class TestLoudAudioFEFunctional(FEFunctionalTest): """TestAudioLoudnessFEFunctional -- functional tests for audio loudness feature extractor """ def test_audio_loudness_functional_one_feature(self): """Test audio loudness feature extractor use: - sample_videos/sample-manual-audio.mp4 :: audio at 15-20s -- pass if audio detected anywhere in this range -- peak at 16s - 18s, verify this is highest scoring """ SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio.mp4" START_TIME = 15 END_TIME = 20 PEAK_START = 16 PEAK_END = 18 # create mock source with the video source = mocks.MockSource(path=SAMPLE_VIDEO) # create the feature extractor testfe = extractors.LoudAudioFeatureExtractor(input_files=[source]) testfe.setup() testfe.run() testfe.teardown() # check if the feature was extracted: self.assertTrue(testfe.features) # check if the feature interval is within the expected range self.assertTrue(testfe.features[0].interval.start >= START_TIME) # get sorted list of features based on feature.score sorted_features = sorted(testfe.features, key=lambda x: x.score, reverse=True) # check if the highest scoring feature is within the peak range self.assertTrue(sorted_features[0].interval.start >= PEAK_START) def test_audio_loudness_functional_no_features(self): """Test audio loudness feature extractor using a silent video. This should produce no features since "-inf" results from pyloudnorm are filtered out by the FE. Use: - sample_videos/sample-manual-audio-blank-video-colours.mp4 :: silent video (30s) -- pass if no features extracted """ SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-blank-video-colours.mp4" # create mock source with the video source = mocks.MockSource(path=SAMPLE_VIDEO) # create the feature extractor testfe = extractors.LoudAudioFeatureExtractor(input_files=[source]) testfe.setup() testfe.run() testfe.teardown() # check if the feature was extracted: self.assertFalse(testfe.features) class TestWordFEFunctional(FEFunctionalTest): """TestWordFEFunctional -- functional tests for word detection feature extractor (uses Whisper)""" @pytest.mark.slow @pytest.mark.veryslow def test_audio_word_detection_harvard1_functional(self): """Test audio word detection feature extractor Uses: - sample-manual-audio-harvardsentences-video-colours.mp4 :: Harvard sentences (list 1) up to item 1.8 ("The birch canoe... The hogs were fed") -- pass if words detected from this set """ SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-harvardsentences-video-colours.mp4" DETECT_WORDS = ["birch", "smooth", "chicken", "depth", "juice", "lemons", "box", "thrown", "beside", "hogs", "fed"] # create mock source with the video source = mocks.MockSource(path=SAMPLE_VIDEO) # create the feature extractor testfe = extractors.WordFeatureExtractor(input_files=[source]) testfe.setup(words=DETECT_WORDS) testfe.run() testfe.teardown() self.assertGreaterEqual(len(testfe.features), len(DETECT_WORDS)) @pytest.mark.slow @pytest.mark.veryslow def test_audio_word_detection_harvard1_rdh_functional(self): """Test audio word detection feature extractor Uses: - sample-manual-audio-harvardsentences-rdh-video-colours.mp4 :: Harvard sentences (list 1) up to item 1.8 ("The birch canoe... The hogs were fed") read by RDH -- pass if words detected from this set """ SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-harvardsentences-rdh-video-colours.mp4" DETECT_WORDS = ["birch", "smooth", "chicken", "depth", "juice", "lemons", "box", "thrown", "beside", "hogs", "fed"] # missing "truck", "glue", "well", "punch" due to problems # create mock source with the video source = mocks.MockSource(path=SAMPLE_VIDEO) # create the feature extractor testfe = extractors.WordFeatureExtractor(input_files=[source]) testfe.setup(words=DETECT_WORDS) testfe.run() testfe.teardown() self.assertGreaterEqual(len(testfe.features), len(DETECT_WORDS)) def test_audio_word_detection_harvard_gluewellpunchtruck_rdh_functional(self): """Test audio word detection feature extractor Uses: - sample-manual-audio-harvardsentences-rdh2-video-colours.mp4 :: only the words "glue", "well", "punch", "truck" are read by RDH """ SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-harvardsentences-rdh2-video-colours.mp4" DETECT_WORDS = ["glue", "well", "punch", "truck"] # create mock source with the video source = mocks.MockSource(path=SAMPLE_VIDEO) # create the feature extractor testfe = extractors.WordFeatureExtractor(input_files=[source]) testfe.setup(words=DETECT_WORDS) testfe.run() testfe.teardown() # check if the word was feature extracted: self.assertGreaterEqual(len(testfe.features), 4) def test_audio_word_detection_noaudio_nofeatures(self): """Test audio word detection feature extractor Uses: - sample-manual-audio-blank-video-colours.mp4 :: silent video (30s) -- pass if no features extracted """ SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-blank-video-colours.mp4" DETECT_WORDS = ["birch", "smooth", "chicken", "depth", "juice", "lemons", "box", "thrown", "beside", "hogs", "fed"] # create mock source with the video source = mocks.MockSource(path=SAMPLE_VIDEO) # create the feature extractor testfe = extractors.WordFeatureExtractor(input_files=[source]) testfe.setup(words=DETECT_WORDS) # ensure no features extracted from blank audio: # self.assertEqual(len(testfe.features), 0) # Actually, Whisper throws a hissy fit if there's no audio: # RuntimeError: stack expects a non-empty TensorList # stdout: "No active speech found in audio" # TODO: consider catching this error in the FE with self.assertRaises(RuntimeError): testfe.run() def test_audio_word_detection_harvard_no_matching_words(self): """Test audio word detection feature extractor Uses: - sample-manual-audio-harvardsentences-video-colours.mp4 :: Harvard sentences (list 1) up to item 1.8 ("The birch canoe... The hogs were fed") plus the sentence: "Portez ce vieux whisky au juge blond qui fume" -- pass if no features extracted """ SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-harvardsentences-video-colours.mp4" DETECT_WORDS = "Portez ce vieux whisky au juge blond qui fume".split() # create mock source with the video source = mocks.MockSource(path=SAMPLE_VIDEO) # create the feature extractor testfe = extractors.WordFeatureExtractor(input_files=[source]) testfe.setup(words=DETECT_WORDS) testfe.run() testfe.teardown() # ensure no Features: self.assertEqual(len(testfe.features), 0) if __name__ == "__main__": unittest.main()