|
- """test_feature_extractors_functional.py -- functional tests for feature extractors
-
- This module contains functional tests for FEs using crafted and/or generated media files
- to verify that the FEs are working as expected:
-
- - laughter detection -- uses videos with laughs at known times
- - video activity -- uses videos with visual activity at known times
- - audio loudness -- uses videos with audio at known times
-
- etc.
-
- These tests are marked slow to avoid running them during normal test runs.
- """
-
- import pytest
- import unittest
- import pipeline.feature_extractors as extractors
- import test.mocks as mocks
-
- class FEFunctionalTest(unittest.TestCase):
- """FEFunctionalTest -- base class for functional tests for feature extractors
- """
- SAMPLE_DIR = "/home/robert/code/softdev2023-24/summerproject/highlights/test/sample_videos"
-
-
- @pytest.mark.slow
- @pytest.mark.veryslow
- class TestLaughterFEFunctional(FEFunctionalTest):
- """TestLaughterFEFunctional -- functional tests for laughter detection feature extractor"""
-
- def test_laughter_detection(self):
- """Test laughter detection feature extractor
-
- Uses:
- - sample_videos/sample-manual-audio-laughs-video-colours.mp4
- :: laughters at 15-20s
- -- pass iff laughter features extracted in this range, *but*
- NOTE: LaughFE subtracts from start time to capture what preceded the laughter
- so we need to subtract this time (and adds a little after too)
- FE 'exposes' these as _PREPEND_TIME and _APPEND_TIME
-
- Note: takes 8-10s to run for this 30s video using GTX 970. As such this test can be skipped with either:
- "-m 'not veryslow'" or "-m 'not slow'"
- """
- SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-laughs-video-colours.mp4"
-
- START_TIME = 15
- END_TIME = 20
- # create mock source with the video
- source = mocks.MockSource(path=SAMPLE_VIDEO)
-
- # create the feature extractor
- testfe = extractors.LaughterFeatureExtractor(input_files=[source])
- testfe.setup()
- testfe.run()
- testfe.teardown()
-
- # check if the feature was extracted:
- self.assertTrue(testfe.features)
- # check if the feature interval is within the expected range
- self.assertTrue(testfe.features[0].interval.start >= (START_TIME - testfe._PREPEND_TIME))
- self.assertTrue(testfe.features[0].interval.end <= (END_TIME + testfe._APPEND_TIME))
-
-
- class TestVideoActivityFEFunctional(FEFunctionalTest):
- """TestVisualActivityFEFunctional -- functional tests for visual activity feature extractor
- """
-
- def test_visual_activity_functional(self):
- """Test visual activity feature extractor
-
- use:
- - sample_videos/sample-manual-visualactivity.mp4 :: activity at 15-20s -- pass if activity detected anywhere in this range
- """
- SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-visualactivity.mp4"
-
- START_TIME = 15
- END_TIME = 20
- # create mock source with the video
- source = mocks.MockSource(path=SAMPLE_VIDEO)
-
- # create the feature extractor
- testfe = extractors.VideoActivityFeatureExtractor(input_files=[source])
- testfe.setup()
- testfe.run()
- testfe.teardown()
-
- # check if the feature was extracted:
- self.assertTrue(testfe.features)
- # check if the feature interval is within the expected range
- self.assertTrue(testfe.features[0].interval.start >= START_TIME)
-
-
- class TestLoudAudioFEFunctional(FEFunctionalTest):
- """TestAudioLoudnessFEFunctional -- functional tests for audio loudness feature extractor
- """
-
- def test_audio_loudness_functional_one_feature(self):
- """Test audio loudness feature extractor
-
- use:
- - sample_videos/sample-manual-audio.mp4 :: audio at 15-20s -- pass if audio detected anywhere in this range
- -- peak at 16s - 18s, verify this is highest scoring
- """
- SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio.mp4"
-
- START_TIME = 15
- END_TIME = 20
- PEAK_START = 16
- PEAK_END = 18
- # create mock source with the video
- source = mocks.MockSource(path=SAMPLE_VIDEO)
-
- # create the feature extractor
- testfe = extractors.LoudAudioFeatureExtractor(input_files=[source])
- testfe.setup()
- testfe.run()
- testfe.teardown()
-
- # check if the feature was extracted:
- self.assertTrue(testfe.features)
- # check if the feature interval is within the expected range
- self.assertTrue(testfe.features[0].interval.start >= START_TIME)
-
- # get sorted list of features based on feature.score
- sorted_features = sorted(testfe.features, key=lambda x: x.score, reverse=True)
- # check if the highest scoring feature is within the peak range
- self.assertTrue(sorted_features[0].interval.start >= PEAK_START)
-
- def test_audio_loudness_functional_no_features(self):
- """Test audio loudness feature extractor using a silent video. This should produce no features
- since "-inf" results from pyloudnorm are filtered out by the FE.
-
- Use:
- - sample_videos/sample-manual-audio-blank-video-colours.mp4
- :: silent video (30s)
- -- pass if no features extracted
- """
- SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-blank-video-colours.mp4"
-
- # create mock source with the video
- source = mocks.MockSource(path=SAMPLE_VIDEO)
-
- # create the feature extractor
- testfe = extractors.LoudAudioFeatureExtractor(input_files=[source])
- testfe.setup()
- testfe.run()
- testfe.teardown()
-
- # check if the feature was extracted:
- self.assertFalse(testfe.features)
-
- class TestWordFEFunctional(FEFunctionalTest):
- """TestWordFEFunctional -- functional tests for word detection feature extractor (uses Whisper)"""
- @pytest.mark.slow
- @pytest.mark.veryslow
- def test_audio_word_detection_harvard1_functional(self):
- """Test audio word detection feature extractor
- Uses:
- - sample-manual-audio-harvardsentences-video-colours.mp4
- :: Harvard sentences (list 1) up to item 1.8 ("The birch canoe... The hogs were fed")
- -- pass if words detected from this set
- """
- SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-harvardsentences-video-colours.mp4"
- DETECT_WORDS = ["birch", "smooth", "chicken", "depth",
- "juice", "lemons", "box", "thrown", "beside",
- "hogs", "fed"]
-
- # create mock source with the video
- source = mocks.MockSource(path=SAMPLE_VIDEO)
-
- # create the feature extractor
- testfe = extractors.WordFeatureExtractor(input_files=[source])
- testfe.setup(words=DETECT_WORDS)
- testfe.run()
- testfe.teardown()
-
- self.assertGreaterEqual(len(testfe.features), len(DETECT_WORDS))
-
-
- @pytest.mark.slow
- @pytest.mark.veryslow
- def test_audio_word_detection_harvard1_rdh_functional(self):
- """Test audio word detection feature extractor
- Uses:
- - sample-manual-audio-harvardsentences-rdh-video-colours.mp4
- :: Harvard sentences (list 1) up to item 1.8 ("The birch canoe... The hogs were fed") read by RDH
- -- pass if words detected from this set
- """
- SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-harvardsentences-rdh-video-colours.mp4"
- DETECT_WORDS = ["birch", "smooth", "chicken", "depth",
- "juice", "lemons", "box", "thrown", "beside",
- "hogs", "fed"] # missing "truck", "glue", "well", "punch" due to problems
-
- # create mock source with the video
- source = mocks.MockSource(path=SAMPLE_VIDEO)
-
- # create the feature extractor
- testfe = extractors.WordFeatureExtractor(input_files=[source])
- testfe.setup(words=DETECT_WORDS)
- testfe.run()
- testfe.teardown()
-
- self.assertGreaterEqual(len(testfe.features), len(DETECT_WORDS))
-
- def test_audio_word_detection_harvard_gluewellpunchtruck_rdh_functional(self):
- """Test audio word detection feature extractor
- Uses:
- - sample-manual-audio-harvardsentences-rdh2-video-colours.mp4
- :: only the words "glue", "well", "punch", "truck" are read by RDH
- """
-
- SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-harvardsentences-rdh2-video-colours.mp4"
- DETECT_WORDS = ["glue", "well", "punch", "truck"]
-
- # create mock source with the video
- source = mocks.MockSource(path=SAMPLE_VIDEO)
-
- # create the feature extractor
- testfe = extractors.WordFeatureExtractor(input_files=[source])
- testfe.setup(words=DETECT_WORDS)
- testfe.run()
- testfe.teardown()
-
- # check if the word was feature extracted:
- self.assertGreaterEqual(len(testfe.features), 4)
-
- def test_audio_word_detection_noaudio_nofeatures(self):
- """Test audio word detection feature extractor
- Uses:
- - sample-manual-audio-blank-video-colours.mp4
- :: silent video (30s)
- -- pass if no features extracted
- """
-
- SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-blank-video-colours.mp4"
- DETECT_WORDS = ["birch", "smooth", "chicken", "depth",
- "juice", "lemons", "box", "thrown", "beside",
- "hogs", "fed"]
-
- # create mock source with the video
- source = mocks.MockSource(path=SAMPLE_VIDEO)
-
- # create the feature extractor
- testfe = extractors.WordFeatureExtractor(input_files=[source])
- testfe.setup(words=DETECT_WORDS)
-
- # ensure no features extracted from blank audio:
- # self.assertEqual(len(testfe.features), 0)
- # Actually, Whisper throws a hissy fit if there's no audio:
- # RuntimeError: stack expects a non-empty TensorList
- # stdout: "No active speech found in audio"
- # TODO: consider catching this error in the FE
- with self.assertRaises(RuntimeError):
- testfe.run()
-
- if __name__ == "__main__":
- unittest.main()
|