Non puoi selezionare più di 25 argomenti Gli argomenti devono iniziare con una lettera o un numero, possono includere trattini ('-') e possono essere lunghi fino a 35 caratteri.

259 righe
10 KiB

  1. """test_feature_extractors_functional.py -- functional tests for feature extractors
  2. This module contains functional tests for FEs using crafted and/or generated media files
  3. to verify that the FEs are working as expected:
  4. - laughter detection -- uses videos with laughs at known times
  5. - video activity -- uses videos with visual activity at known times
  6. - audio loudness -- uses videos with audio at known times
  7. etc.
  8. These tests are marked slow to avoid running them during normal test runs.
  9. """
  10. import pytest
  11. import unittest
  12. import pipeline.feature_extractors as extractors
  13. import test.mocks as mocks
  14. class FEFunctionalTest(unittest.TestCase):
  15. """FEFunctionalTest -- base class for functional tests for feature extractors
  16. """
  17. SAMPLE_DIR = "/home/robert/code/softdev2023-24/summerproject/highlights/test/sample_videos"
  18. @pytest.mark.slow
  19. @pytest.mark.veryslow
  20. class TestLaughterFEFunctional(FEFunctionalTest):
  21. """TestLaughterFEFunctional -- functional tests for laughter detection feature extractor"""
  22. def test_laughter_detection(self):
  23. """Test laughter detection feature extractor
  24. Uses:
  25. - sample_videos/sample-manual-audio-laughs-video-colours.mp4
  26. :: laughters at 15-20s
  27. -- pass iff laughter features extracted in this range, *but*
  28. NOTE: LaughFE subtracts from start time to capture what preceded the laughter
  29. so we need to subtract this time (and adds a little after too)
  30. FE 'exposes' these as _PREPEND_TIME and _APPEND_TIME
  31. Note: takes 8-10s to run for this 30s video using GTX 970. As such this test can be skipped with either:
  32. "-m 'not veryslow'" or "-m 'not slow'"
  33. """
  34. SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-laughs-video-colours.mp4"
  35. START_TIME = 15
  36. END_TIME = 20
  37. # create mock source with the video
  38. source = mocks.MockSource(path=SAMPLE_VIDEO)
  39. # create the feature extractor
  40. testfe = extractors.LaughterFeatureExtractor(input_files=[source])
  41. testfe.setup()
  42. testfe.run()
  43. testfe.teardown()
  44. # check if the feature was extracted:
  45. self.assertTrue(testfe.features)
  46. # check if the feature interval is within the expected range
  47. self.assertTrue(testfe.features[0].interval.start >= (START_TIME - testfe._PREPEND_TIME))
  48. self.assertTrue(testfe.features[0].interval.end <= (END_TIME + testfe._APPEND_TIME))
  49. class TestVideoActivityFEFunctional(FEFunctionalTest):
  50. """TestVisualActivityFEFunctional -- functional tests for visual activity feature extractor
  51. """
  52. def test_visual_activity_functional(self):
  53. """Test visual activity feature extractor
  54. use:
  55. - sample_videos/sample-manual-visualactivity.mp4 :: activity at 15-20s -- pass if activity detected anywhere in this range
  56. """
  57. SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-visualactivity.mp4"
  58. START_TIME = 15
  59. END_TIME = 20
  60. # create mock source with the video
  61. source = mocks.MockSource(path=SAMPLE_VIDEO)
  62. # create the feature extractor
  63. testfe = extractors.VideoActivityFeatureExtractor(input_files=[source])
  64. testfe.setup()
  65. testfe.run()
  66. testfe.teardown()
  67. # check if the feature was extracted:
  68. self.assertTrue(testfe.features)
  69. # check if the feature interval is within the expected range
  70. self.assertTrue(testfe.features[0].interval.start >= START_TIME)
  71. class TestLoudAudioFEFunctional(FEFunctionalTest):
  72. """TestAudioLoudnessFEFunctional -- functional tests for audio loudness feature extractor
  73. """
  74. def test_audio_loudness_functional_one_feature(self):
  75. """Test audio loudness feature extractor
  76. use:
  77. - sample_videos/sample-manual-audio.mp4 :: audio at 15-20s -- pass if audio detected anywhere in this range
  78. -- peak at 16s - 18s, verify this is highest scoring
  79. """
  80. SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio.mp4"
  81. START_TIME = 15
  82. END_TIME = 20
  83. PEAK_START = 16
  84. PEAK_END = 18
  85. # create mock source with the video
  86. source = mocks.MockSource(path=SAMPLE_VIDEO)
  87. # create the feature extractor
  88. testfe = extractors.LoudAudioFeatureExtractor(input_files=[source])
  89. testfe.setup()
  90. testfe.run()
  91. testfe.teardown()
  92. # check if the feature was extracted:
  93. self.assertTrue(testfe.features)
  94. # check if the feature interval is within the expected range
  95. self.assertTrue(testfe.features[0].interval.start >= START_TIME)
  96. # get sorted list of features based on feature.score
  97. sorted_features = sorted(testfe.features, key=lambda x: x.score, reverse=True)
  98. # check if the highest scoring feature is within the peak range
  99. self.assertTrue(sorted_features[0].interval.start >= PEAK_START)
  100. def test_audio_loudness_functional_no_features(self):
  101. """Test audio loudness feature extractor using a silent video. This should produce no features
  102. since "-inf" results from pyloudnorm are filtered out by the FE.
  103. Use:
  104. - sample_videos/sample-manual-audio-blank-video-colours.mp4
  105. :: silent video (30s)
  106. -- pass if no features extracted
  107. """
  108. SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-blank-video-colours.mp4"
  109. # create mock source with the video
  110. source = mocks.MockSource(path=SAMPLE_VIDEO)
  111. # create the feature extractor
  112. testfe = extractors.LoudAudioFeatureExtractor(input_files=[source])
  113. testfe.setup()
  114. testfe.run()
  115. testfe.teardown()
  116. # check if the feature was extracted:
  117. self.assertFalse(testfe.features)
  118. class TestWordFEFunctional(FEFunctionalTest):
  119. """TestWordFEFunctional -- functional tests for word detection feature extractor (uses Whisper)"""
  120. @pytest.mark.slow
  121. @pytest.mark.veryslow
  122. def test_audio_word_detection_harvard1_functional(self):
  123. """Test audio word detection feature extractor
  124. Uses:
  125. - sample-manual-audio-harvardsentences-video-colours.mp4
  126. :: Harvard sentences (list 1) up to item 1.8 ("The birch canoe... The hogs were fed")
  127. -- pass if words detected from this set
  128. """
  129. SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-harvardsentences-video-colours.mp4"
  130. DETECT_WORDS = ["birch", "smooth", "chicken", "depth",
  131. "juice", "lemons", "box", "thrown", "beside",
  132. "hogs", "fed"]
  133. # create mock source with the video
  134. source = mocks.MockSource(path=SAMPLE_VIDEO)
  135. # create the feature extractor
  136. testfe = extractors.WordFeatureExtractor(input_files=[source])
  137. testfe.setup(words=DETECT_WORDS)
  138. testfe.run()
  139. testfe.teardown()
  140. self.assertGreaterEqual(len(testfe.features), len(DETECT_WORDS))
  141. @pytest.mark.slow
  142. @pytest.mark.veryslow
  143. def test_audio_word_detection_harvard1_rdh_functional(self):
  144. """Test audio word detection feature extractor
  145. Uses:
  146. - sample-manual-audio-harvardsentences-rdh-video-colours.mp4
  147. :: Harvard sentences (list 1) up to item 1.8 ("The birch canoe... The hogs were fed") read by RDH
  148. -- pass if words detected from this set
  149. """
  150. SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-harvardsentences-rdh-video-colours.mp4"
  151. DETECT_WORDS = ["birch", "smooth", "chicken", "depth",
  152. "juice", "lemons", "box", "thrown", "beside",
  153. "hogs", "fed"] # missing "truck", "glue", "well", "punch" due to problems
  154. # create mock source with the video
  155. source = mocks.MockSource(path=SAMPLE_VIDEO)
  156. # create the feature extractor
  157. testfe = extractors.WordFeatureExtractor(input_files=[source])
  158. testfe.setup(words=DETECT_WORDS)
  159. testfe.run()
  160. testfe.teardown()
  161. self.assertGreaterEqual(len(testfe.features), len(DETECT_WORDS))
  162. def test_audio_word_detection_harvard_gluewellpunchtruck_rdh_functional(self):
  163. """Test audio word detection feature extractor
  164. Uses:
  165. - sample-manual-audio-harvardsentences-rdh2-video-colours.mp4
  166. :: only the words "glue", "well", "punch", "truck" are read by RDH
  167. """
  168. SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-harvardsentences-rdh2-video-colours.mp4"
  169. DETECT_WORDS = ["glue", "well", "punch", "truck"]
  170. # create mock source with the video
  171. source = mocks.MockSource(path=SAMPLE_VIDEO)
  172. # create the feature extractor
  173. testfe = extractors.WordFeatureExtractor(input_files=[source])
  174. testfe.setup(words=DETECT_WORDS)
  175. testfe.run()
  176. testfe.teardown()
  177. # check if the word was feature extracted:
  178. self.assertGreaterEqual(len(testfe.features), 4)
  179. def test_audio_word_detection_noaudio_nofeatures(self):
  180. """Test audio word detection feature extractor
  181. Uses:
  182. - sample-manual-audio-blank-video-colours.mp4
  183. :: silent video (30s)
  184. -- pass if no features extracted
  185. """
  186. SAMPLE_VIDEO = f"{self.SAMPLE_DIR}/sample-manual-audio-blank-video-colours.mp4"
  187. DETECT_WORDS = ["birch", "smooth", "chicken", "depth",
  188. "juice", "lemons", "box", "thrown", "beside",
  189. "hogs", "fed"]
  190. # create mock source with the video
  191. source = mocks.MockSource(path=SAMPLE_VIDEO)
  192. # create the feature extractor
  193. testfe = extractors.WordFeatureExtractor(input_files=[source])
  194. testfe.setup(words=DETECT_WORDS)
  195. # ensure no features extracted from blank audio:
  196. # self.assertEqual(len(testfe.features), 0)
  197. # Actually, Whisper throws a hissy fit if there's no audio:
  198. # RuntimeError: stack expects a non-empty TensorList
  199. # stdout: "No active speech found in audio"
  200. # TODO: consider catching this error in the FE
  201. with self.assertRaises(RuntimeError):
  202. testfe.run()
  203. if __name__ == "__main__":
  204. unittest.main()