Browse Source

feat: add LAFE functionality

Uses pyloudnorm under the hood to determine the loudness of the supplied
media file (handles videos transparently)

TBC: some sort of limiter on the number produced
main
Rob Hallam 2 months ago
parent
commit
b50b81a899
1 changed files with 66 additions and 0 deletions
  1. +66
    -0
      pipeline/feature_extractors.py

+ 66
- 0
pipeline/feature_extractors.py View File

@@ -1,9 +1,13 @@
from abc import ABC from abc import ABC
import logging import logging
import os
import random import random
import subprocess import subprocess
from ast import literal_eval from ast import literal_eval
from pipeline.utils import SourceMedia, Feature, Interval from pipeline.utils import SourceMedia, Feature, Interval
# for loudness detection
import soundfile
import pyloudnorm


logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)


@@ -175,6 +179,68 @@ class LoudAudioFeatureExtractor(FeatureExtractor):


teardown() is used to clean up temporary files created during setup (if specified by config) teardown() is used to clean up temporary files created during setup (if specified by config)
""" """
def __init__(self, input_files=None, config=None):
if not input_files:
raise ValueError("No input files provided!")
self.input_files = input_files
self.config = config
self.features = []

def _audio_file_from_path(self, path: str) -> str:
"""Return the audio file path given a video file path

Example:
- in = "/path/to/video.mp4"
- out = "/tmp/video.mp4.wav"
"""
OUTPUT_DIR = "/tmp"
return f"{OUTPUT_DIR}/{os.path.basename(path)}.wav"

def _loudnorm(self, audio_file):
"""Run pyloudnorm on the audio file"""
data, rate = soundfile.read(audio_file) # load audio (with shape (samples, channels))
meter = pyloudnorm.Meter(rate=rate,block_size=0.3) # create BS.1770 meter

loudnesses = []
loudness_features = []
window_size = int(rate * 0.5) # 500ms
stride_size = int(rate * 0.5) # 500ms -- no overlap

# for w in range(data.shape[0]//100):
# loudnesses.append(meter.integrated_loudness(data[w:w+int(0.3*rate),0:2]))
for w in range(0, len(data)-window_size, stride_size):
window = data[w:w+window_size, 0:2] # extract window
loudnesses.append( (w/rate, meter.integrated_loudness(window)) )

for timecode, loudval in sorted([l for l in loudnesses if float(l[1]) != float("-inf")], key=lambda x: x[1], reverse=True):
# print(f"Timecode: {timecode}, Loudness: {loudval}")
loudness_features.append((timecode, round(loudval, 3))) # round to 3 DP

return loudness_features

def setup(self):
"""extract audio from video files to be processed by pyloudnorm

TODO: config -- hardcoded for now
"""
# pyloudnorm expects WAV files
for file in self.input_files:
audio_file = self._audio_file_from_path(file.path)
# ffmpeg -i input.mp4 -vn -acodec pcm_s16le output.wav
subprocess.run(["ffmpeg", "-y", "-i", file.path, "-vn", "-acodec", "pcm_s16le", audio_file],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)

def run(self):
"""Use pyloudnorm to detect loud audio"""
for file in self.input_files:
audio_file = self._audio_file_from_path(file.path)
loudnesses = self._loudnorm(audio_file)
for time, loudness in loudnesses:
self.features.append(Feature(interval=Interval(start=time, duration=0.500),
source=file, feature_extractor="loudness",
score=loudness))


class VideoActivityFeatureExtractor(FeatureExtractor): class VideoActivityFeatureExtractor(FeatureExtractor):
"""Feature extractor for video activity detection. """Feature extractor for video activity detection.




Loading…
Cancel
Save