diff --git a/pipeline/feature_extractors.py b/pipeline/feature_extractors.py index 45224fb..cf9dbdb 100644 --- a/pipeline/feature_extractors.py +++ b/pipeline/feature_extractors.py @@ -214,21 +214,31 @@ class LoudAudioFeatureExtractor(FeatureExtractor): OUTPUT_DIR = "/tmp" return f"{OUTPUT_DIR}/{os.path.basename(path)}.wav" + def _get_loudnesses(self, data, meter, rate, window_size, stride_size): + """Extract loudnesses from the audio data using pyloudnorm + + return a list of 2-tuples, each representing a timecode and loudness value + """ + loudnesses = [] + + for w in range(0, len(data)-window_size, stride_size): + window = data[w:w+window_size, 0:2] # extract window + loudnesses.append( (w/rate, meter.integrated_loudness(window)) ) + + return loudnesses + def _loudnorm(self, audio_file): """Run pyloudnorm on the audio file""" data, rate = soundfile.read(audio_file) # load audio (with shape (samples, channels)) meter = pyloudnorm.Meter(rate=rate,block_size=0.3) # create BS.1770 meter - loudnesses = [] loudness_features = [] window_size = int(rate * 0.5) # 500ms stride_size = int(rate * 0.5) # 500ms -- no overlap # for w in range(data.shape[0]//100): # loudnesses.append(meter.integrated_loudness(data[w:w+int(0.3*rate),0:2])) - for w in range(0, len(data)-window_size, stride_size): - window = data[w:w+window_size, 0:2] # extract window - loudnesses.append( (w/rate, meter.integrated_loudness(window)) ) + loudnesses = self._get_loudnesses(data, meter, rate, window_size, stride_size) for timecode, loudval in sorted([l for l in loudnesses if float(l[1]) != float("-inf")], key=lambda x: x[1], reverse=True): # print(f"Timecode: {timecode}, Loudness: {loudval}")