|
@@ -214,21 +214,31 @@ class LoudAudioFeatureExtractor(FeatureExtractor): |
|
|
OUTPUT_DIR = "/tmp" |
|
|
OUTPUT_DIR = "/tmp" |
|
|
return f"{OUTPUT_DIR}/{os.path.basename(path)}.wav" |
|
|
return f"{OUTPUT_DIR}/{os.path.basename(path)}.wav" |
|
|
|
|
|
|
|
|
|
|
|
def _get_loudnesses(self, data, meter, rate, window_size, stride_size): |
|
|
|
|
|
"""Extract loudnesses from the audio data using pyloudnorm |
|
|
|
|
|
|
|
|
|
|
|
return a list of 2-tuples, each representing a timecode and loudness value |
|
|
|
|
|
""" |
|
|
|
|
|
loudnesses = [] |
|
|
|
|
|
|
|
|
|
|
|
for w in range(0, len(data)-window_size, stride_size): |
|
|
|
|
|
window = data[w:w+window_size, 0:2] # extract window |
|
|
|
|
|
loudnesses.append( (w/rate, meter.integrated_loudness(window)) ) |
|
|
|
|
|
|
|
|
|
|
|
return loudnesses |
|
|
|
|
|
|
|
|
def _loudnorm(self, audio_file): |
|
|
def _loudnorm(self, audio_file): |
|
|
"""Run pyloudnorm on the audio file""" |
|
|
"""Run pyloudnorm on the audio file""" |
|
|
data, rate = soundfile.read(audio_file) # load audio (with shape (samples, channels)) |
|
|
data, rate = soundfile.read(audio_file) # load audio (with shape (samples, channels)) |
|
|
meter = pyloudnorm.Meter(rate=rate,block_size=0.3) # create BS.1770 meter |
|
|
meter = pyloudnorm.Meter(rate=rate,block_size=0.3) # create BS.1770 meter |
|
|
|
|
|
|
|
|
loudnesses = [] |
|
|
|
|
|
loudness_features = [] |
|
|
loudness_features = [] |
|
|
window_size = int(rate * 0.5) # 500ms |
|
|
window_size = int(rate * 0.5) # 500ms |
|
|
stride_size = int(rate * 0.5) # 500ms -- no overlap |
|
|
stride_size = int(rate * 0.5) # 500ms -- no overlap |
|
|
|
|
|
|
|
|
# for w in range(data.shape[0]//100): |
|
|
# for w in range(data.shape[0]//100): |
|
|
# loudnesses.append(meter.integrated_loudness(data[w:w+int(0.3*rate),0:2])) |
|
|
# loudnesses.append(meter.integrated_loudness(data[w:w+int(0.3*rate),0:2])) |
|
|
for w in range(0, len(data)-window_size, stride_size): |
|
|
|
|
|
window = data[w:w+window_size, 0:2] # extract window |
|
|
|
|
|
loudnesses.append( (w/rate, meter.integrated_loudness(window)) ) |
|
|
|
|
|
|
|
|
loudnesses = self._get_loudnesses(data, meter, rate, window_size, stride_size) |
|
|
|
|
|
|
|
|
for timecode, loudval in sorted([l for l in loudnesses if float(l[1]) != float("-inf")], key=lambda x: x[1], reverse=True): |
|
|
for timecode, loudval in sorted([l for l in loudnesses if float(l[1]) != float("-inf")], key=lambda x: x[1], reverse=True): |
|
|
# print(f"Timecode: {timecode}, Loudness: {loudval}") |
|
|
# print(f"Timecode: {timecode}, Loudness: {loudval}") |
|
|