From f4ae8b8916aab768f28472c3c5e9b686ecba0e20 Mon Sep 17 00:00:00 2001 From: Rob Hallam <0504004h@student.gla.ac.uk> Date: Thu, 18 Jul 2024 21:47:31 +0100 Subject: [PATCH] feat: add Consolidator, OverlapConsolidator Consolidator: interface OverlapConsolidator: condenses features if their times overlap --- pipeline/consolidators.py | 59 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 pipeline/consolidators.py diff --git a/pipeline/consolidators.py b/pipeline/consolidators.py new file mode 100644 index 0000000..e7fb7f9 --- /dev/null +++ b/pipeline/consolidators.py @@ -0,0 +1,59 @@ +"""Consolidators for features extracted from media files""" + +from abc import ABC + +class Consolidator(ABC): + """Consolidator interface. + + A Consolidator's purpose is to reduce a list of features based on eg adjacency/overlap in time. + + Illustration + ============ + + feature1: + <----------> + feature2: + <------------> + consolidated: + <-----------------> + + TODO: consider how to ensure we only try to merge features from the same path!! #important + """ + def __init__(self, features=None): + if features is None or len(features) == 0: + raise ValueError("Features must be provided") # TODO: #API -- decide if this is necessary + # maybe we want to permit [] which would 'consolidate' to [] + self.features = features + + def consolidate(self): + """Consolidate features by some criterion/criteria (eg overlap, adjacency)""" + + +class OverlapConsolidator(Consolidator): + """Consolidator that merges overlapping features. + + An overlap is defined as two features that share a common interval of time,ie: + interval1.end < interval2.start or + interval2.start < interval1.end + """ + + def consolidate(self): + """Consolidate overlapping features""" + # sort features by start time + self.features.sort(key=lambda x: x.interval.start) + + # merge overlapping features + # TODO: check my working here (or write tests...) + consolidated = [] + current = self.features[0] + for feature in self.features[1:]: + if current.interval.end < feature.interval.start: + # no overlap + consolidated.append(current) + current = feature + else: + # overlap + current.interval.end = max(current.interval.end, feature.interval.end) + consolidated.append(current) + + self.features = consolidated