|
- """Consolidators for features extracted from media files"""
-
- from abc import ABC
-
- class Consolidator(ABC):
- """Consolidator interface.
-
- A Consolidator's purpose is to reduce a list of features based on eg adjacency/overlap in time.
-
- Illustration
- ============
-
- feature1:
- <---------->
- feature2:
- <------------>
- consolidated:
- <----------------->
-
- TODO: consider how to ensure we only try to merge features from the same path!! #important
- """
- def __init__(self, features=None):
- if features is None or len(features) == 0:
- raise ValueError("Features must be provided") # TODO: #API -- decide if this is necessary
- # maybe we want to permit [] which would 'consolidate' to []
- self.features = features
-
- def consolidate(self):
- """Consolidate features by some criterion/criteria (eg overlap, adjacency)"""
-
-
- class OverlapConsolidator(Consolidator):
- """Consolidator that merges overlapping features.
-
- An overlap is defined as two features that share a common interval of time,ie:
- interval1.end < interval2.start or
- interval2.start < interval1.end
-
- An optional delta parameter can be provided to allow for a 'fudge factor' in the comparison,
- this is used to allow for small gaps between features to be considered as overlapping.
- """
-
- def __init__(self, features: list=[], delta: float=0.0):
- super().__init__(features)
- self.delta = delta
-
- def consolidate(self):
- """Consolidate overlapping features"""
- # sort features by start time ascending
- self.features.sort(key=lambda x: x.interval.start)
-
- # merge overlapping features
- # TODO: check my working here (or write tests...)
- consolidated = []
- current = self.features[0]
- for feature in self.features[1:]:
- if current.interval.end + self.delta < feature.interval.start - self.delta:
- # no overlap
- consolidated.append(current)
- current = feature
- else:
- # overlap
- current.interval.move_end(max(current.interval.end, feature.interval.end))
- consolidated.append(current)
-
- self.features = consolidated
|