You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

67 lines
2.2 KiB

  1. """Consolidators for features extracted from media files"""
  2. from abc import ABC
  3. class Consolidator(ABC):
  4. """Consolidator interface.
  5. A Consolidator's purpose is to reduce a list of features based on eg adjacency/overlap in time.
  6. Illustration
  7. ============
  8. feature1:
  9. <---------->
  10. feature2:
  11. <------------>
  12. consolidated:
  13. <----------------->
  14. TODO: consider how to ensure we only try to merge features from the same path!! #important
  15. """
  16. def __init__(self, features=None):
  17. if features is None or len(features) == 0:
  18. raise ValueError("Features must be provided") # TODO: #API -- decide if this is necessary
  19. # maybe we want to permit [] which would 'consolidate' to []
  20. self.features = features
  21. def consolidate(self):
  22. """Consolidate features by some criterion/criteria (eg overlap, adjacency)"""
  23. class OverlapConsolidator(Consolidator):
  24. """Consolidator that merges overlapping features.
  25. An overlap is defined as two features that share a common interval of time,ie:
  26. interval1.end < interval2.start or
  27. interval2.start < interval1.end
  28. An optional delta parameter can be provided to allow for a 'fudge factor' in the comparison,
  29. this is used to allow for small gaps between features to be considered as overlapping.
  30. """
  31. def __init__(self, features: list=[], delta: float=0.0):
  32. super().__init__(features)
  33. self.delta = delta
  34. def consolidate(self):
  35. """Consolidate overlapping features"""
  36. # sort features by start time ascending
  37. self.features.sort(key=lambda x: x.interval.start)
  38. # merge overlapping features
  39. # TODO: check my working here (or write tests...)
  40. consolidated = []
  41. current = self.features[0]
  42. for feature in self.features[1:]:
  43. if current.interval.end + self.delta < feature.interval.start - self.delta:
  44. # no overlap
  45. consolidated.append(current)
  46. current = feature
  47. else:
  48. # overlap
  49. current.interval.move_end(max(current.interval.end, feature.interval.end))
  50. consolidated.append(current)
  51. self.features = consolidated