You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

157 lines
6.1 KiB

  1. """adjusters.py -- adjust the gathered Features
  2. This is usually done to either modify to reduce the Features in some way.
  3. For example:
  4. - TargetTimeAdjuster: drop Features until the target time is reached
  5. - FeatureCountAdjuster: drop Features until the target number of Features is reached
  6. TODO: Consider eg a generic PredicateAdjuster -- supply a predicate/lambda that will be used to determine whether to keep a Feature or not.
  7. """
  8. from enum import Enum
  9. class Adjuster():
  10. """Generic Adjuster class. Expects a list of Features and returns a list of Features."""
  11. def __init__(self, features: list=[]):
  12. """Initialize the Adjuster with Features.
  13. NOTE: an empty feature list is permitted, since a FeatureExtractor may not produce features. Adjusters subclassing should be aware of this.
  14. """
  15. self.features = features
  16. def adjust(self) -> list:
  17. """Adjust the Features. Override this method in subclasses."""
  18. return self.features
  19. class TargetTimeAdjuster(Adjuster):
  20. """Adjuster that drops Features until the target time is reached."""
  21. _STRATEGY = Enum("MarginStrategy", ["ABSOLUTE", "PERCENT"])
  22. _DEFAULT_TARGET_TIME = 60.0 # 1 minute
  23. _DEFAULT_MARGIN = 10 # can be percent or absolute value
  24. def _determine_margin(self, time: float, margin: float, strategy: _STRATEGY) -> tuple:
  25. """Determine the target time margins.
  26. If the strategy is ABSOLUTE, the margin is a fixed value in seconds.
  27. If the strategy is PERCENT, the margin is a percentage of the target time.
  28. Returns a tuple of (min, max) times.
  29. Pulled out for unit testing
  30. """
  31. target_time_min = target_time_max = None
  32. if strategy == self._STRATEGY.ABSOLUTE:
  33. # both specified in seconds
  34. target_time_min = time - margin
  35. target_time_max = time + margin
  36. elif strategy == self._STRATEGY.PERCENT:
  37. target_time_max = time + (time * margin / 100)
  38. target_time_min = time - (time * margin / 100)
  39. # ensure we don't have negative times
  40. if type(target_time_min) is float and target_time_min < 0:
  41. target_time_min = 0.0
  42. return (target_time_min, target_time_max)
  43. def _features_total_time(self, features: list) -> float:
  44. """Calculate the total duration of all Features.
  45. Returns the total time in seconds.
  46. Pulled out for unit testing.
  47. """
  48. return float(sum([x.interval.duration for x in features]))
  49. def _sort_by_score_time(self, features: list) -> list:
  50. """Sort Features by score (primary) and by time (secondary).
  51. Returns a sorted list of Features.
  52. Pulled out for unit testing as RDH was having issues with adjust()
  53. and wanted to verify sorting was working correctly.
  54. """
  55. return sorted(features, key=lambda x: (x.score, x.interval.duration))
  56. def __init__(self, features: list=[],
  57. target_time: int|float=_DEFAULT_TARGET_TIME,
  58. margin: int|float=_DEFAULT_MARGIN,
  59. strategy=_STRATEGY.ABSOLUTE):
  60. """Initialize the Adjuster with Features and a target time.
  61. Default target time is 60 seconds (1 minute). Even if the desired target time is 60s exactly, it is recommended to specify it explicitly.
  62. """
  63. super().__init__(features)
  64. self.target_time = float(target_time)
  65. self.margin = float(margin)
  66. self.strategy = strategy
  67. def adjust(self) -> list:
  68. """Drop Features until the target time within the margin is reached. Prioritise dropping lower scoring Features.
  69. Approach:
  70. Sort list of Features by score (primary) and by time (secondary).
  71. Drop lowest scoring Features until the target time is reached;
  72. if dropping a Feature would result in missing the margin, skip dropping that Feature
  73. if no Features can be dropped without missing the margin,
  74. drop the lowest scoring Feature until we are under the target time (with margin)
  75. Returns a list of Features, and also modifies the internal list of Features.
  76. """
  77. # check for early exit
  78. if not self.features:
  79. return []
  80. # figure out our margins
  81. target_time_min, target_time_max = self._determine_margin(self.target_time, self.margin, self.strategy)
  82. # calculate total time of all Features
  83. total_time = self._features_total_time(features=self.features)
  84. # if we are already within the target time, return the Features as-is
  85. if total_time <= target_time_max:
  86. return self.features
  87. # sort list of Features by score (primary) and by duration (secondary)
  88. sorted_features = self._sort_by_score_time(self.features)
  89. drop_indices = [] # indices of Features to drop
  90. # first pass- drop lowest scoring Features until we are within the target time
  91. for i in range(len(sorted_features)):
  92. # check if dropping this Feature would put us in the target range:
  93. # if so, drop it and return
  94. if (total_time - sorted_features[i].interval.duration >= target_time_min and
  95. total_time - sorted_features[i].interval.duration <= target_time_max):
  96. drop_indices.append(i)
  97. break
  98. elif (total_time - sorted_features[i].interval.duration > target_time_max):
  99. drop_indices.append(i)
  100. total_time -= sorted_features[i].interval.duration
  101. for i in drop_indices:
  102. self.features.remove(sorted_features[i])
  103. # if we are now within the target time, return the Features
  104. total_time = self._features_total_time(features=self.features)
  105. if total_time <= target_time_max:
  106. return self.features
  107. # else: we are still over the target time
  108. # so drop the lowest scoring Features until we are UNDER the target time
  109. for i in range(len(sorted_features)):
  110. self.features.remove(sorted_features[i])
  111. total_time -= sorted_features[i].interval.duration
  112. if total_time <= target_time_max:
  113. break
  114. return self.features