From b025f0ea10ca135bcf75a9ab24467e6b047a99bc Mon Sep 17 00:00:00 2001 From: Rob Hallam <0504004h@student.gla.ac.uk> Date: Tue, 3 Sep 2024 13:35:28 +0100 Subject: [PATCH] feat: [wip] TargetTimeAdjuster set up structure & helpers TargetTimeAdjuster will adjust a list of Features until it is within an optional margin of a target total duration. Helper functions: - _determine_margin() :: figure out the max and min cutoff times, considering margin and margin strategy (percent / absolute) - _features_total_time() :: basic sum of list of Features' durations TODO: rename to TargetDurationAdjuster ? rename 'strategy' ?? --- pipeline/adjusters.py | 87 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 86 insertions(+), 1 deletion(-) diff --git a/pipeline/adjusters.py b/pipeline/adjusters.py index cb97a7a..97afeb3 100644 --- a/pipeline/adjusters.py +++ b/pipeline/adjusters.py @@ -10,6 +10,8 @@ For example: TODO: Consider eg a generic PredicateAdjuster -- supply a predicate/lambda that will be used to determine whether to keep a Feature or not. """ +from enum import Enum + class Adjuster(): """Generic Adjuster class. Expects a list of Features and returns a list of Features.""" @@ -21,5 +23,88 @@ class Adjuster(): self.features = features - def adjust(self): + def adjust(self) -> list: """Adjust the Features. Override this method in subclasses.""" + return self.features + + +class TargetTimeAdjuster(Adjuster): + """Adjuster that drops Features until the target time is reached.""" + + _STRATEGY = Enum("MarginStrategy", ["ABSOLUTE", "PERCENT"]) + _DEFAULT_TARGET_TIME = 60.0 # 1 minute + _DEFAULT_MARGIN = 10 # can be percent or absolute value + + def _determine_margin(self, time: float, margin: float, strategy: _STRATEGY) -> tuple: + """Determine the target time margins. + + If the strategy is ABSOLUTE, the margin is a fixed value in seconds. + If the strategy is PERCENT, the margin is a percentage of the target time. + + Returns a tuple of (min, max) times. + + Pulled out for unit testing + """ + target_time_min = target_time_max = None + + if strategy == self._STRATEGY.ABSOLUTE: + # both specified in seconds + target_time_min = time - margin + target_time_max = time + margin + elif strategy == self._STRATEGY.PERCENT: + target_time_max = time + (time * margin / 100) + target_time_min = time - (time * margin / 100) + + return (target_time_min, target_time_max) + + def _features_total_time(self, features: list) -> float: + """Calculate the total duration of all Features. + + Returns the total time in seconds. + + Pulled out for unit testing. + """ + return float(sum([x.duration for x in features])) + + def __init__(self, features: list=[], + target_time: int|float=_DEFAULT_TARGET_TIME, + margin: int|float=_DEFAULT_MARGIN, + strategy=_STRATEGY.ABSOLUTE): + """Initialize the Adjuster with Features and a target time. + + Default target time is 60 seconds (1 minute). Even if the desired target time is 60s exactly, it is recommended to specify it explicitly. + """ + super().__init__(features) + self.target_time = float(target_time) + self.margin = float(margin) + self.strategy = strategy + + def adjust(self) -> list: + """Drop Features until the target time within the margin is reached. + + Approach: + + Sort list of Features by score (primary) and by time (secondary). + Drop lowest scoring Features until the target time is reached; + if dropping a Feature would result in missing the margin, skip dropping that Feature + if no Features can be dropped without missing the margin, + drop the lowest scoring Feature until we are under the target time (with margin) + + Returns a list of Features, and also modifies the internal list of Features. + """ + # check for early exit + if not self.features: + return [] + + # figure out our margins + target_time_min, target_time_max = self._determine_margin(self.target_time, self.margin, self.strategy) + + # calculate total time of all Features + total_time = self._features_total_time(features=self.features) + + # if we are already within the target time, return the Features as-is + if total_time <= target_time_max: + return self.features + + # sort list of Features by score (primary) and by duration (secondary) + sorted_features = sorted(self.features, key=lambda x: (x.score, x.time))