diff --git a/pipeline/adjusters.py b/pipeline/adjusters.py new file mode 100644 index 0000000..f42feb2 --- /dev/null +++ b/pipeline/adjusters.py @@ -0,0 +1,156 @@ +"""adjusters.py -- adjust the gathered Features + +This is usually done to either modify to reduce the Features in some way. + +For example: + + - TargetTimeAdjuster: drop Features until the target time is reached + - FeatureCountAdjuster: drop Features until the target number of Features is reached + +TODO: Consider eg a generic PredicateAdjuster -- supply a predicate/lambda that will be used to determine whether to keep a Feature or not. +""" + +from enum import Enum + +class Adjuster(): + """Generic Adjuster class. Expects a list of Features and returns a list of Features.""" + + def __init__(self, features: list=[]): + """Initialize the Adjuster with Features. + + NOTE: an empty feature list is permitted, since a FeatureExtractor may not produce features. Adjusters subclassing should be aware of this. + """ + self.features = features + + + def adjust(self) -> list: + """Adjust the Features. Override this method in subclasses.""" + return self.features + + +class TargetTimeAdjuster(Adjuster): + """Adjuster that drops Features until the target time is reached.""" + + _STRATEGY = Enum("MarginStrategy", ["ABSOLUTE", "PERCENT"]) + _DEFAULT_TARGET_TIME = 60.0 # 1 minute + _DEFAULT_MARGIN = 10 # can be percent or absolute value + + def _determine_margin(self, time: float, margin: float, strategy: _STRATEGY) -> tuple: + """Determine the target time margins. + + If the strategy is ABSOLUTE, the margin is a fixed value in seconds. + If the strategy is PERCENT, the margin is a percentage of the target time. + + Returns a tuple of (min, max) times. + + Pulled out for unit testing + """ + target_time_min = target_time_max = None + + if strategy == self._STRATEGY.ABSOLUTE: + # both specified in seconds + target_time_min = time - margin + target_time_max = time + margin + elif strategy == self._STRATEGY.PERCENT: + target_time_max = time + (time * margin / 100) + target_time_min = time - (time * margin / 100) + + # ensure we don't have negative times + if type(target_time_min) is float and target_time_min < 0: + target_time_min = 0.0 + + return (target_time_min, target_time_max) + + def _features_total_time(self, features: list) -> float: + """Calculate the total duration of all Features. + + Returns the total time in seconds. + + Pulled out for unit testing. + """ + return float(sum([x.interval.duration for x in features])) + + def _sort_by_score_time(self, features: list) -> list: + """Sort Features by score (primary) and by time (secondary). + + Returns a sorted list of Features. + + Pulled out for unit testing as RDH was having issues with adjust() + and wanted to verify sorting was working correctly. + """ + return sorted(features, key=lambda x: (x.score, x.interval.duration)) + + def __init__(self, features: list=[], + target_time: int|float=_DEFAULT_TARGET_TIME, + margin: int|float=_DEFAULT_MARGIN, + strategy=_STRATEGY.ABSOLUTE): + """Initialize the Adjuster with Features and a target time. + + Default target time is 60 seconds (1 minute). Even if the desired target time is 60s exactly, it is recommended to specify it explicitly. + """ + super().__init__(features) + self.target_time = float(target_time) + self.margin = float(margin) + self.strategy = strategy + + def adjust(self) -> list: + """Drop Features until the target time within the margin is reached. Prioritise dropping lower scoring Features. + + Approach: + + Sort list of Features by score (primary) and by time (secondary). + Drop lowest scoring Features until the target time is reached; + if dropping a Feature would result in missing the margin, skip dropping that Feature + if no Features can be dropped without missing the margin, + drop the lowest scoring Feature until we are under the target time (with margin) + + Returns a list of Features, and also modifies the internal list of Features. + """ + # check for early exit + if not self.features: + return [] + + # figure out our margins + target_time_min, target_time_max = self._determine_margin(self.target_time, self.margin, self.strategy) + + # calculate total time of all Features + total_time = self._features_total_time(features=self.features) + + # if we are already within the target time, return the Features as-is + if total_time <= target_time_max: + return self.features + + # sort list of Features by score (primary) and by duration (secondary) + sorted_features = self._sort_by_score_time(self.features) + drop_indices = [] # indices of Features to drop + + # first pass- drop lowest scoring Features until we are within the target time + for i in range(len(sorted_features)): + # check if dropping this Feature would put us in the target range: + # if so, drop it and return + if (total_time - sorted_features[i].interval.duration >= target_time_min and + total_time - sorted_features[i].interval.duration <= target_time_max): + drop_indices.append(i) + break + + elif (total_time - sorted_features[i].interval.duration > target_time_max): + drop_indices.append(i) + total_time -= sorted_features[i].interval.duration + + for i in drop_indices: + self.features.remove(sorted_features[i]) + + # if we are now within the target time, return the Features + total_time = self._features_total_time(features=self.features) + if total_time <= target_time_max: + return self.features + + # else: we are still over the target time + # so drop the lowest scoring Features until we are UNDER the target time + for i in range(len(sorted_features)): + self.features.remove(sorted_features[i]) + total_time -= sorted_features[i].interval.duration + if total_time <= target_time_max: + break + + return self.features diff --git a/test/mocks.py b/test/mocks.py index 4cb016c..9ca095e 100644 --- a/test/mocks.py +++ b/test/mocks.py @@ -7,9 +7,16 @@ class MockInterval(): self.end = end self.duration = end - start + @classmethod + def from_duration(cls, duration): + return cls(start=0, end=duration) + def to_json(self): return {"start": self.start, "end": self.end} + def __eq__(self, other): + return self.start == other.start and self.end == other.end + class MockFeature(): """Mock feature object for testing""" def __init__(self, interval, source=None, feature_extractor="mock", score=0.0): @@ -21,6 +28,10 @@ class MockFeature(): def to_json(self): return {"interval": self.interval} + def __eq__(self, other): + return (self.interval == other.interval and self.source == other.source + and self.feature_extractor == other.feature_extractor) + class MockSource(): """Mock Source object for testing Feature""" def __init__(self, source=None, path=None): diff --git a/test/test_adjusters.py b/test/test_adjusters.py new file mode 100644 index 0000000..e718137 --- /dev/null +++ b/test/test_adjusters.py @@ -0,0 +1,282 @@ +"""test_adjusters.py -- test pipeline Adjusters (eg TargetTimeAdjuster)""" +import unittest +import unittest.mock as mock +import pipeline.adjusters as adjusters + +from test.mocks import MockFeature, MockInterval + +class TestAdjuster(unittest.TestCase): + """Test the generic Adjuster class""" + + def test_init(self): + """Test the Adjuster can be initialised""" + adjuster = adjusters.Adjuster() + self.assertEqual(adjuster.features, []) + + def test_adjust(self): + """Test the generic adjust""" + adjuster = adjusters.Adjuster() + self.assertEqual(adjuster.adjust(), []) + self.assertEqual(adjuster.features, []) + +class TestTargetTimeAdjuster(unittest.TestCase): + """Test the TargetTimeAdjuster + + TTA drops Features until the target time is reached (or within a margin)""" + + def test_init(self): + """Test the TTA can be initialised""" + tta = adjusters.TargetTimeAdjuster() + self.assertEqual(tta.features, []) + + def test_features_total_time(self): + """Test the TTA can calculate the total time of Features + + Test: + - input duration floats: 1.0, 2.0, 3.0, 4.0 == 10.0 + """ + tta = adjusters.TargetTimeAdjuster() + features = [] + for i in range(1, 5): + features.append(make_feature(duration=i*1.0)) + + self.assertEqual(tta._features_total_time(features), 10.0) + self.assertEqual(tta._features_total_time([]), 0.0) + self.assertIs(type(tta._features_total_time([])), float) + + def test_determine_margin(self): + """Test the TTA can determine the target time margins + + Args: time, margin, strategy (strategy in: ABSOLUTE, PERCENT) + + Test: + - margin of zero + - margin of 5.0 + - margin of 10.0 + - margin of 100.0 + - both ABSOLUTE and PERCENT strategies + + TODO: figure out what should be done with negative margins & margins > 100.0 + """ + + tta = adjusters.TargetTimeAdjuster() + with self.subTest("ABSOLUTE"): + strategy = adjusters.TargetTimeAdjuster._STRATEGY.ABSOLUTE + test_cases = [] + # populate test cases with tuples of (time, margin, expected) + # zero margin + test_cases.append((60.0, 0.0, (60.0, 60.0))) + # margin of 5.0 + test_cases.append((60.0, 5.0, (55.0, 65.0))) + # margin of 10.0 + test_cases.append((60.0, 10.0, (50.0, 70.0))) + # margin of 100.0 + test_cases.append((60.0, 100.0, (0.0, 160.0))) + + # test + for time, margin, expected in test_cases: + self.assertEqual(tta._determine_margin(time, margin, strategy), expected) + + with self.subTest("PERCENT"): + strategy = adjusters.TargetTimeAdjuster._STRATEGY.PERCENT + test_cases = [] + # populate test cases with tuples of (time, margin, expected) as above + # zero margin + test_cases.append((60.0, 0.0, (60.0, 60.0))) + # margin of 5.0 + test_cases.append((60.0, 5.0, (57.0, 63.0))) + # margin of 10.0 + test_cases.append((60.0, 10.0, (54.0, 66.0))) + # margin of 100.0 + test_cases.append((60.0, 100.0, (0.0, 120.0))) + + # test + for time, margin, expected in test_cases: + self.assertEqual(tta._determine_margin(time, margin, strategy), expected) + + def test_adjust_no_change(self): + """Test adjusting of list of Features using TTA -- no change to list of Features + + Cases: + - no Features --> [] + - [Features] with total time < target time --> unchanged list + - [Features] with total time = target time --> unchanged list + + TODO: test with Features > target + """ + with self.subTest("no Features"): + tta = adjusters.TargetTimeAdjuster() + self.assertEqual(tta.adjust(), []) + + with self.subTest("Features < target time"): + features = [] + for i in range(1, 5): + features.append(make_feature(duration=i*1.0)) + tta = adjusters.TargetTimeAdjuster(features=features, target_time=20.0) + self.assertEqual(tta.adjust(), features) + + with self.subTest("Features = target time"): + features = [] + for i in range(1, 5): + features.append(make_feature(duration=i*1.0)) + tta = adjusters.TargetTimeAdjuster(features=features, target_time=10.0) + self.assertEqual(tta.adjust(), features) + + + def test_sort_by_score_time(self): + """Test sorting of list of Features by score (primary) and time (secondary) + + Cases: + - [(15.0, 1.0), (10.0, 1.0), (12.0, 1.0)] --> [(10.0, 1.0), (12.0, 1.0), (15.0, 1.0)] # score equal, sort by time + - [(15.0, 1.0), (10.0, 4.0), (12.0, 3.0)] --> [(15.0, 1.0), (12.0, 3.0), (10.0, 4.0)] # sort by score + - [(15.0, 1.0), (10.0, 1.0), (12.0, 2.0)] --> [(10.0, 1.0), (15.0, 1.0), (12.0, 2.0)] # mixed: scores below duration + - [] --> [] + - [(15.0, 1.0)] --> [(15.0, 1.0)] + + Cases giving RDH trouble: + - [(16.0, 1.0), (16.0, 1.0), (1.0, 1.0), (1.0, 1.0)] --> [(1.0, 1.0), (1.0, 1.0), (16.0, 1.0), (16.0, 1.0)] # multiple lowest scoring, multiple shortest duration + """ + + tta = adjusters.TargetTimeAdjuster() + with self.subTest("score equal, sort by duration"): + features = [ + make_feature(duration=15.0, score=1.0), + make_feature(duration=10.0, score=1.0), + make_feature(duration=12.0, score=1.0) + ] + self.assertEqual(tta._sort_by_score_time(features), [features[1], features[2], features[0]]) + + with self.subTest("sort by score, duration irrelevant"): + features = [ + make_feature(duration=15.0, score=1.0), + make_feature(duration=10.0, score=4.0), + make_feature(duration=12.0, score=3.0) + ] + self.assertEqual(tta._sort_by_score_time(features), [features[0], features[2], features[1]]) + + with self.subTest("mixed: scores below duration"): + features = [ + make_feature(duration=15.0, score=1.0), + make_feature(duration=10.0, score=1.0), + make_feature(duration=12.0, score=2.0) + ] + self.assertEqual(tta._sort_by_score_time(features), [features[1], features[0], features[2]]) + + with self.subTest("empty"): + self.assertEqual(tta._sort_by_score_time([]), []) + + with self.subTest("single"): + features = [mock.Mock(duration=15.0, score=1.0)] + self.assertEqual(tta._sort_by_score_time(features), features) + + with self.subTest("multiple lowest scoring, multiple shortest duration"): + features = [ + make_feature(duration=16.0, score=1.0), + make_feature(duration=16.0, score=1.0), + make_feature(duration=1.0, score=1.0), + make_feature(duration=1.0, score=1.0) + ] + self.assertEqual(tta._sort_by_score_time(features), [features[2], features[3], features[0], features[1]]) + + + def test_adjust_changes(self): + """Test adjusting of list of Features using TTA -- changes to list of Features + + All cases have total time > target time. + + In the cases, specification is Feature(duration, score) + Cases: + - target = 30.0, margin = 0.0 + + [(15.0, 1.0), (10.0, 1.0), (12.0, 1.0)] --> [(15.0, 1.0), (12.0, 1.0)] # scores equal, drop smallest + + [(15.0, 2.0), (10.0, 2.0), (12.0, 1.0)] --> [(15.0, 1.0), (10.0, 1.0)] # drop lowest scoring (1) + + [(15.0, 1.0), (10.0, 1.0), (12.0, 2.0)] --> [(15.0, 1.0), (12.0, 2.0)] # drop lowest scoring (2) + + - target = 30.0, margin = 4.0 + + [(15.0, 1.0), (10.0, 2.0), (12.0, 1.0)] --> [(15.0, 1.0), (12.0, 1.0)] # not lowest scoring, but within margin + + [(16.0, 1.0), (16.0, 1.0), (1.0, 1.0), (1.0, 1.0)] --> [(16.0, 1.0), (16.0, 1.0)] # drop multiple lowest scoring, shortest duration + + """ + # target 30.0, margin 0.0 cases + target, margin = 30.0, 0.0 + + with self.subTest(f"target {target} margin {margin}"): + with self.subTest("scores equal"): + features = [ + make_feature(duration=15.0, score=1.0), + make_feature(duration=10.0, score=1.0), + make_feature(duration=12.0, score=1.0) + ] + tta = adjusters.TargetTimeAdjuster(features=features, target_time=target, margin=margin) + expected = [features[0], features[2]] + output = tta.adjust() + self.assertEqual(len(output), 2) + self.assertEqual(output, expected) + self.assertEqual(tta.features, expected) + + with self.subTest("drop lowest scoring (1)"): + features = [ + make_feature(duration=15.0, score=2.0), + make_feature(duration=10.0, score=2.0), + make_feature(duration=12.0, score=1.0) + ] + tta = adjusters.TargetTimeAdjuster(features=features, target_time=target, margin=margin) + expected = [features[0], features[1]] + output = tta.adjust() + self.assertEqual(len(output), 2) + self.assertEqual(output, expected) + self.assertEqual(tta.features, expected) + + with self.subTest("drop lowest scoring (2)"): + features = [ + make_feature(duration=15.0, score=1.0), + make_feature(duration=10.0, score=1.0), + make_feature(duration=12.0, score=2.0) + ] + tta = adjusters.TargetTimeAdjuster(features=features, target_time=target, margin=margin) + expected = [features[0], features[2]] + output = tta.adjust() + self.assertEqual(len(output), 2) + self.assertEqual(output, expected) + self.assertEqual(tta.features, expected) + + # target 30.0, margin 4.0 cases + target, margin, strategy = 30.0, 4.0, adjusters.TargetTimeAdjuster._STRATEGY.ABSOLUTE + + with self.subTest(f"target {target} margin {margin}"): + with self.subTest("not lowest scoring, but within margin"): + # explanation: dropping the 10.0 feature would put us at 27.0, which is within the margin (26.0, 34.0) + features = [ + make_feature(duration=15.0, score=1.0), + make_feature(duration=10.0, score=2.0), + make_feature(duration=12.0, score=1.0) + ] + tta = adjusters.TargetTimeAdjuster(features=features, target_time=target, + margin=margin, strategy=strategy) + expected = [features[0], features[2]] + output = tta.adjust() + self.assertEqual(len(output), 2) + self.assertEqual(output, expected) + self.assertEqual(tta.features, expected) + + with self.subTest("drop multiple lowest scoring, shortest duration"): + # explanation: dropping the 1.0 features would put us at 32.0, which is within the margin (26.0, 34.0) + features = [ + make_feature(duration=16.0, score=1.0), + make_feature(duration=16.0, score=1.0), + make_feature(duration=1.0, score=1.0), + make_feature(duration=1.0, score=1.0), + make_feature(duration=1.0, score=1.0), + make_feature(duration=1.0, score=1.0) + ] + tta = adjusters.TargetTimeAdjuster(features=features, target_time=target, + margin=margin, strategy=strategy) + expected = [features[0], features[1], features[2], features[3]] + output = tta.adjust() + self.assertEqual(len(output), 4) + self.assertEqual(output, expected) + self.assertEqual(tta.features, expected) + + +def make_feature(duration, score=1.0): + """Helper function to create a MockFeature from duration and score""" + return MockFeature(interval=MockInterval.from_duration(duration), score=score)