diff --git a/pipeline/utils.py b/pipeline/utils.py index 944a351..e7a7065 100644 --- a/pipeline/utils.py +++ b/pipeline/utils.py @@ -222,19 +222,18 @@ class Feature(): Instance variables: interval -- Interval: time of feature in the media file - source -- the source of the feature (ie feature extractor) (default: "unknown") - path -- the path to the media file + source -- the original Source of the media (a Source object) + feature_extractor -- the feature extractor that created this Feature (default: "unknown") score -- the score of the feature (eg laughter confidence score, [0, 1] = { x ∈ ℝ | 0 ≤ x ≤ 1 }) (default: 0.0) Notes: - score is notionally in the closed interval [0, 1], but this is not enforced -- it is up to the feature extractor to ensure this (or use scores outside this range if desired -- eg a feature manually selected by user input might have a score of 2.0 so it is sorted 'above' other features) - - sorts based on interval, then source, then score - - path should never be unknown, since we need it to make clips from + - sorts based on interval, then feature_extractor, then score + - source should never be unknown, since we need it to make clips from """ - # TODO: consider renaming score to something more generic - def __init__(self, interval=None, source=None, score=None, path=None): + def __init__(self, interval=None, source: Source|None=None, feature_extractor=None, score=None): """Create a feature with an interval, source, and score Expects a ready-made interval; source and score are optional @@ -243,14 +242,14 @@ class Feature(): raise ValueError("Interval must be provided") self.interval = interval - if path is None: - raise ValueError("Path must be provided") - self.path = path - if source is None: - source = "unknown" + raise ValueError("A Source must be provided") self.source = source + if feature_extractor is None: + feature_extractor = "unknown" + self.feature_extractor = feature_extractor + if score is None: score = 0.0 self.score = score @@ -259,23 +258,26 @@ class Feature(): # which delegate to the Interval class :) @classmethod - def from_start(cls, start=None, source=None, score=None, path=None): - return cls(interval=Interval.from_start(start), source=source, score=score, path=path) + def from_start(cls, start=None, source=None, feature_extractor=None, score=None): + return cls(interval=Interval.from_start(start), source=source, + feature_extractor=feature_extractor, score=score) @classmethod - def from_end(cls, end=None, source=None, score=None, path=None): - return cls(interval=Interval.from_end(end), source=source, score=score, path=path) + def from_end(cls, end=None, source=None, feature_extractor=None, score=None): + return cls(interval=Interval.from_end(end), source=source,\ + feature_extractor=feature_extractor, score=score) def __repr__(self): return f"Feature({self.interval}, {self.source}, {self.score})" def __lt__(self, other): - """Sort based on interval, then source, then score""" + """Sort based on interval, then feature_extractor, then score""" if self.interval == other.interval: - if self.source == other.source: + if self.feature_extractor == other.feature_extractor: return self.score < other.score - return self.source < other.source + return self.feature_extractor < other.feature_extractor return self.interval < other.interval + def to_json(self): """Return a dict representation of the feature for JSON encoding