diff --git a/pipeline/utils.py b/pipeline/utils.py index 65af770..914d909 100644 --- a/pipeline/utils.py +++ b/pipeline/utils.py @@ -1,3 +1,5 @@ +import subprocess + class SourceMedia(): """Source media used by eg feature extractors. This is a list of Source objects. @@ -114,6 +116,7 @@ class Interval(): DEFAULT_DURATION = 5 # seconds DEFAUT_PRECISION = 3 # decimal places + def __init__(self, start=None, end=None, duration=None): if start is None and end is None and duration is None: raise ValueError("Two of start, end, or duration must be provided") @@ -196,3 +199,65 @@ class Interval(): self.duration = new_duration self.end = self.start + self.duration +class Feature(): + """A feature extracted from a media file ("has a" Interval) + + This extends intervals by adding other fields, such as the feature source and 'score' + + Instance variables: + + interval -- Interval: time of feature in the media file + source -- the source of the feature (ie feature extractor) (default: "unknown") + path -- the path to the media file + score -- the score of the feature (eg laughter confidence score, [0, 1] = { x ∈ ℝ | 0 ≤ x ≤ 1 }) (default: 0.0) + + Notes: + + - score is notionally in the closed interval [0, 1], but this is not enforced -- it is up to the feature extractor to ensure this (or use scores outside this range if desired -- eg a feature manually selected by user input might have a score of 2.0 so it is sorted 'above' other features) + - sorts based on interval, then source, then score + - path should never be unknown, since we need it to make clips from + """ + # TODO: consider renaming score to something more generic + + def __init__(self, interval=None, source=None, score=None, path=None): + """Create a feature with an interval, source, and score + + Expects a ready-made interval; source and score are optional + """ + if interval is None: + raise ValueError("Interval must be provided") + self.interval = interval + + if path is None: + raise ValueError("Path must be provided") + self.path = path + + if source is None: + source = "unknown" + self.source = source + + if score is None: + score = 0.0 + self.score = score + + # classmethods for creating a feature with an interval directly + # which delegate to the Interval class :) + + @classmethod + def from_start(cls, start=None, source=None, score=None, path=None): + return cls(interval=Interval.from_start(start), source=source, score=score, path=path) + + @classmethod + def from_end(cls, end=None, source=None, score=None, path=None): + return cls(interval=Interval.from_end(end), source=source, score=score, path=path) + + def __repr__(self): + return f"Feature({self.interval}, {self.source}, {self.score})" + + def __lt__(self, other): + """Sort based on interval, then source, then score""" + if self.interval == other.interval: + if self.source == other.source: + return self.score < other.score + return self.source < other.source + return self.interval < other.interval