class SourceMedia(): """Source media used by eg feature extractors. This is a list of Source objects. JSON type schema: [{ "source": "/path/to/video.mp4", "path": "/path/to/video.mp4", "provider": "FileInputJSON" }, { "source": "http://example.com/video.mp4", "path": "/path/to/downloaded_video.mp4", "provider": "InputYAML" }] It should be possible to combine/merge/aggregate multiple SourceMedia into one TODO: consider if we actually want that or if we just loop over a list of >0 SourceMedia Iterating over a SourceMedia object should return a list of Source objects. """ def __init__(self, sources=[]): self.sources = sources def __iter__(self): return iter(self.sources) class Source(): """A Source is a single media file (eg), used to populate SourceMedia objects. JSON type schema: { "source": "/path/to/video.mp4", "path": "/path/to/video.mp4", "provider": "FileInputJSON" } def __init__(self, source, path, provider): if not source: raise ValueError("Source must be provided") # TODO: #API -- decide if this is necessary self.source = source if not path: # we need a file to work on for the rest of the pipeline raise ValueError("Path must be provided") self.path = path if not provider: raise ValueError("Provider must be provided") # TODO: #API -- decide if this is necessary self.provider = provider def __str__(self): """See: 'accessing the object should return the path to the media file'""" return self.path def __repr__(self): return f"Source({self.source}, {self.path}, {self.provider})" def duration(self): """Return the duration of the media file at self.path (result is cached)""" return self._duration or self._get_duration(self.path) def _get_duration(self, file): """Use ffprobe to get the duration of the media file at self.path and cache result (_duration) usage: ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 """ # test if file exists try: with open(file) as _: pass except FileNotFoundError: raise FileNotFoundError(f"File not found: {file}") # cache the result self._duration = 0.0 or float(subprocess.check_output(["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", file])) return self._duration class Interval(): """An interval of time in a media file This can be defined by a start and end time, a start time and a duration, or an end time and a duration. Instance variables: start -- the start time of the interval end -- the end time of the interval duration -- the duration of the interval (end - start) Notes: Sorts by start time, then end time """ # TODO: decide if ABC or will be used directly # TODO: have default duration for intervals set by config # TODO: consider if we want to permit adjusting intervals (eg, start time, end time, duration) [probably yes] # NOTE: if we have more ways of defining, we could consider multipledispatch? DEFAULT_DURATION = 5 # seconds DEFAUT_PRECISION = 3 # decimal places def __init__(self, start=None, end=None, duration=None): if start is None and end is None and duration is None: raise ValueError("Two of start, end, or duration must be provided") if start is not None and end is not None and duration is not None: raise ValueError("Only two of start, end, or duration may be provided") # start and end if start is not None and end is not None: # some trivial validation if start > end: raise ValueError("Start time must be before end time") self.start = start self.end = end self.duration = end - start # start and duration elif start is not None and duration is not None: if duration < 0: raise ValueError("Duration must be positive") self.start = start self.duration = duration self.end = start + duration # end and duration elif end is not None and duration is not None: if duration < 0: raise ValueError("Duration must be positive") self.end = end self.duration = duration self.start = end - duration # set precision self.start = round(self.start, self.DEFAUT_PRECISION) self.end = round(self.end, self.DEFAUT_PRECISION) self.duration = round(self.duration, self.DEFAUT_PRECISION) @classmethod def from_start(cls, start=None): """Create an interval from a start time using the default duration""" return cls(start=start, duration=cls.DEFAULT_DURATION) @classmethod def from_end(cls, end=None): """Create an interval from an end time using the default duration""" return cls(end=end, duration=cls.DEFAULT_DURATION) def __repr__(self): return f"Interval({self.start}, {self.end}, {self.duration})" def __lt__(self, other): if self.start == other.start: return self.end < other.end return self.start < other.start # -------------------------------------------------------------- # TODO: handle bad cases, eg negative duration, start > end, etc # -------------------------------------------------------------- def move_start(self, new_start: float | int, relative: bool = False): """Update start time of Interval, keeping end time constant (& so modify duration)""" if relative: self.start += new_start else: self.start = new_start self.duration = self.end - self.start def move_end(self, new_end: float | int, relative: bool = False): """Update end time of Interval, keeping start time constant (& so modify duration)""" if relative: self.end += new_end else: self.end = new_end self.duration = self.end - self.start def update_duration(self, new_duration: float | int, relative: bool = False): """Update duration of Interval, keeping start time constant (& so modify end time)""" if relative: self.duration += new_duration else: self.duration = new_duration self.end = self.start + self.duration