You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

309 lines
12 KiB

  1. import subprocess
  2. class SourceMedia():
  3. """Source media used by eg feature extractors. This is a list of Source objects.
  4. JSON type schema:
  5. [{
  6. "source": "/path/to/video.mp4",
  7. "path": "/path/to/video.mp4",
  8. "provider": "FileInputJSON"
  9. },
  10. {
  11. "source": "http://example.com/video.mp4",
  12. "path": "/path/to/downloaded_video.mp4",
  13. "provider": "InputYAML"
  14. }]
  15. It should be possible to combine/merge/aggregate multiple SourceMedia into one
  16. TODO: consider if we actually want that or if we just loop over a list of >0 SourceMedia
  17. Iterating over a SourceMedia object should return a list of Source objects.
  18. """
  19. def __init__(self, sources=[]):
  20. self.sources = sources
  21. def __iter__(self):
  22. return iter(self.sources)
  23. class Source():
  24. """A Source is a single media file (eg), used to populate SourceMedia objects.
  25. JSON type schema:
  26. {
  27. "source": "/path/to/video.mp4",
  28. "path": "/path/to/video.mp4",
  29. "provider": "FileInputJSON"
  30. }
  31. Instance variables:
  32. source -- the source of the media file (eg, a URL or a local path)
  33. path -- the path to the media file
  34. provider -- the provider of the media file (eg, "FileInputJSON")
  35. Accessing the object should return the path to the media file.
  36. Methods:
  37. duration() -- return the duration of the media file (uses ffprobe, result is cached)
  38. Notes:
  39. - source and path may be the same, for example in the case of a local file
  40. """
  41. _duration = None
  42. def __init__(self, source, path, provider):
  43. if not source:
  44. raise ValueError("Source must be provided") # TODO: #API -- decide if this is necessary
  45. self.source = source
  46. if not path:
  47. # we need a file to work on for the rest of the pipeline
  48. raise ValueError("Path must be provided")
  49. self.path = path
  50. if not provider:
  51. raise ValueError("Provider must be provided") # TODO: #API -- decide if this is necessary
  52. self.provider = provider
  53. def __str__(self):
  54. """See: 'accessing the object should return the path to the media file'"""
  55. return self.path
  56. def __repr__(self):
  57. return f"Source({self.source}, {self.path}, {self.provider})"
  58. def duration(self):
  59. """Return the duration of the media file at self.path (result is cached)"""
  60. return self._duration or self._get_duration(self.path)
  61. def _get_duration(self, file):
  62. """Use ffprobe to get the duration of the media file at self.path and cache result (_duration)
  63. usage: ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 <file>
  64. """
  65. # test if file exists
  66. try:
  67. with open(file) as _:
  68. pass
  69. except FileNotFoundError:
  70. raise FileNotFoundError(f"File not found: {file}")
  71. # cache the result
  72. self._duration = 0.0 or float(subprocess.check_output(["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", file]))
  73. return self._duration
  74. def to_json(self):
  75. """Return a dict representation of the source for JSON encoding
  76. @see video_producers.py:PipelineJSONEncoder
  77. """
  78. return dict(source=self.source, path=self.path, provider=self.provider)
  79. class Interval():
  80. """An interval of time in a media file
  81. This can be defined by a start and end time, a start time and a duration, or an end time and a duration.
  82. Instance variables:
  83. start -- the start time of the interval
  84. end -- the end time of the interval
  85. duration -- the duration of the interval (end - start)
  86. Notes:
  87. Sorts by start time, then end time
  88. """
  89. # TODO: decide if ABC or will be used directly
  90. # TODO: have default duration for intervals set by config
  91. # TODO: consider if we want to permit adjusting intervals (eg, start time, end time, duration) [probably yes]
  92. # NOTE: if we have more ways of defining, we could consider multipledispatch?
  93. # TODO: consider if we want to keep a reference to the media file (source) in the interval
  94. DEFAULT_DURATION = 5 # seconds
  95. DEFAUT_PRECISION = 3 # decimal places
  96. def __init__(self, start=None, end=None, duration=None):
  97. if start is None and end is None and duration is None:
  98. raise ValueError("Two of start, end, or duration must be provided")
  99. if start is not None and end is not None and duration is not None:
  100. raise ValueError("Only two of start, end, or duration may be provided")
  101. # start and end
  102. if start is not None and end is not None:
  103. # some trivial validation
  104. if start > end:
  105. raise ValueError("Start time must be before end time")
  106. self.start = start
  107. self.end = end
  108. self.duration = end - start
  109. # start and duration
  110. elif start is not None and duration is not None:
  111. if duration < 0:
  112. raise ValueError("Duration must be positive")
  113. self.start = start
  114. self.duration = duration
  115. self.end = start + duration
  116. # end and duration
  117. elif end is not None and duration is not None:
  118. if duration < 0:
  119. raise ValueError("Duration must be positive")
  120. self.end = end
  121. self.duration = duration
  122. self.start = end - duration
  123. # set precision
  124. self.start = round(self.start, self.DEFAUT_PRECISION)
  125. self.end = round(self.end, self.DEFAUT_PRECISION)
  126. self.duration = round(self.duration, self.DEFAUT_PRECISION)
  127. @classmethod
  128. def from_start(cls, start=None):
  129. """Create an interval from a start time using the default duration"""
  130. return cls(start=start, duration=cls.DEFAULT_DURATION)
  131. @classmethod
  132. def from_end(cls, end=None):
  133. """Create an interval from an end time using the default duration"""
  134. return cls(end=end, duration=cls.DEFAULT_DURATION)
  135. def __repr__(self):
  136. return f"Interval({self.start}, {self.end}, {self.duration})"
  137. def __lt__(self, other):
  138. if self.start == other.start:
  139. return self.end < other.end
  140. return self.start < other.start
  141. def __eq__(self, other):
  142. return self.start == other.start and self.end == other.end
  143. def to_json(self):
  144. """Return a dict representation of the interval for JSON encoding
  145. @see video_producers.py:PipelineJSONEncoder
  146. """
  147. return dict(start=self.start, end=self.end, duration=self.duration)
  148. # --------------------------------------------------------------
  149. # TODO: handle bad cases, eg negative duration, start > end, etc
  150. # --------------------------------------------------------------
  151. def move_start(self, new_start: float | int, relative: bool = False):
  152. """Update start time of Interval, keeping end time constant (& so modify duration)"""
  153. if relative:
  154. self.start += new_start
  155. else:
  156. self.start = new_start
  157. self.duration = round((self.end - self.start), self.DEFAUT_PRECISION)
  158. def move_end(self, new_end: float | int, relative: bool = False):
  159. """Update end time of Interval, keeping start time constant (& so modify duration)"""
  160. if relative:
  161. self.end += new_end
  162. else:
  163. self.end = new_end
  164. self.duration = round((self.end - self.start), self.DEFAUT_PRECISION)
  165. def update_duration(self, new_duration: float | int, relative: bool = False):
  166. """Update duration of Interval, keeping start time constant (& so modify end time)"""
  167. if relative:
  168. self.duration += new_duration
  169. else:
  170. self.duration = new_duration
  171. self.end = self.start + self.duration
  172. def overlaps(self, other):
  173. """Check if this interval overlaps (or touches) with another interval
  174. This is the case if:
  175. - this.start <= other.end <= this.end
  176. - this.start <= other.start <= this.end
  177. - other.start <= this.end <= other.end
  178. - other.start <= this.start <= other.end
  179. """
  180. return (self.start <= other.end <= self.end) or (self.start <= other.start <= self.end) or \
  181. (other.start <= self.end <= other.end) or (other.start <= self.start <= other.end)
  182. class Feature():
  183. """A feature extracted from a media file ("has a" Interval)
  184. This extends intervals by adding other fields, such as the feature source and 'score'
  185. Instance variables:
  186. interval -- Interval: time of feature in the media file
  187. source -- the original Source of the media (a Source object)
  188. feature_extractor -- the feature extractor that created this Feature (default: "unknown")
  189. score -- the score of the feature (eg laughter confidence score, [0, 1] = { x ∈ ℝ | 0 ≤ x ≤ 1 }) (default: 0.0)
  190. Notes:
  191. - score is notionally in the closed interval [0, 1], but this is not enforced -- it is up to the feature extractor to ensure this (or use scores outside this range if desired -- eg a feature manually selected by user input might have a score of 2.0 so it is sorted 'above' other features)
  192. - sorts based on interval, then feature_extractor, then score
  193. - source should never be unknown, since we need it to make clips from
  194. """
  195. def __init__(self, interval=None, source: Source|None=None, feature_extractor=None, score=None):
  196. """Create a feature with an interval, source, and score
  197. Expects a ready-made interval; source and score are optional
  198. """
  199. if interval is None:
  200. raise ValueError("Interval must be provided")
  201. self.interval = interval
  202. if source is None:
  203. raise ValueError("A Source must be provided")
  204. self.source = source
  205. if feature_extractor is None:
  206. feature_extractor = "unknown"
  207. self.feature_extractor = feature_extractor
  208. if score is None:
  209. score = 0.0
  210. self.score = score
  211. # classmethods for creating a feature with an interval directly
  212. # which delegate to the Interval class :)
  213. @classmethod
  214. def from_start(cls, start=None, source=None, feature_extractor=None, score=None):
  215. return cls(interval=Interval.from_start(start), source=source,
  216. feature_extractor=feature_extractor, score=score)
  217. @classmethod
  218. def from_end(cls, end=None, source=None, feature_extractor=None, score=None):
  219. return cls(interval=Interval.from_end(end), source=source,\
  220. feature_extractor=feature_extractor, score=score)
  221. def __repr__(self):
  222. return f"Feature({self.interval}, {self.source}, {self.feature_extractor}, {self.score})"
  223. def __lt__(self, other):
  224. """Sort based on interval, then feature_extractor, then score"""
  225. if self.interval == other.interval:
  226. if self.feature_extractor == other.feature_extractor:
  227. return self.score < other.score
  228. return self.feature_extractor < other.feature_extractor
  229. return self.interval < other.interval
  230. def __eq__(self, other):
  231. return self.interval == other.interval \
  232. and self.source == other.source \
  233. and self.feature_extractor == other.feature_extractor \
  234. and self.score == other.score
  235. def to_json(self):
  236. """Return a dict representation of the feature for JSON encoding
  237. @see video_producers.py:PipelineJSONEncoder
  238. """
  239. return dict(interval=self.interval.to_json(), source=self.source.to_json(),
  240. feature_extractor=self.feature_extractor, score=self.score)