Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

utils.py 10 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. import subprocess
  2. class SourceMedia():
  3. """Source media used by eg feature extractors. This is a list of Source objects.
  4. JSON type schema:
  5. [{
  6. "source": "/path/to/video.mp4",
  7. "path": "/path/to/video.mp4",
  8. "provider": "FileInputJSON"
  9. },
  10. {
  11. "source": "http://example.com/video.mp4",
  12. "path": "/path/to/downloaded_video.mp4",
  13. "provider": "InputYAML"
  14. }]
  15. It should be possible to combine/merge/aggregate multiple SourceMedia into one
  16. TODO: consider if we actually want that or if we just loop over a list of >0 SourceMedia
  17. Iterating over a SourceMedia object should return a list of Source objects.
  18. """
  19. def __init__(self, sources=[]):
  20. self.sources = sources
  21. def __iter__(self):
  22. return iter(self.sources)
  23. class Source():
  24. """A Source is a single media file (eg), used to populate SourceMedia objects.
  25. JSON type schema:
  26. {
  27. "source": "/path/to/video.mp4",
  28. "path": "/path/to/video.mp4",
  29. "provider": "FileInputJSON"
  30. }
  31. Instance variables:
  32. source -- the source of the media file (eg, a URL or a local path)
  33. path -- the path to the media file
  34. provider -- the provider of the media file (eg, "FileInputJSON")
  35. Accessing the object should return the path to the media file.
  36. Methods:
  37. duration() -- return the duration of the media file (uses ffprobe, result is cached)
  38. Notes:
  39. - source and path may be the same, for example in the case of a local file
  40. """
  41. _duration = None
  42. def __init__(self, source, path, provider):
  43. if not source:
  44. raise ValueError("Source must be provided") # TODO: #API -- decide if this is necessary
  45. self.source = source
  46. if not path:
  47. # we need a file to work on for the rest of the pipeline
  48. raise ValueError("Path must be provided")
  49. self.path = path
  50. if not provider:
  51. raise ValueError("Provider must be provided") # TODO: #API -- decide if this is necessary
  52. self.provider = provider
  53. def __str__(self):
  54. """See: 'accessing the object should return the path to the media file'"""
  55. return self.path
  56. def __repr__(self):
  57. return f"Source({self.source}, {self.path}, {self.provider})"
  58. def duration(self):
  59. """Return the duration of the media file at self.path (result is cached)"""
  60. return self._duration or self._get_duration(self.path)
  61. def _get_duration(self, file):
  62. """Use ffprobe to get the duration of the media file at self.path and cache result (_duration)
  63. usage: ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 <file>
  64. """
  65. # test if file exists
  66. try:
  67. with open(file) as _:
  68. pass
  69. except FileNotFoundError:
  70. raise FileNotFoundError(f"File not found: {file}")
  71. # cache the result
  72. self._duration = 0.0 or float(subprocess.check_output(["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", file]))
  73. return self._duration
  74. def to_json(self):
  75. """Return a dict representation of the source for JSON encoding
  76. @see video_producers.py:PipelineJSONEncoder
  77. """
  78. return dict(source=self.source, path=self.path, provider=self.provider)
  79. class Interval():
  80. """An interval of time in a media file
  81. This can be defined by a start and end time, a start time and a duration, or an end time and a duration.
  82. Instance variables:
  83. start -- the start time of the interval
  84. end -- the end time of the interval
  85. duration -- the duration of the interval (end - start)
  86. Notes:
  87. Sorts by start time, then end time
  88. """
  89. # TODO: decide if ABC or will be used directly
  90. # TODO: have default duration for intervals set by config
  91. # TODO: consider if we want to permit adjusting intervals (eg, start time, end time, duration) [probably yes]
  92. # NOTE: if we have more ways of defining, we could consider multipledispatch?
  93. # TODO: consider if we want to keep a reference to the media file (source) in the interval
  94. DEFAULT_DURATION = 5 # seconds
  95. DEFAUT_PRECISION = 3 # decimal places
  96. def __init__(self, start=None, end=None, duration=None):
  97. if start is None and end is None and duration is None:
  98. raise ValueError("Two of start, end, or duration must be provided")
  99. if start is not None and end is not None and duration is not None:
  100. raise ValueError("Only two of start, end, or duration may be provided")
  101. # start and end
  102. if start is not None and end is not None:
  103. # some trivial validation
  104. if start > end:
  105. raise ValueError("Start time must be before end time")
  106. self.start = start
  107. self.end = end
  108. self.duration = end - start
  109. # start and duration
  110. elif start is not None and duration is not None:
  111. if duration < 0:
  112. raise ValueError("Duration must be positive")
  113. self.start = start
  114. self.duration = duration
  115. self.end = start + duration
  116. # end and duration
  117. elif end is not None and duration is not None:
  118. if duration < 0:
  119. raise ValueError("Duration must be positive")
  120. self.end = end
  121. self.duration = duration
  122. self.start = end - duration
  123. # set precision
  124. self.start = round(self.start, self.DEFAUT_PRECISION)
  125. self.end = round(self.end, self.DEFAUT_PRECISION)
  126. self.duration = round(self.duration, self.DEFAUT_PRECISION)
  127. @classmethod
  128. def from_start(cls, start=None):
  129. """Create an interval from a start time using the default duration"""
  130. return cls(start=start, duration=cls.DEFAULT_DURATION)
  131. @classmethod
  132. def from_end(cls, end=None):
  133. """Create an interval from an end time using the default duration"""
  134. return cls(end=end, duration=cls.DEFAULT_DURATION)
  135. def __repr__(self):
  136. return f"Interval({self.start}, {self.end}, {self.duration})"
  137. def __lt__(self, other):
  138. if self.start == other.start:
  139. return self.end < other.end
  140. return self.start < other.start
  141. def to_json(self):
  142. """Return a dict representation of the interval for JSON encoding
  143. @see video_producers.py:PipelineJSONEncoder
  144. """
  145. return dict(start=self.start, end=self.end, duration=self.duration)
  146. # --------------------------------------------------------------
  147. # TODO: handle bad cases, eg negative duration, start > end, etc
  148. # --------------------------------------------------------------
  149. def move_start(self, new_start: float | int, relative: bool = False):
  150. """Update start time of Interval, keeping end time constant (& so modify duration)"""
  151. if relative:
  152. self.start += new_start
  153. else:
  154. self.start = new_start
  155. self.duration = round((self.end - self.start), self.DEFAUT_PRECISION)
  156. def move_end(self, new_end: float | int, relative: bool = False):
  157. """Update end time of Interval, keeping start time constant (& so modify duration)"""
  158. if relative:
  159. self.end += new_end
  160. else:
  161. self.end = new_end
  162. self.duration = round((self.end - self.start), self.DEFAUT_PRECISION)
  163. def update_duration(self, new_duration: float | int, relative: bool = False):
  164. """Update duration of Interval, keeping start time constant (& so modify end time)"""
  165. if relative:
  166. self.duration += new_duration
  167. else:
  168. self.duration = new_duration
  169. self.end = self.start + self.duration
  170. class Feature():
  171. """A feature extracted from a media file ("has a" Interval)
  172. This extends intervals by adding other fields, such as the feature source and 'score'
  173. Instance variables:
  174. interval -- Interval: time of feature in the media file
  175. source -- the source of the feature (ie feature extractor) (default: "unknown")
  176. path -- the path to the media file
  177. score -- the score of the feature (eg laughter confidence score, [0, 1] = { x ∈ ℝ | 0 ≤ x ≤ 1 }) (default: 0.0)
  178. Notes:
  179. - score is notionally in the closed interval [0, 1], but this is not enforced -- it is up to the feature extractor to ensure this (or use scores outside this range if desired -- eg a feature manually selected by user input might have a score of 2.0 so it is sorted 'above' other features)
  180. - sorts based on interval, then source, then score
  181. - path should never be unknown, since we need it to make clips from
  182. """
  183. # TODO: consider renaming score to something more generic
  184. def __init__(self, interval=None, source=None, score=None, path=None):
  185. """Create a feature with an interval, source, and score
  186. Expects a ready-made interval; source and score are optional
  187. """
  188. if interval is None:
  189. raise ValueError("Interval must be provided")
  190. self.interval = interval
  191. if path is None:
  192. raise ValueError("Path must be provided")
  193. self.path = path
  194. if source is None:
  195. source = "unknown"
  196. self.source = source
  197. if score is None:
  198. score = 0.0
  199. self.score = score
  200. # classmethods for creating a feature with an interval directly
  201. # which delegate to the Interval class :)
  202. @classmethod
  203. def from_start(cls, start=None, source=None, score=None, path=None):
  204. return cls(interval=Interval.from_start(start), source=source, score=score, path=path)
  205. @classmethod
  206. def from_end(cls, end=None, source=None, score=None, path=None):
  207. return cls(interval=Interval.from_end(end), source=source, score=score, path=path)
  208. def __repr__(self):
  209. return f"Feature({self.interval}, {self.source}, {self.score})"
  210. def __lt__(self, other):
  211. """Sort based on interval, then source, then score"""
  212. if self.interval == other.interval:
  213. if self.source == other.source:
  214. return self.score < other.score
  215. return self.source < other.source
  216. return self.interval < other.interval
  217. def to_json(self):
  218. """Return a dict representation of the feature for JSON encoding
  219. @see video_producers.py:PipelineJSONEncoder
  220. """
  221. return dict(interval=self.interval.to_json(), source=self.source.to_json(),
  222. feature_extractor=self.feature_extractor, score=self.score)