No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.

utils.py 9.6 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. import subprocess
  2. class SourceMedia():
  3. """Source media used by eg feature extractors. This is a list of Source objects.
  4. JSON type schema:
  5. [{
  6. "source": "/path/to/video.mp4",
  7. "path": "/path/to/video.mp4",
  8. "provider": "FileInputJSON"
  9. },
  10. {
  11. "source": "http://example.com/video.mp4",
  12. "path": "/path/to/downloaded_video.mp4",
  13. "provider": "InputYAML"
  14. }]
  15. It should be possible to combine/merge/aggregate multiple SourceMedia into one
  16. TODO: consider if we actually want that or if we just loop over a list of >0 SourceMedia
  17. Iterating over a SourceMedia object should return a list of Source objects.
  18. """
  19. def __init__(self, sources=[]):
  20. self.sources = sources
  21. def __iter__(self):
  22. return iter(self.sources)
  23. class Source():
  24. """A Source is a single media file (eg), used to populate SourceMedia objects.
  25. JSON type schema:
  26. {
  27. "source": "/path/to/video.mp4",
  28. "path": "/path/to/video.mp4",
  29. "provider": "FileInputJSON"
  30. }
  31. Instance variables:
  32. source -- the source of the media file (eg, a URL or a local path)
  33. path -- the path to the media file
  34. provider -- the provider of the media file (eg, "FileInputJSON")
  35. Accessing the object should return the path to the media file.
  36. Methods:
  37. duration() -- return the duration of the media file (uses ffprobe, result is cached)
  38. Notes:
  39. - source and path may be the same, for example in the case of a local file
  40. """
  41. _duration = None
  42. def __init__(self, source, path, provider):
  43. if not source:
  44. raise ValueError("Source must be provided") # TODO: #API -- decide if this is necessary
  45. self.source = source
  46. if not path:
  47. # we need a file to work on for the rest of the pipeline
  48. raise ValueError("Path must be provided")
  49. self.path = path
  50. if not provider:
  51. raise ValueError("Provider must be provided") # TODO: #API -- decide if this is necessary
  52. self.provider = provider
  53. def __str__(self):
  54. """See: 'accessing the object should return the path to the media file'"""
  55. return self.path
  56. def __repr__(self):
  57. return f"Source({self.source}, {self.path}, {self.provider})"
  58. def duration(self):
  59. """Return the duration of the media file at self.path (result is cached)"""
  60. return self._duration or self._get_duration(self.path)
  61. def _get_duration(self, file):
  62. """Use ffprobe to get the duration of the media file at self.path and cache result (_duration)
  63. usage: ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 <file>
  64. """
  65. # test if file exists
  66. try:
  67. with open(file) as _:
  68. pass
  69. except FileNotFoundError:
  70. raise FileNotFoundError(f"File not found: {file}")
  71. # cache the result
  72. self._duration = 0.0 or float(subprocess.check_output(["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", file]))
  73. return self._duration
  74. class Interval():
  75. """An interval of time in a media file
  76. This can be defined by a start and end time, a start time and a duration, or an end time and a duration.
  77. Instance variables:
  78. start -- the start time of the interval
  79. end -- the end time of the interval
  80. duration -- the duration of the interval (end - start)
  81. Notes:
  82. Sorts by start time, then end time
  83. """
  84. # TODO: decide if ABC or will be used directly
  85. # TODO: have default duration for intervals set by config
  86. # TODO: consider if we want to permit adjusting intervals (eg, start time, end time, duration) [probably yes]
  87. # NOTE: if we have more ways of defining, we could consider multipledispatch?
  88. DEFAULT_DURATION = 5 # seconds
  89. DEFAUT_PRECISION = 3 # decimal places
  90. def __init__(self, start=None, end=None, duration=None):
  91. if start is None and end is None and duration is None:
  92. raise ValueError("Two of start, end, or duration must be provided")
  93. if start is not None and end is not None and duration is not None:
  94. raise ValueError("Only two of start, end, or duration may be provided")
  95. # start and end
  96. if start is not None and end is not None:
  97. # some trivial validation
  98. if start > end:
  99. raise ValueError("Start time must be before end time")
  100. self.start = start
  101. self.end = end
  102. self.duration = end - start
  103. # start and duration
  104. elif start is not None and duration is not None:
  105. if duration < 0:
  106. raise ValueError("Duration must be positive")
  107. self.start = start
  108. self.duration = duration
  109. self.end = start + duration
  110. # end and duration
  111. elif end is not None and duration is not None:
  112. if duration < 0:
  113. raise ValueError("Duration must be positive")
  114. self.end = end
  115. self.duration = duration
  116. self.start = end - duration
  117. # set precision
  118. self.start = round(self.start, self.DEFAUT_PRECISION)
  119. self.end = round(self.end, self.DEFAUT_PRECISION)
  120. self.duration = round(self.duration, self.DEFAUT_PRECISION)
  121. @classmethod
  122. def from_start(cls, start=None):
  123. """Create an interval from a start time using the default duration"""
  124. return cls(start=start, duration=cls.DEFAULT_DURATION)
  125. @classmethod
  126. def from_end(cls, end=None):
  127. """Create an interval from an end time using the default duration"""
  128. return cls(end=end, duration=cls.DEFAULT_DURATION)
  129. def __repr__(self):
  130. return f"Interval({self.start}, {self.end}, {self.duration})"
  131. def __lt__(self, other):
  132. if self.start == other.start:
  133. return self.end < other.end
  134. return self.start < other.start
  135. # --------------------------------------------------------------
  136. # TODO: handle bad cases, eg negative duration, start > end, etc
  137. # --------------------------------------------------------------
  138. def move_start(self, new_start: float | int, relative: bool = False):
  139. """Update start time of Interval, keeping end time constant (& so modify duration)"""
  140. if relative:
  141. self.start += new_start
  142. else:
  143. self.start = new_start
  144. self.duration = round((self.end - self.start), self.DEFAUT_PRECISION)
  145. def move_end(self, new_end: float | int, relative: bool = False):
  146. """Update end time of Interval, keeping start time constant (& so modify duration)"""
  147. if relative:
  148. self.end += new_end
  149. else:
  150. self.end = new_end
  151. self.duration = round((self.end - self.start), self.DEFAUT_PRECISION)
  152. def update_duration(self, new_duration: float | int, relative: bool = False):
  153. """Update duration of Interval, keeping start time constant (& so modify end time)"""
  154. if relative:
  155. self.duration += new_duration
  156. else:
  157. self.duration = new_duration
  158. self.end = self.start + self.duration
  159. class Feature():
  160. """A feature extracted from a media file ("has a" Interval)
  161. This extends intervals by adding other fields, such as the feature source and 'score'
  162. Instance variables:
  163. interval -- Interval: time of feature in the media file
  164. source -- the source of the feature (ie feature extractor) (default: "unknown")
  165. path -- the path to the media file
  166. score -- the score of the feature (eg laughter confidence score, [0, 1] = { x ∈ ℝ | 0 ≤ x ≤ 1 }) (default: 0.0)
  167. Notes:
  168. - score is notionally in the closed interval [0, 1], but this is not enforced -- it is up to the feature extractor to ensure this (or use scores outside this range if desired -- eg a feature manually selected by user input might have a score of 2.0 so it is sorted 'above' other features)
  169. - sorts based on interval, then source, then score
  170. - path should never be unknown, since we need it to make clips from
  171. """
  172. # TODO: consider renaming score to something more generic
  173. def __init__(self, interval=None, source=None, score=None, path=None):
  174. """Create a feature with an interval, source, and score
  175. Expects a ready-made interval; source and score are optional
  176. """
  177. if interval is None:
  178. raise ValueError("Interval must be provided")
  179. self.interval = interval
  180. if path is None:
  181. raise ValueError("Path must be provided")
  182. self.path = path
  183. if source is None:
  184. source = "unknown"
  185. self.source = source
  186. if score is None:
  187. score = 0.0
  188. self.score = score
  189. # classmethods for creating a feature with an interval directly
  190. # which delegate to the Interval class :)
  191. @classmethod
  192. def from_start(cls, start=None, source=None, score=None, path=None):
  193. return cls(interval=Interval.from_start(start), source=source, score=score, path=path)
  194. @classmethod
  195. def from_end(cls, end=None, source=None, score=None, path=None):
  196. return cls(interval=Interval.from_end(end), source=source, score=score, path=path)
  197. def __repr__(self):
  198. return f"Feature({self.interval}, {self.source}, {self.score})"
  199. def __lt__(self, other):
  200. """Sort based on interval, then source, then score"""
  201. if self.interval == other.interval:
  202. if self.source == other.source:
  203. return self.score < other.score
  204. return self.source < other.source
  205. return self.interval < other.interval