Browse Source

feat: add JSONFeatureExtractor

Parses JSON, eg that output by the pipeline or defined manually and
produces Features that can be (re-)used with the pipeline
main
Rob Hallam 3 months ago
parent
commit
c76d9f6264
1 changed files with 37 additions and 1 deletions
  1. +37
    -1
      pipeline/feature_extractors.py

+ 37
- 1
pipeline/feature_extractors.py View File

@@ -1,10 +1,10 @@
from abc import ABC
import json
import logging
import os
import random
import subprocess
from ast import literal_eval
from pipeline.utils import SourceMedia, Feature, Interval
from pipeline.utils import SourceMedia, Source, Feature, Interval

# for loudness detection
@@ -332,6 +332,42 @@ class VideoActivityFeatureExtractor(FeatureExtractor):
pass


class JSONFeatureExtractor(FeatureExtractor):
"""(Re-)create features from a JSON file

The JSON file can have one of two formats:
- the format produced by the pipleline (@see: video_producers.py:JSONProducer)
- a simplified format which is easier for manual creation
"""

def __init__(self, input_files=None, config=None):
if not input_files:
raise ValueError("No input files provided!")
self.input_files = input_files
self.config = config
self.features = []

def setup(self):
pass

def _interval_from_dict(self, d):
return Interval(start=d["start"], duration=d["duration"])

def _source_from_dict(self, d):
return Source(d["source"], d["path"], d["provider"])

def run(self):

# only pipeline JSON format for now
# TODO: add support for simplified format
for file in self.input_files:
features_from_json = self._read_json_from_file(file.path)

for feature in features_from_json:
self.features.append(Feature(interval=self._interval_from_dict(feature["interval"]),
source=self._source_from_dict(feature["source"]),
feature_extractor=feature["feature_extractor"],
score=feature["score"]))

def teardown(self):
pass

Loading…
Cancel
Save