feat: add JSONFeatureExtractor

Parses JSON, eg that output by the pipeline or defined manually and produces Features that can be (re-)used with the pipeline
há 7 meses · c76d9f6264
--- a/pipeline/feature_extractors.py
+++ b/pipeline/feature_extractors.py
@@ -1,10 +1,10 @@
 from abc import ABC
 import json
 import logging
 import os
 import random
 import subprocess
 from ast import literal_eval
 from pipeline.utils import SourceMedia, Feature, Interval
 from pipeline.utils import SourceMedia, Source, Feature, Interval

 # for loudness detection
@@ -332,6 +332,42 @@ class VideoActivityFeatureExtractor(FeatureExtractor):
        pass


 class JSONFeatureExtractor(FeatureExtractor):
    """(Re-)create features from a JSON file

    The JSON file can have one of two formats:
      - the format produced by the pipleline (@see: video_producers.py:JSONProducer)
      - a simplified format which is easier for manual creation
    """

    def __init__(self, input_files=None, config=None):
        if not input_files:
            raise ValueError("No input files provided!")
        self.input_files = input_files
        self.config = config
        self.features = []

    def setup(self):
        pass

    def _interval_from_dict(self, d):
        return Interval(start=d["start"], duration=d["duration"])

    def _source_from_dict(self, d):
        return Source(d["source"], d["path"], d["provider"])

    def run(self):

        # only pipeline JSON format for now
        # TODO: add support for simplified format
        for file in self.input_files:
            features_from_json = self._read_json_from_file(file.path)

            for feature in features_from_json:
                self.features.append(Feature(interval=self._interval_from_dict(feature["interval"]),
                                             source=self._source_from_dict(feature["source"]),
                                             feature_extractor=feature["feature_extractor"],
                                             score=feature["score"]))

    def teardown(self):
        pass