diff --git a/pipeline/feature_extractors.py b/pipeline/feature_extractors.py index be9c6f6..b71b28b 100644 --- a/pipeline/feature_extractors.py +++ b/pipeline/feature_extractors.py @@ -1,10 +1,10 @@ from abc import ABC +import json import logging import os import random import subprocess from ast import literal_eval -from pipeline.utils import SourceMedia, Feature, Interval from pipeline.utils import SourceMedia, Source, Feature, Interval # for loudness detection @@ -332,6 +332,42 @@ class VideoActivityFeatureExtractor(FeatureExtractor): pass +class JSONFeatureExtractor(FeatureExtractor): + """(Re-)create features from a JSON file + + The JSON file can have one of two formats: + - the format produced by the pipleline (@see: video_producers.py:JSONProducer) + - a simplified format which is easier for manual creation + """ + + def __init__(self, input_files=None, config=None): + if not input_files: + raise ValueError("No input files provided!") + self.input_files = input_files + self.config = config + self.features = [] + + def setup(self): + pass + + def _interval_from_dict(self, d): + return Interval(start=d["start"], duration=d["duration"]) + + def _source_from_dict(self, d): + return Source(d["source"], d["path"], d["provider"]) + + def run(self): + + # only pipeline JSON format for now + # TODO: add support for simplified format + for file in self.input_files: + features_from_json = self._read_json_from_file(file.path) + + for feature in features_from_json: + self.features.append(Feature(interval=self._interval_from_dict(feature["interval"]), + source=self._source_from_dict(feature["source"]), + feature_extractor=feature["feature_extractor"], + score=feature["score"])) def teardown(self): pass