From 9c87cdcc0f97d108caafd7ccb5ad5b22e9dfc374 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Mon, 30 Mar 2020 19:32:43 +0300 Subject: [PATCH 01/80] Move annotations to dm --- cvat/apps/annotation/__init__.py | 4 - cvat/apps/annotation/admin.py | 3 - cvat/apps/annotation/annotation.py | 458 --------------- cvat/apps/annotation/apps.py | 18 - cvat/apps/annotation/format.py | 41 -- .../annotation/migrations/0001_initial.py | 48 -- .../migrations/0002_auto_20190805_0927.py | 74 --- cvat/apps/annotation/migrations/__init__.py | 3 - cvat/apps/annotation/models.py | 46 -- cvat/apps/annotation/serializers.py | 81 --- cvat/apps/annotation/settings.py | 17 - cvat/apps/annotation/tests.py | 3 - cvat/apps/annotation/views.py | 3 - cvat/apps/dataset_manager/bindings.py | 525 ++++++++++++++++-- .../formats}/README.md | 0 cvat/apps/engine/annotation.py | 19 +- cvat/apps/engine/data_manager.py | 2 +- cvat/apps/engine/views.py | 77 ++- cvat/apps/git/git.py | 1 - cvat/settings/base.py | 2 - 20 files changed, 533 insertions(+), 892 deletions(-) delete mode 100644 cvat/apps/annotation/__init__.py delete mode 100644 cvat/apps/annotation/admin.py delete mode 100644 cvat/apps/annotation/annotation.py delete mode 100644 cvat/apps/annotation/apps.py delete mode 100644 cvat/apps/annotation/format.py delete mode 100644 cvat/apps/annotation/migrations/0001_initial.py delete mode 100644 cvat/apps/annotation/migrations/0002_auto_20190805_0927.py delete mode 100644 cvat/apps/annotation/migrations/__init__.py delete mode 100644 cvat/apps/annotation/models.py delete mode 100644 cvat/apps/annotation/serializers.py delete mode 100644 cvat/apps/annotation/settings.py delete mode 100644 cvat/apps/annotation/tests.py delete mode 100644 cvat/apps/annotation/views.py rename cvat/apps/{annotation => dataset_manager/formats}/README.md (100%) diff --git a/cvat/apps/annotation/__init__.py b/cvat/apps/annotation/__init__.py deleted file mode 100644 index a6b8e925df10..000000000000 --- a/cvat/apps/annotation/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (C) 2018 Intel Corporation -# -# SPDX-License-Identifier: MIT -default_app_config = 'cvat.apps.annotation.apps.AnnotationConfig' diff --git a/cvat/apps/annotation/admin.py b/cvat/apps/annotation/admin.py deleted file mode 100644 index b66dde17a5cf..000000000000 --- a/cvat/apps/annotation/admin.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (C) 2018 Intel Corporation -# -# SPDX-License-Identifier: MIT diff --git a/cvat/apps/annotation/annotation.py b/cvat/apps/annotation/annotation.py deleted file mode 100644 index 49ababe9ffc1..000000000000 --- a/cvat/apps/annotation/annotation.py +++ /dev/null @@ -1,458 +0,0 @@ -# Copyright (C) 2018 Intel Corporation -# -# SPDX-License-Identifier: MIT - -import os -import copy -from collections import OrderedDict, namedtuple - -from django.utils import timezone - -from cvat.apps.engine.data_manager import DataManager, TrackManager -from cvat.apps.engine.serializers import LabeledDataSerializer - -class AnnotationIR: - def __init__(self, data=None): - self.reset() - if data: - self._tags = getattr(data, 'tags', []) or data['tags'] - self._shapes = getattr(data, 'shapes', []) or data['shapes'] - self._tracks = getattr(data, 'tracks', []) or data['tracks'] - - def add_tag(self, tag): - self._tags.append(tag) - - def add_shape(self, shape): - self._shapes.append(shape) - - def add_track(self, track): - self._tracks.append(track) - - @property - def tags(self): - return self._tags - - @property - def shapes(self): - return self._shapes - - @property - def tracks(self): - return self._tracks - - @property - def version(self): - return self._version - - @tags.setter - def tags(self, tags): - self._tags = tags - - @shapes.setter - def shapes(self, shapes): - self._shapes = shapes - - @tracks.setter - def tracks(self, tracks): - self._tracks = tracks - - @version.setter - def version(self, version): - self._version = version - - def __getitem__(self, key): - return getattr(self, key) - - @property - def data(self): - return { - 'version': self.version, - 'tags': self.tags, - 'shapes': self.shapes, - 'tracks': self.tracks, - } - - def serialize(self): - serializer = LabeledDataSerializer(data=self.data) - if serializer.is_valid(raise_exception=True): - return serializer.data - - #makes a data copy from specified frame interval - def slice(self, start, stop): - is_frame_inside = lambda x: (start <= int(x['frame']) <= stop) - splitted_data = AnnotationIR() - splitted_data.tags = copy.deepcopy(list(filter(is_frame_inside, self.tags))) - splitted_data.shapes = copy.deepcopy(list(filter(is_frame_inside, self.shapes))) - splitted_data.tracks = copy.deepcopy(list(filter(lambda y: len(list(filter(is_frame_inside, y['shapes']))), self.tracks))) - - return splitted_data - - @data.setter - def data(self, data): - self.version = data['version'] - self.tags = data['tags'] - self.shapes = data['shapes'] - self.tracks = data['tracks'] - - def reset(self): - self._version = 0 - self._tags = [] - self._shapes = [] - self._tracks = [] - -class Annotation: - Attribute = namedtuple('Attribute', 'name, value') - LabeledShape = namedtuple('LabeledShape', 'type, frame, label, points, occluded, attributes, group, z_order') - LabeledShape.__new__.__defaults__ = (0, 0) - TrackedShape = namedtuple('TrackedShape', 'type, frame, points, occluded, outside, keyframe, attributes, group, z_order, label, track_id') - TrackedShape.__new__.__defaults__ = (0, 0, None, 0) - Track = namedtuple('Track', 'label, group, shapes') - Tag = namedtuple('Tag', 'frame, label, attributes, group') - Tag.__new__.__defaults__ = (0, ) - Frame = namedtuple('Frame', 'frame, name, width, height, labeled_shapes, tags') - - def __init__(self, annotation_ir, db_task, scheme='', host='', create_callback=None): - self._annotation_ir = annotation_ir - self._db_task = db_task - self._scheme = scheme - self._host = host - self._create_callback=create_callback - self._MAX_ANNO_SIZE=30000 - self._frame_info = {} - self._frame_mapping = {} - self._frame_step = db_task.data.get_frame_step() - - db_labels = self._db_task.label_set.all().prefetch_related('attributespec_set').order_by('pk') - - self._label_mapping = OrderedDict((db_label.id, db_label) for db_label in db_labels) - - self._attribute_mapping = {db_label.id: {'mutable': {}, 'immutable': {}} for db_label in db_labels} - - for db_label in db_labels: - for db_attribute in db_label.attributespec_set.all(): - if db_attribute.mutable: - self._attribute_mapping[db_label.id]['mutable'][db_attribute.id] = db_attribute.name - else: - self._attribute_mapping[db_label.id]['immutable'][db_attribute.id] = db_attribute.name - - self._attribute_mapping_merged = {} - for label_id, attr_mapping in self._attribute_mapping.items(): - self._attribute_mapping_merged[label_id] = { - **attr_mapping['mutable'], - **attr_mapping['immutable'], - } - - self._init_frame_info() - self._init_meta() - - def _get_label_id(self, label_name): - for db_label in self._label_mapping.values(): - if label_name == db_label.name: - return db_label.id - return None - - def _get_label_name(self, label_id): - return self._label_mapping[label_id].name - - def _get_attribute_name(self, attribute_id): - for attribute_mapping in self._attribute_mapping_merged.values(): - if attribute_id in attribute_mapping: - return attribute_mapping[attribute_id] - - def _get_attribute_id(self, label_id, attribute_name, attribute_type=None): - if attribute_type: - container = self._attribute_mapping[label_id][attribute_type] - else: - container = self._attribute_mapping_merged[label_id] - - for attr_id, attr_name in container.items(): - if attribute_name == attr_name: - return attr_id - return None - - def _get_mutable_attribute_id(self, label_id, attribute_name): - return self._get_attribute_id(label_id, attribute_name, 'mutable') - - def _get_immutable_attribute_id(self, label_id, attribute_name): - return self._get_attribute_id(label_id, attribute_name, 'immutable') - - def _init_frame_info(self): - if hasattr(self._db_task.data, 'video'): - self._frame_info = { - frame: { - "path": "frame_{:06d}".format(frame), - "width": self._db_task.data.video.width, - "height": self._db_task.data.video.height, - } for frame in range(self._db_task.data.size) - } - else: - self._frame_info = {db_image.frame: { - "path": db_image.path, - "width": db_image.width, - "height": db_image.height, - } for db_image in self._db_task.data.images.all()} - - self._frame_mapping = { - self._get_filename(info["path"]): frame for frame, info in self._frame_info.items() - } - - def _init_meta(self): - db_segments = self._db_task.segment_set.all().prefetch_related('job_set') - self._meta = OrderedDict([ - ("task", OrderedDict([ - ("id", str(self._db_task.id)), - ("name", self._db_task.name), - ("size", str(self._db_task.data.size)), - ("mode", self._db_task.mode), - ("overlap", str(self._db_task.overlap)), - ("bugtracker", self._db_task.bug_tracker), - ("created", str(timezone.localtime(self._db_task.created_date))), - ("updated", str(timezone.localtime(self._db_task.updated_date))), - ("start_frame", str(self._db_task.data.start_frame)), - ("stop_frame", str(self._db_task.data.stop_frame)), - ("frame_filter", self._db_task.data.frame_filter), - ("z_order", str(self._db_task.z_order)), - - ("labels", [ - ("label", OrderedDict([ - ("name", db_label.name), - ("attributes", [ - ("attribute", OrderedDict([ - ("name", db_attr.name), - ("mutable", str(db_attr.mutable)), - ("input_type", db_attr.input_type), - ("default_value", db_attr.default_value), - ("values", db_attr.values)])) - for db_attr in db_label.attributespec_set.all()]) - ])) for db_label in self._label_mapping.values() - ]), - - ("segments", [ - ("segment", OrderedDict([ - ("id", str(db_segment.id)), - ("start", str(db_segment.start_frame)), - ("stop", str(db_segment.stop_frame)), - ("url", "{0}://{1}/?id={2}".format( - self._scheme, self._host, db_segment.job_set.all()[0].id))] - )) for db_segment in db_segments - ]), - - ("owner", OrderedDict([ - ("username", self._db_task.owner.username), - ("email", self._db_task.owner.email) - ]) if self._db_task.owner else ""), - - ("assignee", OrderedDict([ - ("username", self._db_task.assignee.username), - ("email", self._db_task.assignee.email) - ]) if self._db_task.assignee else ""), - ])), - ("dumped", str(timezone.localtime(timezone.now()))) - ]) - - if hasattr(self._db_task.data, "video"): - self._meta["task"]["original_size"] = OrderedDict([ - ("width", str(self._db_task.data.video.width)), - ("height", str(self._db_task.data.video.height)) - ]) - # Add source to dumped file - self._meta["source"] = str(os.path.basename(self._db_task.data.video.path)) - - def _export_attributes(self, attributes): - exported_attributes = [] - for attr in attributes: - attribute_name = self._get_attribute_name(attr["spec_id"]) - exported_attributes.append(Annotation.Attribute( - name=attribute_name, - value=attr["value"], - )) - return exported_attributes - - def _export_tracked_shape(self, shape): - return Annotation.TrackedShape( - type=shape["type"], - frame=self._db_task.data.start_frame + shape["frame"] * self._frame_step, - label=self._get_label_name(shape["label_id"]), - points=shape["points"], - occluded=shape["occluded"], - z_order=shape.get("z_order", 0), - group=shape.get("group", 0), - outside=shape.get("outside", False), - keyframe=shape.get("keyframe", True), - track_id=shape["track_id"], - attributes=self._export_attributes(shape["attributes"]), - ) - - def _export_labeled_shape(self, shape): - return Annotation.LabeledShape( - type=shape["type"], - label=self._get_label_name(shape["label_id"]), - frame=self._db_task.data.start_frame + shape["frame"] * self._frame_step, - points=shape["points"], - occluded=shape["occluded"], - z_order=shape.get("z_order", 0), - group=shape.get("group", 0), - attributes=self._export_attributes(shape["attributes"]), - ) - - def _export_tag(self, tag): - return Annotation.Tag( - frame=self._db_task.data.start_frame + tag["frame"] * self._frame_step, - label=self._get_label_name(tag["label_id"]), - group=tag.get("group", 0), - attributes=self._export_attributes(tag["attributes"]), - ) - - def group_by_frame(self): - def _get_frame(annotations, shape): - db_image = self._frame_info[shape["frame"]] - frame = self._db_task.data.start_frame + shape["frame"] * self._frame_step - if frame not in annotations: - annotations[frame] = Annotation.Frame( - frame=frame, - name=db_image['path'], - height=db_image["height"], - width=db_image["width"], - labeled_shapes=[], - tags=[], - ) - return annotations[frame] - - annotations = {} - data_manager = DataManager(self._annotation_ir) - for shape in sorted(data_manager.to_shapes(self._db_task.data.size), key=lambda shape: shape.get("z_order", 0)): - if 'track_id' in shape: - exported_shape = self._export_tracked_shape(shape) - else: - exported_shape = self._export_labeled_shape(shape) - _get_frame(annotations, shape).labeled_shapes.append(exported_shape) - - for tag in self._annotation_ir.tags: - _get_frame(annotations, tag).tags.append(self._export_tag(tag)) - - return iter(annotations.values()) - - @property - def shapes(self): - for shape in self._annotation_ir.shapes: - yield self._export_labeled_shape(shape) - - @property - def tracks(self): - for idx, track in enumerate(self._annotation_ir.tracks): - tracked_shapes = TrackManager.get_interpolated_shapes(track, 0, self._db_task.data.size) - for tracked_shape in tracked_shapes: - tracked_shape["attributes"] += track["attributes"] - tracked_shape["track_id"] = idx - tracked_shape["group"] = track["group"] - tracked_shape["label_id"] = track["label_id"] - - yield Annotation.Track( - label=self._get_label_name(track["label_id"]), - group=track["group"], - shapes=[self._export_tracked_shape(shape) for shape in tracked_shapes], - ) - - @property - def tags(self): - for tag in self._annotation_ir.tags: - yield self._export_tag(tag) - - @property - def meta(self): - return self._meta - - def _import_tag(self, tag): - _tag = tag._asdict() - label_id = self._get_label_id(_tag.pop('label')) - _tag['frame'] = (int(_tag['frame']) - self._db_task.data.start_frame) // self._frame_step - _tag['label_id'] = label_id - _tag['attributes'] = [self._import_attribute(label_id, attrib) for attrib in _tag['attributes'] - if self._get_attribute_id(label_id, attrib.name)] - return _tag - - def _import_attribute(self, label_id, attribute): - return { - 'spec_id': self._get_attribute_id(label_id, attribute.name), - 'value': attribute.value, - } - - def _import_shape(self, shape): - _shape = shape._asdict() - label_id = self._get_label_id(_shape.pop('label')) - _shape['frame'] = (int(_shape['frame']) - self._db_task.data.start_frame) // self._frame_step - _shape['label_id'] = label_id - _shape['attributes'] = [self._import_attribute(label_id, attrib) for attrib in _shape['attributes'] - if self._get_attribute_id(label_id, attrib.name)] - return _shape - - def _import_track(self, track): - _track = track._asdict() - label_id = self._get_label_id(_track.pop('label')) - _track['frame'] = (min(int(shape.frame) for shape in _track['shapes']) - \ - self._db_task.data.start_frame) // self._frame_step - _track['label_id'] = label_id - _track['attributes'] = [] - _track['shapes'] = [shape._asdict() for shape in _track['shapes']] - for shape in _track['shapes']: - shape['frame'] = (int(shape['frame']) - self._db_task.data.start_frame) // self._frame_step - _track['attributes'] = [self._import_attribute(label_id, attrib) for attrib in shape['attributes'] - if self._get_immutable_attribute_id(label_id, attrib.name)] - shape['attributes'] = [self._import_attribute(label_id, attrib) for attrib in shape['attributes'] - if self._get_mutable_attribute_id(label_id, attrib.name)] - - return _track - - def _call_callback(self): - if self._len() > self._MAX_ANNO_SIZE: - self._create_callback(self._annotation_ir.serialize()) - self._annotation_ir.reset() - - def add_tag(self, tag): - imported_tag = self._import_tag(tag) - if imported_tag['label_id']: - self._annotation_ir.add_tag(imported_tag) - self._call_callback() - - def add_shape(self, shape): - imported_shape = self._import_shape(shape) - if imported_shape['label_id']: - self._annotation_ir.add_shape(imported_shape) - self._call_callback() - - def add_track(self, track): - imported_track = self._import_track(track) - if imported_track['label_id']: - self._annotation_ir.add_track(imported_track) - self._call_callback() - - @property - def data(self): - return self._annotation_ir - - def _len(self): - track_len = 0 - for track in self._annotation_ir.tracks: - track_len += len(track['shapes']) - - return len(self._annotation_ir.tags) + len(self._annotation_ir.shapes) + track_len - - @property - def frame_info(self): - return self._frame_info - - @property - def frame_step(self): - return self._frame_step - - @staticmethod - def _get_filename(path): - return os.path.splitext(os.path.basename(path))[0] - - def match_frame(self, filename): - # try to match by filename - _filename = self._get_filename(filename) - if _filename in self._frame_mapping: - return self._frame_mapping[_filename] - - raise Exception("Cannot match filename or determinate framenumber for {} filename".format(filename)) diff --git a/cvat/apps/annotation/apps.py b/cvat/apps/annotation/apps.py deleted file mode 100644 index 6a14bfef6b8c..000000000000 --- a/cvat/apps/annotation/apps.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (C) 2018 Intel Corporation -# -# SPDX-License-Identifier: MIT - -from django.apps import AppConfig -from django.db.models.signals import post_migrate -from cvat.apps.annotation.settings import BUILTIN_FORMATS - -def register_builtins_callback(sender, **kwargs): - from .format import register_format - for builtin_format in BUILTIN_FORMATS: - register_format(builtin_format) - -class AnnotationConfig(AppConfig): - name = 'cvat.apps.annotation' - - def ready(self): - post_migrate.connect(register_builtins_callback, sender=self) diff --git a/cvat/apps/annotation/format.py b/cvat/apps/annotation/format.py deleted file mode 100644 index 9ac2a00ca1b8..000000000000 --- a/cvat/apps/annotation/format.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (C) 2018 Intel Corporation -# -# SPDX-License-Identifier: MIT - -from cvat.apps.annotation import models -from django.core.exceptions import ObjectDoesNotExist -from cvat.apps.annotation.serializers import AnnotationFormatSerializer -from django.core.files import File - -from copy import deepcopy - -def register_format(format_file): - source_code = open(format_file, 'r').read() - global_vars = {} - exec(source_code, global_vars) - if "format_spec" not in global_vars or not isinstance(global_vars["format_spec"], dict): - raise Exception("Could not find 'format_spec' definition in format file specification") - - format_spec = deepcopy(global_vars["format_spec"]) - format_spec["handler_file"] = File(open(format_file)) - for spec in format_spec["loaders"] + format_spec["dumpers"]: - spec["display_name"] = spec["display_name"].format( - name=format_spec["name"], - format=spec["format"], - version=spec["version"], - ) - - try: - annotation_format = models.AnnotationFormat.objects.get(name=format_spec["name"]) - serializer = AnnotationFormatSerializer(annotation_format, data=format_spec) - if serializer.is_valid(raise_exception=True): - serializer.save() - except ObjectDoesNotExist: - serializer = AnnotationFormatSerializer(data=format_spec) - if serializer.is_valid(raise_exception=True): - serializer.save() - -def get_annotation_formats(): - return AnnotationFormatSerializer( - models.AnnotationFormat.objects.all(), - many=True).data diff --git a/cvat/apps/annotation/migrations/0001_initial.py b/cvat/apps/annotation/migrations/0001_initial.py deleted file mode 100644 index 9c331fee941e..000000000000 --- a/cvat/apps/annotation/migrations/0001_initial.py +++ /dev/null @@ -1,48 +0,0 @@ -# Generated by Django 2.1.9 on 2019-07-31 15:20 - -import cvat.apps.annotation.models -import cvat.apps.engine.models -from django.conf import settings -import django.core.files.storage -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - - initial = True - - dependencies = [ - migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ] - - operations = [ - migrations.CreateModel( - name='AnnotationFormat', - fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('name', cvat.apps.engine.models.SafeCharField(max_length=256)), - ('created_date', models.DateTimeField(auto_now_add=True)), - ('updated_date', models.DateTimeField(auto_now_add=True)), - ('handler_file', models.FileField(storage=django.core.files.storage.FileSystemStorage(location=settings.BASE_DIR), upload_to=cvat.apps.annotation.models.upload_file_handler)), - ('owner', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL)), - ], - options={ - 'default_permissions': (), - }, - ), - migrations.CreateModel( - name='AnnotationHandler', - fields=[ - ('type', models.CharField(choices=[('dumper', 'DUMPER'), ('loader', 'LOADER')], max_length=16)), - ('display_name', cvat.apps.engine.models.SafeCharField(max_length=256, primary_key=True, serialize=False)), - ('format', models.CharField(max_length=16)), - ('version', models.CharField(max_length=16)), - ('handler', models.CharField(max_length=256)), - ('annotation_format', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='annotation.AnnotationFormat')), - ], - options={ - 'default_permissions': (), - }, - ), - ] diff --git a/cvat/apps/annotation/migrations/0002_auto_20190805_0927.py b/cvat/apps/annotation/migrations/0002_auto_20190805_0927.py deleted file mode 100644 index 6b2898640209..000000000000 --- a/cvat/apps/annotation/migrations/0002_auto_20190805_0927.py +++ /dev/null @@ -1,74 +0,0 @@ -# Generated by Django 2.1.9 on 2019-08-05 06:27 - -import cvat.apps.engine.models -from django.db import migrations, models -import django.db.models.deletion - -def split_handlers(apps, schema_editor): - db_alias = schema_editor.connection.alias - handler_model = apps.get_model('annotation', 'AnnotationHandler') - dumper_model = apps.get_model('annotation', "AnnotationDumper") - loader_model = apps.get_model('annotation', 'AnnotationLoader') - - - for db_handler in handler_model.objects.all(): - if db_handler.type == "dumper": - new_handler = dumper_model() - else: - new_handler = loader_model() - - new_handler.display_name = db_handler.display_name - new_handler.format = db_handler.format - new_handler.version = db_handler.version - new_handler.handler = db_handler.handler - new_handler.annotation_format = db_handler.annotation_format - - new_handler.save() - db_handler.delete() - -class Migration(migrations.Migration): - - dependencies = [ - ('annotation', '0001_initial'), - ] - - operations = [ - migrations.CreateModel( - name='AnnotationDumper', - fields=[ - ('display_name', cvat.apps.engine.models.SafeCharField(max_length=256, primary_key=True, serialize=False)), - ('format', models.CharField(max_length=16)), - ('version', models.CharField(max_length=16)), - ('handler', models.CharField(max_length=256)), - ('annotation_format', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='annotation.AnnotationFormat')), - ], - options={ - 'abstract': False, - 'default_permissions': (), - }, - ), - migrations.CreateModel( - name='AnnotationLoader', - fields=[ - ('display_name', cvat.apps.engine.models.SafeCharField(max_length=256, primary_key=True, serialize=False)), - ('format', models.CharField(max_length=16)), - ('version', models.CharField(max_length=16)), - ('handler', models.CharField(max_length=256)), - ('annotation_format', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='annotation.AnnotationFormat')), - ], - options={ - 'abstract': False, - 'default_permissions': (), - }, - ), - migrations.RunPython( - code=split_handlers, - ), - migrations.RemoveField( - model_name='annotationhandler', - name='annotation_format', - ), - migrations.DeleteModel( - name='AnnotationHandler', - ), - ] diff --git a/cvat/apps/annotation/migrations/__init__.py b/cvat/apps/annotation/migrations/__init__.py deleted file mode 100644 index b66dde17a5cf..000000000000 --- a/cvat/apps/annotation/migrations/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (C) 2018 Intel Corporation -# -# SPDX-License-Identifier: MIT diff --git a/cvat/apps/annotation/models.py b/cvat/apps/annotation/models.py deleted file mode 100644 index 3595327dca56..000000000000 --- a/cvat/apps/annotation/models.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (C) 2018 Intel Corporation -# -# SPDX-License-Identifier: MIT - -import os - -from django.db import models -from django.conf import settings -from django.core.files.storage import FileSystemStorage -from django.contrib.auth.models import User - -from cvat.apps.engine.models import SafeCharField - -def upload_file_handler(instance, filename): - return os.path.join('formats', str(instance.id), filename) - -class AnnotationFormat(models.Model): - name = SafeCharField(max_length=256) - owner = models.ForeignKey(User, null=True, blank=True, - on_delete=models.SET_NULL) - created_date = models.DateTimeField(auto_now_add=True) - updated_date = models.DateTimeField(auto_now_add=True) - handler_file = models.FileField( - upload_to=upload_file_handler, - storage=FileSystemStorage(location=os.path.join(settings.BASE_DIR)), - ) - - class Meta: - default_permissions = () - -class AnnotationHandler(models.Model): - display_name = SafeCharField(max_length=256, primary_key=True) - format = models.CharField(max_length=16) - version = models.CharField(max_length=16) - handler = models.CharField(max_length=256) - annotation_format = models.ForeignKey(AnnotationFormat, on_delete=models.CASCADE) - - class Meta: - default_permissions = () - abstract = True - -class AnnotationDumper(AnnotationHandler): - pass - -class AnnotationLoader(AnnotationHandler): - pass diff --git a/cvat/apps/annotation/serializers.py b/cvat/apps/annotation/serializers.py deleted file mode 100644 index 7284c0414a00..000000000000 --- a/cvat/apps/annotation/serializers.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (C) 2018-2020 Intel Corporation -# -# SPDX-License-Identifier: MIT - -from django.utils import timezone -from rest_framework import serializers - -from cvat.apps.annotation import models - -class AnnotationDumperSerializer(serializers.ModelSerializer): - class Meta: - model = models.AnnotationDumper - exclude = ('annotation_format',) - # https://www.django-rest-framework.org/api-guide/validators/#updating-nested-serializers - extra_kwargs = { - 'display_name': { - 'validators': [], - }, - } - -class AnnotationLoaderSerializer(serializers.ModelSerializer): - class Meta: - model = models.AnnotationLoader - exclude = ('annotation_format',) - # https://www.django-rest-framework.org/api-guide/validators/#updating-nested-serializers - extra_kwargs = { - 'display_name': { - 'validators': [], - }, - } - -class AnnotationFormatSerializer(serializers.ModelSerializer): - dumpers = AnnotationDumperSerializer(many=True, source="annotationdumper_set") - loaders = AnnotationLoaderSerializer(many=True, source="annotationloader_set") - - class Meta: - model = models.AnnotationFormat - fields = "__all__" - - # pylint: disable=no-self-use - def create(self, validated_data): - dumpers = validated_data.pop("annotationdumper_set") - loaders = validated_data.pop("annotationloader_set") - - annotation_format = models.AnnotationFormat() - annotation_format.name = validated_data["name"] - annotation_format.handler_file = validated_data["handler_file"].name - annotation_format.save() - - for dumper in dumpers: - models.AnnotationDumper(annotation_format=annotation_format, **dumper).save() - - for loader in loaders: - models.AnnotationLoader(annotation_format=annotation_format, **loader).save() - - return annotation_format - - # pylint: disable=no-self-use - def update(self, instance, validated_data): - dumper_names = [handler["display_name"] for handler in validated_data["annotationdumper_set"]] - loader_names = [handler["display_name"] for handler in validated_data["annotationloader_set"]] - instance.handler_file = validated_data.get('handler_file', instance.handler_file) - instance.owner = validated_data.get('owner', instance.owner) - instance.updated_date = timezone.localtime(timezone.now()) - - handlers_to_delete = [d for d in instance.annotationdumper_set.all() if d.display_name not in dumper_names] + \ - [l for l in instance.annotationloader_set.all() if l.display_name not in loader_names] - - for db_handler in handlers_to_delete: - db_handler.delete() - - for dumper in validated_data["annotationdumper_set"]: - models.AnnotationDumper(annotation_format=instance, **dumper).save() - for loader in validated_data["annotationloader_set"]: - models.AnnotationLoader(annotation_format=instance, **loader).save() - - instance.save() - return instance - -class AnnotationFileSerializer(serializers.Serializer): - annotation_file = serializers.FileField() diff --git a/cvat/apps/annotation/settings.py b/cvat/apps/annotation/settings.py deleted file mode 100644 index 9099a387c05c..000000000000 --- a/cvat/apps/annotation/settings.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (C) 2018 Intel Corporation -# -# SPDX-License-Identifier: MIT - -import os - -path_prefix = os.path.join('cvat', 'apps', 'dataset_manager', 'formats') -BUILTIN_FORMATS = ( - os.path.join(path_prefix, 'cvat.py'), - os.path.join(path_prefix, 'pascal_voc.py'), - os.path.join(path_prefix, 'yolo.py'), - os.path.join(path_prefix, 'coco.py'), - os.path.join(path_prefix, 'mask.py'), - os.path.join(path_prefix, 'tfrecord.py'), - os.path.join(path_prefix, 'mot.py'), - os.path.join(path_prefix, 'labelme.py'), -) diff --git a/cvat/apps/annotation/tests.py b/cvat/apps/annotation/tests.py deleted file mode 100644 index b66dde17a5cf..000000000000 --- a/cvat/apps/annotation/tests.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (C) 2018 Intel Corporation -# -# SPDX-License-Identifier: MIT diff --git a/cvat/apps/annotation/views.py b/cvat/apps/annotation/views.py deleted file mode 100644 index b66dde17a5cf..000000000000 --- a/cvat/apps/annotation/views.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (C) 2018 Intel Corporation -# -# SPDX-License-Identifier: MIT diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index 53a103f02a68..f5264a2b5f9e 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -3,24 +3,494 @@ # # SPDX-License-Identifier: MIT -from collections import OrderedDict +import osp as osp +from collections import OrderedDict, namedtuple +from copy import deepcopy from django.db import transaction - -from cvat.apps.annotation.annotation import Annotation -from cvat.apps.engine.annotation import TaskAnnotation -from cvat.apps.engine.models import ShapeType, AttributeType +from django.utils import timezone import datumaro.components.extractor as datumaro +from cvat.apps.engine.annotation import TaskAnnotation +from cvat.apps.engine.data_manager import AnnotationManager, TrackManager +from cvat.apps.engine.models import AttributeType, ShapeType +from cvat.apps.engine.serializers import LabeledDataSerializer from datumaro.util.image import Image +class AnnotationIR: + def __init__(self, data=None): + self.reset() + if data: + self._tags = getattr(data, 'tags', []) or data['tags'] + self._shapes = getattr(data, 'shapes', []) or data['shapes'] + self._tracks = getattr(data, 'tracks', []) or data['tracks'] + + def add_tag(self, tag): + self._tags.append(tag) + + def add_shape(self, shape): + self._shapes.append(shape) + + def add_track(self, track): + self._tracks.append(track) + + @property + def tags(self): + return self._tags + + @property + def shapes(self): + return self._shapes + + @property + def tracks(self): + return self._tracks + + @property + def version(self): + return self._version + + @tags.setter + def tags(self, tags): + self._tags = tags + + @shapes.setter + def shapes(self, shapes): + self._shapes = shapes + + @tracks.setter + def tracks(self, tracks): + self._tracks = tracks + + @version.setter + def version(self, version): + self._version = version + + def __getitem__(self, key): + return getattr(self, key) + + @property + def data(self): + return { + 'version': self.version, + 'tags': self.tags, + 'shapes': self.shapes, + 'tracks': self.tracks, + } + + def serialize(self): + serializer = LabeledDataSerializer(data=self.data) + if serializer.is_valid(raise_exception=True): + return serializer.data + + # makes a data copy from specified frame interval + def slice(self, start, stop): + def is_frame_inside(x): return (start <= int(x['frame']) <= stop) + splitted_data = AnnotationIR() + splitted_data.tags = deepcopy(list(filter(is_frame_inside, self.tags))) + splitted_data.shapes = deepcopy( + list(filter(is_frame_inside, self.shapes))) + splitted_data.tracks = deepcopy(list(filter(lambda y: len( + list(filter(is_frame_inside, y['shapes']))), self.tracks))) + + return splitted_data + + @data.setter + def data(self, data): + self.version = data['version'] + self.tags = data['tags'] + self.shapes = data['shapes'] + self.tracks = data['tracks'] + + def reset(self): + self._version = 0 + self._tags = [] + self._shapes = [] + self._tracks = [] + + +class Annotation: + Attribute = namedtuple('Attribute', 'name, value') + LabeledShape = namedtuple( + 'LabeledShape', 'type, frame, label, points, occluded, attributes, group, z_order') + LabeledShape.__new__.__defaults__ = (0, 0) + TrackedShape = namedtuple( + 'TrackedShape', 'type, frame, points, occluded, outside, keyframe, attributes, group, z_order, label, track_id') + TrackedShape.__new__.__defaults__ = (0, 0, None, 0) + Track = namedtuple('Track', 'label, group, shapes') + Tag = namedtuple('Tag', 'frame, label, attributes, group') + Tag.__new__.__defaults__ = (0, ) + Frame = namedtuple( + 'Frame', 'frame, name, width, height, labeled_shapes, tags') + + def __init__(self, annotation_ir, db_task, scheme='', host='', create_callback=None): + self._annotation_ir = annotation_ir + self._db_task = db_task + self._scheme = scheme + self._host = host + self._create_callback = create_callback + self._MAX_ANNO_SIZE = 30000 + self._frame_info = {} + self._frame_mapping = {} + self._frame_step = db_task.data.get_frame_step() + + db_labels = self._db_task.label_set.all().prefetch_related( + 'attributespec_set').order_by('pk') + + self._label_mapping = OrderedDict( + (db_label.id, db_label) for db_label in db_labels) + + self._attribute_mapping = {db_label.id: { + 'mutable': {}, 'immutable': {}} for db_label in db_labels} + + for db_label in db_labels: + for db_attribute in db_label.attributespec_set.all(): + if db_attribute.mutable: + self._attribute_mapping[db_label.id]['mutable'][db_attribute.id] = db_attribute.name + else: + self._attribute_mapping[db_label.id]['immutable'][db_attribute.id] = db_attribute.name + + self._attribute_mapping_merged = {} + for label_id, attr_mapping in self._attribute_mapping.items(): + self._attribute_mapping_merged[label_id] = { + **attr_mapping['mutable'], + **attr_mapping['immutable'], + } + + self._init_frame_info() + self._init_meta() + + def _get_label_id(self, label_name): + for db_label in self._label_mapping.values(): + if label_name == db_label.name: + return db_label.id + return None + + def _get_label_name(self, label_id): + return self._label_mapping[label_id].name + + def _get_attribute_name(self, attribute_id): + for attribute_mapping in self._attribute_mapping_merged.values(): + if attribute_id in attribute_mapping: + return attribute_mapping[attribute_id] + + def _get_attribute_id(self, label_id, attribute_name, attribute_type=None): + if attribute_type: + container = self._attribute_mapping[label_id][attribute_type] + else: + container = self._attribute_mapping_merged[label_id] + + for attr_id, attr_name in container.items(): + if attribute_name == attr_name: + return attr_id + return None + + def _get_mutable_attribute_id(self, label_id, attribute_name): + return self._get_attribute_id(label_id, attribute_name, 'mutable') + + def _get_immutable_attribute_id(self, label_id, attribute_name): + return self._get_attribute_id(label_id, attribute_name, 'immutable') + + def _init_frame_info(self): + if hasattr(self._db_task.data, 'video'): + self._frame_info = { + frame: { + "path": "frame_{:06d}".format(frame), + "width": self._db_task.data.video.width, + "height": self._db_task.data.video.height, + } for frame in range(self._db_task.data.size) + } + else: + self._frame_info = {db_image.frame: { + "path": db_image.path, + "width": db_image.width, + "height": db_image.height, + } for db_image in self._db_task.data.images.all()} + + self._frame_mapping = { + self._get_filename(info["path"]): frame for frame, info in self._frame_info.items() + } + + def _init_meta(self): + db_segments = self._db_task.segment_set.all().prefetch_related('job_set') + self._meta = OrderedDict([ + ("task", OrderedDict([ + ("id", str(self._db_task.id)), + ("name", self._db_task.name), + ("size", str(self._db_task.data.size)), + ("mode", self._db_task.mode), + ("overlap", str(self._db_task.overlap)), + ("bugtracker", self._db_task.bug_tracker), + ("created", str(timezone.localtime(self._db_task.created_date))), + ("updated", str(timezone.localtime(self._db_task.updated_date))), + ("start_frame", str(self._db_task.data.start_frame)), + ("stop_frame", str(self._db_task.data.stop_frame)), + ("frame_filter", self._db_task.data.frame_filter), + ("z_order", str(self._db_task.z_order)), + + ("labels", [ + ("label", OrderedDict([ + ("name", db_label.name), + ("attributes", [ + ("attribute", OrderedDict([ + ("name", db_attr.name), + ("mutable", str(db_attr.mutable)), + ("input_type", db_attr.input_type), + ("default_value", db_attr.default_value), + ("values", db_attr.values)])) + for db_attr in db_label.attributespec_set.all()]) + ])) for db_label in self._label_mapping.values() + ]), + + ("segments", [ + ("segment", OrderedDict([ + ("id", str(db_segment.id)), + ("start", str(db_segment.start_frame)), + ("stop", str(db_segment.stop_frame)), + ("url", "{0}://{1}/?id={2}".format( + self._scheme, self._host, db_segment.job_set.all()[0].id))] + )) for db_segment in db_segments + ]), + + ("owner", OrderedDict([ + ("username", self._db_task.owner.username), + ("email", self._db_task.owner.email) + ]) if self._db_task.owner else ""), + + ("assignee", OrderedDict([ + ("username", self._db_task.assignee.username), + ("email", self._db_task.assignee.email) + ]) if self._db_task.assignee else ""), + ])), + ("dumped", str(timezone.localtime(timezone.now()))) + ]) + + if hasattr(self._db_task.data, "video"): + self._meta["task"]["original_size"] = OrderedDict([ + ("width", str(self._db_task.data.video.width)), + ("height", str(self._db_task.data.video.height)) + ]) + # Add source to dumped file + self._meta["source"] = str( + osp.basename(self._db_task.data.video.path)) + + def _export_attributes(self, attributes): + exported_attributes = [] + for attr in attributes: + attribute_name = self._get_attribute_name(attr["spec_id"]) + exported_attributes.append(Annotation.Attribute( + name=attribute_name, + value=attr["value"], + )) + return exported_attributes + + def _export_tracked_shape(self, shape): + return Annotation.TrackedShape( + type=shape["type"], + frame=self._db_task.data.start_frame + + shape["frame"] * self._frame_step, + label=self._get_label_name(shape["label_id"]), + points=shape["points"], + occluded=shape["occluded"], + z_order=shape.get("z_order", 0), + group=shape.get("group", 0), + outside=shape.get("outside", False), + keyframe=shape.get("keyframe", True), + track_id=shape["track_id"], + attributes=self._export_attributes(shape["attributes"]), + ) + + def _export_labeled_shape(self, shape): + return Annotation.LabeledShape( + type=shape["type"], + label=self._get_label_name(shape["label_id"]), + frame=self._db_task.data.start_frame + + shape["frame"] * self._frame_step, + points=shape["points"], + occluded=shape["occluded"], + z_order=shape.get("z_order", 0), + group=shape.get("group", 0), + attributes=self._export_attributes(shape["attributes"]), + ) + + def _export_tag(self, tag): + return Annotation.Tag( + frame=self._db_task.data.start_frame + + tag["frame"] * self._frame_step, + label=self._get_label_name(tag["label_id"]), + group=tag.get("group", 0), + attributes=self._export_attributes(tag["attributes"]), + ) + + def group_by_frame(self): + def _get_frame(annotations, shape): + db_image = self._frame_info[shape["frame"]] + frame = self._db_task.data.start_frame + \ + shape["frame"] * self._frame_step + if frame not in annotations: + annotations[frame] = Annotation.Frame( + frame=frame, + name=db_image['path'], + height=db_image["height"], + width=db_image["width"], + labeled_shapes=[], + tags=[], + ) + return annotations[frame] + + annotations = {} + data_manager = AnnotationManager(self._annotation_ir) + for shape in sorted(data_manager.to_shapes(self._db_task.data.size), key=lambda shape: shape.get("z_order", 0)): + if 'track_id' in shape: + exported_shape = self._export_tracked_shape(shape) + else: + exported_shape = self._export_labeled_shape(shape) + _get_frame(annotations, shape).labeled_shapes.append( + exported_shape) + + for tag in self._annotation_ir.tags: + _get_frame(annotations, tag).tags.append(self._export_tag(tag)) + + return iter(annotations.values()) + + @property + def shapes(self): + for shape in self._annotation_ir.shapes: + yield self._export_labeled_shape(shape) + + @property + def tracks(self): + for idx, track in enumerate(self._annotation_ir.tracks): + tracked_shapes = TrackManager.get_interpolated_shapes( + track, 0, self._db_task.data.size) + for tracked_shape in tracked_shapes: + tracked_shape["attributes"] += track["attributes"] + tracked_shape["track_id"] = idx + tracked_shape["group"] = track["group"] + tracked_shape["label_id"] = track["label_id"] + + yield Annotation.Track( + label=self._get_label_name(track["label_id"]), + group=track["group"], + shapes=[self._export_tracked_shape( + shape) for shape in tracked_shapes], + ) + + @property + def tags(self): + for tag in self._annotation_ir.tags: + yield self._export_tag(tag) + + @property + def meta(self): + return self._meta + + def _import_tag(self, tag): + _tag = tag._asdict() + label_id = self._get_label_id(_tag.pop('label')) + _tag['frame'] = (int(_tag['frame']) - + self._db_task.data.start_frame) // self._frame_step + _tag['label_id'] = label_id + _tag['attributes'] = [self._import_attribute(label_id, attrib) for attrib in _tag['attributes'] + if self._get_attribute_id(label_id, attrib.name)] + return _tag + + def _import_attribute(self, label_id, attribute): + return { + 'spec_id': self._get_attribute_id(label_id, attribute.name), + 'value': attribute.value, + } + + def _import_shape(self, shape): + _shape = shape._asdict() + label_id = self._get_label_id(_shape.pop('label')) + _shape['frame'] = (int(_shape['frame']) - + self._db_task.data.start_frame) // self._frame_step + _shape['label_id'] = label_id + _shape['attributes'] = [self._import_attribute(label_id, attrib) for attrib in _shape['attributes'] + if self._get_attribute_id(label_id, attrib.name)] + return _shape + + def _import_track(self, track): + _track = track._asdict() + label_id = self._get_label_id(_track.pop('label')) + _track['frame'] = (min(int(shape.frame) for shape in _track['shapes']) - + self._db_task.data.start_frame) // self._frame_step + _track['label_id'] = label_id + _track['attributes'] = [] + _track['shapes'] = [shape._asdict() for shape in _track['shapes']] + for shape in _track['shapes']: + shape['frame'] = (int(shape['frame']) - + self._db_task.data.start_frame) // self._frame_step + _track['attributes'] = [self._import_attribute(label_id, attrib) for attrib in shape['attributes'] + if self._get_immutable_attribute_id(label_id, attrib.name)] + shape['attributes'] = [self._import_attribute(label_id, attrib) for attrib in shape['attributes'] + if self._get_mutable_attribute_id(label_id, attrib.name)] + + return _track + + def _call_callback(self): + if self._len() > self._MAX_ANNO_SIZE: + self._create_callback(self._annotation_ir.serialize()) + self._annotation_ir.reset() + + def add_tag(self, tag): + imported_tag = self._import_tag(tag) + if imported_tag['label_id']: + self._annotation_ir.add_tag(imported_tag) + self._call_callback() + + def add_shape(self, shape): + imported_shape = self._import_shape(shape) + if imported_shape['label_id']: + self._annotation_ir.add_shape(imported_shape) + self._call_callback() + + def add_track(self, track): + imported_track = self._import_track(track) + if imported_track['label_id']: + self._annotation_ir.add_track(imported_track) + self._call_callback() + + @property + def data(self): + return self._annotation_ir + + def _len(self): + track_len = 0 + for track in self._annotation_ir.tracks: + track_len += len(track['shapes']) + + return len(self._annotation_ir.tags) + len(self._annotation_ir.shapes) + track_len + + @property + def frame_info(self): + return self._frame_info + + @property + def frame_step(self): + return self._frame_step + + @staticmethod + def _get_filename(path): + return osp.splitext(osp.basename(path))[0] + + def match_frame(self, filename): + # try to match by filename + _filename = self._get_filename(filename) + if _filename in self._frame_mapping: + return self._frame_mapping[_filename] + + raise Exception( + "Cannot match filename or determinate framenumber for {} filename".format(filename)) + + class CvatImagesExtractor(datumaro.Extractor): def __init__(self, url, frame_provider): super().__init__() self._frame_provider = frame_provider - self._subsets = None def __iter__(self): frames = self._frame_provider.get_frames( @@ -35,16 +505,6 @@ def __iter__(self): def __len__(self): return len(self._frame_provider) - def subsets(self): - return self._subsets - - def get(self, item_id, subset=None, path=None): - if path or subset: - raise KeyError() - return datumaro.DatasetItem( - id=item_id, - image=self._frame_provider[item_id].getvalue() - ) class CvatAnnotationsExtractor(datumaro.Extractor): def __init__(self, url, cvat_annotations): @@ -58,7 +518,7 @@ def __init__(self, url, cvat_annotations): cvat_frame_anno.height, cvat_frame_anno.width) ) dm_item = datumaro.DatasetItem(id=cvat_frame_anno.frame, - annotations=dm_anno, image=dm_image) + annotations=dm_anno, image=dm_image) dm_annotations.append((dm_item.id, dm_item)) dm_annotations = sorted(dm_annotations, key=lambda e: int(e[0])) @@ -71,11 +531,6 @@ def __iter__(self): def __len__(self): return len(self._items) - # pylint: disable=no-self-use - def subsets(self): - return [] - # pylint: enable=no-self-use - def categories(self): return self._categories @@ -100,7 +555,7 @@ def _read_cvat_anno(self, cvat_frame_anno, cvat_task_anno): categories = self.categories() label_cat = categories[datumaro.AnnotationType.label] - map_label = lambda name: label_cat.find(name)[0] + def map_label(name): return label_cat.find(name)[0] label_attrs = { label['name']: label['attributes'] for _, label in cvat_task_anno.meta['task']['labels'] @@ -130,7 +585,7 @@ def convert_attrs(label, cvat_attrs): anno_attr = convert_attrs(tag_obj.label, tag_obj.attributes) anno = datumaro.Label(label=anno_label, - attributes=anno_attr, group=anno_group) + attributes=anno_attr, group=anno_group) item_anno.append(anno) for shape_obj in cvat_frame_anno.labeled_shapes: @@ -147,17 +602,17 @@ def convert_attrs(label, cvat_attrs): anno_points = shape_obj.points if shape_obj.type == ShapeType.POINTS: anno = datumaro.Points(anno_points, - label=anno_label, attributes=anno_attr, group=anno_group) + label=anno_label, attributes=anno_attr, group=anno_group) elif shape_obj.type == ShapeType.POLYLINE: anno = datumaro.PolyLine(anno_points, - label=anno_label, attributes=anno_attr, group=anno_group) + label=anno_label, attributes=anno_attr, group=anno_group) elif shape_obj.type == ShapeType.POLYGON: anno = datumaro.Polygon(anno_points, - label=anno_label, attributes=anno_attr, group=anno_group) + label=anno_label, attributes=anno_attr, group=anno_group) elif shape_obj.type == ShapeType.RECTANGLE: x0, y0, x1, y1 = anno_points anno = datumaro.Bbox(x0, y0, x1 - x0, y1 - y0, - label=anno_label, attributes=anno_attr, group=anno_group) + label=anno_label, attributes=anno_attr, group=anno_group) else: raise Exception("Unknown shape type '%s'" % shape_obj.type) @@ -165,6 +620,7 @@ def convert_attrs(label, cvat_attrs): return item_anno + class CvatTaskExtractor(CvatAnnotationsExtractor): def __init__(self, url, db_task, user): cvat_annotations = TaskAnnotation(db_task.id, user) @@ -197,9 +653,10 @@ def match_frame(item, cvat_task_anno): frame_number = int(item.id[len('frame_'):]) if not frame_number in cvat_task_anno.frame_info: raise Exception("Could not match item id: '%s' with any task frame" % - item.id) + item.id) return frame_number + def import_dm_annotations(dm_dataset, cvat_task_anno): shapes = { datumaro.AnnotationType.bbox: ShapeType.RECTANGLE, @@ -214,8 +671,8 @@ def import_dm_annotations(dm_dataset, cvat_task_anno): frame_number = match_frame(item, cvat_task_anno) # do not store one-item groups - group_map = { 0: 0 } - group_size = { 0: 0 } + group_map = {0: 0} + group_size = {0: 0} for ann in item.annotations: if ann.type in shapes: group = group_map.get(ann.group) @@ -226,7 +683,7 @@ def import_dm_annotations(dm_dataset, cvat_task_anno): else: group_size[ann.group] += 1 group_map = {g: s for g, s in group_size.items() - if 1 < s and group_map[g]} + if 1 < s and group_map[g]} group_map = {g: i for i, g in enumerate([0] + sorted(group_map))} for ann in item.annotations: @@ -239,7 +696,7 @@ def import_dm_annotations(dm_dataset, cvat_task_anno): occluded=ann.attributes.get('occluded') == True, group=group_map.get(ann.group, 0), attributes=[cvat_task_anno.Attribute(name=n, value=str(v)) - for n, v in ann.attributes.items()], + for n, v in ann.attributes.items()], )) elif ann.type == datumaro.AnnotationType.label: cvat_task_anno.add_tag(cvat_task_anno.Tag( @@ -247,5 +704,5 @@ def import_dm_annotations(dm_dataset, cvat_task_anno): label=label_cat.items[ann.label].name, group=group_map.get(ann.group, 0), attributes=[cvat_task_anno.Attribute(name=n, value=str(v)) - for n, v in ann.attributes.items()], + for n, v in ann.attributes.items()], )) diff --git a/cvat/apps/annotation/README.md b/cvat/apps/dataset_manager/formats/README.md similarity index 100% rename from cvat/apps/annotation/README.md rename to cvat/apps/dataset_manager/formats/README.md diff --git a/cvat/apps/engine/annotation.py b/cvat/apps/engine/annotation.py index bacfd0d7e5ad..093c7ee97edd 100644 --- a/cvat/apps/engine/annotation.py +++ b/cvat/apps/engine/annotation.py @@ -3,26 +3,25 @@ # SPDX-License-Identifier: MIT import os -from enum import Enum from collections import OrderedDict -from django.utils import timezone -from PIL import Image +from enum import Enum from django.conf import settings from django.db import transaction +from django.utils import timezone -from cvat.apps.profiler import silk_profile +from cvat.apps.dataset_manager.bindings import Annotation, AnnotationIR from cvat.apps.engine.plugins import plugin_decorator -from cvat.apps.annotation.annotation import AnnotationIR, Annotation from cvat.apps.engine.utils import execute_python_code, import_modules +from cvat.apps.profiler import silk_profile -from . import models -from .data_manager import DataManager +from . import models, serializers +from .data_manager import AnnotationManager from .log import slogger -from . import serializers -"""dot.notation access to dictionary attributes""" + class dotdict(OrderedDict): + """dot.notation access to dictionary attributes""" __getattr__ = OrderedDict.get __setattr__ = OrderedDict.__setitem__ __delattr__ = OrderedDict.__delitem__ @@ -687,7 +686,7 @@ def _patch_data(self, data, action): self._merge_data(_data, jobs[jid]["start"], self.db_task.overlap) def _merge_data(self, data, start_frame, overlap): - data_manager = DataManager(self.ir_data) + data_manager = AnnotationManager(self.ir_data) data_manager.merge(data, start_frame, overlap) def put(self, data): diff --git a/cvat/apps/engine/data_manager.py b/cvat/apps/engine/data_manager.py index b39c6783f616..fc401626571e 100644 --- a/cvat/apps/engine/data_manager.py +++ b/cvat/apps/engine/data_manager.py @@ -11,7 +11,7 @@ from . import models -class DataManager: +class AnnotationManager: def __init__(self, data): self.data = data diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index f1ac8e404c4a..d5bac4fd0e7a 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -5,56 +5,49 @@ import os import os.path as osp import re -import traceback import shutil +import traceback from datetime import datetime from tempfile import mkstemp -from django.views.generic import RedirectView +import django_rq +from django.conf import settings +from django.contrib.auth.models import User +from django.core.exceptions import ObjectDoesNotExist +from django.db import IntegrityError from django.http import HttpResponse, HttpResponseNotFound from django.shortcuts import render -from django.conf import settings -from sendfile import sendfile -from rest_framework.permissions import IsAuthenticated -from rest_framework.response import Response -from rest_framework.renderers import JSONRenderer -from rest_framework import status -from rest_framework import viewsets -from rest_framework import serializers +from django.utils import timezone +from django.utils.decorators import method_decorator +from django.views.generic import RedirectView +from django_filters import rest_framework as filters +from django_filters.rest_framework import DjangoFilterBackend +from drf_yasg import openapi +from drf_yasg.inspectors import CoreAPICompatInspector, NotHandled +from drf_yasg.utils import swagger_auto_schema +from rest_framework import mixins, serializers, status, viewsets from rest_framework.decorators import action -from rest_framework import mixins from rest_framework.exceptions import APIException -from django_filters import rest_framework as filters -import django_rq -from django.db import IntegrityError -from django.utils import timezone - +from rest_framework.permissions import SAFE_METHODS, IsAuthenticated +from rest_framework.renderers import JSONRenderer +from rest_framework.response import Response +from sendfile import sendfile -from . import annotation, task, models -from cvat.settings.base import JS_3RDPARTY, CSS_3RDPARTY -from cvat.apps.authentication.decorators import login_required -from .log import slogger, clogger -from cvat.apps.engine.models import StatusChoice, Task, Job, Plugin -from cvat.apps.engine.serializers import (TaskSerializer, UserSerializer, - ExceptionSerializer, AboutSerializer, JobSerializer, DataMetaSerializer, - RqStatusSerializer, DataSerializer, LabeledDataSerializer, - PluginSerializer, FileInfoSerializer, LogEventSerializer, - ProjectSerializer, BasicUserSerializer) -from cvat.apps.annotation.serializers import AnnotationFileSerializer, AnnotationFormatSerializer -from django.contrib.auth.models import User -from django.core.exceptions import ObjectDoesNotExist +import cvat.apps.dataset_manager as dataset_manager from cvat.apps.authentication import auth -from rest_framework.permissions import SAFE_METHODS -from cvat.apps.annotation.models import AnnotationDumper, AnnotationLoader -from cvat.apps.annotation.format import get_annotation_formats +from cvat.apps.authentication.decorators import login_required from cvat.apps.engine.frame_provider import FrameProvider -import cvat.apps.dataset_manager.task as DatumaroTask +from cvat.apps.engine.models import Job, Plugin, StatusChoice, Task +from cvat.apps.engine.serializers import ( + AboutSerializer, BasicUserSerializer, DataMetaSerializer, DataSerializer, + ExceptionSerializer, FileInfoSerializer, JobSerializer, + LabeledDataSerializer, LogEventSerializer, PluginSerializer, + ProjectSerializer, RqStatusSerializer, TaskSerializer, UserSerializer) +from cvat.settings.base import CSS_3RDPARTY, JS_3RDPARTY + +from . import annotation, models, task +from .log import clogger, slogger -from drf_yasg.utils import swagger_auto_schema -from drf_yasg import openapi -from django.utils.decorators import method_decorator -from drf_yasg.inspectors import NotHandled, CoreAPICompatInspector -from django_filters.rest_framework import DjangoFilterBackend # drf-yasg component doesn't handle correctly URL_FORMAT_OVERRIDE and # send requests with ?format=openapi suffix instead of ?scheme=openapi. @@ -211,13 +204,7 @@ def share(request): responses={'200': AnnotationFormatSerializer(many=True)}) @action(detail=False, methods=['GET'], url_path='annotation/formats') def annotation_formats(request): - data = get_annotation_formats() - return Response(data) - - @staticmethod - @action(detail=False, methods=['GET'], url_path='dataset/formats') - def dataset_formats(request): - data = DatumaroTask.get_export_formats() + data = dataset_manager.get_export_formats() data = JSONRenderer().render(data) return Response(data) diff --git a/cvat/apps/git/git.py b/cvat/apps/git/git.py index 4f35812ba2de..5189acbdfeeb 100644 --- a/cvat/apps/git/git.py +++ b/cvat/apps/git/git.py @@ -10,7 +10,6 @@ from cvat.apps.engine.annotation import dump_task_data from cvat.apps.engine.plugins import add_plugin from cvat.apps.git.models import GitStatusChoice -from cvat.apps.annotation.models import AnnotationDumper from cvat.apps.git.models import GitData from collections import OrderedDict diff --git a/cvat/settings/base.py b/cvat/settings/base.py index 6bf38049e8c7..24a2861af17e 100644 --- a/cvat/settings/base.py +++ b/cvat/settings/base.py @@ -96,9 +96,7 @@ def generate_ssh_keys(): 'cvat.apps.engine', 'cvat.apps.authentication', 'cvat.apps.documentation', - 'cvat.apps.git', 'cvat.apps.dataset_manager', - 'cvat.apps.annotation', 'django_rq', 'compressor', 'cacheops', From 06d621713c2f96434ba5942fab0ba285255009b4 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Mon, 30 Mar 2020 20:43:45 +0300 Subject: [PATCH 02/80] Refactor dm --- cvat/apps/dataset_manager/__init__.py | 7 ++ cvat/apps/dataset_manager/_tests.py | 2 +- cvat/apps/dataset_manager/bindings.py | 57 +++++++------- cvat/apps/dataset_manager/formats/__init__.py | 64 +++++++++++++++ cvat/apps/dataset_manager/task.py | 77 +++---------------- cvat/apps/engine/annotation.py | 6 +- cvat/apps/engine/views.py | 11 ++- 7 files changed, 122 insertions(+), 102 deletions(-) diff --git a/cvat/apps/dataset_manager/__init__.py b/cvat/apps/dataset_manager/__init__.py index e69de29bb2d1..aee421163331 100644 --- a/cvat/apps/dataset_manager/__init__.py +++ b/cvat/apps/dataset_manager/__init__.py @@ -0,0 +1,7 @@ + +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from .formats import get_formats +from .task import export_task_as_dataset \ No newline at end of file diff --git a/cvat/apps/dataset_manager/_tests.py b/cvat/apps/dataset_manager/_tests.py index 1a5300756e1f..d87480d69b1e 100644 --- a/cvat/apps/dataset_manager/_tests.py +++ b/cvat/apps/dataset_manager/_tests.py @@ -302,7 +302,7 @@ def test_labelme(self): self._test_export('cvat_label_me', save_images=True) def test_formats_query(self): - formats = dm.get_export_formats() + formats = dm.get_formats() expected = set(f['tag'] for f in dm.EXPORT_FORMATS) actual = set(f['tag'] for f in formats) diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index f5264a2b5f9e..21110555621b 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -12,7 +12,7 @@ import datumaro.components.extractor as datumaro from cvat.apps.engine.annotation import TaskAnnotation -from cvat.apps.engine.data_manager import AnnotationManager, TrackManager +from cvat.apps.engine.annotation_manager import AnnotationManager, TrackManager from cvat.apps.engine.models import AttributeType, ShapeType from cvat.apps.engine.serializers import LabeledDataSerializer from datumaro.util.image import Image @@ -209,7 +209,8 @@ def _init_frame_info(self): } for db_image in self._db_task.data.images.all()} self._frame_mapping = { - self._get_filename(info["path"]): frame for frame, info in self._frame_info.items() + self._get_filename(info["path"]): frame + for frame, info in self._frame_info.items() } def _init_meta(self): @@ -340,8 +341,8 @@ def _get_frame(annotations, shape): return annotations[frame] annotations = {} - data_manager = AnnotationManager(self._annotation_ir) - for shape in sorted(data_manager.to_shapes(self._db_task.data.size), key=lambda shape: shape.get("z_order", 0)): + annotation_manager = AnnotationManager(self._annotation_ir) + for shape in sorted(annotation_manager.to_shapes(self._db_task.data.size), key=lambda shape: shape.get("z_order", 0)): if 'track_id' in shape: exported_shape = self._export_tracked_shape(shape) else: @@ -390,10 +391,11 @@ def _import_tag(self, tag): _tag = tag._asdict() label_id = self._get_label_id(_tag.pop('label')) _tag['frame'] = (int(_tag['frame']) - - self._db_task.data.start_frame) // self._frame_step + self._db_task.data.start_frame) // self._frame_step _tag['label_id'] = label_id - _tag['attributes'] = [self._import_attribute(label_id, attrib) for attrib in _tag['attributes'] - if self._get_attribute_id(label_id, attrib.name)] + _tag['attributes'] = [self._import_attribute(label_id, attrib) + for attrib in _tag['attributes'] + if self._get_attribute_id(label_id, attrib.name)] return _tag def _import_attribute(self, label_id, attribute): @@ -406,27 +408,30 @@ def _import_shape(self, shape): _shape = shape._asdict() label_id = self._get_label_id(_shape.pop('label')) _shape['frame'] = (int(_shape['frame']) - - self._db_task.data.start_frame) // self._frame_step + self._db_task.data.start_frame) // self._frame_step _shape['label_id'] = label_id - _shape['attributes'] = [self._import_attribute(label_id, attrib) for attrib in _shape['attributes'] - if self._get_attribute_id(label_id, attrib.name)] + _shape['attributes'] = [self._import_attribute(label_id, attrib) + for attrib in _shape['attributes'] + if self._get_attribute_id(label_id, attrib.name)] return _shape def _import_track(self, track): _track = track._asdict() label_id = self._get_label_id(_track.pop('label')) _track['frame'] = (min(int(shape.frame) for shape in _track['shapes']) - - self._db_task.data.start_frame) // self._frame_step + self._db_task.data.start_frame) // self._frame_step _track['label_id'] = label_id _track['attributes'] = [] _track['shapes'] = [shape._asdict() for shape in _track['shapes']] for shape in _track['shapes']: - shape['frame'] = (int(shape['frame']) - - self._db_task.data.start_frame) // self._frame_step - _track['attributes'] = [self._import_attribute(label_id, attrib) for attrib in shape['attributes'] - if self._get_immutable_attribute_id(label_id, attrib.name)] - shape['attributes'] = [self._import_attribute(label_id, attrib) for attrib in shape['attributes'] - if self._get_mutable_attribute_id(label_id, attrib.name)] + shape['frame'] = (int(shape['frame']) - \ + self._db_task.data.start_frame) // self._frame_step + _track['attributes'] = [self._import_attribute(label_id, attrib) + for attrib in shape['attributes'] + if self._get_immutable_attribute_id(label_id, attrib.name)] + shape['attributes'] = [self._import_attribute(label_id, attrib) + for attrib in shape['attributes'] + if self._get_mutable_attribute_id(label_id, attrib.name)] return _track @@ -518,7 +523,7 @@ def __init__(self, url, cvat_annotations): cvat_frame_anno.height, cvat_frame_anno.width) ) dm_item = datumaro.DatasetItem(id=cvat_frame_anno.frame, - annotations=dm_anno, image=dm_image) + annotations=dm_anno, image=dm_image) dm_annotations.append((dm_item.id, dm_item)) dm_annotations = sorted(dm_annotations, key=lambda e: int(e[0])) @@ -585,7 +590,7 @@ def convert_attrs(label, cvat_attrs): anno_attr = convert_attrs(tag_obj.label, tag_obj.attributes) anno = datumaro.Label(label=anno_label, - attributes=anno_attr, group=anno_group) + attributes=anno_attr, group=anno_group) item_anno.append(anno) for shape_obj in cvat_frame_anno.labeled_shapes: @@ -602,17 +607,17 @@ def convert_attrs(label, cvat_attrs): anno_points = shape_obj.points if shape_obj.type == ShapeType.POINTS: anno = datumaro.Points(anno_points, - label=anno_label, attributes=anno_attr, group=anno_group) + label=anno_label, attributes=anno_attr, group=anno_group) elif shape_obj.type == ShapeType.POLYLINE: anno = datumaro.PolyLine(anno_points, - label=anno_label, attributes=anno_attr, group=anno_group) + label=anno_label, attributes=anno_attr, group=anno_group) elif shape_obj.type == ShapeType.POLYGON: anno = datumaro.Polygon(anno_points, - label=anno_label, attributes=anno_attr, group=anno_group) + label=anno_label, attributes=anno_attr, group=anno_group) elif shape_obj.type == ShapeType.RECTANGLE: x0, y0, x1, y1 = anno_points anno = datumaro.Bbox(x0, y0, x1 - x0, y1 - y0, - label=anno_label, attributes=anno_attr, group=anno_group) + label=anno_label, attributes=anno_attr, group=anno_group) else: raise Exception("Unknown shape type '%s'" % shape_obj.type) @@ -683,7 +688,7 @@ def import_dm_annotations(dm_dataset, cvat_task_anno): else: group_size[ann.group] += 1 group_map = {g: s for g, s in group_size.items() - if 1 < s and group_map[g]} + if 1 < s and group_map[g]} group_map = {g: i for i, g in enumerate([0] + sorted(group_map))} for ann in item.annotations: @@ -696,7 +701,7 @@ def import_dm_annotations(dm_dataset, cvat_task_anno): occluded=ann.attributes.get('occluded') == True, group=group_map.get(ann.group, 0), attributes=[cvat_task_anno.Attribute(name=n, value=str(v)) - for n, v in ann.attributes.items()], + for n, v in ann.attributes.items()], )) elif ann.type == datumaro.AnnotationType.label: cvat_task_anno.add_tag(cvat_task_anno.Tag( @@ -704,5 +709,5 @@ def import_dm_annotations(dm_dataset, cvat_task_anno): label=label_cat.items[ann.label].name, group=group_map.get(ann.group, 0), attributes=[cvat_task_anno.Attribute(name=n, value=str(v)) - for n, v in ann.attributes.items()], + for n, v in ann.attributes.items()], )) diff --git a/cvat/apps/dataset_manager/formats/__init__.py b/cvat/apps/dataset_manager/formats/__init__.py index e69de29bb2d1..8cdbedfe7858 100644 --- a/cvat/apps/dataset_manager/formats/__init__.py +++ b/cvat/apps/dataset_manager/formats/__init__.py @@ -0,0 +1,64 @@ + +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from datumaro.components.project import Environment + + +FORMAT_DATUMARO = "datumaro_project" + +FORMATS = [ + { + 'name': 'Datumaro', + 'tag': FORMAT_DATUMARO, + 'is_default': True, + }, + { + 'name': 'PASCAL VOC 2012', + 'tag': 'cvat_voc', + 'is_default': False, + }, + { + 'name': 'MS COCO', + 'tag': 'cvat_coco', + 'is_default': False, + }, + { + 'name': 'YOLO', + 'tag': 'cvat_yolo', + 'is_default': False, + }, + { + 'name': 'TF Detection API', + 'tag': 'cvat_tfrecord', + 'is_default': False, + }, + { + 'name': 'MOT', + 'tag': 'cvat_mot', + 'is_default': False, + }, + { + 'name': 'LabelMe', + 'tag': 'cvat_label_me', + 'is_default': False, + }, +] + +DEFAULT_FORMAT = FORMAT_DATUMARO + +def get_formats(): + converters = Environment(config={ + 'plugins_dir': _FORMATS_DIR + }).converters + + available_formats = set(converters.items) + available_formats.add(FORMAT_DATUMARO) + + public_formats = [] + for fmt in FORMATS: + if fmt['tag'] in available_formats: + public_formats.append(fmt) + + return public_formats diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py index 688f7792c101..88093e33e57d 100644 --- a/cvat/apps/dataset_manager/task.py +++ b/cvat/apps/dataset_manager/task.py @@ -3,26 +3,28 @@ # # SPDX-License-Identifier: MIT -from datetime import timedelta import json import os import os.path as osp import shutil import tempfile +from datetime import timedelta -from django.utils import timezone import django_rq +from django.utils import timezone -from cvat.settings.base import DATUMARO_PATH as _DATUMARO_REPO_PATH, \ - BASE_DIR as _CVAT_ROOT_DIR +from cvat.apps.engine.frame_provider import FrameProvider from cvat.apps.engine.log import slogger from cvat.apps.engine.models import Task -from cvat.apps.engine.frame_provider import FrameProvider -from .util import current_function_name, make_zip_archive -from datumaro.components.project import Project, Environment +from cvat.settings.base import BASE_DIR as _CVAT_ROOT_DIR, \ + DATUMARO_PATH as _DATUMARO_REPO_PATH import datumaro.components.extractor as datumaro +from datumaro.components.project import Project + from .bindings import CvatImagesExtractor, CvatTaskExtractor +from .formats import DEFAULT_FORMAT, FORMAT_DATUMARO +from .util import current_function_name, make_zip_archive _FORMATS_DIR = osp.join(osp.dirname(__file__), 'formats') @@ -41,8 +43,6 @@ def log_exception(logger=None, exc_info=True): def get_export_cache_dir(db_task): return osp.join(db_task.get_task_dirname(), 'export_cache') -EXPORT_FORMAT_DATUMARO_PROJECT = "datumaro_project" - class TaskProject: @staticmethod @@ -147,7 +147,7 @@ def save(self, save_dir=None, save_images=False): def export(self, dst_format, save_dir, save_images=False, server_url=None): if self._dataset is None: self._init_dataset() - if dst_format == EXPORT_FORMAT_DATUMARO_PROJECT: + if dst_format == FORMAT_DATUMARO: self._remote_export(save_dir=save_dir, server_url=server_url) else: converter = self._dataset.env.make_converter(dst_format, @@ -250,11 +250,10 @@ def _remote_export(self, save_dir, server_url=None): osp.join(cvat_utils_dst_dir, 'cli')) -DEFAULT_FORMAT = EXPORT_FORMAT_DATUMARO_PROJECT DEFAULT_CACHE_TTL = timedelta(hours=10) CACHE_TTL = DEFAULT_CACHE_TTL -def export_project(task_id, user, dst_format=None, server_url=None): +def export_task_as_dataset(task_id, user, dst_format=None, server_url=None): try: db_task = Task.objects.get(pk=task_id) @@ -307,57 +306,3 @@ def clear_export_cache(task_id, file_path, file_ctime): except Exception: log_exception(slogger.task[task_id]) raise - - -EXPORT_FORMATS = [ - { - 'name': 'Datumaro', - 'tag': EXPORT_FORMAT_DATUMARO_PROJECT, - 'is_default': True, - }, - { - 'name': 'PASCAL VOC 2012', - 'tag': 'cvat_voc', - 'is_default': False, - }, - { - 'name': 'MS COCO', - 'tag': 'cvat_coco', - 'is_default': False, - }, - { - 'name': 'YOLO', - 'tag': 'cvat_yolo', - 'is_default': False, - }, - { - 'name': 'TF Detection API', - 'tag': 'cvat_tfrecord', - 'is_default': False, - }, - { - 'name': 'MOT', - 'tag': 'cvat_mot', - 'is_default': False, - }, - { - 'name': 'LabelMe', - 'tag': 'cvat_label_me', - 'is_default': False, - }, -] - -def get_export_formats(): - converters = Environment(config={ - 'plugins_dir': _FORMATS_DIR - }).converters - - available_formats = set(converters.items) - available_formats.add(EXPORT_FORMAT_DATUMARO_PROJECT) - - public_formats = [] - for fmt in EXPORT_FORMATS: - if fmt['tag'] in available_formats: - public_formats.append(fmt) - - return public_formats diff --git a/cvat/apps/engine/annotation.py b/cvat/apps/engine/annotation.py index 093c7ee97edd..12fa5b6ecdae 100644 --- a/cvat/apps/engine/annotation.py +++ b/cvat/apps/engine/annotation.py @@ -16,7 +16,7 @@ from cvat.apps.profiler import silk_profile from . import models, serializers -from .data_manager import AnnotationManager +from .annotation_manager import AnnotationManager from .log import slogger @@ -686,8 +686,8 @@ def _patch_data(self, data, action): self._merge_data(_data, jobs[jid]["start"], self.db_task.overlap) def _merge_data(self, data, start_frame, overlap): - data_manager = AnnotationManager(self.ir_data) - data_manager.merge(data, start_frame, overlap) + annotation_manager = AnnotationManager(self.ir_data) + annotation_manager.merge(data, start_frame, overlap) def put(self, data): self._patch_data(data, None) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index d5bac4fd0e7a..b4d27f6a181b 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -204,7 +204,7 @@ def share(request): responses={'200': AnnotationFormatSerializer(many=True)}) @action(detail=False, methods=['GET'], url_path='annotation/formats') def annotation_formats(request): - data = dataset_manager.get_export_formats() + data = dataset_manager.get_formats() data = JSONRenderer().render(data) return Response(data) @@ -650,10 +650,9 @@ def dataset_export(self, request, pk): dst_format = request.query_params.get("format", "") if not dst_format: - dst_format = DatumaroTask.DEFAULT_FORMAT + dst_format = dataset_manager.DEFAULT_FORMAT dst_format = dst_format.lower() - if dst_format not in [f['tag'] - for f in DatumaroTask.get_export_formats()]: + if dst_format not in [f['tag'] for f in dataset_manager.get_formats()]: raise serializers.ValidationError( "Unexpected parameter 'format' specified for the request") @@ -694,8 +693,8 @@ def dataset_export(self, request, pk): except Exception: server_address = None - ttl = DatumaroTask.CACHE_TTL.total_seconds() - queue.enqueue_call(func=DatumaroTask.export_project, + ttl = dataset_manager.CACHE_TTL.total_seconds() + queue.enqueue_call(func=dataset_manager.export_task_as_dataset, args=(pk, request.user, dst_format, server_address), job_id=rq_id, meta={ 'request_time': timezone.localtime() }, result_ttl=ttl, failure_ttl=ttl) From 40375f0e66051dbb552a41a2caa777b1debf7f7f Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Tue, 31 Mar 2020 18:58:50 +0300 Subject: [PATCH 03/80] Rename data manager --- cvat/apps/engine/{data_manager.py => annotation_manager.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename cvat/apps/engine/{data_manager.py => annotation_manager.py} (100%) diff --git a/cvat/apps/engine/data_manager.py b/cvat/apps/engine/annotation_manager.py similarity index 100% rename from cvat/apps/engine/data_manager.py rename to cvat/apps/engine/annotation_manager.py From 8c64a9a46b10cd695cfd4c02ee7ec26acd8f294c Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Tue, 31 Mar 2020 18:59:37 +0300 Subject: [PATCH 04/80] Move anno dump and upload functions --- cvat/apps/dataset_manager/bindings.py | 43 ++++++++------------------- cvat/apps/dataset_manager/task.py | 40 +++++++++++++++---------- cvat/apps/engine/annotation.py | 41 ------------------------- 3 files changed, 37 insertions(+), 87 deletions(-) diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index 21110555621b..0ddc034d3782 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -14,6 +14,7 @@ from cvat.apps.engine.annotation import TaskAnnotation from cvat.apps.engine.annotation_manager import AnnotationManager, TrackManager from cvat.apps.engine.models import AttributeType, ShapeType +from cvat.apps.engine.frame_provider import FrameProvider from cvat.apps.engine.serializers import LabeledDataSerializer from datumaro.util.image import Image @@ -491,32 +492,23 @@ def match_frame(self, filename): "Cannot match filename or determinate framenumber for {} filename".format(filename)) -class CvatImagesExtractor(datumaro.Extractor): - def __init__(self, url, frame_provider): - super().__init__() - - self._frame_provider = frame_provider - - def __iter__(self): - frames = self._frame_provider.get_frames( - self._frame_provider.Quality.ORIGINAL, - self._frame_provider.Type.NUMPY_ARRAY) - for item_id, (image, _) in enumerate(frames): - yield datumaro.DatasetItem( - id=item_id, - image=Image(image), - ) - - def __len__(self): - return len(self._frame_provider) - +class CvatTaskExtractor(datumaro.Extractor): + def __init__(self, url, db_task, user, scheme=None, host=None): + cvat_annotations = TaskAnnotation(db_task.id, user) + with transaction.atomic(): + cvat_annotations.init_from_db() + cvat_annotations = Annotation(cvat_annotations.ir_data, db_task, + scheme=scheme, host=host) + frame_provider = FrameProvider(db_task.data) -class CvatAnnotationsExtractor(datumaro.Extractor): - def __init__(self, url, cvat_annotations): self._categories = self._load_categories(cvat_annotations) dm_annotations = [] + frame_provider.get_frames( + self._frame_provider.Quality.ORIGINAL, + self._frame_provider.Type.NUMPY_ARRAY) + for cvat_frame_anno in cvat_annotations.group_by_frame(): dm_anno = self._read_cvat_anno(cvat_frame_anno, cvat_annotations) dm_image = Image(path=cvat_frame_anno.name, size=( @@ -626,15 +618,6 @@ def convert_attrs(label, cvat_attrs): return item_anno -class CvatTaskExtractor(CvatAnnotationsExtractor): - def __init__(self, url, db_task, user): - cvat_annotations = TaskAnnotation(db_task.id, user) - with transaction.atomic(): - cvat_annotations.init_from_db() - cvat_annotations = Annotation(cvat_annotations.ir_data, db_task) - super().__init__(url, cvat_annotations) - - def match_frame(item, cvat_task_anno): is_video = cvat_task_anno.meta['task']['mode'] == 'interpolation' diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py index 88093e33e57d..a0e6bb6221b5 100644 --- a/cvat/apps/dataset_manager/task.py +++ b/cvat/apps/dataset_manager/task.py @@ -13,7 +13,6 @@ import django_rq from django.utils import timezone -from cvat.apps.engine.frame_provider import FrameProvider from cvat.apps.engine.log import slogger from cvat.apps.engine.models import Task @@ -36,8 +35,6 @@ def log_exception(logger=None, exc_info=True): (_MODULE_NAME, current_function_name(2)), exc_info=exc_info) -_TASK_IMAGES_EXTRACTOR = '_cvat_task_images' -_TASK_ANNO_EXTRACTOR = '_cvat_task_anno' _TASK_IMAGES_REMOTE_EXTRACTOR = 'cvat_rest_api_task_images' def get_export_cache_dir(db_task): @@ -90,9 +87,6 @@ def _create(self): def _load(self): self._project = Project.load(self._project_dir) - self._project.env.extractors.register(_TASK_IMAGES_EXTRACTOR, - lambda url: CvatImagesExtractor(url, - FrameProvider(self._db_task.data))) def _import_from_task(self, user): self._project = Project.generate(self._project_dir, config={ @@ -100,17 +94,10 @@ def _import_from_task(self, user): 'plugins_dir': _FORMATS_DIR, }) - self._project.add_source('task_%s_images' % self._db_task.id, { - 'format': _TASK_IMAGES_EXTRACTOR, - }) - self._project.env.extractors.register(_TASK_IMAGES_EXTRACTOR, - lambda url: CvatImagesExtractor(url, - FrameProvider(self._db_task.data))) - - self._project.add_source('task_%s_anno' % self._db_task.id, { - 'format': _TASK_ANNO_EXTRACTOR, + self._project.add_source('task_%s' % self._db_task.id, { + 'format': _TASK_EXTRACTOR, }) - self._project.env.extractors.register(_TASK_ANNO_EXTRACTOR, + self._project.env.extractors.register(_TASK_EXTRACTOR, lambda url: CvatTaskExtractor(url, db_task=self._db_task, user=user)) @@ -249,6 +236,27 @@ def _remote_export(self, save_dir, server_url=None): shutil.copytree(osp.join(_CVAT_ROOT_DIR, 'utils', 'cli'), osp.join(cvat_utils_dst_dir, 'cli')) + def upload(self, annotation_file, loader): + annotation_importer = Annotation( + annotation_ir=AnnotationIR(), + db_task=self.db_task, + create_callback=self.create, + ) + self.delete() + db_format = loader.annotation_format + with open(annotation_file, 'rb') as file_object: + source_code = open(os.path.join(settings.BASE_DIR, db_format.handler_file.name)).read() + global_vars = globals() + imports = import_modules(source_code) + global_vars.update(imports) + execute_python_code(source_code, global_vars) + + global_vars["file_object"] = file_object + global_vars["annotations"] = annotation_importer + + execute_python_code("{}(file_object, annotations)".format(loader.handler), global_vars) + self.create(annotation_importer.data.serialize()) + DEFAULT_CACHE_TTL = timedelta(hours=10) CACHE_TTL = DEFAULT_CACHE_TTL diff --git a/cvat/apps/engine/annotation.py b/cvat/apps/engine/annotation.py index 12fa5b6ecdae..83ad3fc31d4d 100644 --- a/cvat/apps/engine/annotation.py +++ b/cvat/apps/engine/annotation.py @@ -718,47 +718,6 @@ def init_from_db(self): overlap = self.db_task.overlap self._merge_data(annotation.ir_data, start_frame, overlap) - def dump(self, filename, dumper, scheme, host): - anno_exporter = Annotation( - annotation_ir=self.ir_data, - db_task=self.db_task, - scheme=scheme, - host=host, - ) - db_format = dumper.annotation_format - - with open(filename, 'wb') as dump_file: - source_code = open(os.path.join(settings.BASE_DIR, db_format.handler_file.name)).read() - global_vars = globals() - imports = import_modules(source_code) - global_vars.update(imports) - execute_python_code(source_code, global_vars) - global_vars["file_object"] = dump_file - global_vars["annotations"] = anno_exporter - - execute_python_code("{}(file_object, annotations)".format(dumper.handler), global_vars) - - def upload(self, annotation_file, loader): - annotation_importer = Annotation( - annotation_ir=AnnotationIR(), - db_task=self.db_task, - create_callback=self.create, - ) - self.delete() - db_format = loader.annotation_format - with open(annotation_file, 'rb') as file_object: - source_code = open(os.path.join(settings.BASE_DIR, db_format.handler_file.name)).read() - global_vars = globals() - imports = import_modules(source_code) - global_vars.update(imports) - execute_python_code(source_code, global_vars) - - global_vars["file_object"] = file_object - global_vars["annotations"] = annotation_importer - - execute_python_code("{}(file_object, annotations)".format(loader.handler), global_vars) - self.create(annotation_importer.data.serialize()) - @property def data(self): return self.ir_data.data From ce9c0a8c1e025f49213861541b8d76d0f970b895 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Thu, 2 Apr 2020 15:37:17 +0300 Subject: [PATCH 05/80] Join server host and port in cvat cli --- .../plugins/cvat_rest_api_task_images.py | 17 +++++------------ utils/cli/cli.py | 2 +- utils/cli/core/core.py | 4 ++-- utils/cli/tests/test_cli.py | 3 +-- 4 files changed, 9 insertions(+), 17 deletions(-) diff --git a/cvat/apps/dataset_manager/export_templates/plugins/cvat_rest_api_task_images.py b/cvat/apps/dataset_manager/export_templates/plugins/cvat_rest_api_task_images.py index a4e92f8cd59d..4ece65049a00 100644 --- a/cvat/apps/dataset_manager/export_templates/plugins/cvat_rest_api_task_images.py +++ b/cvat/apps/dataset_manager/export_templates/plugins/cvat_rest_api_task_images.py @@ -20,14 +20,9 @@ CONFIG_SCHEMA = _SchemaBuilder() \ .add('task_id', int) \ - .add('server_host', str) \ - .add('server_port', int) \ + .add('server_url', str) \ .build() -DEFAULT_CONFIG = Config({ - 'server_port': 80 -}, schema=CONFIG_SCHEMA, mutable=False) - class cvat_rest_api_task_images(datumaro.SourceExtractor): def _image_local_path(self, item_id): task_id = self._config.task_id @@ -53,16 +48,15 @@ def _connect(self): session = None try: - print("Enter credentials for '%s:%s' to read task data:" % \ - (self._config.server_host, self._config.server_port)) + print("Enter credentials for '%s' to read task data:" % \ + (self._config.server_url)) username = input('User: ') password = getpass.getpass() session = requests.Session() session.auth = (username, password) - api = CVAT_API_V1(self._config.server_host, - self._config.server_port) + api = CVAT_API_V1(self._config.server_url) cli = CVAT_CLI(session, api) self._session = session @@ -92,8 +86,7 @@ def __init__(self, url): with open(osp.join(url, 'config.json'), 'r') as config_file: config = json.load(config_file) - config = Config(config, - fallback=DEFAULT_CONFIG, schema=CONFIG_SCHEMA) + config = Config(config, schema=CONFIG_SCHEMA) self._config = config with open(osp.join(url, 'images_meta.json'), 'r') as images_file: diff --git a/utils/cli/cli.py b/utils/cli/cli.py index f22bf81520c2..ed749d7e3312 100755 --- a/utils/cli/cli.py +++ b/utils/cli/cli.py @@ -29,7 +29,7 @@ def main(): config_log(args.loglevel) with requests.Session() as session: session.auth = args.auth - api = CVAT_API_V1(args.server_host, args.server_port) + api = CVAT_API_V1('%s:%s' % (args.server_host, args.server_port)) cli = CLI(session, api) try: actions[args.action](cli, **args.__dict__) diff --git a/utils/cli/core/core.py b/utils/cli/core/core.py index 4f8f847e7ba0..59f45af49ecf 100644 --- a/utils/cli/core/core.py +++ b/utils/cli/core/core.py @@ -145,8 +145,8 @@ def tasks_upload(self, task_id, fileformat, filename, **kwargs): class CVAT_API_V1(): """ Build parameterized API URLs """ - def __init__(self, host, port): - self.base = 'http://{}:{}/api/v1/'.format(host, port) + def __init__(self, host): + self.base = 'http://{}/api/v1/'.format(host) @property def tasks(self): diff --git a/utils/cli/tests/test_cli.py b/utils/cli/tests/test_cli.py index 957093ccf487..97db8bfb8c14 100644 --- a/utils/cli/tests/test_cli.py +++ b/utils/cli/tests/test_cli.py @@ -14,12 +14,11 @@ class TestCLI(APITestCase): - @unittest.mock.patch('sys.stdout', new_callable=io.StringIO) def setUp(self, mock_stdout): self.client = RequestsClient() self.client.auth = HTTPBasicAuth('admin', 'admin') - self.api = CVAT_API_V1('testserver', '') + self.api = CVAT_API_V1('testserver') self.cli = CLI(self.client, self.api) self.taskname = 'test_task' self.cli.tasks_create(self.taskname, From cf181ae0590a4c18b5e7b3223ac34ea89acc347c Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Fri, 3 Apr 2020 11:54:14 +0300 Subject: [PATCH 06/80] Move export templates dir --- .../datumaro}/export_templates/README.md | 0 .../plugins/cvat_rest_api_task_images.py | 36 +++++++------------ 2 files changed, 13 insertions(+), 23 deletions(-) rename cvat/apps/dataset_manager/{ => formats/datumaro}/export_templates/README.md (100%) rename cvat/apps/dataset_manager/{ => formats/datumaro}/export_templates/plugins/cvat_rest_api_task_images.py (82%) diff --git a/cvat/apps/dataset_manager/export_templates/README.md b/cvat/apps/dataset_manager/formats/datumaro/export_templates/README.md similarity index 100% rename from cvat/apps/dataset_manager/export_templates/README.md rename to cvat/apps/dataset_manager/formats/datumaro/export_templates/README.md diff --git a/cvat/apps/dataset_manager/export_templates/plugins/cvat_rest_api_task_images.py b/cvat/apps/dataset_manager/formats/datumaro/export_templates/plugins/cvat_rest_api_task_images.py similarity index 82% rename from cvat/apps/dataset_manager/export_templates/plugins/cvat_rest_api_task_images.py rename to cvat/apps/dataset_manager/formats/datumaro/export_templates/plugins/cvat_rest_api_task_images.py index 4ece65049a00..b54cce3fbd37 100644 --- a/cvat/apps/dataset_manager/export_templates/plugins/cvat_rest_api_task_images.py +++ b/cvat/apps/dataset_manager/formats/datumaro/export_templates/plugins/cvat_rest_api_task_images.py @@ -1,29 +1,28 @@ -# Copyright (C) 2019-2020 Intel Corporation +# Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT -from collections import OrderedDict import getpass import json -import os, os.path as osp -import requests - -from datumaro.components.config import (Config, - SchemaBuilder as _SchemaBuilder, -) -import datumaro.components.extractor as datumaro -from datumaro.util.image import lazy_image, load_image, Image +import os +import os.path as osp +from collections import OrderedDict -from cvat.utils.cli.core import CLI as CVAT_CLI, CVAT_API_V1 +import requests +from cvat.utils.cli.core import CLI as CVAT_CLI +from cvat.utils.cli.core import CVAT_API_V1 +from datumaro.components.config import Config, SchemaBuilder +from datumaro.components.extractor import SourceExtractor, DatasetItem +from datumaro.util.image import Image, lazy_image, load_image -CONFIG_SCHEMA = _SchemaBuilder() \ +CONFIG_SCHEMA = SchemaBuilder() \ .add('task_id', int) \ .add('server_url', str) \ .build() -class cvat_rest_api_task_images(datumaro.SourceExtractor): +class cvat_rest_api_task_images(SourceExtractor): def _image_local_path(self, item_id): task_id = self._config.task_id return osp.join(self._cache_dir, @@ -102,7 +101,7 @@ def __init__(self, url): size = (entry['height'], entry['width']) image = Image(data=self._make_image_loader(item_id), path=item_filename, size=size) - item = datumaro.DatasetItem(id=item_id, image=image) + item = DatasetItem(id=item_id, image=image) items.append((item.id, item)) items = sorted(items, key=lambda e: int(e[0])) @@ -118,12 +117,3 @@ def __iter__(self): def __len__(self): return len(self._items) - - # pylint: disable=no-self-use - def subsets(self): - return None - - def get(self, item_id, subset=None, path=None): - if path or subset: - raise KeyError() - return self._items[item_id] From 76c48c79eecceb8ae669bde110243ec552c38889 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Fri, 3 Apr 2020 11:54:37 +0300 Subject: [PATCH 07/80] add dm project exporter --- .../formats/datumaro/__init__.py | 111 ++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 cvat/apps/dataset_manager/formats/datumaro/__init__.py diff --git a/cvat/apps/dataset_manager/formats/datumaro/__init__.py b/cvat/apps/dataset_manager/formats/datumaro/__init__.py new file mode 100644 index 000000000000..b094a3c22f57 --- /dev/null +++ b/cvat/apps/dataset_manager/formats/datumaro/__init__.py @@ -0,0 +1,111 @@ +# Copyright (C) 2019 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from tempfile import TemporaryDirectory + +from cvat.apps.dataset_manager.bindings import import_dm_annotations, CvatAnnotationsExtractor +from cvat.apps.dataset_manager.formats import dm_env, Exporter +from cvat.apps.dataset_manager.util import make_zip_archive +from cvat.settings.base import DATUMARO_PATH + + +class DatumaroProjectExporter(Exporter): + NAME = "DatumaroProject" + EXT = "ZIP" + VERSION = "1.0" + DISPLAY_NAME = "{name} {ext} {version}" + + _REMOTE_IMAGES_EXTRACTOR = 'cvat_rest_api_task_images' + _TEMPLATES_DIR = osp.join(osp.dirname(__file__), 'export_templates') + + def _save_remote_images(self, save_dir, server_url=None): + os.makedirs(save_dir, exist_ok=True) + + db_task = self._db_task + items = [] + config = { + 'server_url': server_url or 'localhost', + 'task_id': db_task.id, + } + + images_meta = { + 'images': items, + } + db_video = getattr(self._db_task.data, 'video', None) + if db_video is not None: + for i in range(self._db_task.data.size): + frame_info = { + 'id': i, + 'name': 'frame_%06d' % i, + 'width': db_video.width, + 'height': db_video.height, + } + items.append(frame_info) + else: + for db_image in self._db_task.data.images.all(): + frame_info = { + 'id': db_image.frame, + 'name': osp.basename(db_image.path), + 'width': db_image.width, + 'height': db_image.height, + } + items.append(frame_info) + + with open(osp.join(save_dir, 'config.json'), 'w') as config_file: + json.dump(config, config_file) + with open(osp.join(save_dir, 'images_meta.json'), 'w') as images_file: + json.dump(images_meta, images_file) + + def _export(self, dataset, save_dir, save_images=False): + converter = env.make_converter('datumaro_project', + save_images=save_images, + config={ 'project_name': self._db_task.name, } + ) + converter(dataset, save_dir=save_dir) + + target_dir = project.config.project_dir + os.makedirs(target_dir, exist_ok=True) + shutil.copyfile( + osp.join(self._TEMPLATES_DIR, 'README.md'), + osp.join(target_dir, 'README.md')) + + if not save_images: + # add remote link to images + source_name = 'task_%s_images' % self._db_task.id + project.add_source(source_name, { + 'format': self._REMOTE_IMAGES_EXTRACTOR, + }) + self._save_remote_images( + osp.join(save_dir, project.local_source_dir(source_name)), + server_url=server_url) + project.save() + + templates_dir = osp.join(self._TEMPLATES_DIR, 'plugins') + target_dir = osp.join(project.config.project_dir, + project.config.env_dir, project.config.plugins_dir) + os.makedirs(target_dir, exist_ok=True) + shutil.copyfile( + osp.join(templates_dir, self._REMOTE_IMAGES_EXTRACTOR + '.py'), + osp.join(target_dir, self._REMOTE_IMAGES_EXTRACTOR + '.py')) + + # Make Datumaro and CVAT CLI modules available to the user + shutil.copytree(DATUMARO_PATH, osp.join(save_dir, 'datumaro'), + ignore=lambda src, names: ['__pycache__'] + [ + n for n in names + if sum([int(n.endswith(ext)) for ext in + ['.pyx', '.pyo', '.pyd', '.pyc']]) + ]) + + cvat_utils_dst_dir = osp.join(save_dir, 'cvat', 'utils') + os.makedirs(cvat_utils_dst_dir) + shutil.copytree(osp.join(_CVAT_ROOT_DIR, 'utils', 'cli'), + osp.join(cvat_utils_dst_dir, 'cli')) + + def __call__(self, dst_file, annotations, save_images=False): + self._db_task = annotations._db_task + + with TemporaryDirectory() as temp_dir: + dataset = CvatAnnotationsExtractor(annotations) + self._export(dataset, save_dir=temp_dir, save_images=save_images) + make_zip_archive(temp_dir, file_object) From a2f00f51d58952894e6bc85126053121d11290c2 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Fri, 3 Apr 2020 11:55:08 +0300 Subject: [PATCH 08/80] update mask format support --- cvat/apps/dataset_manager/formats/mask.py | 77 +++++++---------------- 1 file changed, 22 insertions(+), 55 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/mask.py b/cvat/apps/dataset_manager/formats/mask.py index c18553b32650..d06468b5c39e 100644 --- a/cvat/apps/dataset_manager/formats/mask.py +++ b/cvat/apps/dataset_manager/formats/mask.py @@ -2,75 +2,42 @@ # # SPDX-License-Identifier: MIT -format_spec = { - "name": "MASK", - "dumpers": [ - { - "display_name": "{name} {format} {version}", - "format": "ZIP", - "version": "1.1", - "handler": "dump", - }, - ], - "loaders": [ - { - "display_name": "{name} {format} {version}", - "format": "ZIP", - "version": "1.1", - "handler": "load", - }, - ], -} +from tempfile import TemporaryDirectory -from datumaro.components.converter import Converter -class CvatMaskConverter(Converter): - def __init__(self, save_images=False): - self._save_images = save_images +from pyunpack import Archive - def __call__(self, extractor, save_dir): - from datumaro.components.project import Environment, Dataset +from cvat.apps.dataset_manager.bindings import CvatAnnotationsExtractor, \ + import_dm_annotations +from cvat.apps.dataset_manager.formats import dm_env, exporter, importer +from cvat.apps.dataset_manager.util import make_zip_archive +from datumaro.components.project import Dataset - env = Environment() - polygons_to_masks = env.transforms.get('polygons_to_masks') - boxes_to_masks = env.transforms.get('boxes_to_masks') - merge_instance_segments = env.transforms.get('merge_instance_segments') - id_from_image = env.transforms.get('id_from_image_name') - extractor = extractor.transform(polygons_to_masks) - extractor = extractor.transform(boxes_to_masks) - extractor = extractor.transform(merge_instance_segments) - extractor = extractor.transform(id_from_image) +@exporter(name="MASK", ext="ZIP", version="1.1") +def export_mask(dst_file, annotations, **options): + extractor = CvatAnnotationsExtractor(annotations) + with TemporaryDirectory() as temp_dir: + envt = dm_env.transforms + extractor = extractor.transform(envt.get('polygons_to_masks')) + extractor = extractor.transform(envt.get('boxes_to_masks')) + extractor = extractor.transform(envt.get('merge_instance_segments')) + extractor = extractor.transform(envt.get('id_from_image_name')) extractor = Dataset.from_extractors(extractor) # apply lazy transforms - converter = env.make_converter('voc_segmentation', + converter = dm_env.make_converter('voc_segmentation', apply_colormap=True, label_map='source', - save_images=self._save_images) - converter(extractor, save_dir=save_dir) - -def dump(file_object, annotations): - from cvat.apps.dataset_manager.bindings import CvatAnnotationsExtractor - from cvat.apps.dataset_manager.util import make_zip_archive - from tempfile import TemporaryDirectory - - extractor = CvatAnnotationsExtractor('', annotations) - converter = CvatMaskConverter() - with TemporaryDirectory() as temp_dir: + save_images=save_images) converter(extractor, save_dir=temp_dir) - make_zip_archive(temp_dir, file_object) -def load(file_object, annotations): - from pyunpack import Archive - from tempfile import TemporaryDirectory - from datumaro.plugins.voc_format.importer import VocImporter - from datumaro.components.project import Environment - from cvat.apps.dataset_manager.bindings import import_dm_annotations + make_zip_archive(temp_dir, file_object) +@importer(name="MASK", ext="ZIP", version="1.0") +def import_mask(src_file, annotations, **options): archive_file = file_object if isinstance(file_object, str) else getattr(file_object, "name") with TemporaryDirectory() as tmp_dir: Archive(archive_file).extractall(tmp_dir) - dm_project = VocImporter()(tmp_dir) - dm_dataset = dm_project.make_dataset() + dm_dataset = dm_env.make_importer('voc')(tmp_dir).make_dataset() masks_to_polygons = Environment().transforms.get('masks_to_polygons') dm_dataset = dm_dataset.transform(masks_to_polygons) import_dm_annotations(dm_dataset, annotations) From 37a4a03b6e457c9e9429c636a6255b1da89c0079 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Fri, 3 Apr 2020 15:06:25 +0300 Subject: [PATCH 09/80] Use decorators for formats definition --- cvat/apps/dataset_manager/formats/__init__.py | 131 ++++++++++-------- .../formats/datumaro/__init__.py | 10 +- 2 files changed, 78 insertions(+), 63 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/__init__.py b/cvat/apps/dataset_manager/formats/__init__.py index 8cdbedfe7858..0e964480ecb8 100644 --- a/cvat/apps/dataset_manager/formats/__init__.py +++ b/cvat/apps/dataset_manager/formats/__init__.py @@ -5,60 +5,79 @@ from datumaro.components.project import Environment +dm_env = Environment() -FORMAT_DATUMARO = "datumaro_project" - -FORMATS = [ - { - 'name': 'Datumaro', - 'tag': FORMAT_DATUMARO, - 'is_default': True, - }, - { - 'name': 'PASCAL VOC 2012', - 'tag': 'cvat_voc', - 'is_default': False, - }, - { - 'name': 'MS COCO', - 'tag': 'cvat_coco', - 'is_default': False, - }, - { - 'name': 'YOLO', - 'tag': 'cvat_yolo', - 'is_default': False, - }, - { - 'name': 'TF Detection API', - 'tag': 'cvat_tfrecord', - 'is_default': False, - }, - { - 'name': 'MOT', - 'tag': 'cvat_mot', - 'is_default': False, - }, - { - 'name': 'LabelMe', - 'tag': 'cvat_label_me', - 'is_default': False, - }, -] - -DEFAULT_FORMAT = FORMAT_DATUMARO - -def get_formats(): - converters = Environment(config={ - 'plugins_dir': _FORMATS_DIR - }).converters - - available_formats = set(converters.items) - available_formats.add(FORMAT_DATUMARO) - - public_formats = [] - for fmt in FORMATS: - if fmt['tag'] in available_formats: - public_formats.append(fmt) - - return public_formats + +class _Format: + NAME = '' + EXT = '' + VERSION = '' + DISPLAY_NAME = '{name} {ext} {version}' + +class Exporter(_Format): + def __call__(self, dst_file, annotations, **options): + raise NotImplementedError() + +class Importer(_Format): + def __call__(self, src_file, annotations, **options): + raise NotImplementedError() + +def _wrap_format(f_or_cls, klass, name, version, ext, display_name): + import inspect + if inspect.isclass(f): + assert hasattr(f_or_cls, '__call__') + target = f_or_cls + elif inspect.isfunction(f_or_cls): + class wrapper(klass): + # pylint: disable=arguments-differ + def __call__(self, *args, **kwargs): + f_or_cls(*args, **kwargs) + + wrapper.__name__ = f_or_cls.__name__ + wrapper.__module__ = f_or_cls.__module__ + target = wrapper + else: + assert inspect.isclass(f_or_cls) or inspect.isfunction(f_or_cls) + + target.NAME = name or klass.NAME or f_or_cls.__name__ + target.VERSION = version or klass.VERSION + target.EXT = ext or klass.EXT + target.DISPLAY_NAME = (display_name or klass.DISPLAY_NAME).format( + name=name, version=version, ext=ext) + assert all([target.NAME, target.VERSION, target.EXT, target.DISPLAY_NAME]) + return target + +EXPORT_FORMATS = {} +def exporter(name, ext, version, display_name=None): + assert name not in EXPORT_FORMATS, "Export format '%s' already registered" % name + def wrap_with_params(f_or_cls): + t = _wrap_format(f_or_cls, Exporter, + name=name, ext=ext, version=version, display_name=display_name) + EXPORT_FORMATS[name] = t + return t + return wrap_with_params + +IMPORT_FORMATS = {} +def importer(name, ext, version, display_name=None): + assert name not in IMPORT_FORMATS, "Import format '%s' already registered" % name + def wrap_with_params(f_or_cls): + t = _wrap_format(f_or_cls, Importer, + name=name, ext=ext, version=version, display_name=display_name) + IMPORT_FORMATS[name] = t + return t + return wrap_with_params + + +def _serialize_format(f): + return { + 'name': f.DISPLAY_NAME, + 'tag': f.NAME, + 'ext': f.EXT, + 'version': f.VERSION, + } + +def get_export_formats(): + return [_serialize_format(f) for f in EXPORT_FORMATS] + +def get_import_formats(): + return [_serialize_format(f) for f in IMPORT_FORMATS] diff --git a/cvat/apps/dataset_manager/formats/datumaro/__init__.py b/cvat/apps/dataset_manager/formats/datumaro/__init__.py index b094a3c22f57..024e70127ca2 100644 --- a/cvat/apps/dataset_manager/formats/datumaro/__init__.py +++ b/cvat/apps/dataset_manager/formats/datumaro/__init__.py @@ -5,17 +5,13 @@ from tempfile import TemporaryDirectory from cvat.apps.dataset_manager.bindings import import_dm_annotations, CvatAnnotationsExtractor -from cvat.apps.dataset_manager.formats import dm_env, Exporter +from cvat.apps.dataset_manager.formats import dm_env, exporter from cvat.apps.dataset_manager.util import make_zip_archive from cvat.settings.base import DATUMARO_PATH -class DatumaroProjectExporter(Exporter): - NAME = "DatumaroProject" - EXT = "ZIP" - VERSION = "1.0" - DISPLAY_NAME = "{name} {ext} {version}" - +@exporter(name="DatumaroProject", ext="ZIP", version="1.0") +class DatumaroProjectExporter: _REMOTE_IMAGES_EXTRACTOR = 'cvat_rest_api_task_images' _TEMPLATES_DIR = osp.join(osp.dirname(__file__), 'export_templates') From 7736039c8b0f893691405a210aff9c0ac074384a Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Fri, 3 Apr 2020 18:50:07 +0300 Subject: [PATCH 10/80] Update formats --- cvat/apps/dataset_manager/bindings.py | 44 +++++++------- cvat/apps/dataset_manager/formats/__init__.py | 11 ++++ cvat/apps/dataset_manager/formats/coco.py | 57 ++++++------------- cvat/apps/dataset_manager/formats/labelme.py | 4 +- cvat/apps/dataset_manager/formats/mask.py | 4 +- cvat/apps/dataset_manager/formats/mot.py | 4 +- .../dataset_manager/formats/pascal_voc.py | 4 +- cvat/apps/dataset_manager/formats/tfrecord.py | 4 +- cvat/apps/dataset_manager/formats/yolo.py | 4 +- 9 files changed, 60 insertions(+), 76 deletions(-) diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index 0ddc034d3782..19ef6d2fc6bb 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -111,7 +111,7 @@ def reset(self): self._tracks = [] -class Annotation: +class TaskData: Attribute = namedtuple('Attribute', 'name, value') LabeledShape = namedtuple( 'LabeledShape', 'type, frame, label, points, occluded, attributes, group, z_order') @@ -195,13 +195,11 @@ def _get_immutable_attribute_id(self, label_id, attribute_name): def _init_frame_info(self): if hasattr(self._db_task.data, 'video'): - self._frame_info = { - frame: { - "path": "frame_{:06d}".format(frame), - "width": self._db_task.data.video.width, - "height": self._db_task.data.video.height, - } for frame in range(self._db_task.data.size) - } + self._frame_info = {frame: { + "path": "frame_{:06d}".format(frame), + "width": self._db_task.data.video.width, + "height": self._db_task.data.video.height, + } for frame in range(self._db_task.data.size)} else: self._frame_info = {db_image.frame: { "path": db_image.path, @@ -478,6 +476,10 @@ def frame_info(self): def frame_step(self): return self._frame_step + @property + def db_task(self): + return self._db_task + @staticmethod def _get_filename(path): return osp.splitext(osp.basename(path))[0] @@ -491,26 +493,20 @@ def match_frame(self, filename): raise Exception( "Cannot match filename or determinate framenumber for {} filename".format(filename)) - -class CvatTaskExtractor(datumaro.Extractor): - def __init__(self, url, db_task, user, scheme=None, host=None): - cvat_annotations = TaskAnnotation(db_task.id, user) - with transaction.atomic(): - cvat_annotations.init_from_db() - cvat_annotations = Annotation(cvat_annotations.ir_data, db_task, - scheme=scheme, host=host) - frame_provider = FrameProvider(db_task.data) - - self._categories = self._load_categories(cvat_annotations) +class CvatTaskDataExtractor(datumaro.Extractor): + def __init__(self, task_data, include_images=False): + self._categories = self._load_categories(task_data) dm_annotations = [] - frame_provider.get_frames( - self._frame_provider.Quality.ORIGINAL, - self._frame_provider.Type.NUMPY_ARRAY) + if include_images: + frame_provider = FrameProvider(db_task.data) + frame_provider.get_frames( + self._frame_provider.Quality.ORIGINAL, + self._frame_provider.Type.NUMPY_ARRAY) - for cvat_frame_anno in cvat_annotations.group_by_frame(): - dm_anno = self._read_cvat_anno(cvat_frame_anno, cvat_annotations) + for cvat_frame_anno in task_data.group_by_frame(): + dm_anno = self._read_cvat_anno(cvat_frame_anno, task_data) dm_image = Image(path=cvat_frame_anno.name, size=( cvat_frame_anno.height, cvat_frame_anno.width) ) diff --git a/cvat/apps/dataset_manager/formats/__init__.py b/cvat/apps/dataset_manager/formats/__init__.py index 0e964480ecb8..32fbaa8b49dd 100644 --- a/cvat/apps/dataset_manager/formats/__init__.py +++ b/cvat/apps/dataset_manager/formats/__init__.py @@ -81,3 +81,14 @@ def get_export_formats(): def get_import_formats(): return [_serialize_format(f) for f in IMPORT_FORMATS] + + +import .coco +import .cvat +import .datumaro +import .labelme +import .mask +import .mot +import .pascal_voc +import .tfrecord +import .yolo \ No newline at end of file diff --git a/cvat/apps/dataset_manager/formats/coco.py b/cvat/apps/dataset_manager/formats/coco.py index fe323b645db0..3ad49776cdd5 100644 --- a/cvat/apps/dataset_manager/formats/coco.py +++ b/cvat/apps/dataset_manager/formats/coco.py @@ -2,52 +2,29 @@ # # SPDX-License-Identifier: MIT -format_spec = { - "name": "COCO", - "dumpers": [ - { - "display_name": "{name} {format} {version}", - "format": "JSON", - "version": "1.0", - "handler": "dump" - }, - ], - "loaders": [ - { - "display_name": "{name} {format} {version}", - "format": "JSON", - "version": "1.0", - "handler": "load" - }, - ], -} +import os.path as osp +import shutil +from tempfile import TemporaryDirectory -def load(file_object, annotations): - from datumaro.plugins.coco_format.extractor import CocoInstancesExtractor - from cvat.apps.dataset_manager.bindings import import_dm_annotations +from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor, \ + import_dm_annotations +from cvat.apps.dataset_manager.formats import dm_env, exporter, importer - dm_dataset = CocoInstancesExtractor(file_object.name) - import_dm_annotations(dm_dataset, annotations) -from datumaro.plugins.coco_format.converter import \ - CocoInstancesConverter as _CocoInstancesConverter -class CvatCocoConverter(_CocoInstancesConverter): - NAME = 'cvat_coco' - -def dump(file_object, annotations): - import os.path as osp - import shutil - from cvat.apps.dataset_manager.bindings import CvatAnnotationsExtractor - from tempfile import TemporaryDirectory - - extractor = CvatAnnotationsExtractor('', annotations) - converter = CvatCocoConverter() +@exporter(name="COCO", ext="JSON", version="1.0") +def export_coco(dst_file, task_data): + extractor = CvatTaskDataExtractor(task_data) with TemporaryDirectory() as temp_dir: converter(extractor, save_dir=temp_dir) - # HACK: file_object should not be used this way, however, + # HACK: dst_file should not be used this way, however, # it is the most efficient way. The correct approach would be to copy # file contents. - file_object.close() + dst_file.close() shutil.move(osp.join(temp_dir, 'annotations', 'instances_default.json'), - file_object.name) \ No newline at end of file + dst_file.name) + +@importer(name="COCO", ext="JSON", version="1.0") +def import_coco(src_file, task_data): + dataset = dm_env.make_extractor('coco_instances')(src_file.name) + import_dm_annotations(dataset, task_data) diff --git a/cvat/apps/dataset_manager/formats/labelme.py b/cvat/apps/dataset_manager/formats/labelme.py index 8cc0d880cc18..19a99f39eb3e 100644 --- a/cvat/apps/dataset_manager/formats/labelme.py +++ b/cvat/apps/dataset_manager/formats/labelme.py @@ -41,11 +41,11 @@ def __call__(self, extractor, save_dir): converter(extractor, save_dir=save_dir) def dump(file_object, annotations): - from cvat.apps.dataset_manager.bindings import CvatAnnotationsExtractor + from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor from cvat.apps.dataset_manager.util import make_zip_archive from tempfile import TemporaryDirectory - extractor = CvatAnnotationsExtractor('', annotations) + extractor = CvatTaskDataExtractor('', annotations) converter = CvatLabelMeConverter() with TemporaryDirectory() as temp_dir: converter(extractor, save_dir=temp_dir) diff --git a/cvat/apps/dataset_manager/formats/mask.py b/cvat/apps/dataset_manager/formats/mask.py index d06468b5c39e..492ce7b3af62 100644 --- a/cvat/apps/dataset_manager/formats/mask.py +++ b/cvat/apps/dataset_manager/formats/mask.py @@ -6,7 +6,7 @@ from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import CvatAnnotationsExtractor, \ +from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor, \ import_dm_annotations from cvat.apps.dataset_manager.formats import dm_env, exporter, importer from cvat.apps.dataset_manager.util import make_zip_archive @@ -15,7 +15,7 @@ @exporter(name="MASK", ext="ZIP", version="1.1") def export_mask(dst_file, annotations, **options): - extractor = CvatAnnotationsExtractor(annotations) + extractor = CvatTaskDataExtractor(annotations) with TemporaryDirectory() as temp_dir: envt = dm_env.transforms extractor = extractor.transform(envt.get('polygons_to_masks')) diff --git a/cvat/apps/dataset_manager/formats/mot.py b/cvat/apps/dataset_manager/formats/mot.py index ced2fccabfe8..df85cb064ada 100644 --- a/cvat/apps/dataset_manager/formats/mot.py +++ b/cvat/apps/dataset_manager/formats/mot.py @@ -26,11 +26,11 @@ class CvatMotConverter(_MotConverter): NAME = 'cvat_mot' def dump(file_object, annotations): - from cvat.apps.dataset_manager.bindings import CvatAnnotationsExtractor + from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor from cvat.apps.dataset_manager.util import make_zip_archive from tempfile import TemporaryDirectory - extractor = CvatAnnotationsExtractor('', annotations) + extractor = CvatTaskDataExtractor('', annotations) converter = CvatMotConverter() with TemporaryDirectory() as temp_dir: converter(extractor, save_dir=temp_dir) diff --git a/cvat/apps/dataset_manager/formats/pascal_voc.py b/cvat/apps/dataset_manager/formats/pascal_voc.py index a74d14ba3f2c..eefa05386fc3 100644 --- a/cvat/apps/dataset_manager/formats/pascal_voc.py +++ b/cvat/apps/dataset_manager/formats/pascal_voc.py @@ -80,11 +80,11 @@ def __call__(self, extractor, save_dir): converter(extractor, save_dir=save_dir) def dump(file_object, annotations): - from cvat.apps.dataset_manager.bindings import CvatAnnotationsExtractor + from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor from cvat.apps.dataset_manager.util import make_zip_archive from tempfile import TemporaryDirectory - extractor = CvatAnnotationsExtractor('', annotations) + extractor = CvatTaskDataExtractor('', annotations) converter = CvatVocConverter() with TemporaryDirectory() as temp_dir: converter(extractor, save_dir=temp_dir) diff --git a/cvat/apps/dataset_manager/formats/tfrecord.py b/cvat/apps/dataset_manager/formats/tfrecord.py index db6dee6944fc..369b7fe32bf3 100644 --- a/cvat/apps/dataset_manager/formats/tfrecord.py +++ b/cvat/apps/dataset_manager/formats/tfrecord.py @@ -28,11 +28,11 @@ class CvatTfrecordConverter(_TfDetectionApiConverter): NAME = 'cvat_tfrecord' def dump(file_object, annotations): - from cvat.apps.dataset_manager.bindings import CvatAnnotationsExtractor + from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor from cvat.apps.dataset_manager.util import make_zip_archive from tempfile import TemporaryDirectory - extractor = CvatAnnotationsExtractor('', annotations) + extractor = CvatTaskDataExtractor('', annotations) converter = CvatTfrecordConverter() with TemporaryDirectory() as temp_dir: converter(extractor, save_dir=temp_dir) diff --git a/cvat/apps/dataset_manager/formats/yolo.py b/cvat/apps/dataset_manager/formats/yolo.py index f21ebe43634a..7632b3d4efaa 100644 --- a/cvat/apps/dataset_manager/formats/yolo.py +++ b/cvat/apps/dataset_manager/formats/yolo.py @@ -58,10 +58,10 @@ class CvatYoloConverter(_YoloConverter): NAME = 'cvat_yolo' def dump(file_object, annotations): - from cvat.apps.dataset_manager.bindings import CvatAnnotationsExtractor + from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor from cvat.apps.dataset_manager.util import make_zip_archive from tempfile import TemporaryDirectory - extractor = CvatAnnotationsExtractor('', annotations) + extractor = CvatTaskDataExtractor('', annotations) converter = CvatYoloConverter() with TemporaryDirectory() as temp_dir: converter(extractor, save_dir=temp_dir) From ba75d7c833e3c5d1c3df7ebf63fd3bfe2a57acfa Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Mon, 6 Apr 2020 11:34:40 +0300 Subject: [PATCH 11/80] Update format implementations --- cvat/apps/dataset_manager/formats/__init__.py | 31 +++--- cvat/apps/dataset_manager/formats/coco.py | 43 +++++--- cvat/apps/dataset_manager/formats/cvat.py | 91 +++++++++++------ .../formats/datumaro/__init__.py | 61 +++++------- cvat/apps/dataset_manager/formats/labelme.py | 77 +++++---------- cvat/apps/dataset_manager/formats/mask.py | 35 +++---- cvat/apps/dataset_manager/formats/mot.py | 81 ++++++--------- .../dataset_manager/formats/pascal_voc.py | 98 +++++++------------ cvat/apps/dataset_manager/formats/tfrecord.py | 66 +++++-------- cvat/apps/dataset_manager/formats/yolo.py | 77 ++++++--------- .../plugins/datumaro_format/converter.py | 23 +++++ datumaro/datumaro/util/image.py | 35 +++++-- datumaro/tests/test_image.py | 5 +- 13 files changed, 341 insertions(+), 382 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/__init__.py b/cvat/apps/dataset_manager/formats/__init__.py index 32fbaa8b49dd..58d10469c9d2 100644 --- a/cvat/apps/dataset_manager/formats/__init__.py +++ b/cvat/apps/dataset_manager/formats/__init__.py @@ -12,14 +12,17 @@ class _Format: NAME = '' EXT = '' VERSION = '' - DISPLAY_NAME = '{name} {ext} {version}' class Exporter(_Format): - def __call__(self, dst_file, annotations, **options): + DISPLAY_NAME = '{name} {version}' + + def __call__(self, dst_file, task_data, **options): raise NotImplementedError() class Importer(_Format): - def __call__(self, src_file, annotations, **options): + DISPLAY_NAME = '{name} {ext} {version}' + + def __call__(self, src_file, task_data, **options): raise NotImplementedError() def _wrap_format(f_or_cls, klass, name, version, ext, display_name): @@ -48,7 +51,7 @@ def __call__(self, *args, **kwargs): return target EXPORT_FORMATS = {} -def exporter(name, ext, version, display_name=None): +def exporter(name, version, ext=None, display_name=None): assert name not in EXPORT_FORMATS, "Export format '%s' already registered" % name def wrap_with_params(f_or_cls): t = _wrap_format(f_or_cls, Exporter, @@ -58,7 +61,7 @@ def wrap_with_params(f_or_cls): return wrap_with_params IMPORT_FORMATS = {} -def importer(name, ext, version, display_name=None): +def importer(name, version, ext, display_name=None): assert name not in IMPORT_FORMATS, "Import format '%s' already registered" % name def wrap_with_params(f_or_cls): t = _wrap_format(f_or_cls, Importer, @@ -83,12 +86,12 @@ def get_import_formats(): return [_serialize_format(f) for f in IMPORT_FORMATS] -import .coco -import .cvat -import .datumaro -import .labelme -import .mask -import .mot -import .pascal_voc -import .tfrecord -import .yolo \ No newline at end of file +import cvat.apps.dataset_manager.formats.coco +import cvat.apps.dataset_manager.formats.cvat +import cvat.apps.dataset_manager.formats.datumaro +import cvat.apps.dataset_manager.formats.labelme +import cvat.apps.dataset_manager.formats.mask +import cvat.apps.dataset_manager.formats.mot +import cvat.apps.dataset_manager.formats.pascal_voc +import cvat.apps.dataset_manager.formats.tfrecord +import cvat.apps.dataset_manager.formats.yolo \ No newline at end of file diff --git a/cvat/apps/dataset_manager/formats/coco.py b/cvat/apps/dataset_manager/formats/coco.py index 3ad49776cdd5..386e39acd21b 100644 --- a/cvat/apps/dataset_manager/formats/coco.py +++ b/cvat/apps/dataset_manager/formats/coco.py @@ -2,29 +2,42 @@ # # SPDX-License-Identifier: MIT -import os.path as osp import shutil from tempfile import TemporaryDirectory from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor, \ import_dm_annotations from cvat.apps.dataset_manager.formats import dm_env, exporter, importer +from cvat.apps.dataset_manager.util import make_zip_archive -@exporter(name="COCO", ext="JSON", version="1.0") -def export_coco(dst_file, task_data): - extractor = CvatTaskDataExtractor(task_data) +@exporter(name='COCO', version='1.0') +def _export(dst_file, task_data, save_images=False): + extractor = CvatTaskDataExtractor(task_data, include_images=save_images) + extractor = Dataset.from_extractors(extractor) # apply lazy transforms with TemporaryDirectory() as temp_dir: + converter = dm_env.make_converter('coco_instances', + save_images=save_images) converter(extractor, save_dir=temp_dir) - # HACK: dst_file should not be used this way, however, - # it is the most efficient way. The correct approach would be to copy - # file contents. - dst_file.close() - shutil.move(osp.join(temp_dir, 'annotations', 'instances_default.json'), - dst_file.name) - -@importer(name="COCO", ext="JSON", version="1.0") -def import_coco(src_file, task_data): - dataset = dm_env.make_extractor('coco_instances')(src_file.name) - import_dm_annotations(dataset, task_data) + if save_images: + make_zip_archive(temp_dir, dst_file) + else: + # Return only json file + dst_file.close() + shutil.move(osp.join(temp_dir, 'annotations', 'instances_default.json'), + dst_file.name) + +@importer(name='COCO', ext='JSON, ZIP', version='1.0') +def _import(src_file, task_data): + src_path = src_file.name + + if src_path.lower.endswith('.json'): + dataset = dm_env.make_extractor('coco_instances', src_path) + import_dm_annotations(dataset, task_data) + else: + with TemporaryDirectory() as tmp_dir: + Archive(src_path).extractall(tmp_dir) + + dataset = dm_env.make_importer('coco')(tmp_dir).make_dataset() + import_dm_annotations(dataset, task_data) \ No newline at end of file diff --git a/cvat/apps/dataset_manager/formats/cvat.py b/cvat/apps/dataset_manager/formats/cvat.py index 4e89f2a437c1..8573d803a6bf 100644 --- a/cvat/apps/dataset_manager/formats/cvat.py +++ b/cvat/apps/dataset_manager/formats/cvat.py @@ -2,31 +2,17 @@ # # SPDX-License-Identifier: MIT -format_spec = { - "name": "CVAT", - "dumpers": [ - { - "display_name": "{name} {format} {version} for videos", - "format": "XML", - "version": "1.1", - "handler": "dump_as_cvat_interpolation" - }, - { - "display_name": "{name} {format} {version} for images", - "format": "XML", - "version": "1.1", - "handler": "dump_as_cvat_annotation" - } - ], - "loaders": [ - { - "display_name": "{name} {format} {version}", - "format": "XML", - "version": "1.1", - "handler": "load", - } - ], -} +import os.path as osp +from collections import OrderedDict +from glob import glob +from tempfile import TemporaryDirectory + +from pyunpack import Archive + +from cvat.apps.dataset_manager.formats import exporter, importer +from cvat.apps.dataset_manager.util import make_zip_archive +from datumaro.util.image import save_image + def pairwise(iterable): a = iter(iterable) @@ -34,7 +20,6 @@ def pairwise(iterable): def create_xml_dumper(file_object): from xml.sax.saxutils import XMLGenerator - from collections import OrderedDict class XmlAnnotationWriter: def __init__(self, file): self.version = "1.1" @@ -184,7 +169,6 @@ def close_root(self): return XmlAnnotationWriter(file_object) def dump_as_cvat_annotation(file_object, annotations): - from collections import OrderedDict dumper = create_xml_dumper(file_object) dumper.open_root() dumper.add_meta(annotations.meta) @@ -298,7 +282,6 @@ def dump_as_cvat_annotation(file_object, annotations): dumper.close_root() def dump_as_cvat_interpolation(file_object, annotations): - from collections import OrderedDict dumper = create_xml_dumper(file_object) dumper.open_root() dumper.add_meta(annotations.meta) @@ -525,3 +508,55 @@ def load(file_object, annotations): annotations.add_tag(annotations.Tag(**tag)) tag = None el.clear() + +def _export(dst_file, task_data, anno_callback, **options): + dst_path = dst_file.name + anno_callback(dst_file, task_data) + + if not save_images: + return + + dst_file.close() + with TemporaryDirectory() as temp_dir: + shutil.move(dst_path, temp_dir) + + frame_provider = FrameProvider(task_data.db_task.data) + frames = frame_provider.get_frames( + frame_provider.Quality.ORIGINAL, + frame_provider.Type.NUMPY_ARRAY) + for frame_id, frame_data in enumerate(frames): + frame_filename = osp.basename(task_data.frame_info[frame_id]['path']) + if '.' in frame_filename: + save_image(frame_data, + osp.join(temp_dir, 'images', frame_filename), + jpeg_quality=100) + else: + save_image(frame_data, + osp.join(temp_dir, 'images', frame_filename + '.png')) + + make_zip_archive(temp_dir, dst_file) + +@exporter(name='CVAT for videos', version='1.1') +def _export_video(dst_file, task_data, save_images=False): + _export(dst_file, task_data, + anno_callback=dump_as_cvat_interpolation, save_images=save_images) + +@exporter(name='CVAT for images', version='1.1') +def _export_images(dst_file, task_data, save_images=False): + _export(dst_file, task_data, + anno_callback=dump_as_cvat_annotation, save_images=save_images) + +@importer(name='CVAT', ext='XML, ZIP', version='1.1', + display_name='{name} {version}') +def _import(src_file, task_data, **options): + src_path = src_file.name + + if src_path.lower().endswith('.xml'): + load(src_path, task_data) + elif src_file.lower().endswith('.zip'): + with TemporaryDirectory() as tmp_dir: + Archive(src_path).extractall(tmp_dir) + + anno_paths = glob(osp.join(tmp_dir, '**', '*.xml'), recursive=True) + for p in anno_paths: + load(p, task_data) diff --git a/cvat/apps/dataset_manager/formats/datumaro/__init__.py b/cvat/apps/dataset_manager/formats/datumaro/__init__.py index 024e70127ca2..90a1632cc8b3 100644 --- a/cvat/apps/dataset_manager/formats/datumaro/__init__.py +++ b/cvat/apps/dataset_manager/formats/datumaro/__init__.py @@ -4,59 +4,45 @@ from tempfile import TemporaryDirectory -from cvat.apps.dataset_manager.bindings import import_dm_annotations, CvatAnnotationsExtractor +from cvat.apps.dataset_manager.bindings import import_dm_annotations, CvatTaskDataExtractor from cvat.apps.dataset_manager.formats import dm_env, exporter from cvat.apps.dataset_manager.util import make_zip_archive from cvat.settings.base import DATUMARO_PATH -@exporter(name="DatumaroProject", ext="ZIP", version="1.0") +@exporter(name="Datumaro", ext="ZIP", version="1.0") class DatumaroProjectExporter: _REMOTE_IMAGES_EXTRACTOR = 'cvat_rest_api_task_images' _TEMPLATES_DIR = osp.join(osp.dirname(__file__), 'export_templates') - def _save_remote_images(self, save_dir, server_url=None): + def _save_image_info(self, save_dir, task_data, server_url=None): os.makedirs(save_dir, exist_ok=True) - db_task = self._db_task - items = [] config = { 'server_url': server_url or 'localhost', - 'task_id': db_task.id, + 'task_id': task_data.db_task.id, } - images_meta = { - 'images': items, - } - db_video = getattr(self._db_task.data, 'video', None) - if db_video is not None: - for i in range(self._db_task.data.size): - frame_info = { - 'id': i, - 'name': 'frame_%06d' % i, - 'width': db_video.width, - 'height': db_video.height, - } - items.append(frame_info) - else: - for db_image in self._db_task.data.images.all(): - frame_info = { - 'id': db_image.frame, - 'name': osp.basename(db_image.path), - 'width': db_image.width, - 'height': db_image.height, - } - items.append(frame_info) + images = [] + images_meta = { 'images': images, } + for frame_id, frame in task_data.frame_info.items(): + images.append({ + 'id': frame_id, + 'name': osp.basename(frame['path']), + 'width': frame['width'], + 'height': frame['height'], + }) with open(osp.join(save_dir, 'config.json'), 'w') as config_file: json.dump(config, config_file) with open(osp.join(save_dir, 'images_meta.json'), 'w') as images_file: json.dump(images_meta, images_file) - def _export(self, dataset, save_dir, save_images=False): + def _export(self, task_data, save_dir, save_images=False): + dataset = CvatTaskDataExtractor(task_data, include_images=save_images) converter = env.make_converter('datumaro_project', save_images=save_images, - config={ 'project_name': self._db_task.name, } + config={ 'project_name': task_data.db_task.name, } ) converter(dataset, save_dir=save_dir) @@ -67,14 +53,14 @@ def _export(self, dataset, save_dir, save_images=False): osp.join(target_dir, 'README.md')) if not save_images: - # add remote link to images - source_name = 'task_%s_images' % self._db_task.id + # add remote links to images + source_name = 'task_%s_images' % task_data.db_task.id project.add_source(source_name, { 'format': self._REMOTE_IMAGES_EXTRACTOR, }) - self._save_remote_images( + self._save_image_info( osp.join(save_dir, project.local_source_dir(source_name)), - server_url=server_url) + task_data, server_url=server_url) project.save() templates_dir = osp.join(self._TEMPLATES_DIR, 'plugins') @@ -98,10 +84,7 @@ def _export(self, dataset, save_dir, save_images=False): shutil.copytree(osp.join(_CVAT_ROOT_DIR, 'utils', 'cli'), osp.join(cvat_utils_dst_dir, 'cli')) - def __call__(self, dst_file, annotations, save_images=False): - self._db_task = annotations._db_task - + def __call__(self, dst_file, task_data, save_images=False): with TemporaryDirectory() as temp_dir: - dataset = CvatAnnotationsExtractor(annotations) - self._export(dataset, save_dir=temp_dir, save_images=save_images) + self._export(task_data, save_dir=temp_dir, save_images=save_images) make_zip_archive(temp_dir, file_object) diff --git a/cvat/apps/dataset_manager/formats/labelme.py b/cvat/apps/dataset_manager/formats/labelme.py index 19a99f39eb3e..573c514d6927 100644 --- a/cvat/apps/dataset_manager/formats/labelme.py +++ b/cvat/apps/dataset_manager/formats/labelme.py @@ -2,67 +2,36 @@ # # SPDX-License-Identifier: MIT -format_spec = { - "name": "LabelMe", - "dumpers": [ - { - "display_name": "{name} {format} {version}", - "format": "ZIP", - "version": "3.0", - "handler": "dump" - } - ], - "loaders": [ - { - "display_name": "{name} {format} {version}", - "format": "ZIP", - "version": "3.0", - "handler": "load", - } - ], -} +from tempfile import TemporaryDirectory +from pyunpack import Archive -from datumaro.components.converter import Converter -class CvatLabelMeConverter(Converter): - def __init__(self, save_images=False): - self._save_images = save_images +from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor, \ + import_dm_annotations +from cvat.apps.dataset_manager.formats import dm_env, exporter, importer +from cvat.apps.dataset_manager.util import make_zip_archive +from datumaro.components.project import Dataset - def __call__(self, extractor, save_dir): - from datumaro.components.project import Environment, Dataset - env = Environment() - id_from_image = env.transforms.get('id_from_image_name') - - extractor = extractor.transform(id_from_image) - extractor = Dataset.from_extractors(extractor) # apply lazy transforms - - converter = env.make_converter('label_me', save_images=self._save_images) - converter(extractor, save_dir=save_dir) - -def dump(file_object, annotations): - from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor - from cvat.apps.dataset_manager.util import make_zip_archive - from tempfile import TemporaryDirectory - - extractor = CvatTaskDataExtractor('', annotations) - converter = CvatLabelMeConverter() +@exporter(name='LabelMe', version='3.0') +def _export(dst_file, task_data, save_images=False): + extractor = CvatTaskDataExtractor(task_data, include_images=save_images) + envt = dm_env.transforms + extractor = extractor.transform(envt.get('id_from_image_name')) + extractor = Dataset.from_extractors(extractor) # apply lazy transforms with TemporaryDirectory() as temp_dir: + converter = dm_env.make_converter('label_me', save_images=save_images) converter(extractor, save_dir=temp_dir) - make_zip_archive(temp_dir, file_object) -def load(file_object, annotations): - from pyunpack import Archive - from tempfile import TemporaryDirectory - from datumaro.plugins.labelme_format import LabelMeImporter - from datumaro.components.project import Environment - from cvat.apps.dataset_manager.bindings import import_dm_annotations + make_zip_archive(temp_dir, dst_file) - archive_file = file_object if isinstance(file_object, str) else getattr(file_object, "name") +@importer(name='LabelMe', ext='ZIP', version='3.0') +def _import(src_file, task_data): + src_path = src_file.name with TemporaryDirectory() as tmp_dir: - Archive(archive_file).extractall(tmp_dir) + Archive(src_path).extractall(tmp_dir) - dm_dataset = LabelMeImporter()(tmp_dir).make_dataset() - masks_to_polygons = Environment().transforms.get('masks_to_polygons') - dm_dataset = dm_dataset.transform(masks_to_polygons) - import_dm_annotations(dm_dataset, annotations) + dataset = dm_env.make_importer('label_me')(tmp_dir).make_dataset() + masks_to_polygons = dm_env.transforms.get('masks_to_polygons') + dataset = dataset.transform(masks_to_polygons) + import_dm_annotations(dataset, task_data) diff --git a/cvat/apps/dataset_manager/formats/mask.py b/cvat/apps/dataset_manager/formats/mask.py index 492ce7b3af62..f8ee0c2b0d64 100644 --- a/cvat/apps/dataset_manager/formats/mask.py +++ b/cvat/apps/dataset_manager/formats/mask.py @@ -13,31 +13,28 @@ from datumaro.components.project import Dataset -@exporter(name="MASK", ext="ZIP", version="1.1") -def export_mask(dst_file, annotations, **options): - extractor = CvatTaskDataExtractor(annotations) +@exporter(name='MASK', version='1.1') +def _export(dst_file, task_data, save_images=False): + extractor = CvatTaskDataExtractor(task_data, include_images=save_images) + envt = dm_env.transforms + extractor = extractor.transform(envt.get('polygons_to_masks')) + extractor = extractor.transform(envt.get('boxes_to_masks')) + extractor = extractor.transform(envt.get('merge_instance_segments')) + extractor = extractor.transform(envt.get('id_from_image_name')) + extractor = Dataset.from_extractors(extractor) # apply lazy transforms with TemporaryDirectory() as temp_dir: - envt = dm_env.transforms - extractor = extractor.transform(envt.get('polygons_to_masks')) - extractor = extractor.transform(envt.get('boxes_to_masks')) - extractor = extractor.transform(envt.get('merge_instance_segments')) - extractor = extractor.transform(envt.get('id_from_image_name')) - extractor = Dataset.from_extractors(extractor) # apply lazy transforms - converter = dm_env.make_converter('voc_segmentation', - apply_colormap=True, label_map='source', - save_images=save_images) + apply_colormap=True, label_map='source', save_images=save_images) converter(extractor, save_dir=temp_dir) - make_zip_archive(temp_dir, file_object) + make_zip_archive(temp_dir, dst_file) -@importer(name="MASK", ext="ZIP", version="1.0") -def import_mask(src_file, annotations, **options): - archive_file = file_object if isinstance(file_object, str) else getattr(file_object, "name") +@importer(name='MASK', ext='ZIP', version='1.1') +def _import(src_file, task_data): with TemporaryDirectory() as tmp_dir: - Archive(archive_file).extractall(tmp_dir) + Archive(src_file.name).extractall(tmp_dir) dm_dataset = dm_env.make_importer('voc')(tmp_dir).make_dataset() - masks_to_polygons = Environment().transforms.get('masks_to_polygons') + masks_to_polygons = dm_env.transforms.get('masks_to_polygons') dm_dataset = dm_dataset.transform(masks_to_polygons) - import_dm_annotations(dm_dataset, annotations) + import_dm_annotations(dm_dataset, task_data) diff --git a/cvat/apps/dataset_manager/formats/mot.py b/cvat/apps/dataset_manager/formats/mot.py index df85cb064ada..f68bc1167046 100644 --- a/cvat/apps/dataset_manager/formats/mot.py +++ b/cvat/apps/dataset_manager/formats/mot.py @@ -1,62 +1,45 @@ +# Copyright (C) 2019 Intel Corporation +# # SPDX-License-Identifier: MIT -format_spec = { - "name": "MOT", - "dumpers": [ - { - "display_name": "{name} {format} {version}", - "format": "ZIP", - "version": "1.1", - "handler": "dump" - }, - ], - "loaders": [ - { - "display_name": "{name} {format} {version}", - "format": "ZIP", - "version": "1.1", - "handler": "load", - } - ], -} +from tempfile import TemporaryDirectory -from datumaro.plugins.mot_format import \ - MotSeqGtConverter as _MotConverter -class CvatMotConverter(_MotConverter): - NAME = 'cvat_mot' +from pyunpack import Archive -def dump(file_object, annotations): - from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor - from cvat.apps.dataset_manager.util import make_zip_archive - from tempfile import TemporaryDirectory +import datumaro.components.extractor as datumaro +from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor, + match_frame) +from cvat.apps.dataset_manager.formats import dm_env, exporter, importer +from cvat.apps.dataset_manager.util import make_zip_archive +from datumaro.components.project import Dataset - extractor = CvatTaskDataExtractor('', annotations) - converter = CvatMotConverter() + +@exporter(name='MOT', version='1.1') +def _export(dst_file, task_data, save_images=False): + extractor = CvatTaskDataExtractor(task_data, include_images=save_images) + envt = dm_env.transforms + extractor = extractor.transform(envt.get('id_from_image_name')) + extractor = Dataset.from_extractors(extractor) # apply lazy transforms with TemporaryDirectory() as temp_dir: + converter = dm_env.make_converter('mot_seq', save_images=save_images) converter(extractor, save_dir=temp_dir) - make_zip_archive(temp_dir, file_object) - -def load(file_object, annotations): - from pyunpack import Archive - from tempfile import TemporaryDirectory - from datumaro.plugins.mot_format import MotSeqImporter - import datumaro.components.extractor as datumaro - from cvat.apps.dataset_manager.bindings import match_frame + make_zip_archive(temp_dir, dst_file) - archive_file = file_object if isinstance(file_object, str) else getattr(file_object, "name") +@importer(name='MOT', ext='ZIP', version='1.1') +def _import(src_file, task_data): with TemporaryDirectory() as tmp_dir: - Archive(archive_file).extractall(tmp_dir) + Archive(src_file.name).extractall(tmp_dir) - tracks = {} + dataset = dm_env.make_importer('mot_seq')(tmp_dir).make_dataset() - dm_dataset = MotSeqImporter()(tmp_dir).make_dataset() - label_cat = dm_dataset.categories()[datumaro.AnnotationType.label] + tracks = {} + label_cat = dataset.categories()[datumaro.AnnotationType.label] - for item in dm_dataset: - frame_id = match_frame(item, annotations) + for item in dataset: + frame_id = match_frame(item, task_data) - for ann in item.annotations: + for ann in item.task_data: if ann.type != datumaro.AnnotationType.bbox: continue @@ -64,7 +47,7 @@ def load(file_object, annotations): if track_id is None: continue - shape = annotations.TrackedShape( + shape = task_data.TrackedShape( type='rectangle', points=ann.points, occluded=ann.attributes.get('occluded') == True, @@ -77,13 +60,13 @@ def load(file_object, annotations): # build trajectories as lists of shapes in track dict if track_id not in tracks: - tracks[track_id] = annotations.Track( + tracks[track_id] = task_data.Track( label_cat.items[ann.label].name, 0, []) tracks[track_id].shapes.append(shape) for track in tracks.values(): - # MOT annotations do not require frames to be ordered + # MOT task_data do not require frames to be ordered track.shapes.sort(key=lambda t: t.frame) # Set outside=True for the last shape in a track to finish the track track.shapes[-1] = track.shapes[-1]._replace(outside=True) - annotations.add_track(track) + task_data.add_track(track) diff --git a/cvat/apps/dataset_manager/formats/pascal_voc.py b/cvat/apps/dataset_manager/formats/pascal_voc.py index eefa05386fc3..f876872c2e16 100644 --- a/cvat/apps/dataset_manager/formats/pascal_voc.py +++ b/cvat/apps/dataset_manager/formats/pascal_voc.py @@ -1,46 +1,45 @@ -# Copyright (C) 2018 Intel Corporation +# Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT -format_spec = { - "name": "PASCAL VOC", - "dumpers": [ - { - "display_name": "{name} {format} {version}", - "format": "ZIP", - "version": "1.1", - "handler": "dump" - }, - ], - "loaders": [ - { - "display_name": "{name} {format} {version}", - "format": "ZIP", - "version": "1.1", - "handler": "load" - }, - ], -} +import os.path as osp +import shutil +from glob import glob -def load(file_object, annotations): - from glob import glob - import os - import os.path as osp - import shutil - from pyunpack import Archive - from tempfile import TemporaryDirectory - from datumaro.plugins.voc_format.importer import VocImporter - from cvat.apps.dataset_manager.bindings import import_dm_annotations +from tempfile import TemporaryDirectory - archive_file = file_object if isinstance(file_object, str) else getattr(file_object, "name") +from pyunpack import Archive + +from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor, + import_dm_annotations) +from cvat.apps.dataset_manager.formats import dm_env, exporter, importer +from cvat.apps.dataset_manager.util import make_zip_archive +from datumaro.components.project import Dataset + + +@exporter(name='PASCAL VOC', version='1.1') +def _export(dst_file, task_data, save_images=False): + extractor = CvatTaskDataExtractor(task_data, include_images=save_images) + envt = dm_env.transforms + extractor = extractor.transform(envt.get('id_from_image_name')) + extractor = Dataset.from_extractors(extractor) # apply lazy transforms + with TemporaryDirectory() as temp_dir: + converter = dm_env.make_converter('voc', label_map='source', + save_images=save_images) + converter(extractor, save_dir=temp_dir) + + make_zip_archive(temp_dir, dst_file) + +@importer(name='PASCAL VOC', ext='ZIP', version='1.1') +def _import(src_file, task_data): with TemporaryDirectory() as tmp_dir: - Archive(archive_file).extractall(tmp_dir) + Archive(src_file.name).extractall(tmp_dir) # put label map from the task if not present labelmap_file = osp.join(tmp_dir, 'labelmap.txt') if not osp.isfile(labelmap_file): labels = (label['name'] + ':::' - for _, label in annotations.meta['task']['labels']) + for _, label in task_data.meta['task']['labels']) with open(labelmap_file, 'w') as f: f.write('\n'.join(labels)) @@ -58,34 +57,7 @@ def load(file_object, annotations): for f in anno_files: shutil.move(f, anno_dir) - dm_project = VocImporter()(tmp_dir) - dm_dataset = dm_project.make_dataset() - import_dm_annotations(dm_dataset, annotations) - -from datumaro.components.converter import Converter -class CvatVocConverter(Converter): - def __init__(self, save_images=False): - self._save_images = save_images - - def __call__(self, extractor, save_dir): - from datumaro.components.project import Environment, Dataset - env = Environment() - id_from_image = env.transforms.get('id_from_image_name') - - extractor = extractor.transform(id_from_image) - extractor = Dataset.from_extractors(extractor) # apply lazy transforms - - converter = env.make_converter('voc', label_map='source', - save_images=self._save_images) - converter(extractor, save_dir=save_dir) - -def dump(file_object, annotations): - from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor - from cvat.apps.dataset_manager.util import make_zip_archive - from tempfile import TemporaryDirectory - - extractor = CvatTaskDataExtractor('', annotations) - converter = CvatVocConverter() - with TemporaryDirectory() as temp_dir: - converter(extractor, save_dir=temp_dir) - make_zip_archive(temp_dir, file_object) \ No newline at end of file + dataset = dm_env.make_importer('voc')(tmp_dir).make_dataset() + masks_to_polygons = Environment().transforms.get('masks_to_polygons') + dataset = dataset.transform(masks_to_polygons) + import_dm_annotations(dataset, task_data) diff --git a/cvat/apps/dataset_manager/formats/tfrecord.py b/cvat/apps/dataset_manager/formats/tfrecord.py index 369b7fe32bf3..62577ec7fc44 100644 --- a/cvat/apps/dataset_manager/formats/tfrecord.py +++ b/cvat/apps/dataset_manager/formats/tfrecord.py @@ -2,52 +2,32 @@ # # SPDX-License-Identifier: MIT -format_spec = { - "name": "TFRecord", - "dumpers": [ - { - "display_name": "{name} {format} {version}", - "format": "ZIP", - "version": "1.0", - "handler": "dump" - }, - ], - "loaders": [ - { - "display_name": "{name} {format} {version}", - "format": "ZIP", - "version": "1.0", - "handler": "load" - }, - ], -} - -from datumaro.plugins.tf_detection_api_format.converter import \ - TfDetectionApiConverter as _TfDetectionApiConverter -class CvatTfrecordConverter(_TfDetectionApiConverter): - NAME = 'cvat_tfrecord' - -def dump(file_object, annotations): - from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor - from cvat.apps.dataset_manager.util import make_zip_archive - from tempfile import TemporaryDirectory - - extractor = CvatTaskDataExtractor('', annotations) - converter = CvatTfrecordConverter() +from tempfile import TemporaryDirectory + +from pyunpack import Archive + +from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor, \ + import_dm_annotations +from cvat.apps.dataset_manager.formats import dm_env, exporter, importer +from cvat.apps.dataset_manager.util import make_zip_archive +from datumaro.components.project import Dataset + + +@exporter(name='TFRecord', version='1.0') +def _export(dst_file, task_data, save_images=False): + extractor = CvatTaskDataExtractor(task_data, include_images=save_images) + extractor = Dataset.from_extractors(extractor) # apply lazy transforms with TemporaryDirectory() as temp_dir: + converter = dm_env.make_converter('tf_detection_api', + save_images=save_images) converter(extractor, save_dir=temp_dir) - make_zip_archive(temp_dir, file_object) -def load(file_object, annotations): - from pyunpack import Archive - from tempfile import TemporaryDirectory - from datumaro.plugins.tf_detection_api_format.importer import TfDetectionApiImporter - from cvat.apps.dataset_manager.bindings import import_dm_annotations + make_zip_archive(temp_dir, dst_file) - archive_file = file_object if isinstance(file_object, str) else getattr(file_object, "name") +@importer(name='TFRecord', ext='ZIP', version='1.0') +def _import(src_file, task_data): with TemporaryDirectory() as tmp_dir: - Archive(archive_file).extractall(tmp_dir) + Archive(src_file.name).extractall(tmp_dir) - dm_project = TfDetectionApiImporter()(tmp_dir) - dm_dataset = dm_project.make_dataset() - import_dm_annotations(dm_dataset, annotations) + dataset = dm_env.make_importer('tf_detection_api')(tmp_dir).make_dataset() + import_dm_annotations(dataset, task_data) diff --git a/cvat/apps/dataset_manager/formats/yolo.py b/cvat/apps/dataset_manager/formats/yolo.py index 7632b3d4efaa..37b0c443d600 100644 --- a/cvat/apps/dataset_manager/formats/yolo.py +++ b/cvat/apps/dataset_manager/formats/yolo.py @@ -1,39 +1,35 @@ -# Copyright (C) 2018 Intel Corporation +# Copyright (C) 2019 Intel Corporation # # SPDX-License-Identifier: MIT -format_spec = { - "name": "YOLO", - "dumpers": [ - { - "display_name": "{name} {format} {version}", - "format": "ZIP", - "version": "1.1", - "handler": "dump" - }, - ], - "loaders": [ - { - "display_name": "{name} {format} {version}", - "format": "ZIP", - "version": "1.1", - "handler": "load" - }, - ], -} +import os.path as osp +from glob import glob +from tempfile import TemporaryDirectory -def load(file_object, annotations): - from pyunpack import Archive - import os.path as osp - from tempfile import TemporaryDirectory - from glob import glob - from datumaro.components.extractor import DatasetItem - from datumaro.plugins.yolo_format.importer import YoloImporter - from cvat.apps.dataset_manager.bindings import import_dm_annotations, match_frame +from pyunpack import Archive - archive_file = file_object if isinstance(file_object, str) else getattr(file_object, "name") +from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor, + import_dm_annotations, match_frame) +from cvat.apps.dataset_manager.formats import dm_env, exporter, importer +from cvat.apps.dataset_manager.util import make_zip_archive +from datumaro.components.extractor import DatasetItem +from datumaro.components.project import Dataset + + +@exporter(name='YOLO', version='1.1') +def _export(dst_file, task_data, save_images=False): + extractor = CvatTaskDataExtractor(task_data, include_images=save_images) + extractor = Dataset.from_extractors(extractor) # apply lazy transforms + with TemporaryDirectory() as temp_dir: + converter = dm_env.make_converter('yolo', save_images=save_images) + converter(extractor, save_dir=temp_dir) + + make_zip_archive(temp_dir, dst_file) + +@importer(name='YOLO', ext='ZIP', version='1.1') +def _import(src_file, task_data): with TemporaryDirectory() as tmp_dir: - Archive(archive_file).extractall(tmp_dir) + Archive(src_file.name).extractall(tmp_dir) image_info = {} anno_files = glob(osp.join(tmp_dir, '**', '*.txt'), recursive=True) @@ -48,21 +44,6 @@ def load(file_object, annotations): if frame_info is not None: image_info[filename] = (frame_info['height'], frame_info['width']) - dm_project = YoloImporter()(tmp_dir, image_info=image_info) - dm_dataset = dm_project.make_dataset() - import_dm_annotations(dm_dataset, annotations) - -from datumaro.plugins.yolo_format.converter import \ - YoloConverter as _YoloConverter -class CvatYoloConverter(_YoloConverter): - NAME = 'cvat_yolo' - -def dump(file_object, annotations): - from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor - from cvat.apps.dataset_manager.util import make_zip_archive - from tempfile import TemporaryDirectory - extractor = CvatTaskDataExtractor('', annotations) - converter = CvatYoloConverter() - with TemporaryDirectory() as temp_dir: - converter(extractor, save_dir=temp_dir) - make_zip_archive(temp_dir, file_object) + dataset = dm_env.make_importer('yolo')(tmp_dir, image_info=image_info) \ + .make_dataset() + import_dm_annotations(dataset, task_data) diff --git a/datumaro/datumaro/plugins/datumaro_format/converter.py b/datumaro/datumaro/plugins/datumaro_format/converter.py index 08dc0062dd29..cf317a0a277e 100644 --- a/datumaro/datumaro/plugins/datumaro_format/converter.py +++ b/datumaro/datumaro/plugins/datumaro_format/converter.py @@ -286,3 +286,26 @@ def __init__(self, save_images=False): def __call__(self, extractor, save_dir): converter = _Converter(extractor, save_dir, **self._options) converter.convert() + + +class DatumaroProjectConverter(Converter): + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + parser.add_argument('--save-images', action='store_true', + help="Save images (default: %(default)s)") + return parser + + def __init__(self, config=None, save_images=False): + self._config = config + self._save_images = save_images + + def __call__(self, extractor, save_dir): + os.makedirs(save_dir, exist_ok=True) + + project = Project.generate(save_dir, config=self._config) + + converter = project.env.make_converter('datumaro', + save_images=self._save_images) + converter(extractor, save_dir=osp.join( + project.config.project_dir, project.config.dataset_dir)) \ No newline at end of file diff --git a/datumaro/datumaro/util/image.py b/datumaro/datumaro/util/image.py index 2d465f71a4c4..73da1e443528 100644 --- a/datumaro/datumaro/util/image.py +++ b/datumaro/datumaro/util/image.py @@ -45,20 +45,30 @@ def load_image(path): assert image.shape[2] in {3, 4} return image -def save_image(path, image, params=None): +def save_image(path, image, **kwargs): + if not kwargs: + kwargs = {} + if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2: import cv2 + + params = [] + ext = path[-4:] if ext.upper() == '.JPG': - params = [ int(cv2.IMWRITE_JPEG_QUALITY), 75 ] + params = [ + int(cv2.IMWRITE_JPEG_QUALITY), kwargs.get('jpeg_quality', 75) + ] image = image.astype(np.uint8) cv2.imwrite(path, image, params=params) elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL: from PIL import Image - if not params: - params = {} + params = {} + params['quality'] = kwargs.get('jpeg_quality') + if kwargs.get('jpeg_quality') == 100: + params['subsampling'] = 0 image = image.astype(np.uint8) if len(image.shape) == 3 and image.shape[2] in {3, 4}: @@ -68,15 +78,22 @@ def save_image(path, image, params=None): else: raise NotImplementedError() -def encode_image(image, ext, params=None): +def encode_image(image, ext, **kwargs): + if not kwargs: + kwargs = {} + if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2: import cv2 + params = [] + if not ext.startswith('.'): ext = '.' + ext if ext.upper() == '.JPG': - params = [ int(cv2.IMWRITE_JPEG_QUALITY), 75 ] + params = [ + int(cv2.IMWRITE_JPEG_QUALITY), kwargs.get('jpeg_quality', 75) + ] image = image.astype(np.uint8) success, result = cv2.imencode(ext, image, params=params) @@ -89,8 +106,10 @@ def encode_image(image, ext, params=None): if ext.startswith('.'): ext = ext[1:] - if not params: - params = {} + params = {} + params['quality'] = kwargs.get('jpeg_quality') + if kwargs.get('jpeg_quality') == 100: + params['subsampling'] = 0 image = image.astype(np.uint8) if len(image.shape) == 3 and image.shape[2] in {3, 4}: diff --git a/datumaro/tests/test_image.py b/datumaro/tests/test_image.py index efb7aea29692..bdb29438a0ae 100644 --- a/datumaro/tests/test_image.py +++ b/datumaro/tests/test_image.py @@ -26,7 +26,7 @@ def test_save_and_load_backends(self): path = osp.join(test_dir, 'img.png') # lossless image_module._IMAGE_BACKEND = save_backend - image_module.save_image(path, src_image) + image_module.save_image(path, src_image, jpeg_quality=100) image_module._IMAGE_BACKEND = load_backend dst_image = image_module.load_image(path) @@ -43,7 +43,8 @@ def test_encode_and_decode_backends(self): src_image = np.random.randint(0, 255 + 1, (2, 4, c)) image_module._IMAGE_BACKEND = save_backend - buffer = image_module.encode_image(src_image, '.png') # lossless + buffer = image_module.encode_image(src_image, '.png', + jpeg_quality=100) # lossless image_module._IMAGE_BACKEND = load_backend dst_image = image_module.decode_image(buffer) From 8b0173a35b9a94395819c2796d52b3e27e26c348 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Mon, 6 Apr 2020 16:20:09 +0300 Subject: [PATCH 12/80] remove parameter --- cvat/apps/dataset_manager/formats/cvat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/dataset_manager/formats/cvat.py b/cvat/apps/dataset_manager/formats/cvat.py index 8573d803a6bf..f510f1eb82f0 100644 --- a/cvat/apps/dataset_manager/formats/cvat.py +++ b/cvat/apps/dataset_manager/formats/cvat.py @@ -548,7 +548,7 @@ def _export_images(dst_file, task_data, save_images=False): @importer(name='CVAT', ext='XML, ZIP', version='1.1', display_name='{name} {version}') -def _import(src_file, task_data, **options): +def _import(src_file, task_data): src_path = src_file.name if src_path.lower().endswith('.xml'): From 5c770b4ddd74cba5eff5f5d6562aad352e14e9ac Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Mon, 6 Apr 2020 16:20:55 +0300 Subject: [PATCH 13/80] Add dm views --- cvat/apps/dataset_manager/formats/__init__.py | 6 ++ cvat/apps/dataset_manager/views.py | 85 +++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 cvat/apps/dataset_manager/views.py diff --git a/cvat/apps/dataset_manager/formats/__init__.py b/cvat/apps/dataset_manager/formats/__init__.py index 58d10469c9d2..4a637b1e3426 100644 --- a/cvat/apps/dataset_manager/formats/__init__.py +++ b/cvat/apps/dataset_manager/formats/__init__.py @@ -85,6 +85,12 @@ def get_export_formats(): def get_import_formats(): return [_serialize_format(f) for f in IMPORT_FORMATS] +def make_importer(name): + return IMPORT_FORMATS[name]() + +def make_exporter(name): + return EXPORT_FORMATS[name]() + import cvat.apps.dataset_manager.formats.coco import cvat.apps.dataset_manager.formats.cvat diff --git a/cvat/apps/dataset_manager/views.py b/cvat/apps/dataset_manager/views.py new file mode 100644 index 000000000000..592a6f3c73aa --- /dev/null +++ b/cvat/apps/dataset_manager/views.py @@ -0,0 +1,85 @@ +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import os +import os.path as osp +from datetime import timedelta + +import django_rq +from django.utils import timezone + +import cvat.apps.dataset_manager.task as task +from cvat.apps.engine.log import slogger +from cvat.apps.engine.models import Task + +from .util import current_function_name + + +_MODULE_NAME = __package__ + '.' + osp.splitext(osp.basename(__file__))[0] +def log_exception(logger=None, exc_info=True): + if logger is None: + logger = slogger + logger.exception("[%s @ %s]: exception occurred" % \ + (_MODULE_NAME, current_function_name(2)), + exc_info=exc_info) + + +def get_export_cache_dir(db_task): + return osp.join(db_task.get_task_dirname(), 'export_cache') + +DEFAULT_CACHE_TTL = timedelta(hours=10) +CACHE_TTL = DEFAULT_CACHE_TTL + + +def export_task(task_id, dst_format, server_url=None, save_images=False): + try: + db_task = Task.objects.get(pk=task_id) + + cache_dir = get_export_cache_dir(db_task) + + exporter = get_exporter(format_name) + output_path = osp.join(cache_dir, '%s.%s' % (dst_format, exporter.EXT)) + + task_time = timezone.localtime(db_task.updated_date).timestamp() + if not (osp.exists(output_path) and \ + task_time <= osp.getmtime(output_path)): + os.makedirs(cache_dir, exist_ok=True) + task.export_task(task_id, output_path, dst_format, + server_url=server_url, save_images=save_images) + + archive_ctime = osp.getctime(output_path) + scheduler = django_rq.get_scheduler() + cleaning_job = scheduler.enqueue_in(time_delta=CACHE_TTL, + func=clear_export_cache, + task_id=task_id, + file_path=output_path, file_ctime=archive_ctime) + slogger.task[task_id].info( + "The task '{}' is exported as '{}' " + "and available for downloading for next '{}'. " + "Export cache cleaning job is enqueued, " + "id '{}', start in '{}'".format( + db_task.name, dst_format, CACHE_TTL, + cleaning_job.id, CACHE_TTL)) + + return output_path + except Exception: + log_exception(slogger.task[task_id]) + raise + +def export_task_as_dataset(task_id, dst_format=None, server_url=None): + export_task(task_id, dst_format, server_url=server_url, save_images=True) + +def export_task_annotations(task_id, dst_format=None, server_url=None): + export_task(task_id, dst_format, server_url=server_url, save_images=False) + +def clear_export_cache(task_id, file_path, file_ctime): + try: + if osp.exists(file_path) and osp.getctime(file_path) == file_ctime: + os.remove(file_path) + slogger.task[task_id].info( + "Export cache file '{}' successfully removed" \ + .format(file_path)) + except Exception: + log_exception(slogger.task[task_id]) + raise From 527123b1d8bde72f093d9679592091137719c1d1 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Mon, 6 Apr 2020 16:21:57 +0300 Subject: [PATCH 14/80] Move annotation components to dm --- .../annotation.py} | 89 +- cvat/apps/dataset_manager/bindings.py | 227 ++-- cvat/apps/dataset_manager/task.py | 1005 ++++++++++++----- cvat/apps/engine/annotation.py | 723 ------------ 4 files changed, 855 insertions(+), 1189 deletions(-) rename cvat/apps/{engine/annotation_manager.py => dataset_manager/annotation.py} (85%) delete mode 100644 cvat/apps/engine/annotation.py diff --git a/cvat/apps/engine/annotation_manager.py b/cvat/apps/dataset_manager/annotation.py similarity index 85% rename from cvat/apps/engine/annotation_manager.py rename to cvat/apps/dataset_manager/annotation.py index fc401626571e..8699de621ecb 100644 --- a/cvat/apps/engine/annotation_manager.py +++ b/cvat/apps/dataset_manager/annotation.py @@ -2,14 +2,71 @@ # # SPDX-License-Identifier: MIT -import copy +from copy import copy, deepcopy import numpy as np from scipy.optimize import linear_sum_assignment from shapely import geometry -from . import models - +from cvat.apps.engine.models import ShapeType +from cvat.apps.engine.serializers import LabeledDataSerializer + + +class AnnotationIR: + def __init__(self, data=None): + self.reset() + if data: + self.tags = getattr(data, 'tags', []) or data['tags'] + self.shapes = getattr(data, 'shapes', []) or data['shapes'] + self.tracks = getattr(data, 'tracks', []) or data['tracks'] + + def add_tag(self, tag): + self.tags.append(tag) + + def add_shape(self, shape): + self.shapes.append(shape) + + def add_track(self, track): + self.tracks.append(track) + + @property + def data(self): + return { + 'version': self.version, + 'tags': self.tags, + 'shapes': self.shapes, + 'tracks': self.tracks, + } + + @data.setter + def data(self, data): + self.version = data['version'] + self.tags = data['tags'] + self.shapes = data['shapes'] + self.tracks = data['tracks'] + + def serialize(self): + serializer = LabeledDataSerializer(data=self.data) + if serializer.is_valid(raise_exception=True): + return serializer.data + + # makes a data copy from specified frame interval + def slice(self, start, stop): + def is_frame_inside(x): return (start <= int(x['frame']) <= stop) + splitted_data = AnnotationIR() + splitted_data.tags = deepcopy(list(filter(is_frame_inside, self.tags))) + splitted_data.shapes = deepcopy( + list(filter(is_frame_inside, self.shapes))) + splitted_data.tracks = deepcopy(list(filter(lambda y: len( + list(filter(is_frame_inside, y['shapes']))), self.tracks))) + + return splitted_data + + def reset(self): + self.version = 0 + self.tags = [] + self.shapes = [] + self.tracks = [] class AnnotationManager: def __init__(self, data): @@ -164,13 +221,13 @@ class ShapeManager(ObjectManager): def to_tracks(self): tracks = [] for shape in self.objects: - shape0 = copy.copy(shape) + shape0 = copy(shape) shape0["keyframe"] = True shape0["outside"] = False # TODO: Separate attributes on mutable and unmutable shape0["attributes"] = [] shape0.pop("group", None) - shape1 = copy.copy(shape0) + shape1 = copy(shape0) shape1["outside"] = True shape1["frame"] += 1 @@ -198,12 +255,12 @@ def _calc_polygons_similarity(p0, p1): has_same_type = obj0["type"] == obj1["type"] has_same_label = obj0.get("label_id") == obj1.get("label_id") if has_same_type and has_same_label: - if obj0["type"] == models.ShapeType.RECTANGLE: + if obj0["type"] == ShapeType.RECTANGLE: p0 = geometry.box(*obj0["points"]) p1 = geometry.box(*obj1["points"]) return _calc_polygons_similarity(p0, p1) - elif obj0["type"] == models.ShapeType.POLYGON: + elif obj0["type"] == ShapeType.POLYGON: p0 = geometry.Polygon(pairwise(obj0["points"])) p1 = geometry.Polygon(pairwise(obj0["points"])) @@ -286,7 +343,7 @@ def _calc_objects_similarity(obj0, obj1, start_frame, overlap): def _modify_unmached_object(obj, end_frame): shape = obj["shapes"][-1] if not shape["outside"]: - shape = copy.deepcopy(shape) + shape = deepcopy(shape) shape["frame"] = end_frame shape["outside"] = True obj["shapes"].append(shape) @@ -304,7 +361,7 @@ def normalize_shape(shape): points.append(p.x) points.append(p.y) - shape = copy.copy(shape) + shape = copy(shape) shape["points"] = points return shape @@ -314,8 +371,8 @@ def get_interpolated_shapes(track, start_frame, end_frame): def interpolate(shape0, shape1): shapes = [] is_same_type = shape0["type"] == shape1["type"] - is_polygon = shape0["type"] == models.ShapeType.POLYGON - is_polyline = shape0["type"] == models.ShapeType.POLYLINE + is_polygon = shape0["type"] == ShapeType.POLYGON + is_polyline = shape0["type"] == ShapeType.POLYLINE is_same_size = len(shape0["points"]) == len(shape1["points"]) if not is_same_type or is_polygon or is_polyline or not is_same_size: shape0 = TrackManager.normalize_shape(shape0) @@ -329,7 +386,7 @@ def interpolate(shape0, shape1): points = np.asarray(shape0["points"]).reshape(-1, 2) else: points = (shape0["points"] + step * off).reshape(-1, 2) - shape = copy.deepcopy(shape0) + shape = deepcopy(shape0) if len(points) == 1: shape["points"] = points.flatten() else: @@ -353,7 +410,7 @@ def interpolate(shape0, shape1): assert shape["frame"] > curr_frame for attr in prev_shape["attributes"]: if attr["spec_id"] not in map(lambda el: el["spec_id"], shape["attributes"]): - shape["attributes"].append(copy.deepcopy(attr)) + shape["attributes"].append(deepcopy(attr)) if not prev_shape["outside"]: shapes.extend(interpolate(prev_shape, shape)) @@ -363,9 +420,9 @@ def interpolate(shape0, shape1): prev_shape = shape # TODO: Need to modify a client and a database (append "outside" shapes for polytracks) - if not prev_shape["outside"] and (prev_shape["type"] == models.ShapeType.RECTANGLE - or prev_shape["type"] == models.ShapeType.POINTS): - shape = copy.copy(prev_shape) + if not prev_shape["outside"] and (prev_shape["type"] == ShapeType.RECTANGLE + or prev_shape["type"] == ShapeType.POINTS): + shape = copy(prev_shape) shape["frame"] = end_frame shapes.extend(interpolate(prev_shape, shape)) diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index 19ef6d2fc6bb..840d35ac05ed 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -3,112 +3,17 @@ # # SPDX-License-Identifier: MIT -import osp as osp +import os.path as osp from collections import OrderedDict, namedtuple -from copy import deepcopy -from django.db import transaction from django.utils import timezone import datumaro.components.extractor as datumaro -from cvat.apps.engine.annotation import TaskAnnotation -from cvat.apps.engine.annotation_manager import AnnotationManager, TrackManager -from cvat.apps.engine.models import AttributeType, ShapeType from cvat.apps.engine.frame_provider import FrameProvider -from cvat.apps.engine.serializers import LabeledDataSerializer +from cvat.apps.engine.models import AttributeType, ShapeType from datumaro.util.image import Image - -class AnnotationIR: - def __init__(self, data=None): - self.reset() - if data: - self._tags = getattr(data, 'tags', []) or data['tags'] - self._shapes = getattr(data, 'shapes', []) or data['shapes'] - self._tracks = getattr(data, 'tracks', []) or data['tracks'] - - def add_tag(self, tag): - self._tags.append(tag) - - def add_shape(self, shape): - self._shapes.append(shape) - - def add_track(self, track): - self._tracks.append(track) - - @property - def tags(self): - return self._tags - - @property - def shapes(self): - return self._shapes - - @property - def tracks(self): - return self._tracks - - @property - def version(self): - return self._version - - @tags.setter - def tags(self, tags): - self._tags = tags - - @shapes.setter - def shapes(self, shapes): - self._shapes = shapes - - @tracks.setter - def tracks(self, tracks): - self._tracks = tracks - - @version.setter - def version(self, version): - self._version = version - - def __getitem__(self, key): - return getattr(self, key) - - @property - def data(self): - return { - 'version': self.version, - 'tags': self.tags, - 'shapes': self.shapes, - 'tracks': self.tracks, - } - - def serialize(self): - serializer = LabeledDataSerializer(data=self.data) - if serializer.is_valid(raise_exception=True): - return serializer.data - - # makes a data copy from specified frame interval - def slice(self, start, stop): - def is_frame_inside(x): return (start <= int(x['frame']) <= stop) - splitted_data = AnnotationIR() - splitted_data.tags = deepcopy(list(filter(is_frame_inside, self.tags))) - splitted_data.shapes = deepcopy( - list(filter(is_frame_inside, self.shapes))) - splitted_data.tracks = deepcopy(list(filter(lambda y: len( - list(filter(is_frame_inside, y['shapes']))), self.tracks))) - - return splitted_data - - @data.setter - def data(self, data): - self.version = data['version'] - self.tags = data['tags'] - self.shapes = data['shapes'] - self.tracks = data['tracks'] - - def reset(self): - self._version = 0 - self._tags = [] - self._shapes = [] - self._tracks = [] +from .annotation import AnnotationManager, TrackManager class TaskData: @@ -125,10 +30,9 @@ class TaskData: Frame = namedtuple( 'Frame', 'frame, name, width, height, labeled_shapes, tags') - def __init__(self, annotation_ir, db_task, scheme='', host='', create_callback=None): + def __init__(self, annotation_ir, db_task, host='', create_callback=None): self._annotation_ir = annotation_ir self._db_task = db_task - self._scheme = scheme self._host = host self._create_callback = create_callback self._MAX_ANNO_SIZE = 30000 @@ -248,8 +152,8 @@ def _init_meta(self): ("id", str(db_segment.id)), ("start", str(db_segment.start_frame)), ("stop", str(db_segment.stop_frame)), - ("url", "{0}://{1}/?id={2}".format( - self._scheme, self._host, db_segment.job_set.all()[0].id))] + ("url", "{}/?id={}".format( + self._host, db_segment.job_set.all()[0].id))] )) for db_segment in db_segments ]), @@ -279,17 +183,17 @@ def _export_attributes(self, attributes): exported_attributes = [] for attr in attributes: attribute_name = self._get_attribute_name(attr["spec_id"]) - exported_attributes.append(Annotation.Attribute( + exported_attributes.append(TaskData.Attribute( name=attribute_name, value=attr["value"], )) return exported_attributes def _export_tracked_shape(self, shape): - return Annotation.TrackedShape( + return TaskData.TrackedShape( type=shape["type"], frame=self._db_task.data.start_frame + - shape["frame"] * self._frame_step, + shape["frame"] * self._frame_step, label=self._get_label_name(shape["label_id"]), points=shape["points"], occluded=shape["occluded"], @@ -302,11 +206,11 @@ def _export_tracked_shape(self, shape): ) def _export_labeled_shape(self, shape): - return Annotation.LabeledShape( + return TaskData.LabeledShape( type=shape["type"], label=self._get_label_name(shape["label_id"]), frame=self._db_task.data.start_frame + - shape["frame"] * self._frame_step, + shape["frame"] * self._frame_step, points=shape["points"], occluded=shape["occluded"], z_order=shape.get("z_order", 0), @@ -315,44 +219,48 @@ def _export_labeled_shape(self, shape): ) def _export_tag(self, tag): - return Annotation.Tag( + return TaskData.Tag( frame=self._db_task.data.start_frame + - tag["frame"] * self._frame_step, + tag["frame"] * self._frame_step, label=self._get_label_name(tag["label_id"]), group=tag.get("group", 0), attributes=self._export_attributes(tag["attributes"]), ) - def group_by_frame(self): - def _get_frame(annotations, shape): - db_image = self._frame_info[shape["frame"]] - frame = self._db_task.data.start_frame + \ - shape["frame"] * self._frame_step - if frame not in annotations: - annotations[frame] = Annotation.Frame( + def group_by_frame(self, include_empty=False): + frames = {} + def get_frame(idx): + frame_info = self._frame_info[idx] + frame = self._db_task.data.start_frame + idx * self._frame_step + if frame not in frames: + frames[frame] = TaskData.Frame( frame=frame, - name=db_image['path'], - height=db_image["height"], - width=db_image["width"], + name=frame_info['path'], + height=frame_info["height"], + width=frame_info["width"], labeled_shapes=[], tags=[], ) - return annotations[frame] + return frames[frame] - annotations = {} - annotation_manager = AnnotationManager(self._annotation_ir) - for shape in sorted(annotation_manager.to_shapes(self._db_task.data.size), key=lambda shape: shape.get("z_order", 0)): + if include_empty: + for idx in self._frame_info: + get_frame(idx) + + anno_manager = AnnotationManager(self._annotation_ir) + for shape in sorted(anno_manager.to_shapes(self._db_task.data.size), + key=lambda shape: shape.get("z_order", 0)): if 'track_id' in shape: exported_shape = self._export_tracked_shape(shape) else: exported_shape = self._export_labeled_shape(shape) - _get_frame(annotations, shape).labeled_shapes.append( + get_frame(shape['frame']).labeled_shapes.append( exported_shape) for tag in self._annotation_ir.tags: - _get_frame(annotations, tag).tags.append(self._export_tag(tag)) + get_frame(tag['frame']).tags.append(self._export_tag(tag)) - return iter(annotations.values()) + return iter(frames.values()) @property def shapes(self): @@ -370,11 +278,11 @@ def tracks(self): tracked_shape["group"] = track["group"] tracked_shape["label_id"] = track["label_id"] - yield Annotation.Track( + yield TaskData.Track( label=self._get_label_name(track["label_id"]), group=track["group"], - shapes=[self._export_tracked_shape( - shape) for shape in tracked_shapes], + shapes=[self._export_tracked_shape(shape) + for shape in tracked_shapes], ) @property @@ -491,34 +399,35 @@ def match_frame(self, filename): return self._frame_mapping[_filename] raise Exception( - "Cannot match filename or determinate framenumber for {} filename".format(filename)) + "Cannot match filename or determine frame number for {} filename".format(filename)) class CvatTaskDataExtractor(datumaro.Extractor): def __init__(self, task_data, include_images=False): self._categories = self._load_categories(task_data) - dm_annotations = [] + dm_items = [] if include_images: frame_provider = FrameProvider(db_task.data) - frame_provider.get_frames( - self._frame_provider.Quality.ORIGINAL, - self._frame_provider.Type.NUMPY_ARRAY) - - for cvat_frame_anno in task_data.group_by_frame(): - dm_anno = self._read_cvat_anno(cvat_frame_anno, task_data) - dm_image = Image(path=cvat_frame_anno.name, size=( - cvat_frame_anno.height, cvat_frame_anno.width) + + for frame_data in task_data.group_by_frame(include_empty=include_images): + loader = None + if include_images: + loader = lambda p: frame_provider.get_frame(frame_data.frame, + quality=frame_provider.Quality.ORIGINAL, + out_type=frame_provider.Type.NUMPY_ARRAY) + dm_image = Image(path=frame_data.name, loader=loader, + size=(frame_data.height, frame_data.width) ) - dm_item = datumaro.DatasetItem(id=cvat_frame_anno.frame, + dm_anno = self._read_cvat_anno(frame_data, task_data) + dm_item = datumaro.DatasetItem(id=frame_data.frame, annotations=dm_anno, image=dm_image) - dm_annotations.append((dm_item.id, dm_item)) + dm_items.append((frame_data.frame, dm_item)) - dm_annotations = sorted(dm_annotations, key=lambda e: int(e[0])) - self._items = OrderedDict(dm_annotations) + self._items = sorted(dm_items, key=lambda e: e[0]) def __iter__(self): - for item in self._items.values(): + for _, item in self._items: yield item def __len__(self): @@ -543,7 +452,7 @@ def _load_categories(cvat_anno): return categories - def _read_cvat_anno(self, cvat_frame_anno, cvat_task_anno): + def _read_cvat_anno(self, cvat_frame_anno, task_data): item_anno = [] categories = self.categories() @@ -551,7 +460,7 @@ def _read_cvat_anno(self, cvat_frame_anno, cvat_task_anno): def map_label(name): return label_cat.find(name)[0] label_attrs = { label['name']: label['attributes'] - for _, label in cvat_task_anno.meta['task']['labels'] + for _, label in task_data.meta['task']['labels'] } def convert_attrs(label, cvat_attrs): @@ -613,19 +522,18 @@ def convert_attrs(label, cvat_attrs): return item_anno - -def match_frame(item, cvat_task_anno): - is_video = cvat_task_anno.meta['task']['mode'] == 'interpolation' +def match_frame(item, task_data): + is_video = task_data.meta['task']['mode'] == 'interpolation' frame_number = None if frame_number is None: try: - frame_number = cvat_task_anno.match_frame(item.id) + frame_number = task_data.match_frame(item.id) except Exception: pass if frame_number is None and item.has_image: try: - frame_number = cvat_task_anno.match_frame(item.image.filename) + frame_number = task_data.match_frame(item.image.filename) except Exception: pass if frame_number is None: @@ -635,13 +543,12 @@ def match_frame(item, cvat_task_anno): pass if frame_number is None and is_video and item.id.startswith('frame_'): frame_number = int(item.id[len('frame_'):]) - if not frame_number in cvat_task_anno.frame_info: + if not frame_number in task_data.frame_info: raise Exception("Could not match item id: '%s' with any task frame" % - item.id) + item.id) return frame_number - -def import_dm_annotations(dm_dataset, cvat_task_anno): +def import_dm_annotations(dm_dataset, task_data): shapes = { datumaro.AnnotationType.bbox: ShapeType.RECTANGLE, datumaro.AnnotationType.polygon: ShapeType.POLYGON, @@ -652,7 +559,7 @@ def import_dm_annotations(dm_dataset, cvat_task_anno): label_cat = dm_dataset.categories()[datumaro.AnnotationType.label] for item in dm_dataset: - frame_number = match_frame(item, cvat_task_anno) + frame_number = match_frame(item, task_data) # do not store one-item groups group_map = {0: 0} @@ -672,21 +579,21 @@ def import_dm_annotations(dm_dataset, cvat_task_anno): for ann in item.annotations: if ann.type in shapes: - cvat_task_anno.add_shape(cvat_task_anno.LabeledShape( + task_data.add_shape(task_data.LabeledShape( type=shapes[ann.type], frame=frame_number, label=label_cat.items[ann.label].name, points=ann.points, occluded=ann.attributes.get('occluded') == True, group=group_map.get(ann.group, 0), - attributes=[cvat_task_anno.Attribute(name=n, value=str(v)) + attributes=[task_data.Attribute(name=n, value=str(v)) for n, v in ann.attributes.items()], )) elif ann.type == datumaro.AnnotationType.label: - cvat_task_anno.add_tag(cvat_task_anno.Tag( + task_data.add_tag(task_data.Tag( frame=frame_number, label=label_cat.items[ann.label].name, group=group_map.get(ann.group, 0), - attributes=[cvat_task_anno.Attribute(name=n, value=str(v)) + attributes=[task_data.Attribute(name=n, value=str(v)) for n, v in ann.attributes.items()], )) diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py index a0e6bb6221b5..b753a8a59332 100644 --- a/cvat/apps/dataset_manager/task.py +++ b/cvat/apps/dataset_manager/task.py @@ -3,314 +3,739 @@ # # SPDX-License-Identifier: MIT -import json -import os -import os.path as osp -import shutil -import tempfile -from datetime import timedelta - -import django_rq +from collections import OrderedDict +from enum import Enum + +from django.conf import settings +from django.db import transaction from django.utils import timezone -from cvat.apps.engine.log import slogger -from cvat.apps.engine.models import Task +from cvat.apps.engine import models, serializers +from cvat.apps.engine.plugins import plugin_decorator +from cvat.apps.profiler import silk_profile -from cvat.settings.base import BASE_DIR as _CVAT_ROOT_DIR, \ - DATUMARO_PATH as _DATUMARO_REPO_PATH -import datumaro.components.extractor as datumaro -from datumaro.components.project import Project +from .annotation import AnnotationIR, AnnotationManager +from .bindings import TaskData +from .formats import make_exporter, make_importer -from .bindings import CvatImagesExtractor, CvatTaskExtractor -from .formats import DEFAULT_FORMAT, FORMAT_DATUMARO -from .util import current_function_name, make_zip_archive -_FORMATS_DIR = osp.join(osp.dirname(__file__), 'formats') +class dotdict(OrderedDict): + """dot.notation access to dictionary attributes""" + __getattr__ = OrderedDict.get + __setattr__ = OrderedDict.__setitem__ + __delattr__ = OrderedDict.__delitem__ + __eq__ = lambda self, other: self.id == other.id + __hash__ = lambda self: self.id -_MODULE_NAME = __package__ + '.' + osp.splitext(osp.basename(__file__))[0] -def log_exception(logger=None, exc_info=True): - if logger is None: - logger = slogger - logger.exception("[%s @ %s]: exception occurred" % \ - (_MODULE_NAME, current_function_name(2)), - exc_info=exc_info) +class PatchAction(str, Enum): + CREATE = "create" + UPDATE = "update" + DELETE = "delete" -_TASK_IMAGES_REMOTE_EXTRACTOR = 'cvat_rest_api_task_images' + @classmethod + def values(cls): + return [item.value for item in cls] -def get_export_cache_dir(db_task): - return osp.join(db_task.get_task_dirname(), 'export_cache') + def __str__(self): + return self.value +def bulk_create(db_model, objects, flt_param): + if objects: + if flt_param: + if 'postgresql' in settings.DATABASES["default"]["ENGINE"]: + return db_model.objects.bulk_create(objects) + else: + ids = list(db_model.objects.filter(**flt_param).values_list('id', flat=True)) + db_model.objects.bulk_create(objects) -class TaskProject: - @staticmethod - def _get_datumaro_project_dir(db_task): - return osp.join(db_task.get_task_dirname(), 'datumaro') + return list(db_model.objects.exclude(id__in=ids).filter(**flt_param)) + else: + return db_model.objects.bulk_create(objects) + + return [] + +def _merge_table_rows(rows, keys_for_merge, field_id): + # It is necessary to keep a stable order of original rows + # (e.g. for tracked boxes). Otherwise prev_box.frame can be bigger + # than next_box.frame. + merged_rows = OrderedDict() + + # Group all rows by field_id. In grouped rows replace fields in + # accordance with keys_for_merge structure. + for row in rows: + row_id = row[field_id] + if not row_id in merged_rows: + merged_rows[row_id] = dotdict(row) + for key in keys_for_merge: + merged_rows[row_id][key] = [] + + for key in keys_for_merge: + item = dotdict({v.split('__', 1)[-1]:row[v] for v in keys_for_merge[key]}) + if item.id is not None: + merged_rows[row_id][key].append(item) + + # Remove redundant keys from final objects + redundant_keys = [item for values in keys_for_merge.values() for item in values] + for i in merged_rows: + for j in redundant_keys: + del merged_rows[i][j] + + return list(merged_rows.values()) + +class JobAnnotation: + def __init__(self, pk): + self.user = user + self.db_job = models.Job.objects.select_related('segment__task') \ + .select_for_update().get(id=pk) + + db_segment = self.db_job.segment + self.start_frame = db_segment.start_frame + self.stop_frame = db_segment.stop_frame + self.ir_data = AnnotationIR() + + self.db_labels = {db_label.id:db_label + for db_label in db_segment.task.label_set.all()} + + self.db_attributes = {} + for db_label in self.db_labels.values(): + self.db_attributes[db_label.id] = { + "mutable": OrderedDict(), + "immutable": OrderedDict(), + "all": OrderedDict(), + } + for db_attr in db_label.attributespec_set.all(): + default_value = dotdict([ + ('spec_id', db_attr.id), + ('value', db_attr.default_value), + ]) + if db_attr.mutable: + self.db_attributes[db_label.id]["mutable"][db_attr.id] = default_value + else: + self.db_attributes[db_label.id]["immutable"][db_attr.id] = default_value + + self.db_attributes[db_label.id]["all"][db_attr.id] = default_value + + def reset(self): + self.ir_data.reset() + + def _save_tracks_to_db(self, tracks): + db_tracks = [] + db_track_attrvals = [] + db_shapes = [] + db_shape_attrvals = [] + + for track in tracks: + track_attributes = track.pop("attributes", []) + shapes = track.pop("shapes") + db_track = models.LabeledTrack(job=self.db_job, **track) + if db_track.label_id not in self.db_labels: + raise AttributeError("label_id `{}` is invalid".format(db_track.label_id)) + + for attr in track_attributes: + db_attrval = models.LabeledTrackAttributeVal(**attr) + if db_attrval.spec_id not in self.db_attributes[db_track.label_id]["immutable"]: + raise AttributeError("spec_id `{}` is invalid".format(db_attrval.spec_id)) + db_attrval.track_id = len(db_tracks) + db_track_attrvals.append(db_attrval) + + for shape in shapes: + shape_attributes = shape.pop("attributes", []) + # FIXME: need to clamp points (be sure that all of them inside the image) + # Should we check here or implement a validator? + db_shape = models.TrackedShape(**shape) + db_shape.track_id = len(db_tracks) + + for attr in shape_attributes: + db_attrval = models.TrackedShapeAttributeVal(**attr) + if db_attrval.spec_id not in self.db_attributes[db_track.label_id]["mutable"]: + raise AttributeError("spec_id `{}` is invalid".format(db_attrval.spec_id)) + db_attrval.shape_id = len(db_shapes) + db_shape_attrvals.append(db_attrval) + + db_shapes.append(db_shape) + shape["attributes"] = shape_attributes + + db_tracks.append(db_track) + track["attributes"] = track_attributes + track["shapes"] = shapes + + db_tracks = bulk_create( + db_model=models.LabeledTrack, + objects=db_tracks, + flt_param={"job_id": self.db_job.id} + ) + + for db_attrval in db_track_attrvals: + db_attrval.track_id = db_tracks[db_attrval.track_id].id + bulk_create( + db_model=models.LabeledTrackAttributeVal, + objects=db_track_attrvals, + flt_param={} + ) + + for db_shape in db_shapes: + db_shape.track_id = db_tracks[db_shape.track_id].id + + db_shapes = bulk_create( + db_model=models.TrackedShape, + objects=db_shapes, + flt_param={"track__job_id": self.db_job.id} + ) + + for db_attrval in db_shape_attrvals: + db_attrval.shape_id = db_shapes[db_attrval.shape_id].id + + bulk_create( + db_model=models.TrackedShapeAttributeVal, + objects=db_shape_attrvals, + flt_param={} + ) + + shape_idx = 0 + for track, db_track in zip(tracks, db_tracks): + track["id"] = db_track.id + for shape in track["shapes"]: + shape["id"] = db_shapes[shape_idx].id + shape_idx += 1 + + self.ir_data.tracks = tracks + + def _save_shapes_to_db(self, shapes): + db_shapes = [] + db_attrvals = [] + + for shape in shapes: + attributes = shape.pop("attributes", []) + # FIXME: need to clamp points (be sure that all of them inside the image) + # Should we check here or implement a validator? + db_shape = models.LabeledShape(job=self.db_job, **shape) + if db_shape.label_id not in self.db_labels: + raise AttributeError("label_id `{}` is invalid".format(db_shape.label_id)) + + for attr in attributes: + db_attrval = models.LabeledShapeAttributeVal(**attr) + if db_attrval.spec_id not in self.db_attributes[db_shape.label_id]["all"]: + raise AttributeError("spec_id `{}` is invalid".format(db_attrval.spec_id)) + + db_attrval.shape_id = len(db_shapes) + db_attrvals.append(db_attrval) + + db_shapes.append(db_shape) + shape["attributes"] = attributes + + db_shapes = bulk_create( + db_model=models.LabeledShape, + objects=db_shapes, + flt_param={"job_id": self.db_job.id} + ) + + for db_attrval in db_attrvals: + db_attrval.shape_id = db_shapes[db_attrval.shape_id].id + + bulk_create( + db_model=models.LabeledShapeAttributeVal, + objects=db_attrvals, + flt_param={} + ) + + for shape, db_shape in zip(shapes, db_shapes): + shape["id"] = db_shape.id + + self.ir_data.shapes = shapes + + def _save_tags_to_db(self, tags): + db_tags = [] + db_attrvals = [] + + for tag in tags: + attributes = tag.pop("attributes", []) + db_tag = models.LabeledImage(job=self.db_job, **tag) + if db_tag.label_id not in self.db_labels: + raise AttributeError("label_id `{}` is invalid".format(db_tag.label_id)) + + for attr in attributes: + db_attrval = models.LabeledImageAttributeVal(**attr) + if db_attrval.spec_id not in self.db_attributes[db_tag.label_id]["all"]: + raise AttributeError("spec_id `{}` is invalid".format(db_attrval.spec_id)) + db_attrval.tag_id = len(db_tags) + db_attrvals.append(db_attrval) + + db_tags.append(db_tag) + tag["attributes"] = attributes + + db_tags = bulk_create( + db_model=models.LabeledImage, + objects=db_tags, + flt_param={"job_id": self.db_job.id} + ) + + for db_attrval in db_attrvals: + db_attrval.image_id = db_tags[db_attrval.tag_id].id + + bulk_create( + db_model=models.LabeledImageAttributeVal, + objects=db_attrvals, + flt_param={} + ) + + for tag, db_tag in zip(tags, db_tags): + tag["id"] = db_tag.id + + self.ir_data.tags = tags + + def _commit(self): + db_prev_commit = self.db_job.commits.last() + db_curr_commit = models.JobCommit() + if db_prev_commit: + db_curr_commit.version = db_prev_commit.version + 1 + else: + db_curr_commit.version = 1 + db_curr_commit.job = self.db_job + db_curr_commit.message = "Changes: tags - {}; shapes - {}; tracks - {}".format( + len(self.ir_data.tags), len(self.ir_data.shapes), len(self.ir_data.tracks)) + db_curr_commit.save() + self.ir_data.version = db_curr_commit.version + + def _set_updated_date(self): + db_task = self.db_job.segment.task + db_task.updated_date = timezone.now() + db_task.save() + + def _save_to_db(self, data): + self.reset() + self._save_tags_to_db(data["tags"]) + self._save_shapes_to_db(data["shapes"]) + self._save_tracks_to_db(data["tracks"]) + + return self.ir_data.tags or self.ir_data.shapes or self.ir_data.tracks + + def _create(self, data): + if self._save_to_db(data): + self._set_updated_date() + self.db_job.save() + + def create(self, data): + self._create(data) + self._commit() + + def put(self, data): + self._delete() + self._create(data) + self._commit() + + def update(self, data): + self._delete(data) + self._create(data) + self._commit() + + def _delete(self, data=None): + deleted_shapes = 0 + if data is None: + deleted_shapes += self.db_job.labeledimage_set.all().delete()[0] + deleted_shapes += self.db_job.labeledshape_set.all().delete()[0] + deleted_shapes += self.db_job.labeledtrack_set.all().delete()[0] + else: + labeledimage_ids = [image["id"] for image in data["tags"]] + labeledshape_ids = [shape["id"] for shape in data["shapes"]] + labeledtrack_ids = [track["id"] for track in data["tracks"]] + labeledimage_set = self.db_job.labeledimage_set + labeledimage_set = labeledimage_set.filter(pk__in=labeledimage_ids) + labeledshape_set = self.db_job.labeledshape_set + labeledshape_set = labeledshape_set.filter(pk__in=labeledshape_ids) + labeledtrack_set = self.db_job.labeledtrack_set + labeledtrack_set = labeledtrack_set.filter(pk__in=labeledtrack_ids) + + # It is not important for us that data had some "invalid" objects + # which were skipped (not acutally deleted). The main idea is to + # say that all requested objects are absent in DB after the method. + self.ir_data.tags = data['tags'] + self.ir_data.shapes = data['shapes'] + self.ir_data.tracks = data['tracks'] + + deleted_shapes += labeledimage_set.delete()[0] + deleted_shapes += labeledshape_set.delete()[0] + deleted_shapes += labeledtrack_set.delete()[0] + + if deleted_shapes: + self._set_updated_date() + + def delete(self, data=None): + self._delete(data) + self._commit() @staticmethod - def create(db_task): - task_project = TaskProject(db_task) - task_project._create() - return task_project + def _extend_attributes(attributeval_set, default_attribute_values): + shape_attribute_specs_set = set(attr.spec_id for attr in attributeval_set) + for db_attr in default_attribute_values: + if db_attr.spec_id not in shape_attribute_specs_set: + attributeval_set.append(dotdict([ + ('spec_id', db_attr.spec_id), + ('value', db_attr.value), + ])) + + def _init_tags_from_db(self): + db_tags = self.db_job.labeledimage_set.prefetch_related( + "label", + "labeledimageattributeval_set" + ).values( + 'id', + 'frame', + 'label_id', + 'group', + 'labeledimageattributeval__spec_id', + 'labeledimageattributeval__value', + 'labeledimageattributeval__id', + ).order_by('frame') + + db_tags = _merge_table_rows( + rows=db_tags, + keys_for_merge={ + "labeledimageattributeval_set": [ + 'labeledimageattributeval__spec_id', + 'labeledimageattributeval__value', + 'labeledimageattributeval__id', + ], + }, + field_id='id', + ) + + for db_tag in db_tags: + self._extend_attributes(db_tag.labeledimageattributeval_set, + self.db_attributes[db_tag.label_id]["all"].values()) + + serializer = serializers.LabeledImageSerializer(db_tags, many=True) + self.ir_data.tags = serializer.data + + def _init_shapes_from_db(self): + db_shapes = self.db_job.labeledshape_set.prefetch_related( + "label", + "labeledshapeattributeval_set" + ).values( + 'id', + 'label_id', + 'type', + 'frame', + 'group', + 'occluded', + 'z_order', + 'points', + 'labeledshapeattributeval__spec_id', + 'labeledshapeattributeval__value', + 'labeledshapeattributeval__id', + ).order_by('frame') + + db_shapes = _merge_table_rows( + rows=db_shapes, + keys_for_merge={ + 'labeledshapeattributeval_set': [ + 'labeledshapeattributeval__spec_id', + 'labeledshapeattributeval__value', + 'labeledshapeattributeval__id', + ], + }, + field_id='id', + ) + for db_shape in db_shapes: + self._extend_attributes(db_shape.labeledshapeattributeval_set, + self.db_attributes[db_shape.label_id]["all"].values()) + + serializer = serializers.LabeledShapeSerializer(db_shapes, many=True) + self.ir_data.shapes = serializer.data + + def _init_tracks_from_db(self): + db_tracks = self.db_job.labeledtrack_set.prefetch_related( + "label", + "labeledtrackattributeval_set", + "trackedshape_set__trackedshapeattributeval_set" + ).values( + "id", + "frame", + "label_id", + "group", + "labeledtrackattributeval__spec_id", + "labeledtrackattributeval__value", + "labeledtrackattributeval__id", + "trackedshape__type", + "trackedshape__occluded", + "trackedshape__z_order", + "trackedshape__points", + "trackedshape__id", + "trackedshape__frame", + "trackedshape__outside", + "trackedshape__trackedshapeattributeval__spec_id", + "trackedshape__trackedshapeattributeval__value", + "trackedshape__trackedshapeattributeval__id", + ).order_by('id', 'trackedshape__frame') + + db_tracks = _merge_table_rows( + rows=db_tracks, + keys_for_merge={ + "labeledtrackattributeval_set": [ + "labeledtrackattributeval__spec_id", + "labeledtrackattributeval__value", + "labeledtrackattributeval__id", + ], + "trackedshape_set":[ + "trackedshape__type", + "trackedshape__occluded", + "trackedshape__z_order", + "trackedshape__points", + "trackedshape__id", + "trackedshape__frame", + "trackedshape__outside", + "trackedshape__trackedshapeattributeval__spec_id", + "trackedshape__trackedshapeattributeval__value", + "trackedshape__trackedshapeattributeval__id", + ], + }, + field_id="id", + ) + + for db_track in db_tracks: + db_track["trackedshape_set"] = _merge_table_rows(db_track["trackedshape_set"], { + 'trackedshapeattributeval_set': [ + 'trackedshapeattributeval__value', + 'trackedshapeattributeval__spec_id', + 'trackedshapeattributeval__id', + ] + }, 'id') + + # A result table can consist many equal rows for track/shape attributes + # We need filter unique attributes manually + db_track["labeledtrackattributeval_set"] = list(set(db_track["labeledtrackattributeval_set"])) + self._extend_attributes(db_track.labeledtrackattributeval_set, + self.db_attributes[db_track.label_id]["immutable"].values()) + + default_attribute_values = self.db_attributes[db_track.label_id]["mutable"].values() + for db_shape in db_track["trackedshape_set"]: + db_shape["trackedshapeattributeval_set"] = list( + set(db_shape["trackedshapeattributeval_set"]) + ) + # in case of trackedshapes need to interpolate attriute values and extend it + # by previous shape attribute values (not default values) + self._extend_attributes(db_shape["trackedshapeattributeval_set"], default_attribute_values) + default_attribute_values = db_shape["trackedshapeattributeval_set"] + + + serializer = serializers.LabeledTrackSerializer(db_tracks, many=True) + self.ir_data.tracks = serializer.data + + def _init_version_from_db(self): + db_commit = self.db_job.commits.last() + self.ir_data.version = db_commit.version if db_commit else 0 + + def init_from_db(self): + self._init_tags_from_db() + self._init_shapes_from_db() + self._init_tracks_from_db() + self._init_version_from_db() + + @property + def data(self): + return self.ir_data.data + + def import_annotations(self, src_file, importer): + task_data = TaskData( + annotation_ir=AnnotationIR(), + db_task=self.db_job.segment.task, + create_callback=self.create, + ) + self.delete() - @staticmethod - def load(db_task): - task_project = TaskProject(db_task) - task_project._load() - task_project._init_dataset() - return task_project + importer(src_file, task_data) - @staticmethod - def from_task(db_task, user): - task_project = TaskProject(db_task) - task_project._import_from_task(user) - return task_project - - def __init__(self, db_task): - self._db_task = db_task - self._project_dir = self._get_datumaro_project_dir(db_task) - self._project = None - self._dataset = None - - def _create(self): - self._project = Project.generate(self._project_dir) - self._project.add_source('task_%s' % self._db_task.id, { - 'format': _TASK_IMAGES_EXTRACTOR, - }) - self._project.env.extractors.register(_TASK_IMAGES_EXTRACTOR, - lambda url: CvatImagesExtractor(url, - FrameProvider(self._db_task.data))) - - self._init_dataset() - self._dataset.define_categories(self._generate_categories()) - - self.save() - - def _load(self): - self._project = Project.load(self._project_dir) - - def _import_from_task(self, user): - self._project = Project.generate(self._project_dir, config={ - 'project_name': self._db_task.name, - 'plugins_dir': _FORMATS_DIR, - }) - - self._project.add_source('task_%s' % self._db_task.id, { - 'format': _TASK_EXTRACTOR, - }) - self._project.env.extractors.register(_TASK_EXTRACTOR, - lambda url: CvatTaskExtractor(url, - db_task=self._db_task, user=user)) - - self._init_dataset() - - def _init_dataset(self): - self._dataset = self._project.make_dataset() - - def _generate_categories(self): - categories = {} - label_categories = datumaro.LabelCategories() - - db_labels = self._db_task.label_set.all() - for db_label in db_labels: - db_attributes = db_label.attributespec_set.all() - label_categories.add(db_label.name) - - for db_attr in db_attributes: - label_categories.attributes.add(db_attr.name) - - categories[datumaro.AnnotationType.label] = label_categories - - return categories - - def put_annotations(self, annotations): - raise NotImplementedError() - - def save(self, save_dir=None, save_images=False): - if self._dataset is not None: - self._dataset.save(save_dir=save_dir, save_images=save_images) - else: - self._project.save(save_dir=save_dir) + self.create(task_data.data.slice(self.start_frame, self.stop_frame).serialize()) - def export(self, dst_format, save_dir, save_images=False, server_url=None): - if self._dataset is None: - self._init_dataset() - if dst_format == FORMAT_DATUMARO: - self._remote_export(save_dir=save_dir, server_url=server_url) - else: - converter = self._dataset.env.make_converter(dst_format, - save_images=save_images) - self._dataset.export_project(converter=converter, save_dir=save_dir) - - def _remote_image_converter(self, save_dir, server_url=None): - os.makedirs(save_dir, exist_ok=True) - - db_task = self._db_task - items = [] - config = { - 'server_host': 'localhost', - 'task_id': db_task.id, - } - if server_url: - if ':' in server_url: - host, port = server_url.rsplit(':', maxsplit=1) +class TaskAnnotation: + def __init__(self, pk): + self.db_task = models.Task.objects.prefetch_related("data__images").get(id=pk) + + # Postgres doesn't guarantee an order by default without explicit order_by + self.db_jobs = models.Job.objects.select_related("segment").filter(segment__task_id=pk).order_by('id') + self.ir_data = AnnotationIR() + + def reset(self): + self.ir_data.reset() + + def _patch_data(self, data, action): + _data = data if isinstance(data, AnnotationIR) else AnnotationIR(data) + splitted_data = {} + jobs = {} + for db_job in self.db_jobs: + jid = db_job.id + start = db_job.segment.start_frame + stop = db_job.segment.stop_frame + jobs[jid] = { "start": start, "stop": stop } + splitted_data[jid] = _data.slice(start, stop) + + for jid, job_data in splitted_data.items(): + _data = AnnotationIR() + if action is None: + _data.data = put_job_data(jid, job_data) else: - host = server_url - port = None - config['server_host'] = host - if port is not None: - config['server_port'] = int(port) - - images_meta = { - 'images': items, - } - db_video = getattr(self._db_task.data, 'video', None) - if db_video is not None: - for i in range(self._db_task.data.size): - frame_info = { - 'id': i, - 'width': db_video.width, - 'height': db_video.height, - } - items.append(frame_info) + _data.data = patch_job_data(jid, job_data, action) + if _data.version > self.ir_data.version: + self.ir_data.version = _data.version + self._merge_data(_data, jobs[jid]["start"], self.db_task.overlap) + + def _merge_data(self, data, start_frame, overlap): + annotation_manager = AnnotationManager(self.ir_data) + annotation_manager.merge(data, start_frame, overlap) + + def put(self, data): + self._patch_data(data, None) + + def create(self, data): + self._patch_data(data, PatchAction.CREATE) + + def update(self, data): + self._patch_data(data, PatchAction.UPDATE) + + def delete(self, data=None): + if data: + self._patch_data(data, PatchAction.DELETE) else: - for db_image in self._db_task.data.images.all(): - frame_info = { - 'id': db_image.frame, - 'name': osp.basename(db_image.path), - 'width': db_image.width, - 'height': db_image.height, - } - items.append(frame_info) - - with open(osp.join(save_dir, 'config.json'), 'w') as config_file: - json.dump(config, config_file) - with open(osp.join(save_dir, 'images_meta.json'), 'w') as images_file: - json.dump(images_meta, images_file) - - def _remote_export(self, save_dir, server_url=None): - if self._dataset is None: - self._init_dataset() - - os.makedirs(save_dir, exist_ok=True) - self._dataset.save(save_dir=save_dir, save_images=False, merge=True) - - exported_project = Project.load(save_dir) - source_name = 'task_%s_images' % self._db_task.id - exported_project.add_source(source_name, { - 'format': _TASK_IMAGES_REMOTE_EXTRACTOR, - }) - self._remote_image_converter( - osp.join(save_dir, exported_project.local_source_dir(source_name)), - server_url=server_url) - exported_project.save() - - - templates_dir = osp.join(osp.dirname(__file__), 'export_templates') - target_dir = exported_project.config.project_dir - os.makedirs(target_dir, exist_ok=True) - shutil.copyfile( - osp.join(templates_dir, 'README.md'), - osp.join(target_dir, 'README.md')) - - templates_dir = osp.join(templates_dir, 'plugins') - target_dir = osp.join(target_dir, - exported_project.config.env_dir, - exported_project.config.plugins_dir) - os.makedirs(target_dir, exist_ok=True) - shutil.copyfile( - osp.join(templates_dir, _TASK_IMAGES_REMOTE_EXTRACTOR + '.py'), - osp.join(target_dir, _TASK_IMAGES_REMOTE_EXTRACTOR + '.py')) - - # NOTE: put datumaro component to the archive so that - # it was available to the user - shutil.copytree(_DATUMARO_REPO_PATH, osp.join(save_dir, 'datumaro'), - ignore=lambda src, names: ['__pycache__'] + [ - n for n in names - if sum([int(n.endswith(ext)) for ext in - ['.pyx', '.pyo', '.pyd', '.pyc']]) - ]) - - # include CVAT CLI module also - cvat_utils_dst_dir = osp.join(save_dir, 'cvat', 'utils') - os.makedirs(cvat_utils_dst_dir) - shutil.copytree(osp.join(_CVAT_ROOT_DIR, 'utils', 'cli'), - osp.join(cvat_utils_dst_dir, 'cli')) - - def upload(self, annotation_file, loader): - annotation_importer = Annotation( + for db_job in self.db_jobs: + delete_job_data(db_job.id) + + def init_from_db(self): + self.reset() + + for db_job in self.db_jobs: + annotation = JobAnnotation(db_job.id) + annotation.init_from_db() + if annotation.ir_data.version > self.ir_data.version: + self.ir_data.version = annotation.ir_data.version + db_segment = db_job.segment + start_frame = db_segment.start_frame + overlap = self.db_task.overlap + self._merge_data(annotation.ir_data, start_frame, overlap) + + def export(self, dst_file, exporter, host='', **options): + task_data = TaskData( + annotation_ir=self.ir_data, + db_task=self.db_task, + host=host, + ) + exporter(dst_file, task_data, **options) + + def import_annotations(self, src_file, importer, **options): + task_data = TaskData( annotation_ir=AnnotationIR(), db_task=self.db_task, create_callback=self.create, - ) + ) self.delete() - db_format = loader.annotation_format - with open(annotation_file, 'rb') as file_object: - source_code = open(os.path.join(settings.BASE_DIR, db_format.handler_file.name)).read() - global_vars = globals() - imports = import_modules(source_code) - global_vars.update(imports) - execute_python_code(source_code, global_vars) - - global_vars["file_object"] = file_object - global_vars["annotations"] = annotation_importer - - execute_python_code("{}(file_object, annotations)".format(loader.handler), global_vars) - self.create(annotation_importer.data.serialize()) - - -DEFAULT_CACHE_TTL = timedelta(hours=10) -CACHE_TTL = DEFAULT_CACHE_TTL - -def export_task_as_dataset(task_id, user, dst_format=None, server_url=None): - try: - db_task = Task.objects.get(pk=task_id) - - if not dst_format: - dst_format = DEFAULT_FORMAT - - cache_dir = get_export_cache_dir(db_task) - save_dir = osp.join(cache_dir, dst_format) - archive_path = osp.normpath(save_dir) + '.zip' - - task_time = timezone.localtime(db_task.updated_date).timestamp() - if not (osp.exists(archive_path) and \ - task_time <= osp.getmtime(archive_path)): - os.makedirs(cache_dir, exist_ok=True) - with tempfile.TemporaryDirectory( - dir=cache_dir, prefix=dst_format + '_') as temp_dir: - project = TaskProject.from_task(db_task, user) - project.export(dst_format, save_dir=temp_dir, save_images=True, - server_url=server_url) - - os.makedirs(cache_dir, exist_ok=True) - make_zip_archive(temp_dir, archive_path) - - archive_ctime = osp.getctime(archive_path) - scheduler = django_rq.get_scheduler() - cleaning_job = scheduler.enqueue_in(time_delta=CACHE_TTL, - func=clear_export_cache, - task_id=task_id, - file_path=archive_path, file_ctime=archive_ctime) - slogger.task[task_id].info( - "The task '{}' is exported as '{}' " - "and available for downloading for next '{}'. " - "Export cache cleaning job is enqueued, " - "id '{}', start in '{}'".format( - db_task.name, dst_format, CACHE_TTL, - cleaning_job.id, CACHE_TTL)) - - return archive_path - except Exception: - log_exception(slogger.task[task_id]) - raise - -def clear_export_cache(task_id, file_path, file_ctime): - try: - if osp.exists(file_path) and osp.getctime(file_path) == file_ctime: - os.remove(file_path) - slogger.task[task_id].info( - "Export cache file '{}' successfully removed" \ - .format(file_path)) - except Exception: - log_exception(slogger.task[task_id]) - raise + + importer(src_file, task_data, **options) + + self.create(task_data.data.serialize()) + + @property + def data(self): + return self.ir_data.data + + +@silk_profile(name="GET job data") +@transaction.atomic +def get_job_data(pk): + annotation = JobAnnotation(pk) + annotation.init_from_db() + + return annotation.data + +@silk_profile(name="POST job data") +@transaction.atomic +def put_job_data(pk, data): + annotation = JobAnnotation(pk) + annotation.put(data) + + return annotation.data + +@silk_profile(name="UPDATE job data") +@plugin_decorator +@transaction.atomic +def patch_job_data(pk, data, action): + annotation = JobAnnotation(pk) + if action == PatchAction.CREATE: + annotation.create(data) + elif action == PatchAction.UPDATE: + annotation.update(data) + elif action == PatchAction.DELETE: + annotation.delete(data) + + return annotation.data + +@silk_profile(name="DELETE job data") +@transaction.atomic +def delete_job_data(pk): + annotation = JobAnnotation(pk) + annotation.delete() + +@silk_profile(name="GET task data") +@transaction.atomic +def get_task_data(pk): + annotation = TaskAnnotation(pk) + annotation.init_from_db() + + return annotation.data + +@silk_profile(name="POST task data") +@transaction.atomic +def put_task_data(pk, data): + annotation = TaskAnnotation(pk) + annotation.put(data) + + return annotation.data + +@silk_profile(name="UPDATE task data") +@transaction.atomic +def patch_task_data(pk, data, action): + annotation = TaskAnnotation(pk) + if action == PatchAction.CREATE: + annotation.create(data) + elif action == PatchAction.UPDATE: + annotation.update(data) + elif action == PatchAction.DELETE: + annotation.delete(data) + + return annotation.data + +@silk_profile(name="DELETE task data") +@transaction.atomic +def delete_task_data(pk): + annotation = TaskAnnotation(pk) + annotation.delete() + +def export_task(task_id, dst_file, format_name, + server_url=None, save_images=False): + # For big tasks dump function may run for a long time and + # we dont need to acquire lock after _AnnotationForTask instance + # has been initialized from DB. + # But there is the bug with corrupted dump file in case 2 or more dump request received at the same time. + # https://github.com/opencv/cvat/issues/217 + with transaction.atomic(): + task = TaskAnnotation(task_id) + task.init_from_db() + + exporter = make_exporter(format_name) + with open(dst_file, 'rb') as f: + task.export(exporter, f, host=server_url, save_images=save_images) + +@transaction.atomic +def import_task_annotations(task_id, src_file, format_name): + task = TaskAnnotation(task_id) + task.init_from_db() + + importer = make_importer(format_name) + with open(src_file, 'rb') as f: + task.import_annotations(importer, f) + +@transaction.atomic +def import_job_annotations(job_id, src_file, format_name): + job = JobAnnotation(job_id) + job.init_from_db() + + importer = make_importer(format_name) + with open(src_file, 'rb') as f: + job.import_annotations(importer, f) diff --git a/cvat/apps/engine/annotation.py b/cvat/apps/engine/annotation.py deleted file mode 100644 index 83ad3fc31d4d..000000000000 --- a/cvat/apps/engine/annotation.py +++ /dev/null @@ -1,723 +0,0 @@ -# Copyright (C) 2018 Intel Corporation -# -# SPDX-License-Identifier: MIT - -import os -from collections import OrderedDict -from enum import Enum - -from django.conf import settings -from django.db import transaction -from django.utils import timezone - -from cvat.apps.dataset_manager.bindings import Annotation, AnnotationIR -from cvat.apps.engine.plugins import plugin_decorator -from cvat.apps.engine.utils import execute_python_code, import_modules -from cvat.apps.profiler import silk_profile - -from . import models, serializers -from .annotation_manager import AnnotationManager -from .log import slogger - - -class dotdict(OrderedDict): - """dot.notation access to dictionary attributes""" - __getattr__ = OrderedDict.get - __setattr__ = OrderedDict.__setitem__ - __delattr__ = OrderedDict.__delitem__ - __eq__ = lambda self, other: self.id == other.id - __hash__ = lambda self: self.id - -class PatchAction(str, Enum): - CREATE = "create" - UPDATE = "update" - DELETE = "delete" - - @classmethod - def values(cls): - return [item.value for item in cls] - - def __str__(self): - return self.value - -@silk_profile(name="GET job data") -@transaction.atomic -def get_job_data(pk, user): - annotation = JobAnnotation(pk, user) - annotation.init_from_db() - - return annotation.data - -@silk_profile(name="POST job data") -@transaction.atomic -def put_job_data(pk, user, data): - annotation = JobAnnotation(pk, user) - annotation.put(data) - - return annotation.data - -@silk_profile(name="UPDATE job data") -@plugin_decorator -@transaction.atomic -def patch_job_data(pk, user, data, action): - annotation = JobAnnotation(pk, user) - if action == PatchAction.CREATE: - annotation.create(data) - elif action == PatchAction.UPDATE: - annotation.update(data) - elif action == PatchAction.DELETE: - annotation.delete(data) - - return annotation.data - -@silk_profile(name="DELETE job data") -@transaction.atomic -def delete_job_data(pk, user): - annotation = JobAnnotation(pk, user) - annotation.delete() - -@silk_profile(name="GET task data") -@transaction.atomic -def get_task_data(pk, user): - annotation = TaskAnnotation(pk, user) - annotation.init_from_db() - - return annotation.data - -@silk_profile(name="POST task data") -@transaction.atomic -def put_task_data(pk, user, data): - annotation = TaskAnnotation(pk, user) - annotation.put(data) - - return annotation.data - -@silk_profile(name="UPDATE task data") -@transaction.atomic -def patch_task_data(pk, user, data, action): - annotation = TaskAnnotation(pk, user) - if action == PatchAction.CREATE: - annotation.create(data) - elif action == PatchAction.UPDATE: - annotation.update(data) - elif action == PatchAction.DELETE: - annotation.delete(data) - - return annotation.data - -@transaction.atomic -def load_task_data(pk, user, filename, loader): - annotation = TaskAnnotation(pk, user) - annotation.upload(filename, loader) - -@transaction.atomic -def load_job_data(pk, user, filename, loader): - annotation = JobAnnotation(pk, user) - annotation.upload(filename, loader) - -@silk_profile(name="DELETE task data") -@transaction.atomic -def delete_task_data(pk, user): - annotation = TaskAnnotation(pk, user) - annotation.delete() - -def dump_task_data(pk, user, filename, dumper, scheme, host): - # For big tasks dump function may run for a long time and - # we dont need to acquire lock after _AnnotationForTask instance - # has been initialized from DB. - # But there is the bug with corrupted dump file in case 2 or more dump request received at the same time. - # https://github.com/opencv/cvat/issues/217 - with transaction.atomic(): - annotation = TaskAnnotation(pk, user) - annotation.init_from_db() - - annotation.dump(filename, dumper, scheme, host) - -def bulk_create(db_model, objects, flt_param): - if objects: - if flt_param: - if 'postgresql' in settings.DATABASES["default"]["ENGINE"]: - return db_model.objects.bulk_create(objects) - else: - ids = list(db_model.objects.filter(**flt_param).values_list('id', flat=True)) - db_model.objects.bulk_create(objects) - - return list(db_model.objects.exclude(id__in=ids).filter(**flt_param)) - else: - return db_model.objects.bulk_create(objects) - - return [] - -def _merge_table_rows(rows, keys_for_merge, field_id): - # It is necessary to keep a stable order of original rows - # (e.g. for tracked boxes). Otherwise prev_box.frame can be bigger - # than next_box.frame. - merged_rows = OrderedDict() - - # Group all rows by field_id. In grouped rows replace fields in - # accordance with keys_for_merge structure. - for row in rows: - row_id = row[field_id] - if not row_id in merged_rows: - merged_rows[row_id] = dotdict(row) - for key in keys_for_merge: - merged_rows[row_id][key] = [] - - for key in keys_for_merge: - item = dotdict({v.split('__', 1)[-1]:row[v] for v in keys_for_merge[key]}) - if item.id is not None: - merged_rows[row_id][key].append(item) - - # Remove redundant keys from final objects - redundant_keys = [item for values in keys_for_merge.values() for item in values] - for i in merged_rows: - for j in redundant_keys: - del merged_rows[i][j] - - return list(merged_rows.values()) - -class JobAnnotation: - def __init__(self, pk, user): - self.user = user - self.db_job = models.Job.objects.select_related('segment__task') \ - .select_for_update().get(id=pk) - - db_segment = self.db_job.segment - self.start_frame = db_segment.start_frame - self.stop_frame = db_segment.stop_frame - self.ir_data = AnnotationIR() - - # pylint: disable=bad-continuation - self.logger = slogger.job[self.db_job.id] - self.db_labels = {db_label.id:db_label - for db_label in db_segment.task.label_set.all()} - - self.db_attributes = {} - for db_label in self.db_labels.values(): - self.db_attributes[db_label.id] = { - "mutable": OrderedDict(), - "immutable": OrderedDict(), - "all": OrderedDict(), - } - for db_attr in db_label.attributespec_set.all(): - default_value = dotdict([ - ('spec_id', db_attr.id), - ('value', db_attr.default_value), - ]) - if db_attr.mutable: - self.db_attributes[db_label.id]["mutable"][db_attr.id] = default_value - else: - self.db_attributes[db_label.id]["immutable"][db_attr.id] = default_value - - self.db_attributes[db_label.id]["all"][db_attr.id] = default_value - - def reset(self): - self.ir_data.reset() - - def _save_tracks_to_db(self, tracks): - db_tracks = [] - db_track_attrvals = [] - db_shapes = [] - db_shape_attrvals = [] - - for track in tracks: - track_attributes = track.pop("attributes", []) - shapes = track.pop("shapes") - db_track = models.LabeledTrack(job=self.db_job, **track) - if db_track.label_id not in self.db_labels: - raise AttributeError("label_id `{}` is invalid".format(db_track.label_id)) - - for attr in track_attributes: - db_attrval = models.LabeledTrackAttributeVal(**attr) - if db_attrval.spec_id not in self.db_attributes[db_track.label_id]["immutable"]: - raise AttributeError("spec_id `{}` is invalid".format(db_attrval.spec_id)) - db_attrval.track_id = len(db_tracks) - db_track_attrvals.append(db_attrval) - - for shape in shapes: - shape_attributes = shape.pop("attributes", []) - # FIXME: need to clamp points (be sure that all of them inside the image) - # Should we check here or implement a validator? - db_shape = models.TrackedShape(**shape) - db_shape.track_id = len(db_tracks) - - for attr in shape_attributes: - db_attrval = models.TrackedShapeAttributeVal(**attr) - if db_attrval.spec_id not in self.db_attributes[db_track.label_id]["mutable"]: - raise AttributeError("spec_id `{}` is invalid".format(db_attrval.spec_id)) - db_attrval.shape_id = len(db_shapes) - db_shape_attrvals.append(db_attrval) - - db_shapes.append(db_shape) - shape["attributes"] = shape_attributes - - db_tracks.append(db_track) - track["attributes"] = track_attributes - track["shapes"] = shapes - - db_tracks = bulk_create( - db_model=models.LabeledTrack, - objects=db_tracks, - flt_param={"job_id": self.db_job.id} - ) - - for db_attrval in db_track_attrvals: - db_attrval.track_id = db_tracks[db_attrval.track_id].id - bulk_create( - db_model=models.LabeledTrackAttributeVal, - objects=db_track_attrvals, - flt_param={} - ) - - for db_shape in db_shapes: - db_shape.track_id = db_tracks[db_shape.track_id].id - - db_shapes = bulk_create( - db_model=models.TrackedShape, - objects=db_shapes, - flt_param={"track__job_id": self.db_job.id} - ) - - for db_attrval in db_shape_attrvals: - db_attrval.shape_id = db_shapes[db_attrval.shape_id].id - - bulk_create( - db_model=models.TrackedShapeAttributeVal, - objects=db_shape_attrvals, - flt_param={} - ) - - shape_idx = 0 - for track, db_track in zip(tracks, db_tracks): - track["id"] = db_track.id - for shape in track["shapes"]: - shape["id"] = db_shapes[shape_idx].id - shape_idx += 1 - - self.ir_data.tracks = tracks - - def _save_shapes_to_db(self, shapes): - db_shapes = [] - db_attrvals = [] - - for shape in shapes: - attributes = shape.pop("attributes", []) - # FIXME: need to clamp points (be sure that all of them inside the image) - # Should we check here or implement a validator? - db_shape = models.LabeledShape(job=self.db_job, **shape) - if db_shape.label_id not in self.db_labels: - raise AttributeError("label_id `{}` is invalid".format(db_shape.label_id)) - - for attr in attributes: - db_attrval = models.LabeledShapeAttributeVal(**attr) - if db_attrval.spec_id not in self.db_attributes[db_shape.label_id]["all"]: - raise AttributeError("spec_id `{}` is invalid".format(db_attrval.spec_id)) - - db_attrval.shape_id = len(db_shapes) - db_attrvals.append(db_attrval) - - db_shapes.append(db_shape) - shape["attributes"] = attributes - - db_shapes = bulk_create( - db_model=models.LabeledShape, - objects=db_shapes, - flt_param={"job_id": self.db_job.id} - ) - - for db_attrval in db_attrvals: - db_attrval.shape_id = db_shapes[db_attrval.shape_id].id - - bulk_create( - db_model=models.LabeledShapeAttributeVal, - objects=db_attrvals, - flt_param={} - ) - - for shape, db_shape in zip(shapes, db_shapes): - shape["id"] = db_shape.id - - self.ir_data.shapes = shapes - - def _save_tags_to_db(self, tags): - db_tags = [] - db_attrvals = [] - - for tag in tags: - attributes = tag.pop("attributes", []) - db_tag = models.LabeledImage(job=self.db_job, **tag) - if db_tag.label_id not in self.db_labels: - raise AttributeError("label_id `{}` is invalid".format(db_tag.label_id)) - - for attr in attributes: - db_attrval = models.LabeledImageAttributeVal(**attr) - if db_attrval.spec_id not in self.db_attributes[db_tag.label_id]["all"]: - raise AttributeError("spec_id `{}` is invalid".format(db_attrval.spec_id)) - db_attrval.tag_id = len(db_tags) - db_attrvals.append(db_attrval) - - db_tags.append(db_tag) - tag["attributes"] = attributes - - db_tags = bulk_create( - db_model=models.LabeledImage, - objects=db_tags, - flt_param={"job_id": self.db_job.id} - ) - - for db_attrval in db_attrvals: - db_attrval.image_id = db_tags[db_attrval.tag_id].id - - bulk_create( - db_model=models.LabeledImageAttributeVal, - objects=db_attrvals, - flt_param={} - ) - - for tag, db_tag in zip(tags, db_tags): - tag["id"] = db_tag.id - - self.ir_data.tags = tags - - def _commit(self): - db_prev_commit = self.db_job.commits.last() - db_curr_commit = models.JobCommit() - if db_prev_commit: - db_curr_commit.version = db_prev_commit.version + 1 - else: - db_curr_commit.version = 1 - db_curr_commit.job = self.db_job - db_curr_commit.message = "Changes: tags - {}; shapes - {}; tracks - {}".format( - len(self.ir_data.tags), len(self.ir_data.shapes), len(self.ir_data.tracks)) - db_curr_commit.save() - self.ir_data.version = db_curr_commit.version - - def _set_updated_date(self): - db_task = self.db_job.segment.task - db_task.updated_date = timezone.now() - db_task.save() - - def _save_to_db(self, data): - self.reset() - self._save_tags_to_db(data["tags"]) - self._save_shapes_to_db(data["shapes"]) - self._save_tracks_to_db(data["tracks"]) - - return self.ir_data.tags or self.ir_data.shapes or self.ir_data.tracks - - def _create(self, data): - if self._save_to_db(data): - self._set_updated_date() - self.db_job.save() - - def create(self, data): - self._create(data) - self._commit() - - def put(self, data): - self._delete() - self._create(data) - self._commit() - - def update(self, data): - self._delete(data) - self._create(data) - self._commit() - - def _delete(self, data=None): - deleted_shapes = 0 - if data is None: - deleted_shapes += self.db_job.labeledimage_set.all().delete()[0] - deleted_shapes += self.db_job.labeledshape_set.all().delete()[0] - deleted_shapes += self.db_job.labeledtrack_set.all().delete()[0] - else: - labeledimage_ids = [image["id"] for image in data["tags"]] - labeledshape_ids = [shape["id"] for shape in data["shapes"]] - labeledtrack_ids = [track["id"] for track in data["tracks"]] - labeledimage_set = self.db_job.labeledimage_set - labeledimage_set = labeledimage_set.filter(pk__in=labeledimage_ids) - labeledshape_set = self.db_job.labeledshape_set - labeledshape_set = labeledshape_set.filter(pk__in=labeledshape_ids) - labeledtrack_set = self.db_job.labeledtrack_set - labeledtrack_set = labeledtrack_set.filter(pk__in=labeledtrack_ids) - - # It is not important for us that data had some "invalid" objects - # which were skipped (not acutally deleted). The main idea is to - # say that all requested objects are absent in DB after the method. - self.ir_data.tags = data['tags'] - self.ir_data.shapes = data['shapes'] - self.ir_data.tracks = data['tracks'] - - deleted_shapes += labeledimage_set.delete()[0] - deleted_shapes += labeledshape_set.delete()[0] - deleted_shapes += labeledtrack_set.delete()[0] - - if deleted_shapes: - self._set_updated_date() - - def delete(self, data=None): - self._delete(data) - self._commit() - - @staticmethod - def _extend_attributes(attributeval_set, default_attribute_values): - shape_attribute_specs_set = set(attr.spec_id for attr in attributeval_set) - for db_attr in default_attribute_values: - if db_attr.spec_id not in shape_attribute_specs_set: - attributeval_set.append(dotdict([ - ('spec_id', db_attr.spec_id), - ('value', db_attr.value), - ])) - - def _init_tags_from_db(self): - db_tags = self.db_job.labeledimage_set.prefetch_related( - "label", - "labeledimageattributeval_set" - ).values( - 'id', - 'frame', - 'label_id', - 'group', - 'labeledimageattributeval__spec_id', - 'labeledimageattributeval__value', - 'labeledimageattributeval__id', - ).order_by('frame') - - db_tags = _merge_table_rows( - rows=db_tags, - keys_for_merge={ - "labeledimageattributeval_set": [ - 'labeledimageattributeval__spec_id', - 'labeledimageattributeval__value', - 'labeledimageattributeval__id', - ], - }, - field_id='id', - ) - - for db_tag in db_tags: - self._extend_attributes(db_tag.labeledimageattributeval_set, - self.db_attributes[db_tag.label_id]["all"].values()) - - serializer = serializers.LabeledImageSerializer(db_tags, many=True) - self.ir_data.tags = serializer.data - - def _init_shapes_from_db(self): - db_shapes = self.db_job.labeledshape_set.prefetch_related( - "label", - "labeledshapeattributeval_set" - ).values( - 'id', - 'label_id', - 'type', - 'frame', - 'group', - 'occluded', - 'z_order', - 'points', - 'labeledshapeattributeval__spec_id', - 'labeledshapeattributeval__value', - 'labeledshapeattributeval__id', - ).order_by('frame') - - db_shapes = _merge_table_rows( - rows=db_shapes, - keys_for_merge={ - 'labeledshapeattributeval_set': [ - 'labeledshapeattributeval__spec_id', - 'labeledshapeattributeval__value', - 'labeledshapeattributeval__id', - ], - }, - field_id='id', - ) - for db_shape in db_shapes: - self._extend_attributes(db_shape.labeledshapeattributeval_set, - self.db_attributes[db_shape.label_id]["all"].values()) - - serializer = serializers.LabeledShapeSerializer(db_shapes, many=True) - self.ir_data.shapes = serializer.data - - def _init_tracks_from_db(self): - db_tracks = self.db_job.labeledtrack_set.prefetch_related( - "label", - "labeledtrackattributeval_set", - "trackedshape_set__trackedshapeattributeval_set" - ).values( - "id", - "frame", - "label_id", - "group", - "labeledtrackattributeval__spec_id", - "labeledtrackattributeval__value", - "labeledtrackattributeval__id", - "trackedshape__type", - "trackedshape__occluded", - "trackedshape__z_order", - "trackedshape__points", - "trackedshape__id", - "trackedshape__frame", - "trackedshape__outside", - "trackedshape__trackedshapeattributeval__spec_id", - "trackedshape__trackedshapeattributeval__value", - "trackedshape__trackedshapeattributeval__id", - ).order_by('id', 'trackedshape__frame') - - db_tracks = _merge_table_rows( - rows=db_tracks, - keys_for_merge={ - "labeledtrackattributeval_set": [ - "labeledtrackattributeval__spec_id", - "labeledtrackattributeval__value", - "labeledtrackattributeval__id", - ], - "trackedshape_set":[ - "trackedshape__type", - "trackedshape__occluded", - "trackedshape__z_order", - "trackedshape__points", - "trackedshape__id", - "trackedshape__frame", - "trackedshape__outside", - "trackedshape__trackedshapeattributeval__spec_id", - "trackedshape__trackedshapeattributeval__value", - "trackedshape__trackedshapeattributeval__id", - ], - }, - field_id="id", - ) - - for db_track in db_tracks: - db_track["trackedshape_set"] = _merge_table_rows(db_track["trackedshape_set"], { - 'trackedshapeattributeval_set': [ - 'trackedshapeattributeval__value', - 'trackedshapeattributeval__spec_id', - 'trackedshapeattributeval__id', - ] - }, 'id') - - # A result table can consist many equal rows for track/shape attributes - # We need filter unique attributes manually - db_track["labeledtrackattributeval_set"] = list(set(db_track["labeledtrackattributeval_set"])) - self._extend_attributes(db_track.labeledtrackattributeval_set, - self.db_attributes[db_track.label_id]["immutable"].values()) - - default_attribute_values = self.db_attributes[db_track.label_id]["mutable"].values() - for db_shape in db_track["trackedshape_set"]: - db_shape["trackedshapeattributeval_set"] = list( - set(db_shape["trackedshapeattributeval_set"]) - ) - # in case of trackedshapes need to interpolate attriute values and extend it - # by previous shape attribute values (not default values) - self._extend_attributes(db_shape["trackedshapeattributeval_set"], default_attribute_values) - default_attribute_values = db_shape["trackedshapeattributeval_set"] - - - serializer = serializers.LabeledTrackSerializer(db_tracks, many=True) - self.ir_data.tracks = serializer.data - - def _init_version_from_db(self): - db_commit = self.db_job.commits.last() - self.ir_data.version = db_commit.version if db_commit else 0 - - def init_from_db(self): - self._init_tags_from_db() - self._init_shapes_from_db() - self._init_tracks_from_db() - self._init_version_from_db() - - @property - def data(self): - return self.ir_data.data - - def upload(self, annotation_file, loader): - annotation_importer = Annotation( - annotation_ir=self.ir_data, - db_task=self.db_job.segment.task, - create_callback=self.create, - ) - self.delete() - db_format = loader.annotation_format - with open(annotation_file, 'rb') as file_object: - source_code = open(os.path.join(settings.BASE_DIR, db_format.handler_file.name)).read() - global_vars = globals() - imports = import_modules(source_code) - global_vars.update(imports) - - execute_python_code(source_code, global_vars) - - global_vars["file_object"] = file_object - global_vars["annotations"] = annotation_importer - - execute_python_code("{}(file_object, annotations)".format(loader.handler), global_vars) - self.create(annotation_importer.data.slice(self.start_frame, self.stop_frame).serialize()) - -class TaskAnnotation: - def __init__(self, pk, user): - self.user = user - self.db_task = models.Task.objects.prefetch_related("data__images").get(id=pk) - - # Postgres doesn't guarantee an order by default without explicit order_by - self.db_jobs = models.Job.objects.select_related("segment").filter(segment__task_id=pk).order_by('id') - self.ir_data = AnnotationIR() - - def reset(self): - self.ir_data.reset() - - def _patch_data(self, data, action): - _data = data if isinstance(data, AnnotationIR) else AnnotationIR(data) - splitted_data = {} - jobs = {} - for db_job in self.db_jobs: - jid = db_job.id - start = db_job.segment.start_frame - stop = db_job.segment.stop_frame - jobs[jid] = { "start": start, "stop": stop } - splitted_data[jid] = _data.slice(start, stop) - - for jid, job_data in splitted_data.items(): - _data = AnnotationIR() - if action is None: - _data.data = put_job_data(jid, self.user, job_data) - else: - _data.data = patch_job_data(jid, self.user, job_data, action) - if _data.version > self.ir_data.version: - self.ir_data.version = _data.version - self._merge_data(_data, jobs[jid]["start"], self.db_task.overlap) - - def _merge_data(self, data, start_frame, overlap): - annotation_manager = AnnotationManager(self.ir_data) - annotation_manager.merge(data, start_frame, overlap) - - def put(self, data): - self._patch_data(data, None) - - def create(self, data): - self._patch_data(data, PatchAction.CREATE) - - def update(self, data): - self._patch_data(data, PatchAction.UPDATE) - - def delete(self, data=None): - if data: - self._patch_data(data, PatchAction.DELETE) - else: - for db_job in self.db_jobs: - delete_job_data(db_job.id, self.user) - - def init_from_db(self): - self.reset() - - for db_job in self.db_jobs: - annotation = JobAnnotation(db_job.id, self.user) - annotation.init_from_db() - if annotation.ir_data.version > self.ir_data.version: - self.ir_data.version = annotation.ir_data.version - db_segment = db_job.segment - start_frame = db_segment.start_frame - overlap = self.db_task.overlap - self._merge_data(annotation.ir_data, start_frame, overlap) - - @property - def data(self): - return self.ir_data.data From a75855053b053ad3b5a3a2014e74a22c1829bb1e Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Tue, 7 Apr 2020 12:14:41 +0300 Subject: [PATCH 15/80] restore extension for export formats --- cvat/apps/dataset_manager/formats/__init__.py | 30 ++++--------------- cvat/apps/dataset_manager/formats/coco.py | 2 +- cvat/apps/dataset_manager/formats/cvat.py | 6 ++-- cvat/apps/dataset_manager/formats/labelme.py | 5 ++-- cvat/apps/dataset_manager/formats/mask.py | 2 +- cvat/apps/dataset_manager/formats/mot.py | 2 +- .../dataset_manager/formats/pascal_voc.py | 2 +- cvat/apps/dataset_manager/formats/tfrecord.py | 2 +- cvat/apps/dataset_manager/formats/yolo.py | 2 +- cvat/apps/dataset_manager/task.py | 6 ++-- cvat/apps/dataset_manager/views.py | 24 +++++++++++++-- datumaro/datumaro/components/cli_plugin.py | 15 +--------- datumaro/datumaro/util/__init__.py | 15 +++++++++- 13 files changed, 57 insertions(+), 56 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/__init__.py b/cvat/apps/dataset_manager/formats/__init__.py index 4a637b1e3426..560d5046ca1b 100644 --- a/cvat/apps/dataset_manager/formats/__init__.py +++ b/cvat/apps/dataset_manager/formats/__init__.py @@ -12,16 +12,13 @@ class _Format: NAME = '' EXT = '' VERSION = '' + DISPLAY_NAME = '{name} {ext} {version}' class Exporter(_Format): - DISPLAY_NAME = '{name} {version}' - def __call__(self, dst_file, task_data, **options): raise NotImplementedError() class Importer(_Format): - DISPLAY_NAME = '{name} {ext} {version}' - def __call__(self, src_file, task_data, **options): raise NotImplementedError() @@ -51,12 +48,12 @@ def __call__(self, *args, **kwargs): return target EXPORT_FORMATS = {} -def exporter(name, version, ext=None, display_name=None): +def exporter(name, version, ext, display_name=None): assert name not in EXPORT_FORMATS, "Export format '%s' already registered" % name def wrap_with_params(f_or_cls): t = _wrap_format(f_or_cls, Exporter, name=name, ext=ext, version=version, display_name=display_name) - EXPORT_FORMATS[name] = t + EXPORT_FORMATS[name.lower()] = t return t return wrap_with_params @@ -66,30 +63,15 @@ def importer(name, version, ext, display_name=None): def wrap_with_params(f_or_cls): t = _wrap_format(f_or_cls, Importer, name=name, ext=ext, version=version, display_name=display_name) - IMPORT_FORMATS[name] = t + IMPORT_FORMATS[name.lower()] = t return t return wrap_with_params - -def _serialize_format(f): - return { - 'name': f.DISPLAY_NAME, - 'tag': f.NAME, - 'ext': f.EXT, - 'version': f.VERSION, - } - -def get_export_formats(): - return [_serialize_format(f) for f in EXPORT_FORMATS] - -def get_import_formats(): - return [_serialize_format(f) for f in IMPORT_FORMATS] - def make_importer(name): - return IMPORT_FORMATS[name]() + return IMPORT_FORMATS[name.lower()]() def make_exporter(name): - return EXPORT_FORMATS[name]() + return EXPORT_FORMATS[name.lower()]() import cvat.apps.dataset_manager.formats.coco diff --git a/cvat/apps/dataset_manager/formats/coco.py b/cvat/apps/dataset_manager/formats/coco.py index 386e39acd21b..c67254715f28 100644 --- a/cvat/apps/dataset_manager/formats/coco.py +++ b/cvat/apps/dataset_manager/formats/coco.py @@ -11,7 +11,7 @@ from cvat.apps.dataset_manager.util import make_zip_archive -@exporter(name='COCO', version='1.0') +@exporter(name='COCO', ext='ZIP', version='1.0') def _export(dst_file, task_data, save_images=False): extractor = CvatTaskDataExtractor(task_data, include_images=save_images) extractor = Dataset.from_extractors(extractor) # apply lazy transforms diff --git a/cvat/apps/dataset_manager/formats/cvat.py b/cvat/apps/dataset_manager/formats/cvat.py index f510f1eb82f0..77717fb0dce2 100644 --- a/cvat/apps/dataset_manager/formats/cvat.py +++ b/cvat/apps/dataset_manager/formats/cvat.py @@ -536,17 +536,17 @@ def _export(dst_file, task_data, anno_callback, **options): make_zip_archive(temp_dir, dst_file) -@exporter(name='CVAT for videos', version='1.1') +@exporter(name='CVAT XML for videos', ext='ZIP', version='1.1') def _export_video(dst_file, task_data, save_images=False): _export(dst_file, task_data, anno_callback=dump_as_cvat_interpolation, save_images=save_images) -@exporter(name='CVAT for images', version='1.1') +@exporter(name='CVAT XML for images', ext='ZIP', version='1.1') def _export_images(dst_file, task_data, save_images=False): _export(dst_file, task_data, anno_callback=dump_as_cvat_annotation, save_images=save_images) -@importer(name='CVAT', ext='XML, ZIP', version='1.1', +@importer(name='CVAT XML', ext='XML, ZIP', version='1.1', display_name='{name} {version}') def _import(src_file, task_data): src_path = src_file.name diff --git a/cvat/apps/dataset_manager/formats/labelme.py b/cvat/apps/dataset_manager/formats/labelme.py index 573c514d6927..8f23a0483960 100644 --- a/cvat/apps/dataset_manager/formats/labelme.py +++ b/cvat/apps/dataset_manager/formats/labelme.py @@ -13,7 +13,7 @@ from datumaro.components.project import Dataset -@exporter(name='LabelMe', version='3.0') +@exporter(name='LabelMe', ext='ZIP', version='3.0') def _export(dst_file, task_data, save_images=False): extractor = CvatTaskDataExtractor(task_data, include_images=save_images) envt = dm_env.transforms @@ -27,9 +27,8 @@ def _export(dst_file, task_data, save_images=False): @importer(name='LabelMe', ext='ZIP', version='3.0') def _import(src_file, task_data): - src_path = src_file.name with TemporaryDirectory() as tmp_dir: - Archive(src_path).extractall(tmp_dir) + Archive(src_file.name).extractall(tmp_dir) dataset = dm_env.make_importer('label_me')(tmp_dir).make_dataset() masks_to_polygons = dm_env.transforms.get('masks_to_polygons') diff --git a/cvat/apps/dataset_manager/formats/mask.py b/cvat/apps/dataset_manager/formats/mask.py index f8ee0c2b0d64..9c7e266b725a 100644 --- a/cvat/apps/dataset_manager/formats/mask.py +++ b/cvat/apps/dataset_manager/formats/mask.py @@ -13,7 +13,7 @@ from datumaro.components.project import Dataset -@exporter(name='MASK', version='1.1') +@exporter(name='MASK', ext='ZIP', version='1.1') def _export(dst_file, task_data, save_images=False): extractor = CvatTaskDataExtractor(task_data, include_images=save_images) envt = dm_env.transforms diff --git a/cvat/apps/dataset_manager/formats/mot.py b/cvat/apps/dataset_manager/formats/mot.py index f68bc1167046..cf611a2cc363 100644 --- a/cvat/apps/dataset_manager/formats/mot.py +++ b/cvat/apps/dataset_manager/formats/mot.py @@ -14,7 +14,7 @@ from datumaro.components.project import Dataset -@exporter(name='MOT', version='1.1') +@exporter(name='MOT', ext='ZIP', version='1.1') def _export(dst_file, task_data, save_images=False): extractor = CvatTaskDataExtractor(task_data, include_images=save_images) envt = dm_env.transforms diff --git a/cvat/apps/dataset_manager/formats/pascal_voc.py b/cvat/apps/dataset_manager/formats/pascal_voc.py index f876872c2e16..8aaca4911896 100644 --- a/cvat/apps/dataset_manager/formats/pascal_voc.py +++ b/cvat/apps/dataset_manager/formats/pascal_voc.py @@ -17,7 +17,7 @@ from datumaro.components.project import Dataset -@exporter(name='PASCAL VOC', version='1.1') +@exporter(name='PASCAL VOC', ext='ZIP', version='1.1') def _export(dst_file, task_data, save_images=False): extractor = CvatTaskDataExtractor(task_data, include_images=save_images) envt = dm_env.transforms diff --git a/cvat/apps/dataset_manager/formats/tfrecord.py b/cvat/apps/dataset_manager/formats/tfrecord.py index 62577ec7fc44..4bbaebfe12da 100644 --- a/cvat/apps/dataset_manager/formats/tfrecord.py +++ b/cvat/apps/dataset_manager/formats/tfrecord.py @@ -13,7 +13,7 @@ from datumaro.components.project import Dataset -@exporter(name='TFRecord', version='1.0') +@exporter(name='TFRecord', ext='ZIP', version='1.0') def _export(dst_file, task_data, save_images=False): extractor = CvatTaskDataExtractor(task_data, include_images=save_images) extractor = Dataset.from_extractors(extractor) # apply lazy transforms diff --git a/cvat/apps/dataset_manager/formats/yolo.py b/cvat/apps/dataset_manager/formats/yolo.py index 37b0c443d600..fb734a7eb39c 100644 --- a/cvat/apps/dataset_manager/formats/yolo.py +++ b/cvat/apps/dataset_manager/formats/yolo.py @@ -16,7 +16,7 @@ from datumaro.components.project import Dataset -@exporter(name='YOLO', version='1.1') +@exporter(name='YOLO', ext='ZIP', version='1.1') def _export(dst_file, task_data, save_images=False): extractor = CvatTaskDataExtractor(task_data, include_images=save_images) extractor = Dataset.from_extractors(extractor) # apply lazy transforms diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py index b753a8a59332..1954c8fb33c6 100644 --- a/cvat/apps/dataset_manager/task.py +++ b/cvat/apps/dataset_manager/task.py @@ -710,9 +710,9 @@ def delete_task_data(pk): def export_task(task_id, dst_file, format_name, server_url=None, save_images=False): # For big tasks dump function may run for a long time and - # we dont need to acquire lock after _AnnotationForTask instance - # has been initialized from DB. - # But there is the bug with corrupted dump file in case 2 or more dump request received at the same time. + # we dont need to acquire lock after the task has been initialized from DB. + # But there is the bug with corrupted dump file in case 2 or + # more dump request received at the same time: # https://github.com/opencv/cvat/issues/217 with transaction.atomic(): task = TaskAnnotation(task_id) diff --git a/cvat/apps/dataset_manager/views.py b/cvat/apps/dataset_manager/views.py index 592a6f3c73aa..6434d92d9349 100644 --- a/cvat/apps/dataset_manager/views.py +++ b/cvat/apps/dataset_manager/views.py @@ -12,7 +12,9 @@ import cvat.apps.dataset_manager.task as task from cvat.apps.engine.log import slogger from cvat.apps.engine.models import Task +from datumaro.util import to_snake_case +from .formats import IMPORT_FORMATS, EXPORT_FORMATS from .util import current_function_name @@ -39,13 +41,16 @@ def export_task(task_id, dst_format, server_url=None, save_images=False): cache_dir = get_export_cache_dir(db_task) exporter = get_exporter(format_name) - output_path = osp.join(cache_dir, '%s.%s' % (dst_format, exporter.EXT)) + output_base = '%s_%s' % ('dataset' if save_images else 'task', + to_snake_case(dst_format)) + output_path = '%s.%s' % (output_base, exporter.EXT) + output_path = osp.join(cache_dir, output_path) task_time = timezone.localtime(db_task.updated_date).timestamp() if not (osp.exists(output_path) and \ task_time <= osp.getmtime(output_path)): os.makedirs(cache_dir, exist_ok=True) - task.export_task(task_id, output_path, dst_format, + task.export_task(task_id, dst_format, temp_dir, server_url=server_url, save_images=save_images) archive_ctime = osp.getctime(output_path) @@ -83,3 +88,18 @@ def clear_export_cache(task_id, file_path, file_ctime): except Exception: log_exception(slogger.task[task_id]) raise + + +def _serialize_format(f): + return { + 'name': f.DISPLAY_NAME, + 'tag': f.NAME.lower(), + 'ext': f.EXT, + 'version': f.VERSION, + } + +def get_export_formats(): + return [_serialize_format(f) for f in EXPORT_FORMATS] + +def get_import_formats(): + return [_serialize_format(f) for f in IMPORT_FORMATS] \ No newline at end of file diff --git a/datumaro/datumaro/components/cli_plugin.py b/datumaro/datumaro/components/cli_plugin.py index 08a7f3834cc9..9439d2b56a6f 100644 --- a/datumaro/datumaro/components/cli_plugin.py +++ b/datumaro/datumaro/components/cli_plugin.py @@ -5,7 +5,7 @@ import argparse -from datumaro.cli.util import MultilineFormatter +from datumaro.cli.util import MultilineFormatter, to_snake_case class CliPlugin: @@ -41,16 +41,3 @@ def remove_plugin_type(s): for t in {'transform', 'extractor', 'converter', 'launcher', 'importer'}: s = s.replace('_' + t, '') return s - -def to_snake_case(s): - if not s: - return '' - - name = [s[0].lower()] - for char in s[1:]: - if char.isalpha() and char.isupper(): - name.append('_') - name.append(char.lower()) - else: - name.append(char) - return ''.join(name) \ No newline at end of file diff --git a/datumaro/datumaro/util/__init__.py b/datumaro/datumaro/util/__init__.py index 7c36fe8efad0..03f4d59579ae 100644 --- a/datumaro/datumaro/util/__init__.py +++ b/datumaro/datumaro/util/__init__.py @@ -42,4 +42,17 @@ def cast(value, type_conv, default=None): try: return type_conv(value) except Exception: - return default \ No newline at end of file + return default + +def to_snake_case(s): + if not s: + return '' + + name = [s[0].lower()] + for char in s[1:]: + if char.isalpha() and char.isupper(): + name.append('_') + name.append(char.lower()) + else: + name.append(char) + return ''.join(name) \ No newline at end of file From 81404b18fcbba80b8475fadd39142454d14a25d7 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Tue, 7 Apr 2020 12:22:47 +0300 Subject: [PATCH 16/80] update rest api --- cvat/apps/engine/serializers.py | 3 + cvat/apps/engine/views.py | 167 ++++++++++++++++---------------- 2 files changed, 86 insertions(+), 84 deletions(-) diff --git a/cvat/apps/engine/serializers.py b/cvat/apps/engine/serializers.py index 5c8cd9c052b1..207915201492 100644 --- a/cvat/apps/engine/serializers.py +++ b/cvat/apps/engine/serializers.py @@ -467,3 +467,6 @@ class LogEventSerializer(serializers.Serializer): message = serializers.CharField(max_length=4096, required=False) payload = serializers.DictField(required=False) is_active = serializers.BooleanField() + +class AnnotationFileSerializer(serializers.Serializer): + annotation_file = serializers.FileField() \ No newline at end of file diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index b4d27f6a181b..9598206d6e04 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -33,19 +33,20 @@ from rest_framework.response import Response from sendfile import sendfile -import cvat.apps.dataset_manager as dataset_manager +import cvat.apps.dataset_manager as dm from cvat.apps.authentication import auth from cvat.apps.authentication.decorators import login_required from cvat.apps.engine.frame_provider import FrameProvider from cvat.apps.engine.models import Job, Plugin, StatusChoice, Task from cvat.apps.engine.serializers import ( - AboutSerializer, BasicUserSerializer, DataMetaSerializer, DataSerializer, - ExceptionSerializer, FileInfoSerializer, JobSerializer, - LabeledDataSerializer, LogEventSerializer, PluginSerializer, - ProjectSerializer, RqStatusSerializer, TaskSerializer, UserSerializer) + AboutSerializer, AnnotationFileSerializer, BasicUserSerializer, + DataMetaSerializer, DataSerializer, ExceptionSerializer, + FileInfoSerializer, JobSerializer, LabeledDataSerializer, + LogEventSerializer, PluginSerializer, ProjectSerializer, + RqStatusSerializer, TaskSerializer, UserSerializer) from cvat.settings.base import CSS_3RDPARTY, JS_3RDPARTY -from . import annotation, models, task +from . import models, task from .log import clogger, slogger @@ -204,7 +205,7 @@ def share(request): responses={'200': AnnotationFormatSerializer(many=True)}) @action(detail=False, methods=['GET'], url_path='annotation/formats') def annotation_formats(request): - data = dataset_manager.get_formats() + data = dm.views.get_formats() data = JSONRenderer().render(data) return Response(data) @@ -512,68 +513,70 @@ def annotations(self, request, pk): '201': openapi.Response(description='Annotations file is ready to download'), '200': openapi.Response(description='Download of file started')}) @action(detail=True, methods=['GET'], serializer_class=None, - url_path='annotations/(?P[^/]+)') + url_path='annotations') def dump(self, request, pk, filename): """ Dump of annotations in common case is a long process which cannot be performed within one request. First request starts dumping process. When the file is ready (code 201) you can get it with query parameter action=download. """ - filename = re.sub(r'[\\/*?:"<>|]', '_', filename) - username = request.user.username - db_task = self.get_object() # call check_object_permissions as well - timestamp = datetime.now().strftime("%Y_%m_%d_%H_%M_%S") - action = request.query_params.get("action") - if action not in [None, "download"]: - raise serializers.ValidationError( - "Please specify a correct 'action' for the request") + db_task = self.get_object() - dump_format = request.query_params.get("format", "") - try: - db_dumper = AnnotationDumper.objects.get(display_name=dump_format) - except ObjectDoesNotExist: + action = request.query_params.get("action", "").lower() + if action not in {"", "download"}: raise serializers.ValidationError( - "Please specify a correct 'format' parameter for the request") + "Unexpected action specified for the request") - file_path = os.path.join(db_task.get_task_artifacts_dirname(), - "{}.{}.{}.{}".format(filename, username, timestamp, db_dumper.format.lower())) + dst_format = request.query_params.get("format", "").lower() + if dst_format not in [f['tag'] for f in dm.views.get_export_formats()]: + raise serializers.ValidationError( + "Unknown format specified for the request") + rq_id = "/api/v1/tasks/{}/annotations/{}".format(pk, dst_format) queue = django_rq.get_queue("default") - rq_id = "{}@/api/v1/tasks/{}/annotations/{}/{}".format(username, pk, dump_format, filename) - rq_job = queue.fetch_job(rq_id) + rq_job = queue.fetch_job(rq_id) if rq_job: - if rq_job.is_finished: - if not rq_job.meta.get("download"): - if action == "download": - rq_job.meta[action] = True - rq_job.save_meta() - return sendfile(request, rq_job.meta["file_path"], attachment=True, - attachment_filename="{}.{}".format(filename, db_dumper.format.lower())) - else: - return Response(status=status.HTTP_201_CREATED) - else: # Remove the old dump file - try: - os.remove(rq_job.meta["file_path"]) - except OSError: - pass - finally: - rq_job.delete() - elif rq_job.is_failed: - exc_info = str(rq_job.exc_info) + last_task_update_time = timezone.localtime(db_task.updated_date) + request_time = rq_job.meta.get('request_time', None) + if request_time is None or request_time < last_task_update_time: + rq_job.cancel() rq_job.delete() - return Response(data=exc_info, status=status.HTTP_500_INTERNAL_SERVER_ERROR) else: - return Response(status=status.HTTP_202_ACCEPTED) + if rq_job.is_finished: + file_path = rq_job.return_value + if action == "download" and osp.exists(file_path): + rq_job.delete() + + timestamp = datetime.strftime(last_task_update_time, + "%Y_%m_%d_%H_%M_%S") + filename = "task_{}_annotations-{}-{}.{}".format( + db_task.name, timestamp, + dst_format, osp.splitext(file_path)[1]) + return sendfile(request, file_path, attachment=True, + attachment_filename=filename.lower()) + else: + if osp.exists(file_path): + return Response(status=status.HTTP_201_CREATED) + elif rq_job.is_failed: + exc_info = str(rq_job.exc_info) + rq_job.delete() + return Response(exc_info, + status=status.HTTP_500_INTERNAL_SERVER_ERROR) + else: + return Response(status=status.HTTP_202_ACCEPTED) - rq_job = queue.enqueue_call( - func=annotation.dump_task_data, - args=(pk, request.user, file_path, db_dumper, - request.scheme, request.get_host()), - job_id=rq_id, - ) - rq_job.meta["file_path"] = file_path - rq_job.save_meta() + try: + if request.scheme: + server_address = request.scheme + '://' + server_address += request.get_host() + except Exception: + server_address = None + ttl = dm.views.CACHE_TTL.total_seconds() + queue.enqueue_call(func=dm.views.export_task_annotations, + args=(pk, dst_format, server_address), job_id=rq_id, + meta={ 'request_time': timezone.localtime() }, + result_ttl=ttl, failure_ttl=ttl) return Response(status=status.HTTP_202_ACCEPTED) @swagger_auto_schema(method='get', operation_summary='When task is being created the method returns information about a status of the creation process') @@ -642,19 +645,15 @@ def data_info(request, pk): def dataset_export(self, request, pk): db_task = self.get_object() - action = request.query_params.get("action", "") - action = action.lower() - if action not in ["", "download"]: + action = request.query_params.get("action", "").lower() + if action not in {"", "download"}: raise serializers.ValidationError( - "Unexpected parameter 'action' specified for the request") + "Unexpected action specified for the request") - dst_format = request.query_params.get("format", "") - if not dst_format: - dst_format = dataset_manager.DEFAULT_FORMAT - dst_format = dst_format.lower() - if dst_format not in [f['tag'] for f in dataset_manager.get_formats()]: + dst_format = request.query_params.get("format", "").lower() + if dst_format not in [f['tag'] for f in dm.views.get_export_formats()]: raise serializers.ValidationError( - "Unexpected parameter 'format' specified for the request") + "Unknown format specified for the request") rq_id = "/api/v1/tasks/{}/dataset/{}".format(pk, dst_format) queue = django_rq.get_queue("default") @@ -672,9 +671,11 @@ def dataset_export(self, request, pk): if action == "download" and osp.exists(file_path): rq_job.delete() - timestamp = datetime.now().strftime("%Y_%m_%d_%H_%M_%S") - filename = "task_{}-{}-{}.zip".format( - db_task.name, timestamp, dst_format) + timestamp = datetime.strftime(last_task_update_time, + "%Y_%m_%d_%H_%M_%S") + filename = "task_{}_dataset-{}-{}-{}-{}.{}".format( + db_task.name, timestamp, + dst_format, osp.splitext(file_path)[1]) return sendfile(request, file_path, attachment=True, attachment_filename=filename.lower()) else: @@ -689,13 +690,15 @@ def dataset_export(self, request, pk): return Response(status=status.HTTP_202_ACCEPTED) try: - server_address = request.get_host() + if request.scheme: + server_address = request.scheme + '://' + server_address += request.get_host() except Exception: server_address = None - ttl = dataset_manager.CACHE_TTL.total_seconds() - queue.enqueue_call(func=dataset_manager.export_task_as_dataset, - args=(pk, request.user, dst_format, server_address), job_id=rq_id, + ttl = dm.views.CACHE_TTL.total_seconds() + queue.enqueue_call(func=dm.views.export_task_as_dataset, + args=(pk, dst_format, server_address), job_id=rq_id, meta={ 'request_time': timezone.localtime() }, result_ttl=ttl, failure_ttl=ttl) return Response(status=status.HTTP_202_ACCEPTED) @@ -734,37 +737,36 @@ def get_permissions(self): def annotations(self, request, pk): self.get_object() # force to call check_object_permissions if request.method == 'GET': - data = annotation.get_job_data(pk, request.user) + data = dm.task.get_job_data(pk) return Response(data) elif request.method == 'PUT': if request.query_params.get("format", ""): return load_data_proxy( request=request, rq_id="{}@/api/v1/jobs/{}/annotations/upload".format(request.user, pk), - rq_func=annotation.load_job_data, + rq_func=dm.task.import_job_annotations, pk=pk, ) else: serializer = LabeledDataSerializer(data=request.data) if serializer.is_valid(raise_exception=True): try: - data = annotation.put_job_data(pk, request.user, serializer.data) + data = dm.task.put_job_data(pk, serializer.data) except (AttributeError, IntegrityError) as e: return Response(data=str(e), status=status.HTTP_400_BAD_REQUEST) return Response(data) elif request.method == 'DELETE': - annotation.delete_job_data(pk, request.user) + dm.task.delete_job_data(pk) return Response(status=status.HTTP_204_NO_CONTENT) elif request.method == 'PATCH': action = self.request.query_params.get("action", None) - if action not in annotation.PatchAction.values(): + if action not in dm.task.PatchAction.values(): raise serializers.ValidationError( "Please specify a correct 'action' for the request") serializer = LabeledDataSerializer(data=request.data) if serializer.is_valid(raise_exception=True): try: - data = annotation.patch_job_data(pk, request.user, - serializer.data, action) + data = dm.task.patch_job_data(pk, serializer.data, action) except (AttributeError, IntegrityError) as e: return Response(data=str(e), status=status.HTTP_400_BAD_REQUEST) return Response(data) @@ -863,16 +865,13 @@ def rq_handler(job, exc_type, exc_value, tb): def load_data_proxy(request, rq_id, rq_func, pk): queue = django_rq.get_queue("default") rq_job = queue.fetch_job(rq_id) - upload_format = request.query_params.get("format", "") + format_name = request.query_params.get("format", "").lower() if not rq_job: serializer = AnnotationFileSerializer(data=request.data) if serializer.is_valid(raise_exception=True): - try: - db_parser = AnnotationLoader.objects.get(pk=upload_format) - except ObjectDoesNotExist: - raise serializers.ValidationError( - "Please specify a correct 'format' parameter for the upload request") + if format_name not in [f['tag'] for f in dm.views.get_import_formats()]: + raise serializers.ValidationError("Unknown input format") anno_file = serializer.validated_data['annotation_file'] fd, filename = mkstemp(prefix='cvat_{}'.format(pk)) @@ -881,7 +880,7 @@ def load_data_proxy(request, rq_id, rq_func, pk): f.write(chunk) rq_job = queue.enqueue_call( func=rq_func, - args=(pk, request.user, filename, db_parser), + args=(pk, filename, format_name), job_id=rq_id ) rq_job.meta['tmp_file'] = filename From 92b1bbace8100b5bf7c2243e01012f5287d7c979 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Tue, 7 Apr 2020 14:46:19 +0300 Subject: [PATCH 17/80] use serializers, update views --- cvat/apps/dataset_manager/formats/__init__.py | 24 +++++++----- cvat/apps/dataset_manager/serializers.py | 12 ++++++ cvat/apps/dataset_manager/views.py | 15 ++------ cvat/apps/engine/views.py | 38 +++++++++++-------- 4 files changed, 52 insertions(+), 37 deletions(-) create mode 100644 cvat/apps/dataset_manager/serializers.py diff --git a/cvat/apps/dataset_manager/formats/__init__.py b/cvat/apps/dataset_manager/formats/__init__.py index 560d5046ca1b..02c384e42be1 100644 --- a/cvat/apps/dataset_manager/formats/__init__.py +++ b/cvat/apps/dataset_manager/formats/__init__.py @@ -4,6 +4,7 @@ # SPDX-License-Identifier: MIT from datumaro.components.project import Environment +from datumaro.util import to_snake_case dm_env = Environment() @@ -13,6 +14,7 @@ class _Format: EXT = '' VERSION = '' DISPLAY_NAME = '{name} {ext} {version}' + TAG = '' class Exporter(_Format): def __call__(self, dst_file, task_data, **options): @@ -22,7 +24,7 @@ class Importer(_Format): def __call__(self, src_file, task_data, **options): raise NotImplementedError() -def _wrap_format(f_or_cls, klass, name, version, ext, display_name): +def _wrap_format(f_or_cls, klass, name, version, ext, display_name, tag): import inspect if inspect.isclass(f): assert hasattr(f_or_cls, '__call__') @@ -44,34 +46,36 @@ def __call__(self, *args, **kwargs): target.EXT = ext or klass.EXT target.DISPLAY_NAME = (display_name or klass.DISPLAY_NAME).format( name=name, version=version, ext=ext) - assert all([target.NAME, target.VERSION, target.EXT, target.DISPLAY_NAME]) + target.TAG = tag or to_snake_case(target.NAME) + assert all([target.NAME, target.VERSION, target.EXT, target.DISPLAY_NAME, + target.TAG]) return target EXPORT_FORMATS = {} -def exporter(name, version, ext, display_name=None): +def exporter(name, version, ext, display_name=None, tag=None): assert name not in EXPORT_FORMATS, "Export format '%s' already registered" % name def wrap_with_params(f_or_cls): - t = _wrap_format(f_or_cls, Exporter, + t = _wrap_format(f_or_cls, Exporter, tag=tag, name=name, ext=ext, version=version, display_name=display_name) - EXPORT_FORMATS[name.lower()] = t + EXPORT_FORMATS[t.TAG] = t return t return wrap_with_params IMPORT_FORMATS = {} -def importer(name, version, ext, display_name=None): +def importer(name, version, ext, display_name=None, tag=None): assert name not in IMPORT_FORMATS, "Import format '%s' already registered" % name def wrap_with_params(f_or_cls): - t = _wrap_format(f_or_cls, Importer, + t = _wrap_format(f_or_cls, Importer, tag=tag, name=name, ext=ext, version=version, display_name=display_name) - IMPORT_FORMATS[name.lower()] = t + IMPORT_FORMATS[t.TAG] = t return t return wrap_with_params def make_importer(name): - return IMPORT_FORMATS[name.lower()]() + return IMPORT_FORMATS[name]() def make_exporter(name): - return EXPORT_FORMATS[name.lower()]() + return EXPORT_FORMATS[name]() import cvat.apps.dataset_manager.formats.coco diff --git a/cvat/apps/dataset_manager/serializers.py b/cvat/apps/dataset_manager/serializers.py new file mode 100644 index 000000000000..f9c5f05cfb59 --- /dev/null +++ b/cvat/apps/dataset_manager/serializers.py @@ -0,0 +1,12 @@ +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from rest_framework import serializers + + +class DatasetFormatSerializer(serializers.Serializer): + name = serializers.CharField(max_length=64, source='DISPLAY_NAME') + tag = serializers.CharField(max_length=64, source='TAG') + ext = serializers.CharField(max_length=64, source='EXT') + version = serializers.CharField(max_length=64, source='VERSION') \ No newline at end of file diff --git a/cvat/apps/dataset_manager/views.py b/cvat/apps/dataset_manager/views.py index 6434d92d9349..966cb085c39e 100644 --- a/cvat/apps/dataset_manager/views.py +++ b/cvat/apps/dataset_manager/views.py @@ -12,7 +12,6 @@ import cvat.apps.dataset_manager.task as task from cvat.apps.engine.log import slogger from cvat.apps.engine.models import Task -from datumaro.util import to_snake_case from .formats import IMPORT_FORMATS, EXPORT_FORMATS from .util import current_function_name @@ -50,7 +49,7 @@ def export_task(task_id, dst_format, server_url=None, save_images=False): if not (osp.exists(output_path) and \ task_time <= osp.getmtime(output_path)): os.makedirs(cache_dir, exist_ok=True) - task.export_task(task_id, dst_format, temp_dir, + task.export_task(task_id, dst_format, output_path, server_url=server_url, save_images=save_images) archive_ctime = osp.getctime(output_path) @@ -90,16 +89,8 @@ def clear_export_cache(task_id, file_path, file_ctime): raise -def _serialize_format(f): - return { - 'name': f.DISPLAY_NAME, - 'tag': f.NAME.lower(), - 'ext': f.EXT, - 'version': f.VERSION, - } - def get_export_formats(): - return [_serialize_format(f) for f in EXPORT_FORMATS] + return list(EXPORT_FORMATS.values()) def get_import_formats(): - return [_serialize_format(f) for f in IMPORT_FORMATS] \ No newline at end of file + return list(IMPORT_FORMATS.values()) \ No newline at end of file diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 9598206d6e04..7527276603b6 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -4,7 +4,6 @@ import os import os.path as osp -import re import shutil import traceback from datetime import datetime @@ -13,7 +12,6 @@ import django_rq from django.conf import settings from django.contrib.auth.models import User -from django.core.exceptions import ObjectDoesNotExist from django.db import IntegrityError from django.http import HttpResponse, HttpResponseNotFound from django.shortcuts import render @@ -36,6 +34,7 @@ import cvat.apps.dataset_manager as dm from cvat.apps.authentication import auth from cvat.apps.authentication.decorators import login_required +from cvat.apps.dataset_manager.serializers import DatasetFormatSerializer from cvat.apps.engine.frame_provider import FrameProvider from cvat.apps.engine.models import Job, Plugin, StatusChoice, Task from cvat.apps.engine.serializers import ( @@ -201,11 +200,20 @@ def share(request): status=status.HTTP_400_BAD_REQUEST) @staticmethod - @swagger_auto_schema(method='get', operation_summary='Method provides the list of available annotations formats supported by the server', - responses={'200': AnnotationFormatSerializer(many=True)}) - @action(detail=False, methods=['GET'], url_path='annotation/formats') - def annotation_formats(request): - data = dm.views.get_formats() + @swagger_auto_schema(method='get', operation_summary='Method provides the list of supported annotations formats', + responses={'200': DatasetFormatSerializer(many=True)}) + @action(detail=False, methods=['GET'], url_path='annotation/export_formats') + def annotation_export_formats(request): + data = dm.views.get_export_formats() + data = JSONRenderer().render(data) + return Response(data) + + @staticmethod + @swagger_auto_schema(method='get', operation_summary='Method provides the list of supported annotations formats', + responses={'200': DatasetFormatSerializer(many=True)}) + @action(detail=False, methods=['GET'], url_path='annotation/import_formats') + def annotation_import_formats(request): + data = dm.views.get_import_formats() data = JSONRenderer().render(data) return Response(data) @@ -469,7 +477,7 @@ def data(self, request, pk): def annotations(self, request, pk): self.get_object() # force to call check_object_permissions if request.method == 'GET': - data = annotation.get_task_data(pk, request.user) + data = dm.task.get_task_data(pk) serializer = LabeledDataSerializer(data=data) if serializer.is_valid(raise_exception=True): return Response(serializer.data) @@ -478,26 +486,26 @@ def annotations(self, request, pk): return load_data_proxy( request=request, rq_id="{}@/api/v1/tasks/{}/annotations/upload".format(request.user, pk), - rq_func=annotation.load_task_data, + rq_func=dm.task.import_task_annotations, pk=pk, ) else: serializer = LabeledDataSerializer(data=request.data) if serializer.is_valid(raise_exception=True): - data = annotation.put_task_data(pk, request.user, serializer.data) + data = dm.task.put_task_data(pk, serializer.data) return Response(data) elif request.method == 'DELETE': - annotation.delete_task_data(pk, request.user) + dm.task.delete_task_data(pk) return Response(status=status.HTTP_204_NO_CONTENT) elif request.method == 'PATCH': action = self.request.query_params.get("action", None) - if action not in annotation.PatchAction.values(): + if action not in dm.task.PatchAction.values(): raise serializers.ValidationError( "Please specify a correct 'action' for the request") serializer = LabeledDataSerializer(data=request.data) if serializer.is_valid(raise_exception=True): try: - data = annotation.patch_task_data(pk, request.user, serializer.data, action) + data = dm.task.patch_task_data(pk, serializer.data, action) except (AttributeError, IntegrityError) as e: return Response(data=str(e), status=status.HTTP_400_BAD_REQUEST) return Response(data) @@ -549,7 +557,7 @@ def dump(self, request, pk, filename): timestamp = datetime.strftime(last_task_update_time, "%Y_%m_%d_%H_%M_%S") - filename = "task_{}_annotations-{}-{}.{}".format( + filename = "task_{}-{}-{}_annotations.{}".format( db_task.name, timestamp, dst_format, osp.splitext(file_path)[1]) return sendfile(request, file_path, attachment=True, @@ -673,7 +681,7 @@ def dataset_export(self, request, pk): timestamp = datetime.strftime(last_task_update_time, "%Y_%m_%d_%H_%M_%S") - filename = "task_{}_dataset-{}-{}-{}-{}.{}".format( + filename = "task_{}-{}-{}_dataset.{}".format( db_task.name, timestamp, dst_format, osp.splitext(file_path)[1]) return sendfile(request, file_path, attachment=True, From ae7974e5c10a1298477696463b732e76ab8201d5 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Tue, 7 Apr 2020 15:07:25 +0300 Subject: [PATCH 18/80] merge develop --- cvat/apps/dataset_manager/_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/dataset_manager/_tests.py b/cvat/apps/dataset_manager/_tests.py index d87480d69b1e..1a5300756e1f 100644 --- a/cvat/apps/dataset_manager/_tests.py +++ b/cvat/apps/dataset_manager/_tests.py @@ -302,7 +302,7 @@ def test_labelme(self): self._test_export('cvat_label_me', save_images=True) def test_formats_query(self): - formats = dm.get_formats() + formats = dm.get_export_formats() expected = set(f['tag'] for f in dm.EXPORT_FORMATS) actual = set(f['tag'] for f in formats) From f8e26637c8754ec3f140c7cd4349cd633ae9e3a3 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Thu, 9 Apr 2020 12:07:43 +0300 Subject: [PATCH 19/80] Update format names --- cvat-core/tests/mocks/dummy-data.mock.js | 6 ++--- .../components/actions-menu/dump-submenu.tsx | 4 +-- cvat/apps/dataset_manager/_tests.py | 25 +++++++++++++------ cvat/apps/dataset_manager/formats/__init__.py | 24 ++++++++---------- cvat/apps/dataset_manager/formats/coco.py | 9 +------ cvat/apps/dataset_manager/formats/cvat.py | 7 +++--- cvat/apps/dataset_manager/formats/mask.py | 10 ++++---- cvat/apps/dataset_manager/serializers.py | 1 - cvat/apps/dataset_manager/views.py | 19 +++++++------- cvat/apps/engine/static/engine/js/base.js | 4 +-- cvat/apps/engine/tests/test_rest_api.py | 20 +++++++-------- cvat/apps/git/git.py | 6 +++-- datumaro/datumaro/util/__init__.py | 8 ++++-- utils/cli/core/definition.py | 4 +-- utils/cli/tests/test_cli.py | 4 +-- 15 files changed, 77 insertions(+), 74 deletions(-) diff --git a/cvat-core/tests/mocks/dummy-data.mock.js b/cvat-core/tests/mocks/dummy-data.mock.js index b4a6ea223ca9..d6f78e2dc616 100644 --- a/cvat-core/tests/mocks/dummy-data.mock.js +++ b/cvat-core/tests/mocks/dummy-data.mock.js @@ -10,13 +10,13 @@ const formatsDummyData = [{ "id": 1, "dumpers": [ { - "display_name": "CVAT XML 1.1 for videos", + "display_name": "CVAT for video 1.1", "format": "XML", "version": "1.1", "handler": "dump_as_cvat_interpolation" }, { - "display_name": "CVAT XML 1.1 for images", + "display_name": "CVAT for images 1.1", "format": "XML", "version": "1.1", "handler": "dump_as_cvat_annotation" @@ -24,7 +24,7 @@ const formatsDummyData = [{ ], "loaders": [ { - "display_name": "CVAT XML 1.1", + "display_name": "CVAT 1.1", "format": "XML", "version": "1.1", "handler": "load" diff --git a/cvat-ui/src/components/actions-menu/dump-submenu.tsx b/cvat-ui/src/components/actions-menu/dump-submenu.tsx index 6a13e3f4aaeb..3bf71cbe9475 100644 --- a/cvat-ui/src/components/actions-menu/dump-submenu.tsx +++ b/cvat-ui/src/components/actions-menu/dump-submenu.tsx @@ -12,8 +12,8 @@ import { import Text from 'antd/lib/typography/Text'; function isDefaultFormat(dumperName: string, taskMode: string): boolean { - return (dumperName === 'CVAT XML 1.1 for videos' && taskMode === 'interpolation') - || (dumperName === 'CVAT XML 1.1 for images' && taskMode === 'annotation'); + return (dumperName === 'CVAT for video 1.1' && taskMode === 'interpolation') + || (dumperName === 'CVAT for images 1.1' && taskMode === 'annotation'); } interface Props { diff --git a/cvat/apps/dataset_manager/_tests.py b/cvat/apps/dataset_manager/_tests.py index 1a5300756e1f..29eac9bbbfed 100644 --- a/cvat/apps/dataset_manager/_tests.py +++ b/cvat/apps/dataset_manager/_tests.py @@ -281,25 +281,34 @@ def _test_export(self, format_name, save_images=False): self.assertTrue(os.listdir(test_dir)) def test_datumaro(self): - self._test_export(dm.EXPORT_FORMAT_DATUMARO_PROJECT, save_images=False) + self._test_export('Datumaro 1.0', save_images=False) def test_coco(self): - self._test_export('cvat_coco', save_images=True) + self._test_export('COCO 1.0', save_images=True) def test_voc(self): - self._test_export('cvat_voc', save_images=True) + self._test_export('PASCAL VOC 1.1', save_images=True) - def test_tf_detection_api(self): - self._test_export('cvat_tfrecord', save_images=True) + def test_tf_record(self): + self._test_export('TFRecord 1.0', save_images=True) def test_yolo(self): - self._test_export('cvat_yolo', save_images=True) + self._test_export('YOLO 1.1', save_images=True) def test_mot(self): - self._test_export('cvat_mot', save_images=True) + self._test_export('MOT 1.1', save_images=True) def test_labelme(self): - self._test_export('cvat_label_me', save_images=True) + self._test_export('LabelMe 3.0', save_images=True) + + def test_mask(self): + self._test_export('Segmentation mask 1.1', save_images=True) + + def test_cvat_video(self): + self._test_export('CVAT for video 1.1', save_images=True) + + def test_cvat_images(self): + self._test_export('CVAT for images 1.1', save_images=True) def test_formats_query(self): formats = dm.get_export_formats() diff --git a/cvat/apps/dataset_manager/formats/__init__.py b/cvat/apps/dataset_manager/formats/__init__.py index 02c384e42be1..4b8b24ebc933 100644 --- a/cvat/apps/dataset_manager/formats/__init__.py +++ b/cvat/apps/dataset_manager/formats/__init__.py @@ -4,7 +4,6 @@ # SPDX-License-Identifier: MIT from datumaro.components.project import Environment -from datumaro.util import to_snake_case dm_env = Environment() @@ -13,8 +12,7 @@ class _Format: NAME = '' EXT = '' VERSION = '' - DISPLAY_NAME = '{name} {ext} {version}' - TAG = '' + DISPLAY_NAME = '{NAME} {VERSION}' class Exporter(_Format): def __call__(self, dst_file, task_data, **options): @@ -24,7 +22,7 @@ class Importer(_Format): def __call__(self, src_file, task_data, **options): raise NotImplementedError() -def _wrap_format(f_or_cls, klass, name, version, ext, display_name, tag): +def _wrap_format(f_or_cls, klass, name, version, ext, display_name): import inspect if inspect.isclass(f): assert hasattr(f_or_cls, '__call__') @@ -45,29 +43,27 @@ def __call__(self, *args, **kwargs): target.VERSION = version or klass.VERSION target.EXT = ext or klass.EXT target.DISPLAY_NAME = (display_name or klass.DISPLAY_NAME).format( - name=name, version=version, ext=ext) - target.TAG = tag or to_snake_case(target.NAME) - assert all([target.NAME, target.VERSION, target.EXT, target.DISPLAY_NAME, - target.TAG]) + NAME=name, VERSION=version, EXT=ext) + assert all([target.NAME, target.VERSION, target.EXT, target.DISPLAY_NAME]) return target EXPORT_FORMATS = {} -def exporter(name, version, ext, display_name=None, tag=None): +def exporter(name, version, ext, display_name=None): assert name not in EXPORT_FORMATS, "Export format '%s' already registered" % name def wrap_with_params(f_or_cls): - t = _wrap_format(f_or_cls, Exporter, tag=tag, + t = _wrap_format(f_or_cls, Exporter, name=name, ext=ext, version=version, display_name=display_name) - EXPORT_FORMATS[t.TAG] = t + EXPORT_FORMATS[t.DISPLAY_NAME] = t return t return wrap_with_params IMPORT_FORMATS = {} -def importer(name, version, ext, display_name=None, tag=None): +def importer(name, version, ext, display_name=None): assert name not in IMPORT_FORMATS, "Import format '%s' already registered" % name def wrap_with_params(f_or_cls): - t = _wrap_format(f_or_cls, Importer, tag=tag, + t = _wrap_format(f_or_cls, Importer, name=name, ext=ext, version=version, display_name=display_name) - IMPORT_FORMATS[t.TAG] = t + IMPORT_FORMATS[t.DISPLAY_NAME] = t return t return wrap_with_params diff --git a/cvat/apps/dataset_manager/formats/coco.py b/cvat/apps/dataset_manager/formats/coco.py index c67254715f28..87098c1fddd4 100644 --- a/cvat/apps/dataset_manager/formats/coco.py +++ b/cvat/apps/dataset_manager/formats/coco.py @@ -2,7 +2,6 @@ # # SPDX-License-Identifier: MIT -import shutil from tempfile import TemporaryDirectory from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor, \ @@ -20,13 +19,7 @@ def _export(dst_file, task_data, save_images=False): save_images=save_images) converter(extractor, save_dir=temp_dir) - if save_images: - make_zip_archive(temp_dir, dst_file) - else: - # Return only json file - dst_file.close() - shutil.move(osp.join(temp_dir, 'annotations', 'instances_default.json'), - dst_file.name) + make_zip_archive(temp_dir, dst_file) @importer(name='COCO', ext='JSON, ZIP', version='1.0') def _import(src_file, task_data): diff --git a/cvat/apps/dataset_manager/formats/cvat.py b/cvat/apps/dataset_manager/formats/cvat.py index 77717fb0dce2..b29c0fd070ae 100644 --- a/cvat/apps/dataset_manager/formats/cvat.py +++ b/cvat/apps/dataset_manager/formats/cvat.py @@ -536,18 +536,17 @@ def _export(dst_file, task_data, anno_callback, **options): make_zip_archive(temp_dir, dst_file) -@exporter(name='CVAT XML for videos', ext='ZIP', version='1.1') +@exporter(name='CVAT for video', ext='ZIP', version='1.1') def _export_video(dst_file, task_data, save_images=False): _export(dst_file, task_data, anno_callback=dump_as_cvat_interpolation, save_images=save_images) -@exporter(name='CVAT XML for images', ext='ZIP', version='1.1') +@exporter(name='CVAT for images', ext='ZIP', version='1.1') def _export_images(dst_file, task_data, save_images=False): _export(dst_file, task_data, anno_callback=dump_as_cvat_annotation, save_images=save_images) -@importer(name='CVAT XML', ext='XML, ZIP', version='1.1', - display_name='{name} {version}') +@importer(name='CVAT', ext='XML, ZIP', version='1.1') def _import(src_file, task_data): src_path = src_file.name diff --git a/cvat/apps/dataset_manager/formats/mask.py b/cvat/apps/dataset_manager/formats/mask.py index 9c7e266b725a..995e04bf0f15 100644 --- a/cvat/apps/dataset_manager/formats/mask.py +++ b/cvat/apps/dataset_manager/formats/mask.py @@ -13,7 +13,7 @@ from datumaro.components.project import Dataset -@exporter(name='MASK', ext='ZIP', version='1.1') +@exporter(name='Segmentation mask', ext='ZIP', version='1.1') def _export(dst_file, task_data, save_images=False): extractor = CvatTaskDataExtractor(task_data, include_images=save_images) envt = dm_env.transforms @@ -29,12 +29,12 @@ def _export(dst_file, task_data, save_images=False): make_zip_archive(temp_dir, dst_file) -@importer(name='MASK', ext='ZIP', version='1.1') +@importer(name='Segmentation mask', ext='ZIP', version='1.1') def _import(src_file, task_data): with TemporaryDirectory() as tmp_dir: Archive(src_file.name).extractall(tmp_dir) - dm_dataset = dm_env.make_importer('voc')(tmp_dir).make_dataset() + dataset = dm_env.make_importer('voc')(tmp_dir).make_dataset() masks_to_polygons = dm_env.transforms.get('masks_to_polygons') - dm_dataset = dm_dataset.transform(masks_to_polygons) - import_dm_annotations(dm_dataset, task_data) + dataset = dataset.transform(masks_to_polygons) + import_dm_annotations(dataset, task_data) diff --git a/cvat/apps/dataset_manager/serializers.py b/cvat/apps/dataset_manager/serializers.py index f9c5f05cfb59..bdc5dd1351ac 100644 --- a/cvat/apps/dataset_manager/serializers.py +++ b/cvat/apps/dataset_manager/serializers.py @@ -7,6 +7,5 @@ class DatasetFormatSerializer(serializers.Serializer): name = serializers.CharField(max_length=64, source='DISPLAY_NAME') - tag = serializers.CharField(max_length=64, source='TAG') ext = serializers.CharField(max_length=64, source='EXT') version = serializers.CharField(max_length=64, source='VERSION') \ No newline at end of file diff --git a/cvat/apps/dataset_manager/views.py b/cvat/apps/dataset_manager/views.py index 966cb085c39e..d888cd0e784b 100644 --- a/cvat/apps/dataset_manager/views.py +++ b/cvat/apps/dataset_manager/views.py @@ -12,8 +12,10 @@ import cvat.apps.dataset_manager.task as task from cvat.apps.engine.log import slogger from cvat.apps.engine.models import Task +from datumaro.cli.util import make_file_name +from datumaro.util import to_snake_case -from .formats import IMPORT_FORMATS, EXPORT_FORMATS +from .formats import EXPORT_FORMATS, IMPORT_FORMATS from .util import current_function_name @@ -41,7 +43,7 @@ def export_task(task_id, dst_format, server_url=None, save_images=False): exporter = get_exporter(format_name) output_base = '%s_%s' % ('dataset' if save_images else 'task', - to_snake_case(dst_format)) + make_file_name(to_snake_case(dst_format))) output_path = '%s.%s' % (output_base, exporter.EXT) output_path = osp.join(cache_dir, output_path) @@ -59,12 +61,11 @@ def export_task(task_id, dst_format, server_url=None, save_images=False): task_id=task_id, file_path=output_path, file_ctime=archive_ctime) slogger.task[task_id].info( - "The task '{}' is exported as '{}' " - "and available for downloading for next '{}'. " - "Export cache cleaning job is enqueued, " - "id '{}', start in '{}'".format( - db_task.name, dst_format, CACHE_TTL, - cleaning_job.id, CACHE_TTL)) + "The task '{}' is exported as '{}' at '{}' " + "and available for downloading for the next {}. " + "Export cache cleaning job is enqueued, id '{}'".format( + db_task.name, dst_format, output_path, CACHE_TTL, + cleaning_job.id)) return output_path except Exception: @@ -93,4 +94,4 @@ def get_export_formats(): return list(EXPORT_FORMATS.values()) def get_import_formats(): - return list(IMPORT_FORMATS.values()) \ No newline at end of file + return list(IMPORT_FORMATS.values()) diff --git a/cvat/apps/engine/static/engine/js/base.js b/cvat/apps/engine/static/engine/js/base.js index 434892aca54e..f3ebe5f1b9fc 100644 --- a/cvat/apps/engine/static/engine/js/base.js +++ b/cvat/apps/engine/static/engine/js/base.js @@ -224,6 +224,6 @@ $(document).ready(() => { }); function isDefaultFormat(dumperName, taskMode) { - return (dumperName === 'CVAT XML 1.1 for videos' && taskMode === 'interpolation') - || (dumperName === 'CVAT XML 1.1 for images' && taskMode === 'annotation'); + return (dumperName === 'CVAT for video 1.1' && taskMode === 'interpolation') + || (dumperName === 'CVAT for images 1.1' && taskMode === 'annotation'); } diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py index a78af9a588db..1639c129cde1 100644 --- a/cvat/apps/engine/tests/test_rest_api.py +++ b/cvat/apps/engine/tests/test_rest_api.py @@ -3036,26 +3036,26 @@ def _get_initial_annotation(annotation_format): "shapes": [], "tracks": [], } - if annotation_format == "CVAT XML 1.1 for videos": + if annotation_format == "CVAT for video 1.1": annotations["tracks"] = rectangle_tracks_with_attrs + rectangle_tracks_wo_attrs - elif annotation_format == "CVAT XML 1.1 for images": + elif annotation_format == "CVAT for images 1.1": annotations["shapes"] = rectangle_shapes_with_attrs + rectangle_shapes_wo_attrs \ + polygon_shapes_wo_attrs + polygon_shapes_with_attrs annotations["tags"] = tags_with_attrs + tags_wo_attrs - elif annotation_format == "PASCAL VOC ZIP 1.1": + elif annotation_format == "PASCAL VOC 1.1": annotations["shapes"] = rectangle_shapes_wo_attrs annotations["tags"] = tags_wo_attrs elif annotation_format == "YOLO ZIP 1.1" or \ - annotation_format == "TFRecord ZIP 1.0": + annotation_format == "TFRecord 1.0": annotations["shapes"] = rectangle_shapes_wo_attrs - elif annotation_format == "COCO JSON 1.0": + elif annotation_format == "COCO 1.0": annotations["shapes"] = polygon_shapes_wo_attrs - elif annotation_format == "MASK ZIP 1.1": + elif annotation_format == "Segmentation mask 1.1": annotations["shapes"] = rectangle_shapes_wo_attrs + polygon_shapes_wo_attrs annotations["tracks"] = rectangle_tracks_wo_attrs @@ -3079,10 +3079,10 @@ def _get_initial_annotation(annotation_format): supported_formats = [{ "name": "CVAT", "dumpers": [{ - "display_name": "CVAT XML 1.1 for images" + "display_name": "CVAT for images 1.1" }], "loaders": [{ - "display_name": "CVAT XML 1.1" + "display_name": "CVAT 1.1" }] }] @@ -3132,7 +3132,7 @@ def _get_initial_annotation(annotation_format): } for loader in annotation_format["loaders"]: - if loader["display_name"] == "MASK ZIP 1.1": + if loader["display_name"] == "Segmentation mask 1.1": continue # can't really predict the result and check response = self._upload_api_v1_tasks_id_annotations(task["id"], annotator, uploaded_data, "format={}".format(loader["display_name"])) self.assertEqual(response.status_code, HTTP_202_ACCEPTED) @@ -3183,7 +3183,7 @@ def etree_to_dict(t): self.assertTrue(coco.getAnnIds()) elif annotation_format_name == "TFRecord": self.assertTrue(zipfile.is_zipfile(content)) - elif annotation_format_name == "MASK": + elif annotation_format_name == "Segmentation mask": self.assertTrue(zipfile.is_zipfile(content)) diff --git a/cvat/apps/git/git.py b/cvat/apps/git/git.py index 5189acbdfeeb..3cea9e69681b 100644 --- a/cvat/apps/git/git.py +++ b/cvat/apps/git/git.py @@ -266,8 +266,10 @@ def push(self, user, scheme, host, db_task, last_save): # Dump an annotation timestamp = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S") - display_name = "CVAT XML 1.1" - display_name += " for images" if self._task_mode == "annotation" else " for videos" + if self._task_mode == "annotation": + display_name = "CVAT for images 1.1" + else: + display_name = "CVAT for video 1.1" cvat_dumper = AnnotationDumper.objects.get(display_name=display_name) dump_name = os.path.join(db_task.get_task_dirname(), "git_annotation_{}.xml".format(timestamp)) diff --git a/datumaro/datumaro/util/__init__.py b/datumaro/datumaro/util/__init__.py index 03f4d59579ae..624284c208e8 100644 --- a/datumaro/datumaro/util/__init__.py +++ b/datumaro/datumaro/util/__init__.py @@ -49,9 +49,13 @@ def to_snake_case(s): return '' name = [s[0].lower()] - for char in s[1:]: + for idx, char in enumerate(s[1:]): + idx = idx + 1 if char.isalpha() and char.isupper(): - name.append('_') + prev_char = s[idx - 1] + if not (prev_char.isalpha() and prev_char.isupper()): + # avoid "HTML" -> "h_t_m_l" + name.append('_') name.append(char.lower()) else: name.append(char) diff --git a/utils/cli/core/definition.py b/utils/cli/core/definition.py index ed7719c3eb29..7f9eb2db9717 100644 --- a/utils/cli/core/definition.py +++ b/utils/cli/core/definition.py @@ -212,7 +212,7 @@ def argparse(s): '--format', dest='fileformat', type=str, - default='CVAT XML 1.1 for images', + default='CVAT for images 1.1', help='annotation format (default: %(default)s)' ) @@ -238,6 +238,6 @@ def argparse(s): '--format', dest='fileformat', type=str, - default='CVAT XML 1.1', + default='CVAT 1.1', help='annotation format (default: %(default)s)' ) diff --git a/utils/cli/tests/test_cli.py b/utils/cli/tests/test_cli.py index 97db8bfb8c14..47cd7942fe70 100644 --- a/utils/cli/tests/test_cli.py +++ b/utils/cli/tests/test_cli.py @@ -60,7 +60,7 @@ def test_tasks_delete(self): def test_tasks_dump(self): path = os.path.join(settings.SHARE_ROOT, 'test_cli.xml') - self.cli.tasks_dump(1, 'CVAT XML 1.1 for images', path) + self.cli.tasks_dump(1, 'CVAT for images 1.1', path) self.assertTrue(os.path.exists(path)) os.remove(path) @@ -131,6 +131,6 @@ def generate_coco_anno(): path = os.path.join(settings.SHARE_ROOT, 'test_cli.json') with open(path, "wb") as coco: coco.write(content) - self.cli.tasks_upload(1, 'COCO JSON 1.0', path) + self.cli.tasks_upload(1, 'COCO 1.0', path) self.assertRegex(self.mock_stdout.getvalue(), '.*{}.*'.format("annotation file")) os.remove(path) From 927ca560298c3870280d91626ac23e83f532c00d Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Thu, 9 Apr 2020 12:12:32 +0300 Subject: [PATCH 20/80] Update docs --- README.md | 17 +- cvat/apps/documentation/user_guide.md | 250 +++++++++++++------------- utils/cli/README.md | 2 +- 3 files changed, 133 insertions(+), 136 deletions(-) diff --git a/README.md b/README.md index 5942c9d7c182..e92980ffa19a 100644 --- a/README.md +++ b/README.md @@ -34,21 +34,22 @@ CVAT is free, online, interactive video and image annotation tool for computer v ## Supported annotation formats -Format selection is possible after clicking on the Upload annotation / Dump annotation button. +Format selection is possible after clicking on the Upload annotation and Dump annotation buttons. [Datumaro](datumaro/README.md) dataset framework allows additional dataset transformations -via its command line tool. +via its command line tool and Python library. -| Annotation format | Dumper | Loader | +| Annotation format | Import | Export | | ------------------------------------------------------------------------------------------ | ------ | ------ | -| [CVAT XML v1.1 for images](cvat/apps/documentation/xml_format.md#annotation) | X | X | -| [CVAT XML v1.1 for a video](cvat/apps/documentation/xml_format.md#interpolation) | X | X | -| [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/) | X | X | +| [CVAT for images](cvat/apps/documentation/xml_format.md#annotation) | X | X | +| [CVAT for a video](cvat/apps/documentation/xml_format.md#interpolation) | X | X | +| [Datumaro](datumaro/README.md) | | X | +| [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/) | X | X | +| Segmentation masks from [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/) | X | X | | [YOLO](https://pjreddie.com/darknet/yolo/) | X | X | | [MS COCO Object Detection](http://cocodataset.org/#format-data) | X | X | -| PNG class mask + instance mask as in [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/) | X | X | | [TFrecord](https://www.tensorflow.org/tutorials/load_data/tf_records) | X | X | | [MOT](https://motchallenge.net/) | X | X | -| [LabelMe](http://labelme.csail.mit.edu/Release3.0) | X | X | +| [LabelMe 3.0](http://labelme.csail.mit.edu/Release3.0) | X | X | ## Links - [Intel AI blog: New Computer Vision Tool Accelerates Annotation of Digital Images and Video](https://www.intel.ai/introducing-cvat) diff --git a/cvat/apps/documentation/user_guide.md b/cvat/apps/documentation/user_guide.md index 06721c15be82..100e028cc1e8 100644 --- a/cvat/apps/documentation/user_guide.md +++ b/cvat/apps/documentation/user_guide.md @@ -100,7 +100,7 @@ Go to the [Django administration panel](http://localhost:8080/admin). There you **Labels**. There are two ways of working with labels: - The ``Constructor`` is a simple way to add and adjust labels. To add a new label click the ``Add label`` button. ![](static/documentation/images/image123.jpg) - + You can set a name of the label in the ``Label name`` field. ![](static/documentation/images/image124.jpg) @@ -109,49 +109,49 @@ Go to the [Django administration panel](http://localhost:8080/admin). There you ![](static/documentation/images/image125.jpg) - The following actions are available here: - 1. Set the attribute’s name. - 1. Choose the way to display the attribute: - - Select — drop down list of value - - Radio — is used when it is necessary to choose just one option out of few suggested. - - Checkbox — is used when it is necessary to choose any number of options out of suggested. - - Text — is used when an attribute is entered as a text. - - Number — is used when an attribute is entered as a number. - 1. Set values for the attribute. The values could be separated by pressing ``Enter``. - The entered value is displayed as a separate element which could be deleted - by pressing ``Backspace`` or clicking the close button (x). - If the specified way of displaying the attribute is Text or Number, + The following actions are available here: + 1. Set the attribute’s name. + 1. Choose the way to display the attribute: + - Select — drop down list of value + - Radio — is used when it is necessary to choose just one option out of few suggested. + - Checkbox — is used when it is necessary to choose any number of options out of suggested. + - Text — is used when an attribute is entered as a text. + - Number — is used when an attribute is entered as a number. + 1. Set values for the attribute. The values could be separated by pressing ``Enter``. + The entered value is displayed as a separate element which could be deleted + by pressing ``Backspace`` or clicking the close button (x). + If the specified way of displaying the attribute is Text or Number, the entered value will be displayed as text by default (e.g. you can specify the text format). - 1. Checkbox ``Mutable`` determines if an attribute would be changed frame to frame. + 1. Checkbox ``Mutable`` determines if an attribute would be changed frame to frame. 1. You can delete the attribute by clicking the close button (x). - Click the ``Continue`` button to add more labels. - If you need to cancel adding a label - press the ``Cancel`` button. - After all the necessary labels are added click the ``Done`` button. - After clicking ``Done`` the added labels would be displayed as separate elements of different colour. - You can edit or delete labels by clicking ``Update attributes`` or ``Delete label``. + Click the ``Continue`` button to add more labels. + If you need to cancel adding a label - press the ``Cancel`` button. + After all the necessary labels are added click the ``Done`` button. + After clicking ``Done`` the added labels would be displayed as separate elements of different colour. + You can edit or delete labels by clicking ``Update attributes`` or ``Delete label``. - - The ``Raw`` is a way of working with labels for an advanced user. - Raw presents label data in _json_ format with an option of editing and copying labels as a text. - The ``Done`` button applies the changes and the ``Reset`` button cancels the changes. + - The ``Raw`` is a way of working with labels for an advanced user. + Raw presents label data in _json_ format with an option of editing and copying labels as a text. + The ``Done`` button applies the changes and the ``Reset`` button cancels the changes. ![](static/documentation/images/image126.jpg) In ``Raw`` and ``Constructor`` mode, you can press the ``Copy`` button to copy the list of labels. - **Select files**. Press tab ``My computer`` to choose some files for annotation from your PC. - If you select tab ``Connected file share`` you can choose files for annotation from your network. + **Select files**. Press tab ``My computer`` to choose some files for annotation from your PC. + If you select tab ``Connected file share`` you can choose files for annotation from your network. If you select `` Remote source`` , you'll see a field where you can enter a list of URLs (one URL per line). - + ![](static/documentation/images/image127.jpg) #### Advanced configuration - ![](static/documentation/images/image128.jpg) + ![](static/documentation/images/image128.jpg) **Z-Order**. Defines the order on drawn polygons. Check the box for enable layered displaying. **Use zip chunks**. Force to use zip chunks as compressed data. Actual for videos only. - + **Image Quality**. Use this option to specify quality of uploaded images. The option helps to load high resolution datasets faster. Use the value from ``1`` (completely compressed images) to ``95`` (almost not compressed images). @@ -190,8 +190,8 @@ Go to the [Django administration panel](http://localhost:8080/admin). There you For example, enter ``25`` to leave every twenty fifth frame in the video. Use this option on video files only. **Chunk size**. Defines a number of frames to be packed in a chunk when send from client to server. - Server defines automatically if empty. - + Server defines automatically if empty. + Recommended values: - 1080p or less: 36 - 2k or less: 8 - 16 @@ -223,61 +223,57 @@ Go to the [Django administration panel](http://localhost:8080/admin). There you 1. The Dashboard contains elements and each of them relates to a separate task. They are sorted in creation order. Each element contains: task name, preview, progress bar, button ``Open``, and menu ``Actions``. Each button is responsible for a in menu ``Actions`` specific function: - - ``Dump Annotation`` — download an annotation file from a task. Several formats are available: - - [CVAT XML 1.1 for video](/cvat/apps/documentation/xml_format.md#interpolation) - is highlighted if a task has the interpolation mode. - - [CVAT XML 1.1 for images](/cvat/apps/documentation/xml_format.md#annotation) - is highlighted if a task has the annotation mode. - - [PASCAL VOC ZIP 1.1](http://host.robots.ox.ac.uk/pascal/VOC/) - - [YOLO ZIP 1.1](https://pjreddie.com/darknet/yolo/) - - [COCO JSON 1.0](http://cocodataset.org/#format-data) - - ``MASK ZIP 1.0`` — archive contains a mask of each frame in the png format and a text file - with the value of each color. - - [TFRecord ZIP 1.0](https://www.tensorflow.org/tutorials/load_data/tf_records) - - [MOT CSV 1.0](https://motchallenge.net/) - - [LabelMe ZIP 3.0 for image](http://labelme.csail.mit.edu/Release3.0/) - - ``Upload annotation`` is possible in same format as ``Dump annotation``, with exception of ``MASK ZIP 1.0`` - format and without choosing whether [CVAT XML 1.1](/cvat/apps/documentation/xml_format.md) - and [LabelMe ZIP 3.0](http://labelme.csail.mit.edu/Release3.0/) - refers to an image or video. - - ``Export as a dataset`` — download a data set from a task. Several formats are available: - - [Datumaro](https://github.com/opencv/cvat/blob/develop/datumaro/docs/design.md) - - [Pascal VOC 2012](http://host.robots.ox.ac.uk/pascal/VOC/) - - [MS COCO](http://cocodataset.org/#format-data) + - ``Dump Annotation`` and ``Export as a dataset`` — download annotations or + annotations and images in a specific format. The following formats are available: + - [CVAT for video](/cvat/apps/documentation/xml_format.md#interpolation) + is highlighted if a task has the interpolation mode. + - [CVAT for images](/cvat/apps/documentation/xml_format.md#annotation) + is highlighted if a task has the annotation mode. + - [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/) + - [(VOC) Segmentation mask](http://host.robots.ox.ac.uk/pascal/VOC/) — + archive contains class and instance masks for each frame in the png + format and a text file with the value of each color. - [YOLO](https://pjreddie.com/darknet/yolo/) + - [COCO](http://cocodataset.org/#format-data) + - [TFRecord](https://www.tensorflow.org/tutorials/load_data/tf_records) + - [MOT](https://motchallenge.net/) + - [LabelMe 3.0](http://labelme.csail.mit.edu/Release3.0/) + - [Datumaro](https://github.com/opencv/cvat/blob/develop/datumaro/) + - ``Upload annotation`` is available in the same formats as in ``Dump annotation``. + - [CVAT](/cvat/apps/documentation/xml_format.md) accepts both video and image sub-formats. - ``Automatic Annotation`` — automatic annotation with OpenVINO toolkit. - Presence depends on how you build CVAT instance. + Its presence depends on how you build CVAT instance. - ``Open bug tracker`` — opens a link to Issue tracker. - ``Delete`` — delete task. Push ``Open`` button to go to task details. -1. Task details is a task page which contains a preview, a progress bar +1. Task details is a task page which contains a preview, a progress bar and the details of the task (specified when the task was created) and the jobs section. - + ![](static/documentation/images/image131.jpg) - The next actions are available on this page: 1. Change the task’s title. - 1. Open ``Actions`` menu. + 1. Open ``Actions`` menu. 1. Change issue tracker or open issue tracker if it is specified. 1. Change labels. - You can add new labels or add attributes for the existing labels in the Raw mode or the Constructor mode. + You can add new labels or add attributes for the existing labels in the Raw mode or the Constructor mode. By clicking ``Copy`` you will copy the labels to the clipboard. - 1. Assigned to — is used to assign a task to a person. Start typing an assignee’s name and/or + 1. Assigned to — is used to assign a task to a person. Start typing an assignee’s name and/or choose the right person out of the dropdown list. - ``Jobs`` — is a list of all jobs for a particular task. Here you can find the next data: - Jobs name whit a hyperlink to it. - - Frames — the frame interval. - - A status of the job. The status is specified by the user in the menu inside the job. - There are three types of status: annotation, validation or completed. + - Frames — the frame interval. + - A status of the job. The status is specified by the user in the menu inside the job. + There are three types of status: annotation, validation or completed. The status of the job is changes the progress bar of the task. - Started on — start date of this job. - Duration — is the amount of time the job is being worked. - - Assignee is the user who is working on the job. + - Assignee is the user who is working on the job. You can start typing an assignee’s name and/or choose the right person out of the dropdown list. - - ``Copy``. By clicking Copy you will copy the job list to the clipboard. - The job list contains direct links to jobs. + - ``Copy``. By clicking Copy you will copy the job list to the clipboard. + The job list contains direct links to jobs. 1. Follow a link inside ``Jobs`` section to start annotation process. In some cases, you can have several links. It depends on size of your @@ -289,33 +285,33 @@ Go to the [Django administration panel](http://localhost:8080/admin). There you ### Models -On the ``Models`` page allows you to manage your deep learning (DL) models uploaded for auto annotation. -Using the functionality you can upload, update or delete a specific DL model. -To open the model manager, click the ``Models`` button on the navigation bar. +On the ``Models`` page allows you to manage your deep learning (DL) models uploaded for auto annotation. +Using the functionality you can upload, update or delete a specific DL model. +To open the model manager, click the ``Models`` button on the navigation bar. The ``Models`` page contains information about all the existing models. The list of models is divided into two sections: -- Primary — contains default CVAT models. Each model is a separate element. -It contains the model’s name, a framework on which the model was based on and +- Primary — contains default CVAT models. Each model is a separate element. +It contains the model’s name, a framework on which the model was based on and ``Supported labels`` (a dropdown list of all supported labels). -- Uploaded by a user — Contains models uploaded by a user. -The list of user models has additional columns with the following information: -name of the user who uploaded the model and the upload date. +- Uploaded by a user — Contains models uploaded by a user. +The list of user models has additional columns with the following information: +name of the user who uploaded the model and the upload date. Here you can delete models in the ``Actions`` menu. ![](static/documentation/images/image099.jpg) -In order to add your model, click `` Create new model``. -Enter model name, and select model file using "Select files" button. +In order to add your model, click `` Create new model``. +Enter model name, and select model file using "Select files" button. To annotate a task with a custom model you need to prepare 4 files: -- ``Model config`` (*.xml) - a text file with network configuration. -- ``Model weights`` (*.bin) - a binary file with trained weights. -- ``Label map`` (*.json) - a simple json file with label_map dictionary like an object with +- ``Model config`` (*.xml) - a text file with network configuration. +- ``Model weights`` (*.bin) - a binary file with trained weights. +- ``Label map`` (*.json) - a simple json file with label_map dictionary like an object with string values for label numbers. -- ``Interpretation script`` (*.py) - a file used to convert net output layer to a predefined structure -which can be processed by CVAT. +- ``Interpretation script`` (*.py) - a file used to convert net output layer to a predefined structure +which can be processed by CVAT. You can learn more about creating model files by pressing [(?)](/cvat/apps/auto_annotation). -Check the box `` Load globally`` if you want everyone to be able to use the model. -Click the ``Submit`` button to submit a model. +Check the box `` Load globally`` if you want everyone to be able to use the model. +Click the ``Submit`` button to submit a model. ![](static/documentation/images/image104.jpg) @@ -352,7 +348,7 @@ The tool consists of: - ``Workspace`` — space where images are shown; - ``Controls sidebar`` — contains tools for navigating the image, zoom, creating shapes and editing tracks (merge, split, group) -- ``Objects sidebar`` — contains label filter, two lists: +- ``Objects sidebar`` — contains label filter, two lists: objects (on the frame) and labels (of objects on the frame) and appearance settings. ![](static/documentation/images/image034_DETRAC.jpg) @@ -515,7 +511,7 @@ Usage examples: ### Downloading annotations 1. To download the latest annotations, you have to save all changes first. - To do this, click ``Open Menu`` button. + To do this, click ``Open Menu`` button. 1. After that, press ``Save Work`` button. There is ``Ctrl+S`` shortcut to save annotations quickly. @@ -527,19 +523,19 @@ Usage examples: ![](static/documentation/images/image118.jpg) 1. Choose format dump annotation file. Dump annotation are available in several formats: - - [CVAT XML 1.1 for video](/cvat/apps/documentation/xml_format.md#interpolation) + - [CVAT for video 1.1](/cvat/apps/documentation/xml_format.md#interpolation) is highlighted if a task has the interpolation mode - - [CVAT XML 1.1 for images](/cvat/apps/documentation/xml_format.md#annotation) + - [CVAT for images 1.1](/cvat/apps/documentation/xml_format.md#annotation) is highlighted if a task has the annotation mode - ![](static/documentation/images/image029.jpg "Example XML format") + ![](static/documentation/images/image029.jpg "Example XML format") - - [PASCAL VOC ZIP 1.1](http://host.robots.ox.ac.uk/pascal/VOC/) - - [YOLO ZIP 1.1](https://pjreddie.com/darknet/yolo/) - - [COCO JSON 1.0](http://cocodataset.org/#format-data) - - ``MASK ZIP 1.0`` — archive contains a mask of each frame in the png format and a text file with + - [PASCAL VOC 1.1](http://host.robots.ox.ac.uk/pascal/VOC/) + - [YOLO 1.1](https://pjreddie.com/darknet/yolo/) + - [COCO 1.0](http://cocodataset.org/#format-data) + - ``Segmentation mask 1.1`` — archive contains a mask of each frame in the png format and a text file with the value of each color - - [TFRecord ZIP 1.0](https://www.tensorflow.org/tutorials/load_data/tf_records) + - [TFRecord 1.0](https://www.tensorflow.org/tutorials/load_data/tf_records) ### Task synchronization with a repository @@ -947,15 +943,15 @@ By default, objects in the mode are zoomed. Check ``Open Menu`` —> ``Settings`` —> ``AAM Zoom Margin`` to adjust that. ## Annotation with box by 4 points -It is an efficient method of bounding box annotation, proposed -[here](https://arxiv.org/pdf/1708.02750.pdf). +It is an efficient method of bounding box annotation, proposed +[here](https://arxiv.org/pdf/1708.02750.pdf). Before starting, you need to be sure that ``Box by 4 points`` is selected. ![](static/documentation/images/image134.jpg) -Press ``N`` for entering drawing mode. Click exactly four extreme points: -the top, bottom, left- and right-most physical points on the object. -Drawing is automatically completed right after clicking the fourth point. +Press ``N`` for entering drawing mode. Click exactly four extreme points: +the top, bottom, left- and right-most physical points on the object. +Drawing is automatically completed right after clicking the fourth point. Press ``Esc`` to cancel editing. ![](static/documentation/images/gif016.gif) @@ -1030,9 +1026,9 @@ automatically. You can adjust the polyline after it has been drawn. ## Annotation with cuboids -It is used to annotate 3 dimensional objects such as cars, boxes, etc... +It is used to annotate 3 dimensional objects such as cars, boxes, etc... Currently the feature supports one point perspective and has the contraint -where the vertical edges are exactly parallel to the sides. +where the vertical edges are exactly parallel to the sides. ### Creating the cuboid @@ -1043,9 +1039,9 @@ You may draw the cuboid by placing 4 points, after which the drawing completes a The first 3 points will represent a plane of the cuboid while the last point represents the depth of that plane. For the first 3 points, it is recomended to only draw the 2 closest side faces, -as well as the top and bottom face. +as well as the top and bottom face. -A few examples: +A few examples: ![](static/documentation/images/CuboidDrawing1.gif) ![](static/documentation/images/CuboidDrawing2.gif) @@ -1054,12 +1050,12 @@ A few examples: ### Editing the cuboid -The cuboid can be edited in multiple ways, by dragging points or by dragging certain faces. -First notice that there is a face that is painted with pink lines only, let us call it the front face. +The cuboid can be edited in multiple ways, by dragging points or by dragging certain faces. +First notice that there is a face that is painted with pink lines only, let us call it the front face. -The cuboid can be moved by simply dragging the shape as normal. -The cuboid can be extended by dragging on the point in the middle of the edges. -The cuboid can also be extended up and down by dragging the point at the vertices. +The cuboid can be moved by simply dragging the shape as normal. +The cuboid can be extended by dragging on the point in the middle of the edges. +The cuboid can also be extended up and down by dragging the point at the vertices. ![](static/documentation/images/CuboidEditing1.gif) @@ -1070,17 +1066,17 @@ The cuboid can then be edited as usual. ![](static/documentation/images/EditingPerspective.gif) If you wish to reset perspective effects, you may right click on cuboid, -and select ``Reset Perspective`` to return to a regular cuboid. +and select ``Reset Perspective`` to return to a regular cuboid. The location of the pink face can be swapped with the adjacent visible side face. This is done by right clicking on the cuboid and selecting ``Switch Perspective Orientation``. -Note that this will also reset the perspective effects. +Note that this will also reset the perspective effects. ![](static/documentation/images/ResetPerspective.gif) Certain faces of the cuboid can also be edited, -these faces are the left, right and dorsal faces, relative to the pink face. -Simply drag the faces to move them independently from the rest of the cuboid. +these faces are the left, right and dorsal faces, relative to the pink face. +Simply drag the faces to move them independently from the rest of the cuboid. ![](static/documentation/images/CuboidEditing2.gif) @@ -1112,14 +1108,14 @@ You can add/delete points after finishing. You can use linear interpolation for points to annotate a moving object: -1. Before starting, you have to be sure that ``Points`` is selected. -1. Linear interpolation works only with one point, so you need to set ``Poly Shapes Size``: 1. +1. Before starting, you have to be sure that ``Points`` is selected. +1. Linear interpolation works only with one point, so you need to set ``Poly Shapes Size``: 1. 1. After that select the interpolation mode. - + ![](static/documentation/images/image122.jpg) 1. Press ``N`` or click ``Create Shape`` for entering drawing mode. - Click LMB to create a point and shape will be automatically completed. + Click LMB to create a point and shape will be automatically completed. ![](static/documentation/images/gif011_DETRAC.gif) @@ -1153,25 +1149,25 @@ a shape is created and you can work with it as a polygon. ## Automatic annotation -Automatic Annotation is used for creating preliminary annotations. -To use Automatic Annotation you need a DL model. You can use primary models or models uploaded by a user. +Automatic Annotation is used for creating preliminary annotations. +To use Automatic Annotation you need a DL model. You can use primary models or models uploaded by a user. You can find the list of available models in the ``Models`` section. -1. To launch automatic annotation, you should open the dashboard and find a task which you want to annotate. +1. To launch automatic annotation, you should open the dashboard and find a task which you want to annotate. Then click the ``Actions`` button and choose option ``Automatic Annotation`` from the dropdown menu. ![](static/documentation/images/image119_DETRAC.jpg) -1. In the dialog window select a model you need. DL models are created for specific labels, e.g. - the Crossroad model was taught using footage from cameras located above the highway and it is best to - use this model for the tasks with similar camera angles. - If it's necessary select the ``Clean old annotations`` checkbox. - Adjust the labels so that the task labels will correspond to the labels of the DL model. - For example, let’s consider a task where you have to annotate labels “car” and “person”. - You should connect the “person” label from the model to the “person” label in the task. - As for the “car” label, you should choose the most fitting label available in the model - the “vehicle” label. - The task requires to annotate cars only and choosing the “vehicle” label implies annotation of all vehicles, - in this case using auto annotation will help you complete the task faster. +1. In the dialog window select a model you need. DL models are created for specific labels, e.g. + the Crossroad model was taught using footage from cameras located above the highway and it is best to + use this model for the tasks with similar camera angles. + If it's necessary select the ``Clean old annotations`` checkbox. + Adjust the labels so that the task labels will correspond to the labels of the DL model. + For example, let’s consider a task where you have to annotate labels “car” and “person”. + You should connect the “person” label from the model to the “person” label in the task. + As for the “car” label, you should choose the most fitting label available in the model - the “vehicle” label. + The task requires to annotate cars only and choosing the “vehicle” label implies annotation of all vehicles, + in this case using auto annotation will help you complete the task faster. Click ``Submit`` to begin the automatic annotation process. ![](static/documentation/images/image120.jpg) @@ -1185,9 +1181,9 @@ You can find the list of available models in the ``Models`` section. ![](static/documentation/images/gif014_DETRAC.gif) 1. Separated bounding boxes can be edited by removing false positives, adding unlabeled objects and - merging into tracks using ``ReID merge`` function. Click the ``ReID merge`` button in the menu. - You can use the default settings (for more information click [here](cvat/apps/reid/README.md)). - To launch the merging process click ``Merge``. Each frame of the track will be a key frame. + merging into tracks using ``ReID merge`` function. Click the ``ReID merge`` button in the menu. + You can use the default settings (for more information click [here](cvat/apps/reid/README.md)). + To launch the merging process click ``Merge``. Each frame of the track will be a key frame. ![](static/documentation/images/image133.jpg) diff --git a/utils/cli/README.md b/utils/cli/README.md index ee637aa6a07e..07b708d72125 100644 --- a/utils/cli/README.md +++ b/utils/cli/README.md @@ -41,4 +41,4 @@ optional arguments: - Delete some tasks `cli.py delete 100 101 102` - Dump annotations -`cli.py dump --format "CVAT XML 1.1 for images" 103 output.xml` +`cli.py dump --format "CVAT for images 1.1" 103 output.xml` From cc04df1e1dcc2f17ddce182d35c258cce6bf4660 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Thu, 9 Apr 2020 12:13:18 +0300 Subject: [PATCH 21/80] Update tests --- cvat/apps/dataset_manager/_tests.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/cvat/apps/dataset_manager/_tests.py b/cvat/apps/dataset_manager/_tests.py index 29eac9bbbfed..edcb798940c1 100644 --- a/cvat/apps/dataset_manager/_tests.py +++ b/cvat/apps/dataset_manager/_tests.py @@ -269,16 +269,12 @@ def _put_api_v1_task_id_annotations(self, tid, data): return response def _test_export(self, format_name, save_images=False): - self.assertTrue(format_name in [f['tag'] for f in dm.EXPORT_FORMATS]) - task, _ = self._generate_task() - project = dm.TaskProject.from_task( - Task.objects.get(pk=task["id"]), self.user.username) - with tempfile.TemporaryDirectory() as test_dir: - project.export(format_name, test_dir, save_images=save_images) + f = BytesIO() + dm.export_task(task["id"], format_name, f, save_images=save_images) - self.assertTrue(os.listdir(test_dir)) + self.assertTrue(len(f.getvalue()) != 0) def test_datumaro(self): self._test_export('Datumaro 1.0', save_images=False) @@ -310,9 +306,12 @@ def test_cvat_video(self): def test_cvat_images(self): self._test_export('CVAT for images 1.1', save_images=True) - def test_formats_query(self): + def test_export_formats_query(self): formats = dm.get_export_formats() - expected = set(f['tag'] for f in dm.EXPORT_FORMATS) - actual = set(f['tag'] for f in formats) - self.assertSetEqual(expected, actual) + self.assertEqual(len(formats), 10) + + def test_import_formats_query(self): + formats = dm.get_import_formats() + + self.assertEqual(len(formats), 8) From 2d2ca2de518a5f27644581d3996ad2cf659d4b06 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Thu, 9 Apr 2020 13:16:55 +0300 Subject: [PATCH 22/80] move test --- .../test_data_manager.py => dataset_manager/test_annotation.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename cvat/apps/{engine/tests/test_data_manager.py => dataset_manager/test_annotation.py} (94%) diff --git a/cvat/apps/engine/tests/test_data_manager.py b/cvat/apps/dataset_manager/test_annotation.py similarity index 94% rename from cvat/apps/engine/tests/test_data_manager.py rename to cvat/apps/dataset_manager/test_annotation.py index 968b57525f6b..2db3969906a5 100644 --- a/cvat/apps/engine/tests/test_data_manager.py +++ b/cvat/apps/dataset_manager/test_annotation.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: MIT -from cvat.apps.engine.data_manager import TrackManager +from cvat.apps.dataset_manager.annotation import TrackManager from unittest import TestCase From 0263a0e3ccdc2d9f810b3b97f4086d5785a81ab8 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Thu, 9 Apr 2020 13:17:06 +0300 Subject: [PATCH 23/80] fix import --- datumaro/datumaro/components/cli_plugin.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datumaro/datumaro/components/cli_plugin.py b/datumaro/datumaro/components/cli_plugin.py index 9439d2b56a6f..e85f5c4f3033 100644 --- a/datumaro/datumaro/components/cli_plugin.py +++ b/datumaro/datumaro/components/cli_plugin.py @@ -5,7 +5,8 @@ import argparse -from datumaro.cli.util import MultilineFormatter, to_snake_case +from datumaro.cli.util import MultilineFormatter +from datumaro.util import to_snake_case class CliPlugin: From ec52ecd8f27a256450008df677098d380337cd3d Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Thu, 9 Apr 2020 13:32:20 +0300 Subject: [PATCH 24/80] Extend format tests --- cvat/apps/dataset_manager/_tests.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/cvat/apps/dataset_manager/_tests.py b/cvat/apps/dataset_manager/_tests.py index edcb798940c1..3aa19957bc13 100644 --- a/cvat/apps/dataset_manager/_tests.py +++ b/cvat/apps/dataset_manager/_tests.py @@ -184,6 +184,24 @@ def _generate_task(self): "type": "polygon", "occluded": False }, + { + "frame": 1, + "label_id": task["labels"][0]["id"], + "group": 1, + "attributes": [], + "points": [100, 300.222, 400, 500, 1, 3], + "type": "points", + "occluded": False + }, + { + "frame": 1, + "label_id": task["labels"][0]["id"], + "group": 1, + "attributes": [], + "points": [2.0, 2.1, 400, 500, 1, 3], + "type": "polyline", + "occluded": False + }, ], "tracks": [ { From 76f496e0e9a2f8c1a87510022ffb6ab8334ba322 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Thu, 9 Apr 2020 14:49:56 +0300 Subject: [PATCH 25/80] django compatibility for directory access --- cvat/apps/dataset_manager/__init__.py | 7 ------- cvat/apps/dataset_manager/formats/coco.py | 5 +++-- cvat/apps/dataset_manager/formats/cvat.py | 3 ++- cvat/apps/dataset_manager/formats/datumaro/__init__.py | 2 +- cvat/apps/dataset_manager/formats/labelme.py | 7 ++++--- cvat/apps/dataset_manager/formats/mask.py | 7 ++++--- cvat/apps/dataset_manager/formats/mot.py | 3 ++- cvat/apps/dataset_manager/formats/pascal_voc.py | 3 ++- .../dataset_manager/formats/{__init__.py => registry.py} | 2 +- cvat/apps/dataset_manager/formats/tfrecord.py | 7 ++++--- cvat/apps/dataset_manager/formats/yolo.py | 3 ++- cvat/apps/dataset_manager/task.py | 2 +- 12 files changed, 26 insertions(+), 25 deletions(-) delete mode 100644 cvat/apps/dataset_manager/__init__.py rename cvat/apps/dataset_manager/formats/{__init__.py => registry.py} (100%) diff --git a/cvat/apps/dataset_manager/__init__.py b/cvat/apps/dataset_manager/__init__.py deleted file mode 100644 index aee421163331..000000000000 --- a/cvat/apps/dataset_manager/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ - -# Copyright (C) 2020 Intel Corporation -# -# SPDX-License-Identifier: MIT - -from .formats import get_formats -from .task import export_task_as_dataset \ No newline at end of file diff --git a/cvat/apps/dataset_manager/formats/coco.py b/cvat/apps/dataset_manager/formats/coco.py index 87098c1fddd4..042699debc41 100644 --- a/cvat/apps/dataset_manager/formats/coco.py +++ b/cvat/apps/dataset_manager/formats/coco.py @@ -6,9 +6,10 @@ from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor, \ import_dm_annotations -from cvat.apps.dataset_manager.formats import dm_env, exporter, importer from cvat.apps.dataset_manager.util import make_zip_archive +from .registry import dm_env, exporter, importer + @exporter(name='COCO', ext='ZIP', version='1.0') def _export(dst_file, task_data, save_images=False): @@ -33,4 +34,4 @@ def _import(src_file, task_data): Archive(src_path).extractall(tmp_dir) dataset = dm_env.make_importer('coco')(tmp_dir).make_dataset() - import_dm_annotations(dataset, task_data) \ No newline at end of file + import_dm_annotations(dataset, task_data) diff --git a/cvat/apps/dataset_manager/formats/cvat.py b/cvat/apps/dataset_manager/formats/cvat.py index b29c0fd070ae..5a33ee4c63ee 100644 --- a/cvat/apps/dataset_manager/formats/cvat.py +++ b/cvat/apps/dataset_manager/formats/cvat.py @@ -9,10 +9,11 @@ from pyunpack import Archive -from cvat.apps.dataset_manager.formats import exporter, importer from cvat.apps.dataset_manager.util import make_zip_archive from datumaro.util.image import save_image +from .registry import exporter, importer + def pairwise(iterable): a = iter(iterable) diff --git a/cvat/apps/dataset_manager/formats/datumaro/__init__.py b/cvat/apps/dataset_manager/formats/datumaro/__init__.py index 90a1632cc8b3..be8d755059b7 100644 --- a/cvat/apps/dataset_manager/formats/datumaro/__init__.py +++ b/cvat/apps/dataset_manager/formats/datumaro/__init__.py @@ -5,7 +5,7 @@ from tempfile import TemporaryDirectory from cvat.apps.dataset_manager.bindings import import_dm_annotations, CvatTaskDataExtractor -from cvat.apps.dataset_manager.formats import dm_env, exporter +from .registry import dm_env, exporter from cvat.apps.dataset_manager.util import make_zip_archive from cvat.settings.base import DATUMARO_PATH diff --git a/cvat/apps/dataset_manager/formats/labelme.py b/cvat/apps/dataset_manager/formats/labelme.py index 8f23a0483960..9ea1a76b800c 100644 --- a/cvat/apps/dataset_manager/formats/labelme.py +++ b/cvat/apps/dataset_manager/formats/labelme.py @@ -6,12 +6,13 @@ from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor, \ - import_dm_annotations -from cvat.apps.dataset_manager.formats import dm_env, exporter, importer +from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor, + import_dm_annotations) from cvat.apps.dataset_manager.util import make_zip_archive from datumaro.components.project import Dataset +from .registry import dm_env, exporter, importer + @exporter(name='LabelMe', ext='ZIP', version='3.0') def _export(dst_file, task_data, save_images=False): diff --git a/cvat/apps/dataset_manager/formats/mask.py b/cvat/apps/dataset_manager/formats/mask.py index 995e04bf0f15..492fed38aaed 100644 --- a/cvat/apps/dataset_manager/formats/mask.py +++ b/cvat/apps/dataset_manager/formats/mask.py @@ -6,12 +6,13 @@ from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor, \ - import_dm_annotations -from cvat.apps.dataset_manager.formats import dm_env, exporter, importer +from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor, + import_dm_annotations) from cvat.apps.dataset_manager.util import make_zip_archive from datumaro.components.project import Dataset +from .registry import dm_env, exporter, importer + @exporter(name='Segmentation mask', ext='ZIP', version='1.1') def _export(dst_file, task_data, save_images=False): diff --git a/cvat/apps/dataset_manager/formats/mot.py b/cvat/apps/dataset_manager/formats/mot.py index cf611a2cc363..6626b5cf34b6 100644 --- a/cvat/apps/dataset_manager/formats/mot.py +++ b/cvat/apps/dataset_manager/formats/mot.py @@ -9,10 +9,11 @@ import datumaro.components.extractor as datumaro from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor, match_frame) -from cvat.apps.dataset_manager.formats import dm_env, exporter, importer from cvat.apps.dataset_manager.util import make_zip_archive from datumaro.components.project import Dataset +from .registry import dm_env, exporter, importer + @exporter(name='MOT', ext='ZIP', version='1.1') def _export(dst_file, task_data, save_images=False): diff --git a/cvat/apps/dataset_manager/formats/pascal_voc.py b/cvat/apps/dataset_manager/formats/pascal_voc.py index 8aaca4911896..a0c54189a933 100644 --- a/cvat/apps/dataset_manager/formats/pascal_voc.py +++ b/cvat/apps/dataset_manager/formats/pascal_voc.py @@ -12,10 +12,11 @@ from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor, import_dm_annotations) -from cvat.apps.dataset_manager.formats import dm_env, exporter, importer from cvat.apps.dataset_manager.util import make_zip_archive from datumaro.components.project import Dataset +from .registry import dm_env, exporter, importer + @exporter(name='PASCAL VOC', ext='ZIP', version='1.1') def _export(dst_file, task_data, save_images=False): diff --git a/cvat/apps/dataset_manager/formats/__init__.py b/cvat/apps/dataset_manager/formats/registry.py similarity index 100% rename from cvat/apps/dataset_manager/formats/__init__.py rename to cvat/apps/dataset_manager/formats/registry.py index 4b8b24ebc933..59d608f713ae 100644 --- a/cvat/apps/dataset_manager/formats/__init__.py +++ b/cvat/apps/dataset_manager/formats/registry.py @@ -5,8 +5,8 @@ from datumaro.components.project import Environment -dm_env = Environment() +dm_env = Environment() class _Format: NAME = '' diff --git a/cvat/apps/dataset_manager/formats/tfrecord.py b/cvat/apps/dataset_manager/formats/tfrecord.py index 4bbaebfe12da..0e4962fa6c4a 100644 --- a/cvat/apps/dataset_manager/formats/tfrecord.py +++ b/cvat/apps/dataset_manager/formats/tfrecord.py @@ -6,12 +6,13 @@ from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor, \ - import_dm_annotations -from cvat.apps.dataset_manager.formats import dm_env, exporter, importer +from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor, + import_dm_annotations) from cvat.apps.dataset_manager.util import make_zip_archive from datumaro.components.project import Dataset +from .registry import dm_env, exporter, importer + @exporter(name='TFRecord', ext='ZIP', version='1.0') def _export(dst_file, task_data, save_images=False): diff --git a/cvat/apps/dataset_manager/formats/yolo.py b/cvat/apps/dataset_manager/formats/yolo.py index fb734a7eb39c..fd35a6d0298a 100644 --- a/cvat/apps/dataset_manager/formats/yolo.py +++ b/cvat/apps/dataset_manager/formats/yolo.py @@ -10,11 +10,12 @@ from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor, import_dm_annotations, match_frame) -from cvat.apps.dataset_manager.formats import dm_env, exporter, importer from cvat.apps.dataset_manager.util import make_zip_archive from datumaro.components.extractor import DatasetItem from datumaro.components.project import Dataset +from .registry import dm_env, exporter, importer + @exporter(name='YOLO', ext='ZIP', version='1.1') def _export(dst_file, task_data, save_images=False): diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py index 1954c8fb33c6..81a403e37ee9 100644 --- a/cvat/apps/dataset_manager/task.py +++ b/cvat/apps/dataset_manager/task.py @@ -16,7 +16,7 @@ from .annotation import AnnotationIR, AnnotationManager from .bindings import TaskData -from .formats import make_exporter, make_importer +from .formats.registry import make_exporter, make_importer class dotdict(OrderedDict): From 1f4612ca42a55e3305d6fecef95ecb0bcf039b8b Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Thu, 9 Apr 2020 14:50:21 +0300 Subject: [PATCH 26/80] move tests --- cvat/apps/dataset_manager/{ => tests}/_tests.py | 0 cvat/apps/dataset_manager/{ => tests}/test_annotation.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename cvat/apps/dataset_manager/{ => tests}/_tests.py (100%) rename cvat/apps/dataset_manager/{ => tests}/test_annotation.py (100%) diff --git a/cvat/apps/dataset_manager/_tests.py b/cvat/apps/dataset_manager/tests/_tests.py similarity index 100% rename from cvat/apps/dataset_manager/_tests.py rename to cvat/apps/dataset_manager/tests/_tests.py diff --git a/cvat/apps/dataset_manager/test_annotation.py b/cvat/apps/dataset_manager/tests/test_annotation.py similarity index 100% rename from cvat/apps/dataset_manager/test_annotation.py rename to cvat/apps/dataset_manager/tests/test_annotation.py From e139e9874d436fb21e18cf801325f816cbf48803 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Thu, 9 Apr 2020 14:50:38 +0300 Subject: [PATCH 27/80] update module links --- cvat/apps/engine/migrations/0017_db_redesign_20190221.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/engine/migrations/0017_db_redesign_20190221.py b/cvat/apps/engine/migrations/0017_db_redesign_20190221.py index 9c54bacd4930..60f6b32218d4 100644 --- a/cvat/apps/engine/migrations/0017_db_redesign_20190221.py +++ b/cvat/apps/engine/migrations/0017_db_redesign_20190221.py @@ -4,7 +4,7 @@ from django.db import migrations, models import django.db.models.deletion from django.conf import settings -from cvat.apps.engine.annotation import _merge_table_rows +from cvat.apps.dataset_manager.task import _merge_table_rows # some modified functions to transer annotation def _bulk_create(db_model, db_alias, objects, flt_param): From acf79ca9006c3018eddca30e4d41f2024a206cb4 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Thu, 9 Apr 2020 14:55:24 +0300 Subject: [PATCH 28/80] fixes --- .../dataset_manager/formats/datumaro/__init__.py | 7 +++++-- cvat/apps/dataset_manager/formats/registry.py | 16 +++++++++------- cvat/apps/dataset_manager/task.py | 1 - 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/datumaro/__init__.py b/cvat/apps/dataset_manager/formats/datumaro/__init__.py index be8d755059b7..8471766364bc 100644 --- a/cvat/apps/dataset_manager/formats/datumaro/__init__.py +++ b/cvat/apps/dataset_manager/formats/datumaro/__init__.py @@ -2,13 +2,16 @@ # # SPDX-License-Identifier: MIT +import os.path as osp from tempfile import TemporaryDirectory -from cvat.apps.dataset_manager.bindings import import_dm_annotations, CvatTaskDataExtractor -from .registry import dm_env, exporter +from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor, + import_dm_annotations) from cvat.apps.dataset_manager.util import make_zip_archive from cvat.settings.base import DATUMARO_PATH +from ..registry import dm_env, exporter + @exporter(name="Datumaro", ext="ZIP", version="1.0") class DatumaroProjectExporter: diff --git a/cvat/apps/dataset_manager/formats/registry.py b/cvat/apps/dataset_manager/formats/registry.py index 59d608f713ae..20377dd67030 100644 --- a/cvat/apps/dataset_manager/formats/registry.py +++ b/cvat/apps/dataset_manager/formats/registry.py @@ -24,7 +24,8 @@ def __call__(self, src_file, task_data, **options): def _wrap_format(f_or_cls, klass, name, version, ext, display_name): import inspect - if inspect.isclass(f): + assert inspect.isclass(f_or_cls) or inspect.isfunction(f_or_cls) + if inspect.isclass(f_or_cls): assert hasattr(f_or_cls, '__call__') target = f_or_cls elif inspect.isfunction(f_or_cls): @@ -36,8 +37,6 @@ def __call__(self, *args, **kwargs): wrapper.__name__ = f_or_cls.__name__ wrapper.__module__ = f_or_cls.__module__ target = wrapper - else: - assert inspect.isclass(f_or_cls) or inspect.isfunction(f_or_cls) target.NAME = name or klass.NAME or f_or_cls.__name__ target.VERSION = version or klass.VERSION @@ -53,17 +52,20 @@ def exporter(name, version, ext, display_name=None): def wrap_with_params(f_or_cls): t = _wrap_format(f_or_cls, Exporter, name=name, ext=ext, version=version, display_name=display_name) - EXPORT_FORMATS[t.DISPLAY_NAME] = t + key = t.DISPLAY_NAME + assert key not in EXPORT_FORMATS, "Export format '%s' already registered" % name + EXPORT_FORMATS[key] = t return t return wrap_with_params IMPORT_FORMATS = {} def importer(name, version, ext, display_name=None): - assert name not in IMPORT_FORMATS, "Import format '%s' already registered" % name def wrap_with_params(f_or_cls): t = _wrap_format(f_or_cls, Importer, name=name, ext=ext, version=version, display_name=display_name) - IMPORT_FORMATS[t.DISPLAY_NAME] = t + key = t.DISPLAY_NAME + assert key not in IMPORT_FORMATS, "Import format '%s' already registered" % name + IMPORT_FORMATS[key] = t return t return wrap_with_params @@ -73,7 +75,7 @@ def make_importer(name): def make_exporter(name): return EXPORT_FORMATS[name]() - +# pylint: disable=unused-import import cvat.apps.dataset_manager.formats.coco import cvat.apps.dataset_manager.formats.cvat import cvat.apps.dataset_manager.formats.datumaro diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py index 81a403e37ee9..576bfa5e26fb 100644 --- a/cvat/apps/dataset_manager/task.py +++ b/cvat/apps/dataset_manager/task.py @@ -84,7 +84,6 @@ def _merge_table_rows(rows, keys_for_merge, field_id): class JobAnnotation: def __init__(self, pk): - self.user = user self.db_job = models.Job.objects.select_related('segment__task') \ .select_for_update().get(id=pk) From c3d8ed11184559bef7fb78e04d9015677a74d0f7 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Fri, 10 Apr 2020 11:56:05 +0300 Subject: [PATCH 29/80] fix git application --- cvat/apps/dataset_manager/annotation.py | 3 + cvat/apps/git/git.py | 85 +++++++++++++------------ cvat/settings/base.py | 3 +- 3 files changed, 49 insertions(+), 42 deletions(-) diff --git a/cvat/apps/dataset_manager/annotation.py b/cvat/apps/dataset_manager/annotation.py index 8699de621ecb..5a6a2f3ba51e 100644 --- a/cvat/apps/dataset_manager/annotation.py +++ b/cvat/apps/dataset_manager/annotation.py @@ -38,6 +38,9 @@ def data(self): 'tracks': self.tracks, } + def __getitem__(self, key): + return getattr(self, key) + @data.setter def data(self, data): self.version = data['version'] diff --git a/cvat/apps/git/git.py b/cvat/apps/git/git.py index 3cea9e69681b..602c414cedfc 100644 --- a/cvat/apps/git/git.py +++ b/cvat/apps/git/git.py @@ -2,27 +2,27 @@ # # SPDX-License-Identifier: MIT -from django.db import transaction -from django.utils import timezone - -from cvat.apps.engine.log import slogger -from cvat.apps.engine.models import Task, Job, User -from cvat.apps.engine.annotation import dump_task_data -from cvat.apps.engine.plugins import add_plugin -from cvat.apps.git.models import GitStatusChoice - -from cvat.apps.git.models import GitData -from collections import OrderedDict - -import subprocess -import django_rq import datetime -import shutil import json import math -import git import os import re +import shutil +import subprocess +from glob import glob +from tempfile import TemporaryDirectory + +import django_rq +import git +from django.db import transaction +from django.utils import timezone +from pyunpack import Archive + +from cvat.apps.dataset_manager.task import export_task +from cvat.apps.engine.log import slogger +from cvat.apps.engine.models import Job, Task, User +from cvat.apps.engine.plugins import add_plugin +from cvat.apps.git.models import GitData, GitStatusChoice def _have_no_access_exception(ex): @@ -267,26 +267,29 @@ def push(self, user, scheme, host, db_task, last_save): # Dump an annotation timestamp = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S") if self._task_mode == "annotation": - display_name = "CVAT for images 1.1" + format_name = "CVAT for images 1.1" else: - display_name = "CVAT for video 1.1" - cvat_dumper = AnnotationDumper.objects.get(display_name=display_name) + format_name = "CVAT for video 1.1" dump_name = os.path.join(db_task.get_task_dirname(), - "git_annotation_{}.xml".format(timestamp)) - dump_task_data( - pk=self._tid, - user=user, - filename=dump_name, - dumper=cvat_dumper, - scheme=scheme, - host=host, + "git_annotation_{}.zip".format(timestamp)) + export_task( + task_id=self._tid, + dst_file=dump_name, + format_name=format_name, + server_url=scheme + host, + save_images=False, ) ext = os.path.splitext(self._path)[1] if ext == '.zip': - subprocess.run(args=['7z', 'a', self._annotation_file, dump_name]) + shutil.move(dump_name, self._annotation_file) elif ext == '.xml': - shutil.copyfile(dump_name, self._annotation_file) + with TemporaryDirectory() as tmp_dir: + # TODO: remove extra packing-unpacking + Archive(src_path).extractall(tmp_dir) + anno_paths = glob(osp.join(tmp_dir, '**', '*.xml'), + recursive=True) + shutil.move(anno_paths[0], self._annotation_file) else: raise Exception("Got unknown annotation file type") @@ -456,7 +459,7 @@ def update_states(): slogger.glob("Exception occured during a status updating for db_git with tid: {}".format(db_git.task_id)) @transaction.atomic -def _onsave(jid, user, data, action): +def _onsave(jid, data, action): db_task = Job.objects.select_related('segment__task').get(pk = jid).segment.task try: db_git = GitData.objects.select_for_update().get(pk = db_task.id) @@ -494,18 +497,18 @@ def _onsave(jid, user, data, action): except GitData.DoesNotExist: pass -def _ondump(tid, user, data_format, scheme, host, plugin_meta_data): - db_task = Task.objects.get(pk = tid) - try: - db_git = GitData.objects.get(pk = db_task) - plugin_meta_data['git'] = OrderedDict({ - "url": db_git.url, - "path": db_git.path, - }) - except GitData.DoesNotExist: - pass - add_plugin("patch_job_data", _onsave, "after", exc_ok = False) # TODO: Append git repository into dump file +# def _ondump(task_id, dst_file, format_name, +# server_url=None, save_images=False, plugin_meta_data): +# db_task = Task.objects.get(pk = tid) +# try: +# db_git = GitData.objects.get(pk = db_task) +# plugin_meta_data['git'] = OrderedDict({ +# "url": db_git.url, +# "path": db_git.path, +# }) +# except GitData.DoesNotExist: +# pass # add_plugin("_dump", _ondump, "before", exc_ok = False) diff --git a/cvat/settings/base.py b/cvat/settings/base.py index 24a2861af17e..92683976c449 100644 --- a/cvat/settings/base.py +++ b/cvat/settings/base.py @@ -93,10 +93,11 @@ def generate_ssh_keys(): 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', - 'cvat.apps.engine', 'cvat.apps.authentication', 'cvat.apps.documentation', 'cvat.apps.dataset_manager', + 'cvat.apps.engine', + 'cvat.apps.git', 'django_rq', 'compressor', 'cacheops', From 5567e5469f3601d2daaba00024421f05adc5f400 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Fri, 10 Apr 2020 21:03:33 +0300 Subject: [PATCH 30/80] fixes --- cvat/apps/dataset_manager/bindings.py | 5 +- cvat/apps/dataset_manager/formats/coco.py | 1 + cvat/apps/dataset_manager/formats/cvat.py | 17 +- .../formats/datumaro/__init__.py | 19 +- cvat/apps/dataset_manager/formats/mot.py | 3 +- cvat/apps/dataset_manager/task.py | 5 +- cvat/apps/dataset_manager/tests/_tests.py | 19 +- cvat/apps/dataset_manager/views.py | 6 +- cvat/apps/engine/tests/test_rest_api.py | 226 ++++++++++-------- cvat/apps/engine/views.py | 25 +- datumaro/datumaro/components/extractor.py | 4 +- .../plugins/datumaro_format/converter.py | 1 + 12 files changed, 182 insertions(+), 149 deletions(-) diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index 840d35ac05ed..5d1e1f08cb38 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -401,14 +401,15 @@ def match_frame(self, filename): raise Exception( "Cannot match filename or determine frame number for {} filename".format(filename)) -class CvatTaskDataExtractor(datumaro.Extractor): +class CvatTaskDataExtractor(datumaro.SourceExtractor): def __init__(self, task_data, include_images=False): + super().__init__() self._categories = self._load_categories(task_data) dm_items = [] if include_images: - frame_provider = FrameProvider(db_task.data) + frame_provider = FrameProvider(task_data.db_task.data) for frame_data in task_data.group_by_frame(include_empty=include_images): loader = None diff --git a/cvat/apps/dataset_manager/formats/coco.py b/cvat/apps/dataset_manager/formats/coco.py index 042699debc41..1ad87b4c5ce7 100644 --- a/cvat/apps/dataset_manager/formats/coco.py +++ b/cvat/apps/dataset_manager/formats/coco.py @@ -4,6 +4,7 @@ from tempfile import TemporaryDirectory +from datumaro.components.project import Dataset from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor, \ import_dm_annotations from cvat.apps.dataset_manager.util import make_zip_archive diff --git a/cvat/apps/dataset_manager/formats/cvat.py b/cvat/apps/dataset_manager/formats/cvat.py index 5a33ee4c63ee..c2458d472bda 100644 --- a/cvat/apps/dataset_manager/formats/cvat.py +++ b/cvat/apps/dataset_manager/formats/cvat.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: MIT import os.path as osp +import shutil from collections import OrderedDict from glob import glob from tempfile import TemporaryDirectory @@ -10,6 +11,7 @@ from pyunpack import Archive from cvat.apps.dataset_manager.util import make_zip_archive +from cvat.apps.engine.frame_provider import FrameProvider from datumaro.util.image import save_image from .registry import exporter, importer @@ -510,7 +512,7 @@ def load(file_object, annotations): tag = None el.clear() -def _export(dst_file, task_data, anno_callback, **options): +def _export(dst_file, task_data, anno_callback, save_images=False): dst_path = dst_file.name anno_callback(dst_file, task_data) @@ -525,17 +527,16 @@ def _export(dst_file, task_data, anno_callback, **options): frames = frame_provider.get_frames( frame_provider.Quality.ORIGINAL, frame_provider.Type.NUMPY_ARRAY) - for frame_id, frame_data in enumerate(frames): + for frame_id, (frame_data, _) in enumerate(frames): frame_filename = osp.basename(task_data.frame_info[frame_id]['path']) if '.' in frame_filename: - save_image(frame_data, - osp.join(temp_dir, 'images', frame_filename), - jpeg_quality=100) + save_image(osp.join(temp_dir, 'images', frame_filename), + frame_data, jpeg_quality=100) else: - save_image(frame_data, - osp.join(temp_dir, 'images', frame_filename + '.png')) + save_image(osp.join(temp_dir, 'images', frame_filename + '.png'), + frame_data) - make_zip_archive(temp_dir, dst_file) + make_zip_archive(temp_dir, dst_path) @exporter(name='CVAT for video', ext='ZIP', version='1.1') def _export_video(dst_file, task_data, save_images=False): diff --git a/cvat/apps/dataset_manager/formats/datumaro/__init__.py b/cvat/apps/dataset_manager/formats/datumaro/__init__.py index 8471766364bc..5763e8197f6a 100644 --- a/cvat/apps/dataset_manager/formats/datumaro/__init__.py +++ b/cvat/apps/dataset_manager/formats/datumaro/__init__.py @@ -2,13 +2,17 @@ # # SPDX-License-Identifier: MIT +import json +import os import os.path as osp +import shutil from tempfile import TemporaryDirectory from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor, import_dm_annotations) from cvat.apps.dataset_manager.util import make_zip_archive -from cvat.settings.base import DATUMARO_PATH +from cvat.settings.base import BASE_DIR, DATUMARO_PATH +from datumaro.components.project import Project from ..registry import dm_env, exporter @@ -18,11 +22,11 @@ class DatumaroProjectExporter: _REMOTE_IMAGES_EXTRACTOR = 'cvat_rest_api_task_images' _TEMPLATES_DIR = osp.join(osp.dirname(__file__), 'export_templates') - def _save_image_info(self, save_dir, task_data, server_url=None): + def _save_image_info(self, save_dir, task_data): os.makedirs(save_dir, exist_ok=True) config = { - 'server_url': server_url or 'localhost', + 'server_url': task_data._host or 'localhost', 'task_id': task_data.db_task.id, } @@ -43,12 +47,13 @@ def _save_image_info(self, save_dir, task_data, server_url=None): def _export(self, task_data, save_dir, save_images=False): dataset = CvatTaskDataExtractor(task_data, include_images=save_images) - converter = env.make_converter('datumaro_project', + converter = dm_env.make_converter('datumaro_project', save_images=save_images, config={ 'project_name': task_data.db_task.name, } ) converter(dataset, save_dir=save_dir) + project = Project.load(save_dir) target_dir = project.config.project_dir os.makedirs(target_dir, exist_ok=True) shutil.copyfile( @@ -63,7 +68,7 @@ def _export(self, task_data, save_dir, save_images=False): }) self._save_image_info( osp.join(save_dir, project.local_source_dir(source_name)), - task_data, server_url=server_url) + task_data) project.save() templates_dir = osp.join(self._TEMPLATES_DIR, 'plugins') @@ -84,10 +89,10 @@ def _export(self, task_data, save_dir, save_images=False): cvat_utils_dst_dir = osp.join(save_dir, 'cvat', 'utils') os.makedirs(cvat_utils_dst_dir) - shutil.copytree(osp.join(_CVAT_ROOT_DIR, 'utils', 'cli'), + shutil.copytree(osp.join(BASE_DIR, 'utils', 'cli'), osp.join(cvat_utils_dst_dir, 'cli')) def __call__(self, dst_file, task_data, save_images=False): with TemporaryDirectory() as temp_dir: self._export(task_data, save_dir=temp_dir, save_images=save_images) - make_zip_archive(temp_dir, file_object) + make_zip_archive(temp_dir, dst_file) diff --git a/cvat/apps/dataset_manager/formats/mot.py b/cvat/apps/dataset_manager/formats/mot.py index 6626b5cf34b6..e466faedf46c 100644 --- a/cvat/apps/dataset_manager/formats/mot.py +++ b/cvat/apps/dataset_manager/formats/mot.py @@ -22,7 +22,8 @@ def _export(dst_file, task_data, save_images=False): extractor = extractor.transform(envt.get('id_from_image_name')) extractor = Dataset.from_extractors(extractor) # apply lazy transforms with TemporaryDirectory() as temp_dir: - converter = dm_env.make_converter('mot_seq', save_images=save_images) + converter = dm_env.make_converter('mot_seq_gt', + save_images=save_images) converter(extractor, save_dir=temp_dir) make_zip_archive(temp_dir, dst_file) diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py index 576bfa5e26fb..f3ca7567f2fc 100644 --- a/cvat/apps/dataset_manager/task.py +++ b/cvat/apps/dataset_manager/task.py @@ -718,8 +718,9 @@ def export_task(task_id, dst_file, format_name, task.init_from_db() exporter = make_exporter(format_name) - with open(dst_file, 'rb') as f: - task.export(exporter, f, host=server_url, save_images=save_images) + with open(dst_file, 'wb') as f: + task.export(f, exporter, host=server_url, + save_images=save_images) @transaction.atomic def import_task_annotations(task_id, src_file, format_name): diff --git a/cvat/apps/dataset_manager/tests/_tests.py b/cvat/apps/dataset_manager/tests/_tests.py index 3aa19957bc13..2ec7d3499aca 100644 --- a/cvat/apps/dataset_manager/tests/_tests.py +++ b/cvat/apps/dataset_manager/tests/_tests.py @@ -47,10 +47,8 @@ def restore(cls): def _setUpModule(): _GitImportFix.apply() - import cvat.apps.dataset_manager.task as dm - from cvat.apps.engine.models import Task + import cvat.apps.dataset_manager as dm globals()['dm'] = dm - globals()['Task'] = Task import sys sys.path.insert(0, __file__[:__file__.rfind('/dataset_manager/')]) @@ -59,7 +57,7 @@ def tearDownModule(): _GitImportFix.restore() from io import BytesIO -import os +import os.path as osp import random import tempfile @@ -289,10 +287,13 @@ def _put_api_v1_task_id_annotations(self, tid, data): def _test_export(self, format_name, save_images=False): task, _ = self._generate_task() - f = BytesIO() - dm.export_task(task["id"], format_name, f, save_images=save_images) + with tempfile.TemporaryDirectory() as temp_dir: + file_path = osp.join(temp_dir, format_name) + dm.task.export_task(task["id"], file_path, + format_name, save_images=save_images) - self.assertTrue(len(f.getvalue()) != 0) + with open(file_path, 'rb') as f: + self.assertTrue(len(f.read()) != 0) def test_datumaro(self): self._test_export('Datumaro 1.0', save_images=False) @@ -325,11 +326,11 @@ def test_cvat_images(self): self._test_export('CVAT for images 1.1', save_images=True) def test_export_formats_query(self): - formats = dm.get_export_formats() + formats = dm.views.get_export_formats() self.assertEqual(len(formats), 10) def test_import_formats_query(self): - formats = dm.get_import_formats() + formats = dm.views.get_import_formats() self.assertEqual(len(formats), 8) diff --git a/cvat/apps/dataset_manager/views.py b/cvat/apps/dataset_manager/views.py index d888cd0e784b..7bfb5ae74a82 100644 --- a/cvat/apps/dataset_manager/views.py +++ b/cvat/apps/dataset_manager/views.py @@ -15,7 +15,7 @@ from datumaro.cli.util import make_file_name from datumaro.util import to_snake_case -from .formats import EXPORT_FORMATS, IMPORT_FORMATS +from .formats.registry import EXPORT_FORMATS, IMPORT_FORMATS from .util import current_function_name @@ -41,7 +41,7 @@ def export_task(task_id, dst_format, server_url=None, save_images=False): cache_dir = get_export_cache_dir(db_task) - exporter = get_exporter(format_name) + exporter = EXPORT_FORMATS[dst_format] output_base = '%s_%s' % ('dataset' if save_images else 'task', make_file_name(to_snake_case(dst_format))) output_path = '%s.%s' % (output_base, exporter.EXT) @@ -51,7 +51,7 @@ def export_task(task_id, dst_format, server_url=None, save_images=False): if not (osp.exists(output_path) and \ task_time <= osp.getmtime(output_path)): os.makedirs(cache_dir, exist_ok=True) - task.export_task(task_id, dst_format, output_path, + task.export_task(task_id, output_path, dst_format, server_url=server_url, save_images=save_images) archive_ctime = osp.getctime(output_path) diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py index 1639c129cde1..9e85d701c5f4 100644 --- a/cvat/apps/engine/tests/test_rest_api.py +++ b/cvat/apps/engine/tests/test_rest_api.py @@ -2,27 +2,30 @@ # # SPDX-License-Identifier: MIT +import io import os -import shutil -from PIL import Image -from io import BytesIO -from enum import Enum import random -from rest_framework.test import APITestCase, APIClient -from rest_framework import status -from django.conf import settings -from django.contrib.auth.models import User, Group -from cvat.apps.engine.models import (Task, Segment, Job, StatusChoice, - AttributeType, Project, Data) -from unittest import mock -import io +import shutil +import tempfile import xml.etree.ElementTree as ET -from collections import defaultdict import zipfile -from pycocotools import coco as coco_loader -import tempfile +from collections import defaultdict +from enum import Enum +from io import BytesIO +from unittest import mock + import av import numpy as np +from django.conf import settings +from django.contrib.auth.models import Group, User +from PIL import Image +from pycocotools import coco as coco_loader +from rest_framework import status +from rest_framework.test import APIClient, APITestCase + +from cvat.apps.engine.models import (AttributeType, Data, Job, Project, + Segment, StatusChoice, Task) + def create_db_users(cls): (group_admin, _) = Group.objects.get_or_create(name="admin") @@ -2448,7 +2451,7 @@ def _delete_api_v1_tasks_id_annotations(self, pk, user): def _dump_api_v1_tasks_id_annotations(self, pk, user, query_params=""): with ForceLogin(user, self.client): response = self.client.get( - "/api/v1/tasks/{0}/annotations/my_task_{0}?{1}".format(pk, query_params)) + "/api/v1/tasks/{0}/annotations{1}".format(pk, query_params)) return response @@ -2470,10 +2473,17 @@ def _upload_api_v1_tasks_id_annotations(self, pk, user, data, query_params=""): return response - def _get_annotation_formats(self, user): + def _get_import_formats(self, user): + with ForceLogin(user, self.client): + response = self.client.get( + path="/api/v1/server/annotation/import_formats" + ) + return response + + def _get_export_formats(self, user): with ForceLogin(user, self.client): response = self.client.get( - path="/api/v1/server/annotation/formats" + path="/api/v1/server/annotation/export_formats" ) return response @@ -3048,7 +3058,7 @@ def _get_initial_annotation(annotation_format): annotations["shapes"] = rectangle_shapes_wo_attrs annotations["tags"] = tags_wo_attrs - elif annotation_format == "YOLO ZIP 1.1" or \ + elif annotation_format == "YOLO 1.1" or \ annotation_format == "TFRecord 1.0": annotations["shapes"] = rectangle_shapes_wo_attrs @@ -3059,10 +3069,10 @@ def _get_initial_annotation(annotation_format): annotations["shapes"] = rectangle_shapes_wo_attrs + polygon_shapes_wo_attrs annotations["tracks"] = rectangle_tracks_wo_attrs - elif annotation_format == "MOT ZIP 1.1": + elif annotation_format == "MOT 1.1": annotations["tracks"] = rectangle_tracks_wo_attrs - elif annotation_format == "LabelMe ZIP 3.0": + elif annotation_format == "LabelMe 3.0": annotations["shapes"] = rectangle_shapes_with_attrs + \ rectangle_shapes_wo_attrs + \ polygon_shapes_wo_attrs + \ @@ -3070,31 +3080,40 @@ def _get_initial_annotation(annotation_format): return annotations - response = self._get_annotation_formats(annotator) + response = self._get_import_formats(annotator) self.assertEqual(response.status_code, HTTP_200_OK) + import_formats = response.data + self.assertTrue(isinstance(import_formats, list) and import_formats) + import_formats = { v['name'] for v in import_formats } - if annotator is not None: - supported_formats = response.data - else: - supported_formats = [{ - "name": "CVAT", - "dumpers": [{ - "display_name": "CVAT for images 1.1" - }], - "loaders": [{ - "display_name": "CVAT 1.1" - }] - }] - - self.assertTrue(isinstance(supported_formats, list) and supported_formats) - - for annotation_format in supported_formats: - for dumper in annotation_format["dumpers"]: + response = self._get_export_formats(annotator) + self.assertEqual(response.status_code, HTTP_200_OK) + export_formats = response.data + self.assertTrue(isinstance(export_formats, list) and export_formats) + export_formats = { v['name'] for v in export_formats } + + formats = { exp: exp if exp in import_formats else None + for exp in export_formats } + if 'CVAT 1.1' in import_formats: + if 'CVAT for video 1.1' in export_formats: + formats['CVAT for video 1.1'] = 'CVAT 1.1' + if 'CVAT for images 1.1' in export_formats: + formats['CVAT for images 1.1'] = 'CVAT 1.1' + if import_formats ^ export_formats: + # NOTE: this may not be an error, so we should not fail + print("The following import formats have no pair:", + import_formats - export_formats) + print("The following export formats have no pair:", + export_formats - import_formats) + + for export_format, import_format in formats.items(): + with self.subTest(export_format=export_format, + import_format=import_format): # 1. create task task, jobs = self._create_task(owner, assignee) # 2. add annotation - data = _get_initial_annotation(dumper["display_name"]) + data = _get_initial_annotation(export_format) response = self._put_api_v1_tasks_id_annotations(task["id"], annotator, data) data["version"] += 1 @@ -3103,49 +3122,54 @@ def _get_initial_annotation(annotation_format): # 3. download annotation response = self._dump_api_v1_tasks_id_annotations(task["id"], annotator, - "format={}".format(dumper["display_name"])) + "/{}".format(export_format)) self.assertEqual(response.status_code, HTTP_202_ACCEPTED) response = self._dump_api_v1_tasks_id_annotations(task["id"], annotator, - "format={}".format(dumper["display_name"])) + "/{}".format(export_format)) self.assertEqual(response.status_code, HTTP_201_CREATED) response = self._dump_api_v1_tasks_id_annotations(task["id"], annotator, - "action=download&format={}".format(dumper["display_name"])) + "/{}?action=download".format(export_format)) self.assertEqual(response.status_code, HTTP_200_OK) # 4. check downloaded data - if response.status_code == status.HTTP_200_OK: - self.assertTrue(response.streaming) - content = io.BytesIO(b"".join(response.streaming_content)) - self._check_dump_content(content, task, jobs, data, annotation_format["name"]) - content.seek(0) - - # 5. remove annotation form the task - response = self._delete_api_v1_tasks_id_annotations(task["id"], annotator) - data["version"] += 1 - self.assertEqual(response.status_code, HTTP_204_NO_CONTENT) - - # 6. upload annotation and check annotation - uploaded_data = { - "annotation_file": content, - } + self.assertTrue(response.streaming) + content = io.BytesIO(b"".join(response.streaming_content)) + self._check_dump_content(content, task, jobs, data, export_format) + content.seek(0) + + # 5. remove annotation form the task + response = self._delete_api_v1_tasks_id_annotations(task["id"], annotator) + data["version"] += 1 + self.assertEqual(response.status_code, HTTP_204_NO_CONTENT) - for loader in annotation_format["loaders"]: - if loader["display_name"] == "Segmentation mask 1.1": - continue # can't really predict the result and check - response = self._upload_api_v1_tasks_id_annotations(task["id"], annotator, uploaded_data, "format={}".format(loader["display_name"])) - self.assertEqual(response.status_code, HTTP_202_ACCEPTED) + # 6. upload annotation + if not import_format: + continue + + uploaded_data = { + "annotation_file": content, + } + response = self._upload_api_v1_tasks_id_annotations( + task["id"], annotator, uploaded_data, + "format={}".format(import_format)) + self.assertEqual(response.status_code, HTTP_202_ACCEPTED) - response = self._upload_api_v1_tasks_id_annotations(task["id"], annotator, {}, "format={}".format(loader["display_name"])) - self.assertEqual(response.status_code, HTTP_201_CREATED) + response = self._upload_api_v1_tasks_id_annotations( + task["id"], annotator, {}, + "format={}".format(import_format)) + self.assertEqual(response.status_code, HTTP_201_CREATED) - response = self._get_api_v1_tasks_id_annotations(task["id"], annotator) - self.assertEqual(response.status_code, HTTP_200_OK) - data["version"] += 2 # upload is delete + put - self._check_response(response, data) + # 7. check annotation + if import_format == "Segmentation mask 1.1": + continue # can't really predict the result to check + response = self._get_api_v1_tasks_id_annotations(task["id"], annotator) + self.assertEqual(response.status_code, HTTP_200_OK) + data["version"] += 2 # upload is delete + put + self._check_response(response, data) - def _check_dump_content(self, content, task, jobs, data, annotation_format_name): + def _check_dump_content(self, content, task, jobs, data, format_name): def etree_to_dict(t): d = {t.tag: {} if t.attrib else None} children = list(t) @@ -3164,26 +3188,33 @@ def etree_to_dict(t): d[t.tag] = text return d - if annotation_format_name == "CVAT": - xmldump = ET.fromstring(content.read()) - self.assertEqual(xmldump.tag, "annotations") - tags = xmldump.findall("./meta") - self.assertEqual(len(tags), 1) - meta = etree_to_dict(tags[0])["meta"] - self.assertEqual(meta["task"]["name"], task["name"]) - elif annotation_format_name == "PASCAL VOC": + if format_name in {"CVAT for video 1.1", "CVAT for images 1.1"}: + with tempfile.TemporaryDirectory() as tmp_dir: + zipfile.ZipFile(content).extractall(tmp_dir) + xmls = glob(osp.join(tmp_dir, '**', '*.xml'), recursive=True) + self.assertTrue(xmls) + for xml in xmls: + xmldump = ET.parse(xml) + self.assertEqual(xmldump.tag, "annotations") + tags = xmldump.findall("./meta") + self.assertEqual(len(tags), 1) + meta = etree_to_dict(tags[0])["meta"] + self.assertEqual(meta["task"]["name"], task["name"]) + elif format_name == "PASCAL VOC 1.1": self.assertTrue(zipfile.is_zipfile(content)) - elif annotation_format_name == "YOLO": + elif format_name == "YOLO 1.1": self.assertTrue(zipfile.is_zipfile(content)) - elif annotation_format_name == "COCO": - with tempfile.NamedTemporaryFile() as tmp_file: - tmp_file.write(content.read()) - tmp_file.flush() - coco = coco_loader.COCO(tmp_file.name) - self.assertTrue(coco.getAnnIds()) - elif annotation_format_name == "TFRecord": + elif format_name == "COCO 1.0": + with tempfile.TemporaryDirectory() as tmp_dir: + zipfile.ZipFile(content).extractall(tmp_dir) + jsons = glob(osp.join(tmp_dir, '**', '*.json'), recursive=True) + self.assertTrue(jsons) + for json in jsons: + coco = coco_loader.COCO(json) + self.assertTrue(coco.getAnnIds()) + elif format_name == "TFRecord 1.0": self.assertTrue(zipfile.is_zipfile(content)) - elif annotation_format_name == "Segmentation mask": + elif format_name == "Segmentation mask 1.1": self.assertTrue(zipfile.is_zipfile(content)) @@ -3234,31 +3265,22 @@ def generate_coco_anno(): ] }""" - response = self._get_annotation_formats(user) - self.assertEqual(response.status_code, status.HTTP_200_OK) - supported_formats = response.data - self.assertTrue(isinstance(supported_formats, list) and supported_formats) - - coco_format = None - for f in response.data: - if f["name"] == "COCO": - coco_format = f - break - self.assertTrue(coco_format) - loader = coco_format["loaders"][0] - task, _ = self._create_task(user, user) content = io.BytesIO(generate_coco_anno()) content.seek(0) + format_name = "COCO 1.0" uploaded_data = { "annotation_file": content, } - response = self._upload_api_v1_tasks_id_annotations(task["id"], user, uploaded_data, "format={}".format(loader["display_name"])) + response = self._upload_api_v1_tasks_id_annotations( + task["id"], user, uploaded_data, + "format={}".format(format_name)) self.assertEqual(response.status_code, status.HTTP_202_ACCEPTED) - response = self._upload_api_v1_tasks_id_annotations(task["id"], user, {}, "format={}".format(loader["display_name"])) + response = self._upload_api_v1_tasks_id_annotations( + task["id"], user, {}, "format={}".format(format_name)) self.assertEqual(response.status_code, status.HTTP_201_CREATED) response = self._get_api_v1_tasks_id_annotations(task["id"], user) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 7527276603b6..03bbee9426b7 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -32,6 +32,7 @@ from sendfile import sendfile import cvat.apps.dataset_manager as dm +import cvat.apps.dataset_manager.views from cvat.apps.authentication import auth from cvat.apps.authentication.decorators import login_required from cvat.apps.dataset_manager.serializers import DatasetFormatSerializer @@ -205,8 +206,7 @@ def share(request): @action(detail=False, methods=['GET'], url_path='annotation/export_formats') def annotation_export_formats(request): data = dm.views.get_export_formats() - data = JSONRenderer().render(data) - return Response(data) + return Response(DatasetFormatSerializer(data, many=True).data) @staticmethod @swagger_auto_schema(method='get', operation_summary='Method provides the list of supported annotations formats', @@ -214,8 +214,7 @@ def annotation_export_formats(request): @action(detail=False, methods=['GET'], url_path='annotation/import_formats') def annotation_import_formats(request): data = dm.views.get_import_formats() - data = JSONRenderer().render(data) - return Response(data) + return Response(DatasetFormatSerializer(data, many=True).data) class ProjectFilter(filters.FilterSet): name = filters.CharFilter(field_name="name", lookup_expr="icontains") @@ -521,8 +520,8 @@ def annotations(self, request, pk): '201': openapi.Response(description='Annotations file is ready to download'), '200': openapi.Response(description='Download of file started')}) @action(detail=True, methods=['GET'], serializer_class=None, - url_path='annotations') - def dump(self, request, pk, filename): + url_path=r'annotations/(?P[^/]+)(\?(?P[^/&]+))?') + def dump(self, request, pk, dst_format, filename=None): """ Dump of annotations in common case is a long process which cannot be performed within one request. First request starts dumping process. When the file is ready (code 201) you can get it with query parameter action=download. @@ -534,8 +533,7 @@ def dump(self, request, pk, filename): raise serializers.ValidationError( "Unexpected action specified for the request") - dst_format = request.query_params.get("format", "").lower() - if dst_format not in [f['tag'] for f in dm.views.get_export_formats()]: + if dst_format not in [f.DISPLAY_NAME for f in dm.views.get_export_formats()]: raise serializers.ValidationError( "Unknown format specified for the request") @@ -557,7 +555,8 @@ def dump(self, request, pk, filename): timestamp = datetime.strftime(last_task_update_time, "%Y_%m_%d_%H_%M_%S") - filename = "task_{}-{}-{}_annotations.{}".format( + filename = filename or \ + "task_{}-{}-{}_annotations.{}".format( db_task.name, timestamp, dst_format, osp.splitext(file_path)[1]) return sendfile(request, file_path, attachment=True, @@ -659,7 +658,7 @@ def dataset_export(self, request, pk): "Unexpected action specified for the request") dst_format = request.query_params.get("format", "").lower() - if dst_format not in [f['tag'] for f in dm.views.get_export_formats()]: + if dst_format not in [f.DISPLAY_NAME for f in dm.views.get_export_formats()]: raise serializers.ValidationError( "Unknown format specified for the request") @@ -878,8 +877,10 @@ def load_data_proxy(request, rq_id, rq_func, pk): if not rq_job: serializer = AnnotationFileSerializer(data=request.data) if serializer.is_valid(raise_exception=True): - if format_name not in [f['tag'] for f in dm.views.get_import_formats()]: - raise serializers.ValidationError("Unknown input format") + if format_name not in \ + [f.DISPLAY_NAME for f in dm.views.get_import_formats()]: + raise serializers.ValidationError( + "Unknown input format '{}'".format(format_name)) anno_file = serializer.validated_data['annotation_file'] fd, filename = mkstemp(prefix='cvat_{}'.format(pk)) diff --git a/datumaro/datumaro/components/extractor.py b/datumaro/datumaro/components/extractor.py index d88d7c4551a7..37248d2a3c66 100644 --- a/datumaro/datumaro/components/extractor.py +++ b/datumaro/datumaro/components/extractor.py @@ -749,9 +749,7 @@ def __init__(self, length=None, subset=None): self._subset = subset def subsets(self): - if self._subset: - return [self._subset] - return None + return [self._subset] def get_subset(self, name): if name != self._subset: diff --git a/datumaro/datumaro/plugins/datumaro_format/converter.py b/datumaro/datumaro/plugins/datumaro_format/converter.py index cf317a0a277e..4ad786f19f82 100644 --- a/datumaro/datumaro/plugins/datumaro_format/converter.py +++ b/datumaro/datumaro/plugins/datumaro_format/converter.py @@ -303,6 +303,7 @@ def __init__(self, config=None, save_images=False): def __call__(self, extractor, save_dir): os.makedirs(save_dir, exist_ok=True) + from datumaro.components.project import Project project = Project.generate(save_dir, config=self._config) converter = project.env.make_converter('datumaro', From c46769fd0d592d01d3320402df7395cfc4a37ba5 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Fri, 10 Apr 2020 21:04:10 +0300 Subject: [PATCH 31/80] add extension recommentation --- CONTRIBUTING.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 01a2539cf0d2..8852448180bf 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -73,6 +73,7 @@ for development - [ESLint](https://marketplace.visualstudio.com/items?itemName=dbaeumer.vscode-eslint) - [vscode-remark-lint](https://marketplace.visualstudio.com/items?itemName=drewbourne.vscode-remark-lint) - [licenser](https://marketplace.visualstudio.com/items?itemName=ymotongpoo.licenser) + - [Trailing Spaces](https://marketplace.visualstudio.com/items?itemName=shardulm94.trailing-spaces) - Reload Visual Studio Code from virtual environment From 8ef19aad73ac06d35a67d45d1237b5a3356660be Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Mon, 13 Apr 2020 18:35:42 +0300 Subject: [PATCH 32/80] fixes --- cvat/apps/dataset_manager/formats/coco.py | 13 +++-- cvat/apps/dataset_manager/formats/cvat.py | 53 ++++++++----------- cvat/apps/dataset_manager/formats/mot.py | 2 +- .../dataset_manager/formats/pascal_voc.py | 2 +- cvat/apps/dataset_manager/formats/yolo.py | 4 +- cvat/apps/dataset_manager/task.py | 4 +- cvat/apps/dataset_manager/views.py | 4 +- cvat/apps/engine/tests/test_rest_api.py | 34 ++++++++---- 8 files changed, 60 insertions(+), 56 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/coco.py b/cvat/apps/dataset_manager/formats/coco.py index 1ad87b4c5ce7..41d6343a3dca 100644 --- a/cvat/apps/dataset_manager/formats/coco.py +++ b/cvat/apps/dataset_manager/formats/coco.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: MIT +import zipfile from tempfile import TemporaryDirectory from datumaro.components.project import Dataset @@ -25,14 +26,12 @@ def _export(dst_file, task_data, save_images=False): @importer(name='COCO', ext='JSON, ZIP', version='1.0') def _import(src_file, task_data): - src_path = src_file.name - - if src_path.lower.endswith('.json'): - dataset = dm_env.make_extractor('coco_instances', src_path) - import_dm_annotations(dataset, task_data) - else: + if zipfile.is_zipfile(src_file): with TemporaryDirectory() as tmp_dir: - Archive(src_path).extractall(tmp_dir) + zipfile.ZipFile(src_file).extractall(tmp_dir) dataset = dm_env.make_importer('coco')(tmp_dir).make_dataset() import_dm_annotations(dataset, task_data) + else: + dataset = dm_env.make_extractor('coco_instances', src_file.name) + import_dm_annotations(dataset, task_data) \ No newline at end of file diff --git a/cvat/apps/dataset_manager/formats/cvat.py b/cvat/apps/dataset_manager/formats/cvat.py index c2458d472bda..334d851a3739 100644 --- a/cvat/apps/dataset_manager/formats/cvat.py +++ b/cvat/apps/dataset_manager/formats/cvat.py @@ -3,13 +3,11 @@ # SPDX-License-Identifier: MIT import os.path as osp -import shutil +import zipfile from collections import OrderedDict from glob import glob from tempfile import TemporaryDirectory -from pyunpack import Archive - from cvat.apps.dataset_manager.util import make_zip_archive from cvat.apps.engine.frame_provider import FrameProvider from datumaro.util.image import save_image @@ -513,30 +511,25 @@ def load(file_object, annotations): el.clear() def _export(dst_file, task_data, anno_callback, save_images=False): - dst_path = dst_file.name - anno_callback(dst_file, task_data) - - if not save_images: - return - - dst_file.close() with TemporaryDirectory() as temp_dir: - shutil.move(dst_path, temp_dir) - - frame_provider = FrameProvider(task_data.db_task.data) - frames = frame_provider.get_frames( - frame_provider.Quality.ORIGINAL, - frame_provider.Type.NUMPY_ARRAY) - for frame_id, (frame_data, _) in enumerate(frames): - frame_filename = osp.basename(task_data.frame_info[frame_id]['path']) - if '.' in frame_filename: - save_image(osp.join(temp_dir, 'images', frame_filename), - frame_data, jpeg_quality=100) - else: - save_image(osp.join(temp_dir, 'images', frame_filename + '.png'), - frame_data) + with open(osp.join(temp_dir, 'annotations.xml'), 'wb') as f: + anno_callback(f, task_data) + + if save_images: + frame_provider = FrameProvider(task_data.db_task.data) + frames = frame_provider.get_frames( + frame_provider.Quality.ORIGINAL, + frame_provider.Type.NUMPY_ARRAY) + for frame_id, (frame_data, _) in enumerate(frames): + frame_name = osp.basename(task_data.frame_info[frame_id]['path']) + if '.' in frame_name: + save_image(osp.join(temp_dir, 'images', frame_name), + frame_data, jpeg_quality=100) + else: + save_image(osp.join(temp_dir, 'images', frame_name + '.png'), + frame_data) - make_zip_archive(temp_dir, dst_path) + make_zip_archive(temp_dir, dst_file) @exporter(name='CVAT for video', ext='ZIP', version='1.1') def _export_video(dst_file, task_data, save_images=False): @@ -550,14 +543,12 @@ def _export_images(dst_file, task_data, save_images=False): @importer(name='CVAT', ext='XML, ZIP', version='1.1') def _import(src_file, task_data): - src_path = src_file.name - - if src_path.lower().endswith('.xml'): - load(src_path, task_data) - elif src_file.lower().endswith('.zip'): + if zipfile.is_zipfile(src_file): with TemporaryDirectory() as tmp_dir: - Archive(src_path).extractall(tmp_dir) + zipfile.ZipFile(src_file).extractall(tmp_dir) anno_paths = glob(osp.join(tmp_dir, '**', '*.xml'), recursive=True) for p in anno_paths: load(p, task_data) + else: + load(src_file, task_data) \ No newline at end of file diff --git a/cvat/apps/dataset_manager/formats/mot.py b/cvat/apps/dataset_manager/formats/mot.py index e466faedf46c..5c3461adff91 100644 --- a/cvat/apps/dataset_manager/formats/mot.py +++ b/cvat/apps/dataset_manager/formats/mot.py @@ -41,7 +41,7 @@ def _import(src_file, task_data): for item in dataset: frame_id = match_frame(item, task_data) - for ann in item.task_data: + for ann in item.annotations: if ann.type != datumaro.AnnotationType.bbox: continue diff --git a/cvat/apps/dataset_manager/formats/pascal_voc.py b/cvat/apps/dataset_manager/formats/pascal_voc.py index a0c54189a933..429696baad53 100644 --- a/cvat/apps/dataset_manager/formats/pascal_voc.py +++ b/cvat/apps/dataset_manager/formats/pascal_voc.py @@ -59,6 +59,6 @@ def _import(src_file, task_data): shutil.move(f, anno_dir) dataset = dm_env.make_importer('voc')(tmp_dir).make_dataset() - masks_to_polygons = Environment().transforms.get('masks_to_polygons') + masks_to_polygons = dm_env.transforms.get('masks_to_polygons') dataset = dataset.transform(masks_to_polygons) import_dm_annotations(dataset, task_data) diff --git a/cvat/apps/dataset_manager/formats/yolo.py b/cvat/apps/dataset_manager/formats/yolo.py index fd35a6d0298a..688ff903482a 100644 --- a/cvat/apps/dataset_manager/formats/yolo.py +++ b/cvat/apps/dataset_manager/formats/yolo.py @@ -38,8 +38,8 @@ def _import(src_file, task_data): filename = osp.splitext(osp.basename(filename))[0] frame_info = None try: - frame_id = match_frame(DatasetItem(id=filename), annotations) - frame_info = annotations.frame_info[frame_id] + frame_id = match_frame(DatasetItem(id=filename), task_data) + frame_info = task_data.frame_info[frame_id] except Exception: pass if frame_info is not None: diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py index f3ca7567f2fc..0d9985b724df 100644 --- a/cvat/apps/dataset_manager/task.py +++ b/cvat/apps/dataset_manager/task.py @@ -729,7 +729,7 @@ def import_task_annotations(task_id, src_file, format_name): importer = make_importer(format_name) with open(src_file, 'rb') as f: - task.import_annotations(importer, f) + task.import_annotations(f, importer) @transaction.atomic def import_job_annotations(job_id, src_file, format_name): @@ -738,4 +738,4 @@ def import_job_annotations(job_id, src_file, format_name): importer = make_importer(format_name) with open(src_file, 'rb') as f: - job.import_annotations(importer, f) + job.import_annotations(f, importer) diff --git a/cvat/apps/dataset_manager/views.py b/cvat/apps/dataset_manager/views.py index 7bfb5ae74a82..b768cc69cb5a 100644 --- a/cvat/apps/dataset_manager/views.py +++ b/cvat/apps/dataset_manager/views.py @@ -73,10 +73,10 @@ def export_task(task_id, dst_format, server_url=None, save_images=False): raise def export_task_as_dataset(task_id, dst_format=None, server_url=None): - export_task(task_id, dst_format, server_url=server_url, save_images=True) + return export_task(task_id, dst_format, server_url=server_url, save_images=True) def export_task_annotations(task_id, dst_format=None, server_url=None): - export_task(task_id, dst_format, server_url=server_url, save_images=False) + return export_task(task_id, dst_format, server_url=server_url, save_images=False) def clear_export_cache(task_id, file_path, file_ctime): try: diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py index 9e85d701c5f4..efff53341b7c 100644 --- a/cvat/apps/engine/tests/test_rest_api.py +++ b/cvat/apps/engine/tests/test_rest_api.py @@ -4,6 +4,7 @@ import io import os +import os.path as osp import random import shutil import tempfile @@ -11,6 +12,7 @@ import zipfile from collections import defaultdict from enum import Enum +from glob import glob from io import BytesIO from unittest import mock @@ -24,7 +26,7 @@ from rest_framework.test import APIClient, APITestCase from cvat.apps.engine.models import (AttributeType, Data, Job, Project, - Segment, StatusChoice, Task) + Segment, StatusChoice, Task) def create_db_users(cls): @@ -3082,13 +3084,19 @@ def _get_initial_annotation(annotation_format): response = self._get_import_formats(annotator) self.assertEqual(response.status_code, HTTP_200_OK) - import_formats = response.data + if annotator is not None: + import_formats = response.data + else: + import_formats = response = self._get_import_formats(owner).data self.assertTrue(isinstance(import_formats, list) and import_formats) import_formats = { v['name'] for v in import_formats } response = self._get_export_formats(annotator) self.assertEqual(response.status_code, HTTP_200_OK) - export_formats = response.data + if annotator is not None: + export_formats = response.data + else: + export_formats = response = self._get_export_formats(owner).data self.assertTrue(isinstance(export_formats, list) and export_formats) export_formats = { v['name'] for v in export_formats } @@ -3134,10 +3142,13 @@ def _get_initial_annotation(annotation_format): self.assertEqual(response.status_code, HTTP_200_OK) # 4. check downloaded data - self.assertTrue(response.streaming) - content = io.BytesIO(b"".join(response.streaming_content)) - self._check_dump_content(content, task, jobs, data, export_format) - content.seek(0) + if annotator is not None: + self.assertTrue(response.streaming) + content = io.BytesIO(b"".join(response.streaming_content)) + self._check_dump_content(content, task, jobs, data, export_format) + content.seek(0) + else: + content = io.BytesIO() # 5. remove annotation form the task response = self._delete_api_v1_tasks_id_annotations(task["id"], annotator) @@ -3166,6 +3177,9 @@ def _get_initial_annotation(annotation_format): continue # can't really predict the result to check response = self._get_api_v1_tasks_id_annotations(task["id"], annotator) self.assertEqual(response.status_code, HTTP_200_OK) + + if annotator is None: + continue data["version"] += 2 # upload is delete + put self._check_response(response, data) @@ -3194,9 +3208,9 @@ def etree_to_dict(t): xmls = glob(osp.join(tmp_dir, '**', '*.xml'), recursive=True) self.assertTrue(xmls) for xml in xmls: - xmldump = ET.parse(xml) - self.assertEqual(xmldump.tag, "annotations") - tags = xmldump.findall("./meta") + xmlroot = ET.parse(xml).getroot() + self.assertEqual(xmlroot.tag, "annotations") + tags = xmlroot.findall("./meta") self.assertEqual(len(tags), 1) meta = etree_to_dict(tags[0])["meta"] self.assertEqual(meta["task"]["name"], task["name"]) From 1c42d41ee2e880e7234088c1e31ae86cec28aa56 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Mon, 13 Apr 2020 18:38:46 +0300 Subject: [PATCH 33/80] api --- cvat/apps/engine/views.py | 293 +++++++++++++++++++------------------- 1 file changed, 143 insertions(+), 150 deletions(-) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 03bbee9426b7..8e5c564a3c38 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -481,18 +481,10 @@ def annotations(self, request, pk): if serializer.is_valid(raise_exception=True): return Response(serializer.data) elif request.method == 'PUT': - if request.query_params.get("format", ""): - return load_data_proxy( - request=request, - rq_id="{}@/api/v1/tasks/{}/annotations/upload".format(request.user, pk), - rq_func=dm.task.import_task_annotations, - pk=pk, - ) - else: - serializer = LabeledDataSerializer(data=request.data) - if serializer.is_valid(raise_exception=True): - data = dm.task.put_task_data(pk, serializer.data) - return Response(data) + serializer = LabeledDataSerializer(data=request.data) + if serializer.is_valid(raise_exception=True): + data = dm.task.put_task_data(pk, serializer.data) + return Response(data) elif request.method == 'DELETE': dm.task.delete_task_data(pk) return Response(status=status.HTTP_204_NO_CONTENT) @@ -509,82 +501,64 @@ def annotations(self, request, pk): return Response(data=str(e), status=status.HTTP_400_BAD_REQUEST) return Response(data) - @swagger_auto_schema(method='get', operation_summary='Method allows to download annotations as a file', - manual_parameters=[openapi.Parameter('filename', openapi.IN_PATH, description="A name of a file with annotations", + @swagger_auto_schema(method='put', operation_summary='Method allows to download annotations as a file', + manual_parameters=[ + openapi.Parameter('format', openapi.IN_PATH, + description="Desired output format name\nYou can get the list of supported formats at:\n/server/annotation/import_formats", type=openapi.TYPE_STRING, required=True), - openapi.Parameter('format', openapi.IN_QUERY, description="A name of a dumper\nYou can get annotation dumpers from this API:\n/server/annotation/formats", + ], + responses={ + '202': openapi.Response(description='Uploading has been started'), + '201': openapi.Response(description='Uploading has finished'), + } + ) + @action(detail=True, methods=['PUT'], serializer_class=None, + url_path=r'annotations/(?P[^/&]+)') + def upload(self, request, pk, src_format): + self.get_object() # force to call check_object_permissions + + return _import_annotations( + request=request, + rq_id="{}@/api/v1/tasks/{}/annotations/upload".format(request.user, pk), + rq_func=dm.task.import_task_annotations, + pk=pk, + format_name=src_format, + ) + + @swagger_auto_schema(method='get', operation_summary='Method allows to download annotations as a file', + manual_parameters=[ + openapi.Parameter('format', openapi.IN_PATH, + description="Desired output format name\nYou can get the list of supported formats at:\n/server/annotation/export_formats", type=openapi.TYPE_STRING, required=True), - openapi.Parameter('action', in_=openapi.IN_QUERY, description='Used to start downloading process after annotation file had been created', - required=False, enum=['download'], type=openapi.TYPE_STRING)], + openapi.Parameter('filename', openapi.IN_QUERY, + description="Desired output file name", + type=openapi.TYPE_STRING, required=False), + openapi.Parameter('action', in_=openapi.IN_QUERY, + description='Used to start downloading process after annotation file had been created', + type=openapi.TYPE_STRING, required=False, enum=['download']) + ], responses={'202': openapi.Response(description='Dump of annotations has been started'), '201': openapi.Response(description='Annotations file is ready to download'), - '200': openapi.Response(description='Download of file started')}) + '200': openapi.Response(description='Download of file started') + } + ) @action(detail=True, methods=['GET'], serializer_class=None, - url_path=r'annotations/(?P[^/]+)(\?(?P[^/&]+))?') - def dump(self, request, pk, dst_format, filename=None): + url_path=r'annotations/(?P[^/&]+)') + def dump(self, request, pk, dst_format): """ Dump of annotations in common case is a long process which cannot be performed within one request. First request starts dumping process. When the file is ready (code 201) you can get it with query parameter action=download. """ - db_task = self.get_object() - - action = request.query_params.get("action", "").lower() - if action not in {"", "download"}: - raise serializers.ValidationError( - "Unexpected action specified for the request") - - if dst_format not in [f.DISPLAY_NAME for f in dm.views.get_export_formats()]: - raise serializers.ValidationError( - "Unknown format specified for the request") - - rq_id = "/api/v1/tasks/{}/annotations/{}".format(pk, dst_format) - queue = django_rq.get_queue("default") - - rq_job = queue.fetch_job(rq_id) - if rq_job: - last_task_update_time = timezone.localtime(db_task.updated_date) - request_time = rq_job.meta.get('request_time', None) - if request_time is None or request_time < last_task_update_time: - rq_job.cancel() - rq_job.delete() - else: - if rq_job.is_finished: - file_path = rq_job.return_value - if action == "download" and osp.exists(file_path): - rq_job.delete() - - timestamp = datetime.strftime(last_task_update_time, - "%Y_%m_%d_%H_%M_%S") - filename = filename or \ - "task_{}-{}-{}_annotations.{}".format( - db_task.name, timestamp, - dst_format, osp.splitext(file_path)[1]) - return sendfile(request, file_path, attachment=True, - attachment_filename=filename.lower()) - else: - if osp.exists(file_path): - return Response(status=status.HTTP_201_CREATED) - elif rq_job.is_failed: - exc_info = str(rq_job.exc_info) - rq_job.delete() - return Response(exc_info, - status=status.HTTP_500_INTERNAL_SERVER_ERROR) - else: - return Response(status=status.HTTP_202_ACCEPTED) - - try: - if request.scheme: - server_address = request.scheme + '://' - server_address += request.get_host() - except Exception: - server_address = None - - ttl = dm.views.CACHE_TTL.total_seconds() - queue.enqueue_call(func=dm.views.export_task_annotations, - args=(pk, dst_format, server_address), job_id=rq_id, - meta={ 'request_time': timezone.localtime() }, - result_ttl=ttl, failure_ttl=ttl) - return Response(status=status.HTTP_202_ACCEPTED) + db_task = self.get_object() # force to call check_object_permissions + + return _export_annotations(db_task=db_task, + rq_id="/api/v1/tasks/{}/annotations/{}".format(pk, dst_format), + request=request, + action=request.query_params.get("action", "").lower(), + callback=dm.views.export_task_annotations, + dst_format=dst_format, + filename=request.query_params.get("filename", "").lower(), + ) @swagger_auto_schema(method='get', operation_summary='When task is being created the method returns information about a status of the creation process') @action(detail=True, methods=['GET'], serializer_class=RqStatusSerializer) @@ -641,74 +615,35 @@ def data_info(request, pk): return Response(serializer.data) @swagger_auto_schema(method='get', operation_summary='Export task as a dataset in a specific format', - manual_parameters=[openapi.Parameter('action', in_=openapi.IN_QUERY, - required=False, type=openapi.TYPE_STRING, enum=['download']), - openapi.Parameter('format', in_=openapi.IN_QUERY, required=False, type=openapi.TYPE_STRING)], - responses={'202': openapi.Response(description='Dump of annotations has been started'), - '201': openapi.Response(description='Annotations file is ready to download'), - '200': openapi.Response(description='Download of file started')}) + manual_parameters=[ + openapi.Parameter('format', openapi.IN_PATH, + description="Desired output format name\nYou can get the list of supported formats at:\n/server/annotation/export_formats", + type=openapi.TYPE_STRING, required=True), + openapi.Parameter('filename', openapi.IN_QUERY, + description="Desired output file name", + type=openapi.TYPE_STRING, required=False), + openapi.Parameter('action', in_=openapi.IN_QUERY, + description='Used to start downloading process after annotation file had been created', + type=openapi.TYPE_STRING, required=False, enum=['download']) + ], + responses={'202': openapi.Response(description='Exporting has been started'), + '201': openapi.Response(description='Output file is ready for downloading'), + '200': openapi.Response(description='Download of file started') + } + ) @action(detail=True, methods=['GET'], serializer_class=None, - url_path='dataset') - def dataset_export(self, request, pk): - db_task = self.get_object() - - action = request.query_params.get("action", "").lower() - if action not in {"", "download"}: - raise serializers.ValidationError( - "Unexpected action specified for the request") - - dst_format = request.query_params.get("format", "").lower() - if dst_format not in [f.DISPLAY_NAME for f in dm.views.get_export_formats()]: - raise serializers.ValidationError( - "Unknown format specified for the request") - - rq_id = "/api/v1/tasks/{}/dataset/{}".format(pk, dst_format) - queue = django_rq.get_queue("default") - - rq_job = queue.fetch_job(rq_id) - if rq_job: - last_task_update_time = timezone.localtime(db_task.updated_date) - request_time = rq_job.meta.get('request_time', None) - if request_time is None or request_time < last_task_update_time: - rq_job.cancel() - rq_job.delete() - else: - if rq_job.is_finished: - file_path = rq_job.return_value - if action == "download" and osp.exists(file_path): - rq_job.delete() - - timestamp = datetime.strftime(last_task_update_time, - "%Y_%m_%d_%H_%M_%S") - filename = "task_{}-{}-{}_dataset.{}".format( - db_task.name, timestamp, - dst_format, osp.splitext(file_path)[1]) - return sendfile(request, file_path, attachment=True, - attachment_filename=filename.lower()) - else: - if osp.exists(file_path): - return Response(status=status.HTTP_201_CREATED) - elif rq_job.is_failed: - exc_info = str(rq_job.exc_info) - rq_job.delete() - return Response(exc_info, - status=status.HTTP_500_INTERNAL_SERVER_ERROR) - else: - return Response(status=status.HTTP_202_ACCEPTED) - - try: - if request.scheme: - server_address = request.scheme + '://' - server_address += request.get_host() - except Exception: - server_address = None - - ttl = dm.views.CACHE_TTL.total_seconds() - queue.enqueue_call(func=dm.views.export_task_as_dataset, - args=(pk, dst_format, server_address), job_id=rq_id, - meta={ 'request_time': timezone.localtime() }, - result_ttl=ttl, failure_ttl=ttl) - return Response(status=status.HTTP_202_ACCEPTED) + url_path=r'dataset/(?P[^/&]+)') + def dataset_export(self, request, pk, dst_format): + db_task = self.get_object() # force to call check_object_permissions + + return _export_annotations(db_task=db_task, + rq_id="/api/v1/tasks/{}/dataset/{}".format(pk, dst_format), + request=request, + action=request.query_params.get("action", "").lower(), + callback=dm.views.export_task_as_dataset, + dst_format=dst_format, + filename=request.query_params.get("filename", "").lower(), + ) @method_decorator(name='retrieve', decorator=swagger_auto_schema(operation_summary='Method returns details of a job')) @method_decorator(name='update', decorator=swagger_auto_schema(operation_summary='Method updates a job by id')) @@ -747,12 +682,14 @@ def annotations(self, request, pk): data = dm.task.get_job_data(pk) return Response(data) elif request.method == 'PUT': - if request.query_params.get("format", ""): - return load_data_proxy( + format_name = request.query_params.get("format", ""): + if format_name: + return _import_annotations( request=request, rq_id="{}@/api/v1/jobs/{}/annotations/upload".format(request.user, pk), rq_func=dm.task.import_job_annotations, pk=pk, + format_name=format_name ) else: serializer = LabeledDataSerializer(data=request.data) @@ -869,10 +806,9 @@ def rq_handler(job, exc_type, exc_value, tb): # '201': openapi.Response(description='Annotations have been uploaded')}, # tags=['tasks']) # @api_view(['PUT']) -def load_data_proxy(request, rq_id, rq_func, pk): +def _import_annotations(request, rq_id, rq_func, pk, format_name): queue = django_rq.get_queue("default") rq_job = queue.fetch_job(rq_id) - format_name = request.query_params.get("format", "").lower() if not rq_job: serializer = AnnotationFileSerializer(data=request.data) @@ -909,3 +845,60 @@ def load_data_proxy(request, rq_id, rq_func, pk): return Response(data=exc_info, status=status.HTTP_500_INTERNAL_SERVER_ERROR) return Response(status=status.HTTP_202_ACCEPTED) + +def _export_annotations(db_task, rq_id, request, dst_format, action, callback, filename): + if action not in {"", "download"}: + raise serializers.ValidationError( + "Unexpected action specified for the request") + + if dst_format not in [f.DISPLAY_NAME for f in dm.views.get_export_formats()]: + raise serializers.ValidationError( + "Unknown format specified for the request") + + queue = django_rq.get_queue("default") + + rq_job = queue.fetch_job(rq_id) + if rq_job: + last_task_update_time = timezone.localtime(db_task.updated_date) + request_time = rq_job.meta.get('request_time', None) + if request_time is None or request_time < last_task_update_time: + rq_job.cancel() + rq_job.delete() + else: + if rq_job.is_finished: + file_path = rq_job.return_value + if action == "download" and osp.exists(file_path): + rq_job.delete() + + timestamp = datetime.strftime(last_task_update_time, + "%Y_%m_%d_%H_%M_%S") + filename = filename or \ + "task_{}-{}-{}.{}".format( + db_task.name, timestamp, + dst_format, osp.splitext(file_path)[1]) + return sendfile(request, file_path, attachment=True, + attachment_filename=filename.lower()) + else: + if osp.exists(file_path): + return Response(status=status.HTTP_201_CREATED) + elif rq_job.is_failed: + exc_info = str(rq_job.exc_info) + rq_job.delete() + return Response(exc_info, + status=status.HTTP_500_INTERNAL_SERVER_ERROR) + else: + return Response(status=status.HTTP_202_ACCEPTED) + + try: + if request.scheme: + server_address = request.scheme + '://' + server_address += request.get_host() + except Exception: + server_address = None + + ttl = dm.views.CACHE_TTL.total_seconds() + queue.enqueue_call(func=callback, + args=(db_task.id, dst_format, server_address), job_id=rq_id, + meta={ 'request_time': timezone.localtime() }, + result_ttl=ttl, failure_ttl=ttl) + return Response(status=status.HTTP_202_ACCEPTED) \ No newline at end of file From 780e1087d69f0e72f2560f6139774baa4d300594 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Mon, 13 Apr 2020 18:57:53 +0300 Subject: [PATCH 34/80] join api methods --- cvat/apps/engine/tests/test_rest_api.py | 13 ++- cvat/apps/engine/views.py | 147 +++++++++++------------- utils/cli/core/core.py | 7 +- utils/cli/tests/test_cli.py | 15 ++- 4 files changed, 91 insertions(+), 91 deletions(-) diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py index efff53341b7c..2d78e5aa5495 100644 --- a/cvat/apps/engine/tests/test_rest_api.py +++ b/cvat/apps/engine/tests/test_rest_api.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: MIT +from glob import glob import io import os import os.path as osp @@ -12,7 +13,6 @@ import zipfile from collections import defaultdict from enum import Enum -from glob import glob from io import BytesIO from unittest import mock @@ -26,7 +26,7 @@ from rest_framework.test import APIClient, APITestCase from cvat.apps.engine.models import (AttributeType, Data, Job, Project, - Segment, StatusChoice, Task) + Segment, StatusChoice, Task) def create_db_users(cls): @@ -3080,6 +3080,9 @@ def _get_initial_annotation(annotation_format): polygon_shapes_wo_attrs + \ polygon_shapes_with_attrs + else: + raise Exception("Unknown format {}".format(annotation_format)) + return annotations response = self._get_import_formats(annotator) @@ -3130,15 +3133,15 @@ def _get_initial_annotation(annotation_format): # 3. download annotation response = self._dump_api_v1_tasks_id_annotations(task["id"], annotator, - "/{}".format(export_format)) + "?format={}".format(export_format)) self.assertEqual(response.status_code, HTTP_202_ACCEPTED) response = self._dump_api_v1_tasks_id_annotations(task["id"], annotator, - "/{}".format(export_format)) + "?format={}".format(export_format)) self.assertEqual(response.status_code, HTTP_201_CREATED) response = self._dump_api_v1_tasks_id_annotations(task["id"], annotator, - "/{}?action=download".format(export_format)) + "?format={}&action=download".format(export_format)) self.assertEqual(response.status_code, HTTP_200_OK) # 4. check downloaded data diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index 8e5c564a3c38..b5172961c6a9 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -465,8 +465,35 @@ def data(self, request, pk): slogger.task[pk].error(msg, exc_info=True) return Response(data=msg + '\n' + str(e), status=status.HTTP_400_BAD_REQUEST) - @swagger_auto_schema(method='get', operation_summary='Method returns annotations for a specific task') - @swagger_auto_schema(method='put', operation_summary='Method performs an update of all annotations in a specific task') + @swagger_auto_schema(method='get', operation_summary='Method allows to download task annotations', + manual_parameters=[ + openapi.Parameter('format', openapi.IN_QUERY, + description="Desired output format name\nYou can get the list of supported formats at:\n/server/annotation/export_formats", + type=openapi.TYPE_STRING, required=False), + openapi.Parameter('filename', openapi.IN_QUERY, + description="Desired output file name", + type=openapi.TYPE_STRING, required=False), + openapi.Parameter('action', in_=openapi.IN_QUERY, + description='Used to start downloading process after annotation file had been created', + type=openapi.TYPE_STRING, required=False, enum=['download']) + ], + responses={ + '202': openapi.Response(description='Dump of annotations has been started'), + '201': openapi.Response(description='Annotations file is ready to download'), + '200': openapi.Response(description='Download of file started') + } + ) + @swagger_auto_schema(method='put', operation_summary='Method allows to upload task annotations', + manual_parameters=[ + openapi.Parameter('format', openapi.IN_QUERY, + description="Input format name\nYou can get the list of supported formats at:\n/server/annotation/import_formats", + type=openapi.TYPE_STRING, required=False), + ], + responses={ + '202': openapi.Response(description='Uploading has been started'), + '201': openapi.Response(description='Uploading has finished'), + } + ) @swagger_auto_schema(method='patch', operation_summary='Method performs a partial update of annotations in a specific task', manual_parameters=[openapi.Parameter('action', in_=openapi.IN_QUERY, required=True, type=openapi.TYPE_STRING, enum=['create', 'update', 'delete'])]) @@ -474,17 +501,38 @@ def data(self, request, pk): @action(detail=True, methods=['GET', 'DELETE', 'PUT', 'PATCH'], serializer_class=LabeledDataSerializer) def annotations(self, request, pk): - self.get_object() # force to call check_object_permissions + db_task = self.get_object() # force to call check_object_permissions if request.method == 'GET': - data = dm.task.get_task_data(pk) - serializer = LabeledDataSerializer(data=data) - if serializer.is_valid(raise_exception=True): - return Response(serializer.data) + format_name = request.query_params.get('format') + if format_name: + return _export_annotations(db_task=db_task, + rq_id="/api/v1/tasks/{}/annotations/{}".format(pk, format_name), + request=request, + action=request.query_params.get("action", "").lower(), + callback=dm.views.export_task_annotations, + format_name=format_name, + filename=request.query_params.get("filename", "").lower(), + ) + else: + data = dm.task.get_task_data(pk) + serializer = LabeledDataSerializer(data=data) + if serializer.is_valid(raise_exception=True): + return Response(serializer.data) elif request.method == 'PUT': - serializer = LabeledDataSerializer(data=request.data) - if serializer.is_valid(raise_exception=True): - data = dm.task.put_task_data(pk, serializer.data) - return Response(data) + format_name = request.query_params.get('format') + if format_name: + return _import_annotations( + request=request, + rq_id="{}@/api/v1/tasks/{}/annotations/upload".format(request.user, pk), + rq_func=dm.task.import_task_annotations, + pk=pk, + format_name=format_name, + ) + else: + serializer = LabeledDataSerializer(data=request.data) + if serializer.is_valid(raise_exception=True): + data = dm.task.put_task_data(pk, serializer.data) + return Response(data) elif request.method == 'DELETE': dm.task.delete_task_data(pk) return Response(status=status.HTTP_204_NO_CONTENT) @@ -501,65 +549,6 @@ def annotations(self, request, pk): return Response(data=str(e), status=status.HTTP_400_BAD_REQUEST) return Response(data) - @swagger_auto_schema(method='put', operation_summary='Method allows to download annotations as a file', - manual_parameters=[ - openapi.Parameter('format', openapi.IN_PATH, - description="Desired output format name\nYou can get the list of supported formats at:\n/server/annotation/import_formats", - type=openapi.TYPE_STRING, required=True), - ], - responses={ - '202': openapi.Response(description='Uploading has been started'), - '201': openapi.Response(description='Uploading has finished'), - } - ) - @action(detail=True, methods=['PUT'], serializer_class=None, - url_path=r'annotations/(?P[^/&]+)') - def upload(self, request, pk, src_format): - self.get_object() # force to call check_object_permissions - - return _import_annotations( - request=request, - rq_id="{}@/api/v1/tasks/{}/annotations/upload".format(request.user, pk), - rq_func=dm.task.import_task_annotations, - pk=pk, - format_name=src_format, - ) - - @swagger_auto_schema(method='get', operation_summary='Method allows to download annotations as a file', - manual_parameters=[ - openapi.Parameter('format', openapi.IN_PATH, - description="Desired output format name\nYou can get the list of supported formats at:\n/server/annotation/export_formats", - type=openapi.TYPE_STRING, required=True), - openapi.Parameter('filename', openapi.IN_QUERY, - description="Desired output file name", - type=openapi.TYPE_STRING, required=False), - openapi.Parameter('action', in_=openapi.IN_QUERY, - description='Used to start downloading process after annotation file had been created', - type=openapi.TYPE_STRING, required=False, enum=['download']) - ], - responses={'202': openapi.Response(description='Dump of annotations has been started'), - '201': openapi.Response(description='Annotations file is ready to download'), - '200': openapi.Response(description='Download of file started') - } - ) - @action(detail=True, methods=['GET'], serializer_class=None, - url_path=r'annotations/(?P[^/&]+)') - def dump(self, request, pk, dst_format): - """ - Dump of annotations in common case is a long process which cannot be performed within one request. - First request starts dumping process. When the file is ready (code 201) you can get it with query parameter action=download. - """ - db_task = self.get_object() # force to call check_object_permissions - - return _export_annotations(db_task=db_task, - rq_id="/api/v1/tasks/{}/annotations/{}".format(pk, dst_format), - request=request, - action=request.query_params.get("action", "").lower(), - callback=dm.views.export_task_annotations, - dst_format=dst_format, - filename=request.query_params.get("filename", "").lower(), - ) - @swagger_auto_schema(method='get', operation_summary='When task is being created the method returns information about a status of the creation process') @action(detail=True, methods=['GET'], serializer_class=RqStatusSerializer) def status(self, request, pk): @@ -632,16 +621,16 @@ def data_info(request, pk): } ) @action(detail=True, methods=['GET'], serializer_class=None, - url_path=r'dataset/(?P[^/&]+)') - def dataset_export(self, request, pk, dst_format): + url_path=r'dataset/(?P[^/&]+)') + def dataset_export(self, request, pk, format_name): db_task = self.get_object() # force to call check_object_permissions return _export_annotations(db_task=db_task, - rq_id="/api/v1/tasks/{}/dataset/{}".format(pk, dst_format), + rq_id="/api/v1/tasks/{}/dataset/{}".format(pk, format_name), request=request, action=request.query_params.get("action", "").lower(), callback=dm.views.export_task_as_dataset, - dst_format=dst_format, + format_name=format_name, filename=request.query_params.get("filename", "").lower(), ) @@ -682,7 +671,7 @@ def annotations(self, request, pk): data = dm.task.get_job_data(pk) return Response(data) elif request.method == 'PUT': - format_name = request.query_params.get("format", ""): + format_name = request.query_params.get("format", "") if format_name: return _import_annotations( request=request, @@ -846,12 +835,12 @@ def _import_annotations(request, rq_id, rq_func, pk, format_name): return Response(status=status.HTTP_202_ACCEPTED) -def _export_annotations(db_task, rq_id, request, dst_format, action, callback, filename): +def _export_annotations(db_task, rq_id, request, format_name, action, callback, filename): if action not in {"", "download"}: raise serializers.ValidationError( "Unexpected action specified for the request") - if dst_format not in [f.DISPLAY_NAME for f in dm.views.get_export_formats()]: + if format_name not in [f.DISPLAY_NAME for f in dm.views.get_export_formats()]: raise serializers.ValidationError( "Unknown format specified for the request") @@ -875,7 +864,7 @@ def _export_annotations(db_task, rq_id, request, dst_format, action, callback, f filename = filename or \ "task_{}-{}-{}.{}".format( db_task.name, timestamp, - dst_format, osp.splitext(file_path)[1]) + format_name, osp.splitext(file_path)[1]) return sendfile(request, file_path, attachment=True, attachment_filename=filename.lower()) else: @@ -898,7 +887,7 @@ def _export_annotations(db_task, rq_id, request, dst_format, action, callback, f ttl = dm.views.CACHE_TTL.total_seconds() queue.enqueue_call(func=callback, - args=(db_task.id, dst_format, server_address), job_id=rq_id, + args=(db_task.id, format_name, server_address), job_id=rq_id, meta={ 'request_time': timezone.localtime() }, result_ttl=ttl, failure_ttl=ttl) return Response(status=status.HTTP_202_ACCEPTED) \ No newline at end of file diff --git a/utils/cli/core/core.py b/utils/cli/core/core.py index 59f45af49ecf..b9ce5c0ff90c 100644 --- a/utils/cli/core/core.py +++ b/utils/cli/core/core.py @@ -1,4 +1,7 @@ +# Copyright (C) 2020 Intel Corporation +# # SPDX-License-Identifier: MIT + import json import logging import os @@ -169,5 +172,5 @@ def tasks_id_annotations_format(self, task_id, fileformat): .format(fileformat) def tasks_id_annotations_filename(self, task_id, name, fileformat): - return self.tasks_id(task_id) + '/annotations/{}?format={}' \ - .format(name, fileformat) + return self.tasks_id(task_id) + '/annotations?format={}&filename={}' \ + .format(fileformat, name) diff --git a/utils/cli/tests/test_cli.py b/utils/cli/tests/test_cli.py index 47cd7942fe70..dddb0bde27ad 100644 --- a/utils/cli/tests/test_cli.py +++ b/utils/cli/tests/test_cli.py @@ -1,16 +1,21 @@ +# Copyright (C) 2020 Intel Corporation +# # SPDX-License-Identifier: MIT -import logging + import io +import logging import os import sys import unittest + from django.conf import settings +from PIL import Image from requests.auth import HTTPBasicAuth -from utils.cli.core import CLI, CVAT_API_V1, ResourceType from rest_framework.test import APITestCase, RequestsClient -from cvat.apps.engine.tests.test_rest_api import create_db_users -from cvat.apps.engine.tests.test_rest_api import generate_image_file -from PIL import Image + +from cvat.apps.engine.tests.test_rest_api import (create_db_users, + generate_image_file) +from utils.cli.core import CLI, CVAT_API_V1, ResourceType class TestCLI(APITestCase): From 951bcb4faa86f538f47b73baee8d8b819aff4a24 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Tue, 14 Apr 2020 11:22:21 +0300 Subject: [PATCH 35/80] Add trim whitespace to workspace config --- .vscode/settings.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index d7f4a5984ff3..3b796a0be54c 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -33,5 +33,6 @@ "./datumaro", ], "licenser.license": "Custom", - "licenser.customHeader": "Copyright (C) @YEAR@ Intel Corporation\n\nSPDX-License-Identifier: MIT" + "licenser.customHeader": "Copyright (C) @YEAR@ Intel Corporation\n\nSPDX-License-Identifier: MIT", + "files.trimTrailingWhitespace": true } From 3c5caae79fd664116ac9d364a9b6a23a973439b4 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Tue, 14 Apr 2020 11:46:17 +0300 Subject: [PATCH 36/80] update tests --- cvat/apps/engine/tests/test_rest_api.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py index 2d78e5aa5495..5803df94ad89 100644 --- a/cvat/apps/engine/tests/test_rest_api.py +++ b/cvat/apps/engine/tests/test_rest_api.py @@ -1,8 +1,7 @@ -# Copyright (C) 2018 Intel Corporation +# Copyright (C) 2020 Intel Corporation # # SPDX-License-Identifier: MIT -from glob import glob import io import os import os.path as osp @@ -13,6 +12,7 @@ import zipfile from collections import defaultdict from enum import Enum +from glob import glob from io import BytesIO from unittest import mock @@ -3080,6 +3080,13 @@ def _get_initial_annotation(annotation_format): polygon_shapes_wo_attrs + \ polygon_shapes_with_attrs + elif annotation_format == "Datumaro 1.0": + annotations["shapes"] = rectangle_shapes_with_attrs + \ + rectangle_shapes_wo_attrs + \ + polygon_shapes_wo_attrs + \ + polygon_shapes_with_attrs + annotations["tags"] = tags_with_attrs + tags_wo_attrs + else: raise Exception("Unknown format {}".format(annotation_format)) From fce56ca832fb3dd7d96756eee523a81a7b10c7b8 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Tue, 14 Apr 2020 13:07:11 +0300 Subject: [PATCH 37/80] fixes --- .travis.yml | 2 +- cvat/apps/dataset_manager/formats/mot.py | 2 +- cvat/apps/engine/views.py | 7 ++++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9875fc6e3150..49ddb8dd7780 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,6 +13,6 @@ before_script: script: - docker-compose -f docker-compose.yml -f docker-compose.ci.yml run cvat_ci /bin/bash -c 'python3 manage.py test cvat/apps utils/cli' - - docker-compose -f docker-compose.yml -f docker-compose.ci.yml run cvat_ci /bin/bash -c 'python3 manage.py test --pattern="_tests.py" cvat/apps/dataset_manager' + - docker-compose -f docker-compose.yml -f docker-compose.ci.yml run cvat_ci /bin/bash -c 'python3 manage.py test --pattern="_tests.py" cvat/apps/dataset_manager/tests' - docker-compose -f docker-compose.yml -f docker-compose.ci.yml run cvat_ci /bin/bash -c 'python3 manage.py test datumaro/' - docker-compose -f docker-compose.yml -f docker-compose.ci.yml run cvat_ci /bin/bash -c 'cd cvat-data && npm install && cd ../cvat-core && npm install && npm run test && npm run coveralls' diff --git a/cvat/apps/dataset_manager/formats/mot.py b/cvat/apps/dataset_manager/formats/mot.py index 5c3461adff91..e8b2ea82a13b 100644 --- a/cvat/apps/dataset_manager/formats/mot.py +++ b/cvat/apps/dataset_manager/formats/mot.py @@ -67,7 +67,7 @@ def _import(src_file, task_data): tracks[track_id].shapes.append(shape) for track in tracks.values(): - # MOT task_data do not require frames to be ordered + # MOT annotations do not require frames to be ordered track.shapes.sort(key=lambda t: t.frame) # Set outside=True for the last shape in a track to finish the track track.shapes[-1] = track.shapes[-1]._replace(outside=True) diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index b5172961c6a9..fe98a2cd9ae4 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -605,7 +605,7 @@ def data_info(request, pk): @swagger_auto_schema(method='get', operation_summary='Export task as a dataset in a specific format', manual_parameters=[ - openapi.Parameter('format', openapi.IN_PATH, + openapi.Parameter('format', openapi.IN_QUERY, description="Desired output format name\nYou can get the list of supported formats at:\n/server/annotation/export_formats", type=openapi.TYPE_STRING, required=True), openapi.Parameter('filename', openapi.IN_QUERY, @@ -621,10 +621,11 @@ def data_info(request, pk): } ) @action(detail=True, methods=['GET'], serializer_class=None, - url_path=r'dataset/(?P[^/&]+)') - def dataset_export(self, request, pk, format_name): + url_path='dataset') + def dataset_export(self, request, pk): db_task = self.get_object() # force to call check_object_permissions + format_name = request.query_params.get("format", "") return _export_annotations(db_task=db_task, rq_id="/api/v1/tasks/{}/dataset/{}".format(pk, format_name), request=request, From c87ece3236235afb38e463df88ea250ae92f9482 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Tue, 14 Apr 2020 17:35:35 +0300 Subject: [PATCH 38/80] Update format docs --- cvat/apps/dataset_manager/formats/README.md | 383 ++++++++++---------- 1 file changed, 184 insertions(+), 199 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/README.md b/cvat/apps/dataset_manager/formats/README.md index 101a28ac2f26..99134562700a 100644 --- a/cvat/apps/dataset_manager/formats/README.md +++ b/cvat/apps/dataset_manager/formats/README.md @@ -1,65 +1,61 @@ - - -## Description - -The purpose of this application is to add support for multiple annotation formats for CVAT. -It allows to download and upload annotations in different formats and easily add support for new. - -## How to add a new annotation format support - -1. Write a python script that will be executed via exec() function. Following items must be defined inside at code: - - **format_spec** - a dictionary with the following structure: - ```python - format_spec = { - "name": "CVAT", - "dumpers": [ - { - "display_name": "{name} {format} {version} for videos", - "format": "XML", - "version": "1.1", - "handler": "dump_as_cvat_interpolation" - }, - { - "display_name": "{name} {format} {version} for images", - "format": "XML", - "version": "1.1", - "handler": "dump_as_cvat_annotation" - } - ], - "loaders": [ - { - "display_name": "{name} {format} {version}", - "format": "XML", - "version": "1.1", - "handler": "load", - } - ], - } - ``` - - **name** - unique name for each format - - **dumpers and loaders** - lists of objects that describes exposed dumpers and loaders and must - have following keys: - 1. display_name - **unique** string used as ID for dumpers and loaders. - Also this string is displayed in CVAT UI. - Possible to use a named placeholders like the python format function - (supports only name, format and version variables). - 1. format - a string, used as extension for a dumped annotation. - 1. version - just string with version. - 1. handler - function that will be called and should be defined at top scope. - - dumper/loader handler functions. Each function should have the following signature: - ```python - def dump_handler(file_object, annotations): - ``` +# Dataset and annotation formats + +## Contents + +- [How to add a format](#how-to-add) +- [Format descriptions](#formats) + - [CVAT](#cvat) + - [LabelMe](#labelme) + - [MOT](#mot) + - [COCO](#coco) + - [PASCAL VOC and mask](#voc) + - [YOLO](#yolo) + - [TF detection API](#tfrecord) + +## How to add a new annotation format support + +1. Add a python script to `dataset_manager/formats` +1. Add an import statement to [registry.py](./registry.py). +1. Each format is supported by an importer and exporter. + It can be a function or a class decorated with + `importer` or `exporter` from [registry.py](./registry.py). Examples: + ``` + @importer(name="MyFormat", version="1.0", ext="ZIP") + def my_importer(file_object, task_data, **options): + ... + + @importer(name="MyFormat", version="2.0", ext="XML") + class my_importer(file_object, task_data, **options): + ... + def __call__(self, file_object, task_data, **options): + ... + + @exporter(name="MyFormat", version="1.0", ext="ZIP"): + def my_exporter(file_object, task_data, **options): + ... + ``` + Each decorator defines format parameters such as: + - *name* + - *version + - *file extension*. For the `importer` it can be a comma-separated list. + + These parameters are combined to produce a visible name. It can be + set explicitly by the `display_name` argument. - Inside of the script environment 2 variables are available: - - **file_object** - python's standard file object returned by open() function and exposing a file-oriented API - (with methods such as read() or write()) to an underlying resource. - - **annotations** - instance of [Annotation](annotation.py#L106) class. + Importer arguments: + - *file_object* - a file with annotations or dataset + - *task_data* - an instance of `TaskData` class. - Annotation class expose API and some additional pre-defined types that allow to get/add shapes inside - a loader/dumper code. + Exporter arguments: + - *file_object* - a file for annotations or dataset + - *task_data* - an instance of `TaskData` class. + - *options* - format-specific options. `save_images` is the option to + distinguish if dataset or just annotations are requested. - Short description of the public methods: + [`TaskData`](../bindings.py) provides many task properties and interfaces + to add and read task annotations. + + Public methods: - **Annotation.shapes** - property, returns a generator of Annotation.LabeledShape objects - **Annotation.tracks** - property, returns a generator of Annotation.Track objects - **Annotation.tags** - property, returns a generator of Annotation.Tag objects @@ -67,31 +63,26 @@ It allows to download and upload annotations in different formats and easily add which groups annotation objects by frame. Note that TrackedShapes will be represented as Annotation.LabeledShape. - **Annotation.meta** - property, returns dictionary which represent a task meta information, for example - video source name, number of frames, number of jobs, etc - - **Annotation.add_tag(tag)** - tag should be a instance of the Annotation.Tag class - - **Annotation.add_shape(shape)** - shape should be a instance of the Annotation.Shape class - - **Annotation.add_track(track)** - track should be a instance of the Annotation.Track class + - **Annotation.add_tag(tag)** - tag should be an instance of the Annotation.Tag class + - **Annotation.add_shape(shape)** - shape should be an instance of the Annotation.Shape class + - **Annotation.add_track(track)** - track should be an instance of the Annotation.Track class - **Annotation.Attribute** = namedtuple('Attribute', 'name, value') - - name - String, name of the attribute - - value - String, value of the attribute - **Annotation.LabeledShape** = namedtuple('LabeledShape', 'type, frame, label, points, occluded, attributes, group, z_order') - LabeledShape.\__new\__.\__defaults\__ = (0, None) - **TrackedShape** = namedtuple('TrackedShape', 'type, points, occluded, frame, attributes, outside, keyframe, z_order') - TrackedShape.\__new\__.\__defaults\__ = (None, ) - **Track** = namedtuple('Track', 'label, group, shapes') - **Tag** = namedtuple('Tag', 'frame, label, attributes, group') - Tag.\__new\__.\__defaults\__ = (0, ) - **Frame** = namedtuple('Frame', 'frame, name, width, height, labeled_shapes, tags') - Pseudocode for a dumper script + Sample exporter code: ```python ... # dump meta info if necessary ... # iterate over all frames - for frame_annotation in annotations.group_by_frame(): + for frame_annotation in task_data.group_by_frame(): # get frame info image_name = frame_annotation.name image_width = frame_annotation.width @@ -114,14 +105,15 @@ It allows to download and upload annotations in different formats and easily add file_object.write(...) ... ``` - Pseudocode for a loader code + + Sample importer code: ```python ... #read file_object ... for parsed_shape in parsed_shapes: - shape = annotations.LabeledShape( + shape = task_data.LabeledShape( type="rectangle", points=[0, 0, 100, 100], occluded=False, @@ -131,53 +123,60 @@ It allows to download and upload annotations in different formats and easily add frame=99, ) - annotations.add_shape(shape) + task_data.add_shape(shape) ``` - Full examples can be found in corrseponding *.py files (cvat.py, coco.py, yolo.py, etc.). -1. Add path to a new python script to the annotation app settings: - ```python - BUILTIN_FORMATS = ( - os.path.join(path_prefix, 'cvat.py'), - os.path.join(path_prefix,'pascal_voc.py'), - ) - ``` +## Format specifications -## Ideas for improvements +### CVAT -- Annotation format manager like DL Model manager with which the user can add custom format support by - writing dumper/loader scripts. -- Often a custom loader/dumper requires additional python packages and it would be useful if CVAT provided some API - that allows the user to install a python dependencies from their own code without changing the source code. - Possible solutions: install additional modules via pip call to a separate directory for each Annotation Format - to reduce version conflicts, etc. Thus, custom code can be run in an extended environment, and core CVAT modules - should not be affected. As well, this functionality can be useful for Auto Annotation module. +This is the native CVAT annotation format. It supports all CVAT annotations +features, so it can be used to make data backups. +- supported annotations - Rectangles, Polygons, Polylines, Points, Cuboids, Tags, Tracks +- attributes are supported +- [Format specification](/cvat/apps/documentation/xml_format.md) -## Format specifications +#### CVAT for images dumper +- downloaded file: a ZIP file of the following structure: + ```bash + taskname.zip/ + ├── images/ + | ├── img1.png + | └── img2.jpg + └── annotations.xml + ``` + +- tracks are split by frames -### CVAT -This is native CVAT annotation format. -[Detailed format description](cvat/apps/documentation/xml_format.md) +#### CVAT for videos dumper +- downloaded file: a ZIP file -#### CVAT XML for images dumper -- downloaded file: Single unpacked XML -- supported shapes - Rectangles, Polygons, Polylines, Points +- shapes are exported as single-frame tracks -#### CVAT XML for videos dumper -- downloaded file: Single unpacked XML -- supported shapes - Rectangles, Polygons, Polylines, Points +#### CVAT loader +- uploaded file: an XML file or a ZIP file -#### CVAT XML Loader -- uploaded file: Single unpacked XML -- supported shapes - Rectangles, Polygons, Polylines, Points -### [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/) +### [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/) - [Format specification](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/devkit_doc.pdf) -#### Pascal dumper description +- supported annotations: + - Rectangles (detection and layout tasks) + - Tags (action- and classification tasks) + - Polygons (segmentation task) +- supported attributes: + - `occluded` + - `truncated` and `difficult` (should be defined for labels as `checkbox`-es) + - action attributes (import only, should be defined as `checkbox`-es) + +#### Pascal VOC export - downloaded file: a zip archive of the following structure: ```bash taskname.zip/ + ├── JpegImages/ + │   ├── .jpg + │   ├── .jpg + │   └── .jpg ├── Annotations/ │   ├── .xml │   ├── .xml @@ -186,44 +185,81 @@ This is native CVAT annotation format. │   └── Main/ │   └── default.txt └── labelmap.txt - ``` -- supported shapes: Rectangles -- additional comments: If you plan to use `truncated` and `difficult` attributes please add the corresponding - items to the CVAT label attributes: - `~checkbox=difficult:false ~checkbox=truncated:false` + # labelmap.txt + # label : color_rgb : 'body' parts : actions + background::: + aeroplane::: + bicycle::: + bird::: + ``` -#### Pascal loader description +#### Pascal VOC import - uploaded file: a zip archive of the structure declared above or the following: ```bash taskname.zip/ ├── .xml ├── .xml - ├── .xml - └── labelmap.txt # optional + └── .xml ``` - The `labelmap.txt` file contains dataset labels. It **must** be included - if dataset labels **differ** from VOC default labels. The file structure: + It must be possible for CVAT to match the frame name and file name from annotation `.xml` + file (the `filename` tag, e.g. `2008_004457.jpg`). There are 2 options: + 1. full match between frame name and file name from annotation `.xml` + (in cases when task was created from images or image archive). + 1. match by frame number. + File name should be in the following format + `.jpg` or `frame_000000.jpg`. + It should be used when task was created from video. + +#### Segmentation mask export +- downloaded file: a zip archive with the following structure: ```bash - # label : color_rgb : 'body' parts : actions - background::: - aeroplane::: - bicycle::: - bird::: + taskname.zip/ + ├── labelmap.txt # optional, required for non-VOC labels + ├── ImageSets/ + │   └── Segmentation/ + │   └── default.txt # list of image names without extension + ├── SegmentationClass/ # merged class masks + │   ├── image1.png + │   └── image2.png + └── SegmentationObject/ # merged instance masks + ├── image1.png + └── image2.png ``` + Mask is a png image with several (RGB) channels where each pixel has own color which corresponds to a label. + Color generation correspond to the Pascal VOC color generation + [algorithm](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/devkit_doc.html#sec:voclabelcolormap). + (0, 0, 0) is used for background. + `labelmap.txt` file contains the values of the used colors in RGB format. The file structure: + ```bash + # label:color_rgb:parts:actions + background:0,128,0:: + aeroplane:10,10,128:: + bicycle:10,128,0:: + bird:0,108,128:: + boat:108,0,100:: + bottle:18,0,8:: + bus:12,28,0:: + ``` +- supported shapes - Rectangles, Polygons - It must be possible for CVAT to match the frame (image name) and file name from annotation \*.xml - file (the tag filename, e.g. `2008_004457.jpg`). There are 2 options: - 1. full match between image name and filename from annotation \*.xml - (in cases when task was created from images or image archive). - 1. match by frame number (if CVAT cannot match by name). File name should - be in the following format `.jpg`. - It should be used when task was created from a video. - -- supported shapes: Rectangles -- limitations: Support of Pascal VOC object detection format -- additional comments: the CVAT task should be created with the full label set that may be in the annotation files +#### Segmentation mask import +- uploaded file: a zip archive of the following structure: + ```bash + taskname.zip/ + ├── labelmap.txt # optional, required for non-VOC labels + ├── ImageSets/ + │   └── Segmentation/ + │   └── .txt + ├── SegmentationClass/ + │   ├── image1.png + │   └── image2.png + └── SegmentationObject/ + ├── image1.png + └── image2.png + ``` +- supported shapes: Polygons #### How to create a task from Pascal VOC dataset 1. Download the Pascal Voc dataset (Can be downloaded from the @@ -242,10 +278,12 @@ This is native CVAT annotation format. and select the *.zip file with annotations from previous step. It may take some time. -### [YOLO](https://pjreddie.com/darknet/yolo/) -#### Yolo dumper description +### [YOLO](https://pjreddie.com/darknet/yolo/) +- [Format specification](https://github.com/AlexeyAB/darknet#how-to-train-to-detect-your-custom-objects) +- supported annotations: Rectangles + +#### YOLO export - downloaded file: a zip archive with following structure: - [Format specification](https://github.com/AlexeyAB/darknet#how-to-train-to-detect-your-custom-objects) ```bash archive.zip/ ├── obj.data @@ -285,19 +323,15 @@ It may take some time. The `*.txt` file structure: each line describes label and bounding box in the following format `label_id cx cy w h`. `obj.names` contains the ordered list of label names. -- supported shapes - Rectangles - -#### Yolo loader description -- uploaded file: a zip archive of the same structure as above - It must be possible to match the CVAT frame (image name) and annotation file name - There are 2 options: - 1. full match between image name and name of annotation `*.txt` file - (in cases when a task was created from images or archive of images). - 1. match by frame number (if CVAT cannot match by name). File name should be in the following format `.jpg`. - It should be used when task was created from a video. -- supported shapes: Rectangles -- additional comments: the CVAT task should be created with the full label set that may be in the annotation files +#### YOLO import +- uploaded file: a zip archive of the same structure as above + It must be possible to match the CVAT frame (image name) and annotation file name + There are 2 options: + 1. full match between image name and name of annotation `*.txt` file + (in cases when a task was created from images or archive of images). + 1. match by frame number (if CVAT cannot match by name). File name should be in the following format `.jpg`. + It should be used when task was created from a video. #### How to create a task from YOLO formatted dataset (from VOC for example) 1. Follow the official [guide](https://pjreddie.com/darknet/yolo/)(see Training YOLO on VOC section) @@ -344,7 +378,8 @@ It may take some time. 1. Click `Upload annotation` button, choose `YOLO ZIP 1.1` and select the *.zip file with labels from previous step. It may take some time. -### [MS COCO Object Detection](http://cocodataset.org/#format-data) +### [MS COCO Object Detection](http://cocodataset.org/#format-data) + #### COCO dumper description - downloaded file: single unpacked `json`. Detailed description of the MS COCO format can be found [here](http://cocodataset.org/#format-data) - supported shapes - Polygons, Rectangles (interpreted as polygons) @@ -371,7 +406,7 @@ It may take some time. 1. click `Upload annotation` button, choose `COCO JSON 1.0` and select `instances_val2017.json.json` annotation file. It may take some time. -### [TFRecord](https://www.tensorflow.org/tutorials/load_data/tf_records) +### [TFRecord](https://www.tensorflow.org/tutorials/load_data/tf_records) TFRecord is a very flexible format, but we try to correspond the format that used in [TF object detection](https://github.com/tensorflow/models/tree/master/research/object_detection) with minimal modifications. @@ -517,58 +552,7 @@ python create_pascal_tf_record.py --data_dir --set train --y 1. Click `Upload annotation` button, choose `TFRecord ZIP 1.0` and select the *.zip file with labels from previous step. It may take some time. -### PNG mask -#### Mask dumper description -- downloaded file: a zip archive with the following structure: - ```bash - taskname.zip - ├── labelmap.txt # optional, required for non-VOC labels - ├── ImageSets/ - │   └── Segmentation/ - │   └── default.txt # list of image names without extension - ├── SegmentationClass/ # merged class masks - │   ├── image1.png - │   └── image2.png - └── SegmentationObject/ # merged instance masks - ├── image1.png - └── image2.png - ``` - Mask is a png image with several (RGB) channels where each pixel has own color which corresponds to a label. - Color generation correspond to the Pascal VOC color generation - [algorithm](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/devkit_doc.html#sec:voclabelcolormap). - (0, 0, 0) is used for background. - `labelmap.txt` file contains the values of the used colors in RGB format. The file structure: - ```bash - # label:color_rgb:parts:actions - background:0,128,0:: - aeroplane:10,10,128:: - bicycle:10,128,0:: - bird:0,108,128:: - boat:108,0,100:: - bottle:18,0,8:: - bus:12,28,0:: - ``` -- supported shapes - Rectangles, Polygons - -#### Mask loader description -- uploaded file: a zip archive of the following structure: - ```bash - name.zip - ├── labelmap.txt # optional, required for non-VOC labels - ├── ImageSets/ - │   └── Segmentation/ - │   └── .txt - ├── SegmentationClass/ - │   ├── image1.png - │   └── image2.png - └── SegmentationObject/ - ├── image1.png - └── image2.png - ``` -- supported shapes: Polygons -- additional comments: the CVAT task should be created with the full label set that may be in the annotation files - -### [MOT sequence](https://arxiv.org/pdf/1906.04567.pdf) +### [MOT sequence](https://arxiv.org/pdf/1906.04567.pdf) #### Dumper - downloaded file: a zip archive of the following structure: ```bash @@ -604,7 +588,8 @@ python create_pascal_tf_record.py --data_dir --set train --y ``` - supported annotations: Rectangle tracks -### [LabelMe](http://labelme.csail.mit.edu/Release3.0) +### [LabelMe](http://labelme.csail.mit.edu/Release3.0) + #### Dumper - downloaded file: a zip archive of the following structure: ```bash From 1e5f9920672a526729fd1aa4f0f3d439d9671df0 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Wed, 15 Apr 2020 15:15:36 +0300 Subject: [PATCH 39/80] join format queries --- cvat/apps/dataset_manager/serializers.py | 6 ++++- cvat/apps/engine/tests/test_rest_api.py | 28 +++++++----------------- cvat/apps/engine/views.py | 28 +++++++++--------------- 3 files changed, 23 insertions(+), 39 deletions(-) diff --git a/cvat/apps/dataset_manager/serializers.py b/cvat/apps/dataset_manager/serializers.py index bdc5dd1351ac..51cf71ca8da3 100644 --- a/cvat/apps/dataset_manager/serializers.py +++ b/cvat/apps/dataset_manager/serializers.py @@ -8,4 +8,8 @@ class DatasetFormatSerializer(serializers.Serializer): name = serializers.CharField(max_length=64, source='DISPLAY_NAME') ext = serializers.CharField(max_length=64, source='EXT') - version = serializers.CharField(max_length=64, source='VERSION') \ No newline at end of file + version = serializers.CharField(max_length=64, source='VERSION') + +class DatasetFormatsSerializer(serializers.Serializer): + importers = DatasetFormatSerializer(many=True) + exporters = DatasetFormatSerializer(many=True) \ No newline at end of file diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py index 5803df94ad89..245ddb52798a 100644 --- a/cvat/apps/engine/tests/test_rest_api.py +++ b/cvat/apps/engine/tests/test_rest_api.py @@ -2475,17 +2475,10 @@ def _upload_api_v1_tasks_id_annotations(self, pk, user, data, query_params=""): return response - def _get_import_formats(self, user): + def _get_formats(self, user): with ForceLogin(user, self.client): response = self.client.get( - path="/api/v1/server/annotation/import_formats" - ) - return response - - def _get_export_formats(self, user): - with ForceLogin(user, self.client): - response = self.client.get( - path="/api/v1/server/annotation/export_formats" + path="/api/v1/server/annotation/formats" ) return response @@ -3092,22 +3085,17 @@ def _get_initial_annotation(annotation_format): return annotations - response = self._get_import_formats(annotator) + response = self._get_formats(annotator) self.assertEqual(response.status_code, HTTP_200_OK) if annotator is not None: - import_formats = response.data + data = response.data else: - import_formats = response = self._get_import_formats(owner).data + data = self._get_formats(owner).data + import_formats = data['importers'] + export_formats = data['exporters'] self.assertTrue(isinstance(import_formats, list) and import_formats) - import_formats = { v['name'] for v in import_formats } - - response = self._get_export_formats(annotator) - self.assertEqual(response.status_code, HTTP_200_OK) - if annotator is not None: - export_formats = response.data - else: - export_formats = response = self._get_export_formats(owner).data self.assertTrue(isinstance(export_formats, list) and export_formats) + import_formats = { v['name'] for v in import_formats } export_formats = { v['name'] for v in export_formats } formats = { exp: exp if exp in import_formats else None diff --git a/cvat/apps/engine/views.py b/cvat/apps/engine/views.py index fe98a2cd9ae4..a90a7661b4e7 100644 --- a/cvat/apps/engine/views.py +++ b/cvat/apps/engine/views.py @@ -32,10 +32,10 @@ from sendfile import sendfile import cvat.apps.dataset_manager as dm -import cvat.apps.dataset_manager.views +import cvat.apps.dataset_manager.views # pylint: disable=unused-import from cvat.apps.authentication import auth from cvat.apps.authentication.decorators import login_required -from cvat.apps.dataset_manager.serializers import DatasetFormatSerializer +from cvat.apps.dataset_manager.serializers import DatasetFormatsSerializer from cvat.apps.engine.frame_provider import FrameProvider from cvat.apps.engine.models import Job, Plugin, StatusChoice, Task from cvat.apps.engine.serializers import ( @@ -202,19 +202,11 @@ def share(request): @staticmethod @swagger_auto_schema(method='get', operation_summary='Method provides the list of supported annotations formats', - responses={'200': DatasetFormatSerializer(many=True)}) - @action(detail=False, methods=['GET'], url_path='annotation/export_formats') - def annotation_export_formats(request): - data = dm.views.get_export_formats() - return Response(DatasetFormatSerializer(data, many=True).data) - - @staticmethod - @swagger_auto_schema(method='get', operation_summary='Method provides the list of supported annotations formats', - responses={'200': DatasetFormatSerializer(many=True)}) - @action(detail=False, methods=['GET'], url_path='annotation/import_formats') - def annotation_import_formats(request): - data = dm.views.get_import_formats() - return Response(DatasetFormatSerializer(data, many=True).data) + responses={'200': DatasetFormatsSerializer()}) + @action(detail=False, methods=['GET'], url_path='annotation/formats') + def annotation_formats(request): + data = dm.views.get_all_formats() + return Response(DatasetFormatsSerializer(data).data) class ProjectFilter(filters.FilterSet): name = filters.CharFilter(field_name="name", lookup_expr="icontains") @@ -468,7 +460,7 @@ def data(self, request, pk): @swagger_auto_schema(method='get', operation_summary='Method allows to download task annotations', manual_parameters=[ openapi.Parameter('format', openapi.IN_QUERY, - description="Desired output format name\nYou can get the list of supported formats at:\n/server/annotation/export_formats", + description="Desired output format name\nYou can get the list of supported formats at:\n/server/annotation/formats", type=openapi.TYPE_STRING, required=False), openapi.Parameter('filename', openapi.IN_QUERY, description="Desired output file name", @@ -486,7 +478,7 @@ def data(self, request, pk): @swagger_auto_schema(method='put', operation_summary='Method allows to upload task annotations', manual_parameters=[ openapi.Parameter('format', openapi.IN_QUERY, - description="Input format name\nYou can get the list of supported formats at:\n/server/annotation/import_formats", + description="Input format name\nYou can get the list of supported formats at:\n/server/annotation/formats", type=openapi.TYPE_STRING, required=False), ], responses={ @@ -606,7 +598,7 @@ def data_info(request, pk): @swagger_auto_schema(method='get', operation_summary='Export task as a dataset in a specific format', manual_parameters=[ openapi.Parameter('format', openapi.IN_QUERY, - description="Desired output format name\nYou can get the list of supported formats at:\n/server/annotation/export_formats", + description="Desired output format name\nYou can get the list of supported formats at:\n/server/annotation/formats", type=openapi.TYPE_STRING, required=True), openapi.Parameter('filename', openapi.IN_QUERY, description="Desired output file name", From 73edf83b217bb807f646a7593028ae4d122fb542 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Wed, 15 Apr 2020 15:16:17 +0300 Subject: [PATCH 40/80] fixes --- cvat/apps/dataset_manager/bindings.py | 2 +- cvat/apps/dataset_manager/formats/cvat.py | 9 ++++++--- cvat/apps/dataset_manager/views.py | 14 ++++++++++++-- datumaro/datumaro/util/image.py | 6 +++--- 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index 5d1e1f08cb38..f6e6905dd23a 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -416,7 +416,7 @@ def __init__(self, task_data, include_images=False): if include_images: loader = lambda p: frame_provider.get_frame(frame_data.frame, quality=frame_provider.Quality.ORIGINAL, - out_type=frame_provider.Type.NUMPY_ARRAY) + out_type=frame_provider.Type.NUMPY_ARRAY)[0] dm_image = Image(path=frame_data.name, loader=loader, size=(frame_data.height, frame_data.width) ) diff --git a/cvat/apps/dataset_manager/formats/cvat.py b/cvat/apps/dataset_manager/formats/cvat.py index 334d851a3739..47cfde478d6a 100644 --- a/cvat/apps/dataset_manager/formats/cvat.py +++ b/cvat/apps/dataset_manager/formats/cvat.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: MIT +import os import os.path as osp import zipfile from collections import OrderedDict @@ -516,17 +517,19 @@ def _export(dst_file, task_data, anno_callback, save_images=False): anno_callback(f, task_data) if save_images: + img_dir = osp.join(temp_dir, 'images') + os.makedirs(img_dir) frame_provider = FrameProvider(task_data.db_task.data) frames = frame_provider.get_frames( frame_provider.Quality.ORIGINAL, frame_provider.Type.NUMPY_ARRAY) for frame_id, (frame_data, _) in enumerate(frames): - frame_name = osp.basename(task_data.frame_info[frame_id]['path']) + frame_name = task_data.frame_info[frame_id]['path'] if '.' in frame_name: - save_image(osp.join(temp_dir, 'images', frame_name), + save_image(osp.join(img_dir, frame_name), frame_data, jpeg_quality=100) else: - save_image(osp.join(temp_dir, 'images', frame_name + '.png'), + save_image(osp.join(img_dir, frame_name + '.png'), frame_data) make_zip_archive(temp_dir, dst_file) diff --git a/cvat/apps/dataset_manager/views.py b/cvat/apps/dataset_manager/views.py index b768cc69cb5a..6ef38b23ed0f 100644 --- a/cvat/apps/dataset_manager/views.py +++ b/cvat/apps/dataset_manager/views.py @@ -4,6 +4,7 @@ import os import os.path as osp +import tempfile from datetime import timedelta import django_rq @@ -51,8 +52,11 @@ def export_task(task_id, dst_format, server_url=None, save_images=False): if not (osp.exists(output_path) and \ task_time <= osp.getmtime(output_path)): os.makedirs(cache_dir, exist_ok=True) - task.export_task(task_id, output_path, dst_format, - server_url=server_url, save_images=save_images) + with tempfile.TemporaryDirectory() as temp_dir: + temp_file = osp.join(temp_dir, 'result') + task.export_task(task_id, temp_file, dst_format, + server_url=server_url, save_images=save_images) + os.replace(temp_file, output_path) archive_ctime = osp.getctime(output_path) scheduler = django_rq.get_scheduler() @@ -95,3 +99,9 @@ def get_export_formats(): def get_import_formats(): return list(IMPORT_FORMATS.values()) + +def get_all_formats(): + return { + 'importers': get_import_formats(), + 'exporters': get_export_formats(), + } \ No newline at end of file diff --git a/datumaro/datumaro/util/image.py b/datumaro/datumaro/util/image.py index 73da1e443528..3a7687a517e7 100644 --- a/datumaro/datumaro/util/image.py +++ b/datumaro/datumaro/util/image.py @@ -195,9 +195,9 @@ def __init__(self, data=None, path=None, loader=None, cache=None, path = '' self._path = path - assert data is not None or path, "Image can not be empty" - if data is None and path: - if osp.isfile(path): + assert data is not None or path or loader, "Image can not be empty" + if data is None and (path or loader): + if osp.isfile(path) or loader: data = lazy_image(path, loader=loader, cache=cache) self._data = data From 1915bb7a6351fabf72aac0e3190e0921e49af7db Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Wed, 15 Apr 2020 15:23:26 +0300 Subject: [PATCH 41/80] update new ui --- cvat-core/src/annotation-format.js | 117 +++--------------- cvat-core/src/api-implementation.js | 9 +- cvat-core/src/api.js | 16 +-- cvat-core/src/server-proxy.js | 17 --- cvat-core/tests/api/server.js | 33 ++--- cvat-ui/src/actions/formats-actions.ts | 7 +- cvat-ui/src/actions/tasks-actions.ts | 2 +- .../components/actions-menu/actions-menu.tsx | 4 +- .../top-bar/annotation-menu.tsx | 4 +- .../containers/actions-menu/actions-menu.tsx | 16 +-- .../top-bar/annotation-menu.tsx | 16 +-- cvat-ui/src/reducers/formats-reducer.ts | 4 +- cvat-ui/src/reducers/interfaces.ts | 3 +- 13 files changed, 44 insertions(+), 204 deletions(-) diff --git a/cvat-core/src/annotation-format.js b/cvat-core/src/annotation-format.js index 5505ecf1a321..641f7bc06161 100644 --- a/cvat-core/src/annotation-format.js +++ b/cvat-core/src/annotation-format.js @@ -12,9 +12,8 @@ class Loader { constructor(initialData) { const data = { - display_name: initialData.display_name, - format: initialData.format, - handler: initialData.handler, + name: initialData.name, + format: initialData.ext, version: initialData.version, }; @@ -27,7 +26,7 @@ * @readonly * @instance */ - get: () => data.display_name, + get: () => data.name, }, format: { /** @@ -39,16 +38,6 @@ */ get: () => data.format, }, - handler: { - /** - * @name handler - * @type {string} - * @memberof module:API.cvat.classes.Loader - * @readonly - * @instance - */ - get: () => data.handler, - }, version: { /** * @name version @@ -71,9 +60,8 @@ class Dumper { constructor(initialData) { const data = { - display_name: initialData.display_name, - format: initialData.format, - handler: initialData.handler, + name: initialData.name, + format: initialData.ext, version: initialData.version, }; @@ -86,7 +74,7 @@ * @readonly * @instance */ - get: () => data.display_name, + get: () => data.name, }, format: { /** @@ -98,16 +86,6 @@ */ get: () => data.format, }, - handler: { - /** - * @name handler - * @type {string} - * @memberof module:API.cvat.classes.Dumper - * @readonly - * @instance - */ - get: () => data.handler, - }, version: { /** * @name version @@ -127,108 +105,41 @@ * @memberof module:API.cvat.classes * @hideconstructor */ - class AnnotationFormat { + class AnnotationFormats { constructor(initialData) { const data = { - created_date: initialData.created_date, - updated_date: initialData.updated_date, - id: initialData.id, - owner: initialData.owner, - name: initialData.name, - handler_file: initialData.handler_file, + exporters: initialData.exporters.map((el) => new Dumper(el)), + importers: initialData.importers.map((el) => new Loader(el)), }; - data.dumpers = initialData.dumpers.map((el) => new Dumper(el)); - data.loaders = initialData.loaders.map((el) => new Loader(el)); - // Now all fields are readonly Object.defineProperties(this, { - id: { - /** - * @name id - * @type {integer} - * @memberof module:API.cvat.classes.AnnotationFormat - * @readonly - * @instance - */ - get: () => data.id, - }, - owner: { - /** - * @name owner - * @type {integer} - * @memberof module:API.cvat.classes.AnnotationFormat - * @readonly - * @instance - */ - get: () => data.owner, - }, - name: { - /** - * @name name - * @type {string} - * @memberof module:API.cvat.classes.AnnotationFormat - * @readonly - * @instance - */ - get: () => data.name, - }, - createdDate: { - /** - * @name createdDate - * @type {string} - * @memberof module:API.cvat.classes.AnnotationFormat - * @readonly - * @instance - */ - get: () => data.created_date, - }, - updatedDate: { - /** - * @name updatedDate - * @type {string} - * @memberof module:API.cvat.classes.AnnotationFormat - * @readonly - * @instance - */ - get: () => data.updated_date, - }, - handlerFile: { - /** - * @name handlerFile - * @type {string} - * @memberof module:API.cvat.classes.AnnotationFormat - * @readonly - * @instance - */ - get: () => data.handler_file, - }, loaders: { /** * @name loaders * @type {module:API.cvat.classes.Loader[]} - * @memberof module:API.cvat.classes.AnnotationFormat + * @memberof module:API.cvat.classes.AnnotationFormats * @readonly * @instance */ - get: () => [...data.loaders], + get: () => [...data.importers], }, dumpers: { /** * @name dumpers * @type {module:API.cvat.classes.Dumper[]} - * @memberof module:API.cvat.classes.AnnotationFormat + * @memberof module:API.cvat.classes.AnnotationFormats * @readonly * @instance */ - get: () => [...data.dumpers], + get: () => [...data.exporters], }, }); } } module.exports = { - AnnotationFormat, + AnnotationFormats, Loader, Dumper, }; diff --git a/cvat-core/src/api-implementation.js b/cvat-core/src/api-implementation.js index 4b7e28727191..3714939da498 100644 --- a/cvat-core/src/api-implementation.js +++ b/cvat-core/src/api-implementation.js @@ -26,7 +26,7 @@ } = require('./enums'); const User = require('./user'); - const { AnnotationFormat } = require('./annotation-format.js'); + const { AnnotationFormats } = require('./annotation-format.js'); const { ArgumentError } = require('./exceptions'); const { Task } = require('./session'); @@ -66,12 +66,7 @@ cvat.server.formats.implementation = async () => { const result = await serverProxy.server.formats(); - return result.map((el) => new AnnotationFormat(el)); - }; - - cvat.server.datasetFormats.implementation = async () => { - const result = await serverProxy.server.datasetFormats(); - return result; + return new AnnotationFormats(result); }; cvat.server.register.implementation = async (username, firstName, lastName, diff --git a/cvat-core/src/api.js b/cvat-core/src/api.js index 4eb4e99af00b..777d3fc01878 100644 --- a/cvat-core/src/api.js +++ b/cvat-core/src/api.js @@ -109,7 +109,7 @@ function build() { * @method formats * @async * @memberof module:API.cvat.server - * @returns {module:API.cvat.classes.AnnotationFormat[]} + * @returns {module:API.cvat.classes.AnnotationFormats} * @throws {module:API.cvat.exceptions.PluginError} * @throws {module:API.cvat.exceptions.ServerError} */ @@ -118,20 +118,6 @@ function build() { .apiWrapper(cvat.server.formats); return result; }, - /** - * Method returns available dataset export formats - * @method exportFormats - * @async - * @memberof module:API.cvat.server - * @returns {module:String[]} - * @throws {module:API.cvat.exceptions.PluginError} - * @throws {module:API.cvat.exceptions.ServerError} - */ - async datasetFormats() { - const result = await PluginRegistry - .apiWrapper(cvat.server.datasetFormats); - return result; - }, /** * Method allows to register on a server * @method register diff --git a/cvat-core/src/server-proxy.js b/cvat-core/src/server-proxy.js index 8251328223db..6fb4fa186970 100644 --- a/cvat-core/src/server-proxy.js +++ b/cvat-core/src/server-proxy.js @@ -154,22 +154,6 @@ return response.data; } - async function datasetFormats() { - const { backendAPI } = config; - - let response = null; - try { - response = await Axios.get(`${backendAPI}/server/dataset/formats`, { - proxy: config.proxy, - }); - response = JSON.parse(response.data); - } catch (errorData) { - throw generateError(errorData); - } - - return response; - } - async function register(username, firstName, lastName, email, password1, password2) { let response = null; try { @@ -664,7 +648,6 @@ about, share, formats, - datasetFormats, exception, login, logout, diff --git a/cvat-core/tests/api/server.js b/cvat-core/tests/api/server.js index 6220e158dd5c..be8881ce22a0 100644 --- a/cvat-core/tests/api/server.js +++ b/cvat-core/tests/api/server.js @@ -18,7 +18,7 @@ jest.mock('../../src/server-proxy', () => { // Initialize api window.cvat = require('../../src/api'); const { - AnnotationFormat, + AnnotationFormats, Loader, Dumper, } = require('../../src/annotation-format'); @@ -58,24 +58,18 @@ describe('Feature: get share storage info', () => { describe('Feature: get annotation formats', () => { test('get annotation formats from a server', async () => { const result = await window.cvat.server.formats(); - expect(Array.isArray(result)).toBeTruthy(); - for (const format of result) { - expect(format).toBeInstanceOf(AnnotationFormat); - } + expect(result).toBeInstanceOf(AnnotationFormats); }); }); describe('Feature: get annotation loaders', () => { test('get annotation formats from a server', async () => { const result = await window.cvat.server.formats(); - expect(Array.isArray(result)).toBeTruthy(); - for (const format of result) { - expect(format).toBeInstanceOf(AnnotationFormat); - const { loaders } = format; - expect(Array.isArray(loaders)).toBeTruthy(); - for (const loader of loaders) { - expect(loader).toBeInstanceOf(Loader); - } + expect(result).toBeInstanceOf(AnnotationFormats); + const { loaders } = result; + expect(Array.isArray(loaders)).toBeTruthy(); + for (const loader of loaders) { + expect(loader).toBeInstanceOf(Loader); } }); }); @@ -83,14 +77,11 @@ describe('Feature: get annotation loaders', () => { describe('Feature: get annotation dumpers', () => { test('get annotation formats from a server', async () => { const result = await window.cvat.server.formats(); - expect(Array.isArray(result)).toBeTruthy(); - for (const format of result) { - expect(format).toBeInstanceOf(AnnotationFormat); - const { dumpers } = format; - expect(Array.isArray(dumpers)).toBeTruthy(); - for (const dumper of dumpers) { - expect(dumper).toBeInstanceOf(Dumper); - } + expect(result).toBeInstanceOf(AnnotationFormats); + const { dumpers } = result; + expect(Array.isArray(dumpers)).toBeTruthy(); + for (const dumper of dumpers) { + expect(dumper).toBeInstanceOf(Dumper); } }); }); \ No newline at end of file diff --git a/cvat-ui/src/actions/formats-actions.ts b/cvat-ui/src/actions/formats-actions.ts index 2ef06269d9ce..b340d3a174d1 100644 --- a/cvat-ui/src/actions/formats-actions.ts +++ b/cvat-ui/src/actions/formats-actions.ts @@ -15,10 +15,9 @@ export enum FormatsActionTypes { const formatsActions = { getFormats: () => createAction(FormatsActionTypes.GET_FORMATS), - getFormatsSuccess: (annotationFormats: any[], datasetFormats: any[]) => ( + getFormatsSuccess: (annotationFormats: any) => ( createAction(FormatsActionTypes.GET_FORMATS_SUCCESS, { annotationFormats, - datasetFormats, }) ), getFormatsFailed: (error: any) => ( @@ -32,14 +31,12 @@ export function getFormatsAsync(): ThunkAction { return async (dispatch): Promise => { dispatch(formatsActions.getFormats()); let annotationFormats = null; - let datasetFormats = null; try { annotationFormats = await cvat.server.formats(); - datasetFormats = await cvat.server.datasetFormats(); dispatch( - formatsActions.getFormatsSuccess(annotationFormats, datasetFormats), + formatsActions.getFormatsSuccess(annotationFormats), ); } catch (error) { dispatch(formatsActions.getFormatsFailed(error)); diff --git a/cvat-ui/src/actions/tasks-actions.ts b/cvat-ui/src/actions/tasks-actions.ts index ff1f3e566f29..4e114e217df6 100644 --- a/cvat-ui/src/actions/tasks-actions.ts +++ b/cvat-ui/src/actions/tasks-actions.ts @@ -280,7 +280,7 @@ ThunkAction, {}, {}, AnyAction> { dispatch(exportDataset(task, exporter)); try { - const url = await task.annotations.exportDataset(exporter.tag); + const url = await task.annotations.exportDataset(exporter.name); const downloadAnchor = (window.document.getElementById('downloadAnchor') as HTMLAnchorElement); downloadAnchor.href = url; downloadAnchor.click(); diff --git a/cvat-ui/src/components/actions-menu/actions-menu.tsx b/cvat-ui/src/components/actions-menu/actions-menu.tsx index d38a069f6651..7d3bdf0e21a7 100644 --- a/cvat-ui/src/components/actions-menu/actions-menu.tsx +++ b/cvat-ui/src/components/actions-menu/actions-menu.tsx @@ -23,7 +23,6 @@ interface Props { loaders: string[]; dumpers: string[]; - exporters: string[]; loadActivity: string | null; dumpActivities: string[] | null; exportActivities: string[] | null; @@ -58,7 +57,6 @@ export default function ActionsMenuComponent(props: Props): JSX.Element { dumpers, loaders, - exporters, onClickMenu, dumpActivities, exportActivities, @@ -138,7 +136,7 @@ export default function ActionsMenuComponent(props: Props): JSX.Element { } { ExportSubmenu({ - exporters, + exporters: dumpers, exportActivities, menuKey: Actions.EXPORT_TASK_DATASET, }) diff --git a/cvat-ui/src/components/annotation-page/top-bar/annotation-menu.tsx b/cvat-ui/src/components/annotation-page/top-bar/annotation-menu.tsx index 85d057a6f126..8ff9df52c2ad 100644 --- a/cvat-ui/src/components/annotation-page/top-bar/annotation-menu.tsx +++ b/cvat-ui/src/components/annotation-page/top-bar/annotation-menu.tsx @@ -14,7 +14,6 @@ interface Props { taskMode: string; loaders: string[]; dumpers: string[]; - exporters: string[]; loadActivity: string | null; dumpActivities: string[] | null; exportActivities: string[] | null; @@ -34,7 +33,6 @@ export default function AnnotationMenuComponent(props: Props): JSX.Element { taskMode, loaders, dumpers, - exporters, onClickMenu, loadActivity, dumpActivities, @@ -108,7 +106,7 @@ export default function AnnotationMenuComponent(props: Props): JSX.Element { } { ExportSubmenu({ - exporters, + exporters: dumpers, exportActivities, menuKey: Actions.EXPORT_TASK_DATASET, }) diff --git a/cvat-ui/src/containers/actions-menu/actions-menu.tsx b/cvat-ui/src/containers/actions-menu/actions-menu.tsx index 77b1c6cd52b5..65b450ace2a9 100644 --- a/cvat-ui/src/containers/actions-menu/actions-menu.tsx +++ b/cvat-ui/src/containers/actions-menu/actions-menu.tsx @@ -24,8 +24,7 @@ interface OwnProps { } interface StateToProps { - annotationFormats: any[]; - exporters: any[]; + annotationFormats: any; loadActivity: string | null; dumpActivities: string[] | null; exportActivities: string[] | null; @@ -53,7 +52,6 @@ function mapStateToProps(state: CombinedState, own: OwnProps): StateToProps { const { formats: { annotationFormats, - datasetFormats, }, plugins: { list: { @@ -79,7 +77,6 @@ function mapStateToProps(state: CombinedState, own: OwnProps): StateToProps { exportActivities: tid in activeExports ? activeExports[tid] : null, loadActivity: tid in loads ? loads[tid] : null, annotationFormats, - exporters: datasetFormats, inferenceIsActive: tid in state.models.inferences, }; } @@ -108,7 +105,6 @@ function ActionsMenuContainer(props: OwnProps & StateToProps & DispatchToProps): const { taskInstance, annotationFormats, - exporters, loadActivity, dumpActivities, exportActivities, @@ -125,11 +121,8 @@ function ActionsMenuContainer(props: OwnProps & StateToProps & DispatchToProps): } = props; - const loaders = annotationFormats - .map((format: any): any[] => format.loaders).flat(); - - const dumpers = annotationFormats - .map((format: any): any[] => format.dumpers).flat(); + const loaders = annotationFormats.loaders.flat(); + const dumpers = annotationFormats.dumpers.flat(); function onClickMenu(params: ClickParam, file?: File): void { if (params.keyPath.length > 1) { @@ -150,7 +143,7 @@ function ActionsMenuContainer(props: OwnProps & StateToProps & DispatchToProps): } } else if (action === Actions.EXPORT_TASK_DATASET) { const format = additionalKey; - const [exporter] = exporters + const [exporter] = dumpers .filter((_exporter: any): boolean => _exporter.name === format); if (exporter) { exportDataset(taskInstance, exporter); @@ -176,7 +169,6 @@ function ActionsMenuContainer(props: OwnProps & StateToProps & DispatchToProps): bugTracker={taskInstance.bugTracker} loaders={loaders.map((loader: any): string => `${loader.name}::${loader.format}`)} dumpers={dumpers.map((dumper: any): string => dumper.name)} - exporters={exporters.map((exporter: any): string => exporter.name)} loadActivity={loadActivity} dumpActivities={dumpActivities} exportActivities={exportActivities} diff --git a/cvat-ui/src/containers/annotation-page/top-bar/annotation-menu.tsx b/cvat-ui/src/containers/annotation-page/top-bar/annotation-menu.tsx index c4d0a35bc8fb..c5af17aeb943 100644 --- a/cvat-ui/src/containers/annotation-page/top-bar/annotation-menu.tsx +++ b/cvat-ui/src/containers/annotation-page/top-bar/annotation-menu.tsx @@ -21,8 +21,7 @@ import { } from 'actions/annotation-actions'; interface StateToProps { - annotationFormats: any[]; - exporters: any[]; + annotationFormats: any; jobInstance: any; loadActivity: string | null; dumpActivities: string[] | null; @@ -48,7 +47,6 @@ function mapStateToProps(state: CombinedState): StateToProps { }, formats: { annotationFormats, - datasetFormats: exporters, }, tasks: { activities: { @@ -69,7 +67,6 @@ function mapStateToProps(state: CombinedState): StateToProps { ? loads[taskID] || jobLoads[jobID] : null, jobInstance, annotationFormats, - exporters, }; } @@ -96,7 +93,6 @@ function AnnotationMenuContainer(props: Props): JSX.Element { const { jobInstance, annotationFormats, - exporters, loadAnnotations, dumpAnnotations, exportDataset, @@ -107,11 +103,8 @@ function AnnotationMenuContainer(props: Props): JSX.Element { exportActivities, } = props; - const loaders = annotationFormats - .map((format: any): any[] => format.loaders).flat(); - - const dumpers = annotationFormats - .map((format: any): any[] => format.dumpers).flat(); + const loaders = annotationFormats.loaders.flat(); + const dumpers = annotationFormats.dumpers.flat(); const onClickMenu = (params: ClickParam, file?: File): void => { if (params.keyPath.length > 1) { @@ -132,7 +125,7 @@ function AnnotationMenuContainer(props: Props): JSX.Element { } } else if (action === Actions.EXPORT_TASK_DATASET) { const format = additionalKey; - const [exporter] = exporters + const [exporter] = dumpers .filter((_exporter: any): boolean => _exporter.name === format); if (exporter) { exportDataset(jobInstance.task, exporter); @@ -153,7 +146,6 @@ function AnnotationMenuContainer(props: Props): JSX.Element { taskMode={jobInstance.task.mode} loaders={loaders.map((loader: any): string => loader.name)} dumpers={dumpers.map((dumper: any): string => dumper.name)} - exporters={exporters.map((exporter: any): string => exporter.name)} loadActivity={loadActivity} dumpActivities={dumpActivities} exportActivities={exportActivities} diff --git a/cvat-ui/src/reducers/formats-reducer.ts b/cvat-ui/src/reducers/formats-reducer.ts index f807b8099662..3429fc2fa758 100644 --- a/cvat-ui/src/reducers/formats-reducer.ts +++ b/cvat-ui/src/reducers/formats-reducer.ts @@ -9,8 +9,7 @@ import { AuthActionTypes, AuthActions } from 'actions/auth-actions'; import { FormatsState } from './interfaces'; const defaultState: FormatsState = { - annotationFormats: [], - datasetFormats: [], + annotationFormats: null, initialized: false, fetching: false, }; @@ -33,7 +32,6 @@ export default ( initialized: true, fetching: false, annotationFormats: action.payload.annotationFormats, - datasetFormats: action.payload.datasetFormats, }; case FormatsActionTypes.GET_FORMATS_FAILED: return { diff --git a/cvat-ui/src/reducers/interfaces.ts b/cvat-ui/src/reducers/interfaces.ts index d794ffd72387..04a8af7639a1 100644 --- a/cvat-ui/src/reducers/interfaces.ts +++ b/cvat-ui/src/reducers/interfaces.ts @@ -62,8 +62,7 @@ export interface TasksState { } export interface FormatsState { - annotationFormats: any[]; - datasetFormats: any[]; + annotationFormats: any; fetching: boolean; initialized: boolean; } From d7ff4debbf0d1023595944c0fbb4ca638b744039 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Wed, 15 Apr 2020 16:34:57 +0300 Subject: [PATCH 42/80] ui tests --- cvat-core/tests/mocks/dummy-data.mock.js | 87 +++++++----------------- 1 file changed, 25 insertions(+), 62 deletions(-) diff --git a/cvat-core/tests/mocks/dummy-data.mock.js b/cvat-core/tests/mocks/dummy-data.mock.js index d6f78e2dc616..c41c7d4ec450 100644 --- a/cvat-core/tests/mocks/dummy-data.mock.js +++ b/cvat-core/tests/mocks/dummy-data.mock.js @@ -6,84 +6,47 @@ const aboutDummyData = { "version": "0.5.dev20190516142240" } -const formatsDummyData = [{ - "id": 1, - "dumpers": [ +const formatsDummyData = { + "exporters": [ { - "display_name": "CVAT for video 1.1", - "format": "XML", + "name": "CVAT for video 1.1", + "ext": "XML", "version": "1.1", - "handler": "dump_as_cvat_interpolation" }, { - "display_name": "CVAT for images 1.1", - "format": "XML", + "name": "CVAT for images 1.1", + "ext": "XML", "version": "1.1", - "handler": "dump_as_cvat_annotation" - } - ], - "loaders": [ - { - "display_name": "CVAT 1.1", - "format": "XML", - "version": "1.1", - "handler": "load" - } - ], - "name": "CVAT", - "created_date": "2019-08-08T12:18:56.571488+03:00", - "updated_date": "2019-08-08T12:18:56.571533+03:00", - "handler_file": "cvat/apps/annotation/cvat.py", - "owner": null -}, -{ - "id": 2, - "dumpers": [ + }, { - "display_name": "PASCAL VOC ZIP 1.0", - "format": "ZIP", + "name": "PASCAL VOC 1.0", + "ext": "ZIP", "version": "1.0", - "handler": "dump" - } - ], - "loaders": [ + }, { - "display_name": "PASCAL VOC ZIP 1.0", - "format": "ZIP", + "name": "YOLO 1.0", + "ext": "ZIP", "version": "1.0", - "handler": "load" - } + }, ], - "name": "PASCAL VOC", - "created_date": "2019-08-08T12:18:56.625025+03:00", - "updated_date": "2019-08-08T12:18:56.625071+03:00", - "handler_file": "cvat/apps/annotation/pascal_voc.py", - "owner": null -}, -{ - "id": 3, - "dumpers": [ + "importers": [ + { + "name": "CVAT 1.1", + "ext": "XML, ZIP", + "version": "1.1", + }, { - "display_name": "YOLO ZIP 1.0", - "format": "ZIP", + "name": "PASCAL VOC 1.0", + "ext": "ZIP", "version": "1.0", - "handler": "dump" - } - ], - "loaders": [ + }, { - "display_name": "YOLO ZIP 1.0", - "format": "ZIP", + "name": "MYFORMAT 1.0", + "ext": "TXT", "version": "1.0", - "handler": "load" } ], - "name": "YOLO", - "created_date": "2019-08-08T12:18:56.667534+03:00", - "updated_date": "2019-08-08T12:18:56.667578+03:00", - "handler_file": "cvat/apps/annotation/yolo.py", - "owner": null -}]; +}; const usersDummyData = { "count": 2, From 01e176d3a67a2df4ed80b11d3ef4c5030be08774 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Wed, 15 Apr 2020 17:39:46 +0300 Subject: [PATCH 43/80] old ui --- cvat-core/src/server-proxy.js | 4 ++-- .../engine/static/engine/js/annotationUI.js | 22 +++++++++---------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/cvat-core/src/server-proxy.js b/cvat-core/src/server-proxy.js index 6fb4fa186970..ec70d79e95c5 100644 --- a/cvat-core/src/server-proxy.js +++ b/cvat-core/src/server-proxy.js @@ -602,8 +602,8 @@ async function dumpAnnotations(id, name, format) { const { backendAPI } = config; const filename = name.replace(/\//g, '_'); - const baseURL = `${backendAPI}/tasks/${id}/annotations/${encodeURIComponent(filename)}`; - let query = `format=${encodeURIComponent(format)}`; + const baseURL = `${backendAPI}/tasks/${id}/annotations`; + let query = `format=${encodeURIComponent(format)}&filename=${encodeURIComponent(filename)}`; let url = `${baseURL}?${query}`; return new Promise((resolve, reject) => { diff --git a/cvat/apps/engine/static/engine/js/annotationUI.js b/cvat/apps/engine/static/engine/js/annotationUI.js index bc763dfe9afb..baf2f771f5d3 100644 --- a/cvat/apps/engine/static/engine/js/annotationUI.js +++ b/cvat/apps/engine/static/engine/js/annotationUI.js @@ -403,21 +403,19 @@ function setupMenu(job, task, shapeCollectionModel, const loaders = {}; - for (const format of annotationFormats) { - for (const dumper of format.dumpers) { - const item = $(`