Skip to content

Commit 8923963

Browse files
zhiltsov-maxChris Lee-Messer
authored and
Chris Lee-Messer
committed
[Datumaro] CLI updates + better documentation (cvat-ai#1057)
1 parent 1a1c7bc commit 8923963

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+2461
-1678
lines changed

.vscode/settings.json

+7-1
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,11 @@
2525
}
2626
],
2727
"python.linting.pylintEnabled": true,
28-
"python.envFile": "${workspaceFolder}/.vscode/python.env"
28+
"python.envFile": "${workspaceFolder}/.vscode/python.env",
29+
"python.testing.unittestEnabled": true,
30+
"python.testing.unittestArgs": [
31+
"-v",
32+
"-s",
33+
"./datumaro",
34+
],
2935
}

README.md

+3
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ CVAT is free, online, interactive video and image annotation tool for computer v
1616
- [Installation guide](cvat/apps/documentation/installation.md)
1717
- [User's guide](cvat/apps/documentation/user_guide.md)
1818
- [Django REST API documentation](#rest-api)
19+
- [Datumaro dataset framework](datumaro/README.md)
1920
- [Command line interface](utils/cli/)
2021
- [XML annotation format](cvat/apps/documentation/xml_format.md)
2122
- [AWS Deployment Guide](cvat/apps/documentation/AWS-Deployment-Guide.md)
@@ -34,6 +35,8 @@ CVAT is free, online, interactive video and image annotation tool for computer v
3435
## Supported annotation formats
3536

3637
Format selection is possible after clicking on the Upload annotation / Dump annotation button.
38+
[Datumaro](datumaro/README.md) dataset framework allows additional dataset transformations
39+
via its command line tool.
3740

3841
| Annotation format | Dumper | Loader |
3942
| ---------------------------------------------------------------------------------- | ------ | ------ |

cvat/apps/dataset_manager/bindings.py

+26-8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
2+
# Copyright (C) 2019-2020 Intel Corporation
3+
#
4+
# SPDX-License-Identifier: MIT
5+
16
from collections import OrderedDict
27
import os
38
import os.path as osp
@@ -6,7 +11,7 @@
611

712
from cvat.apps.annotation.annotation import Annotation
813
from cvat.apps.engine.annotation import TaskAnnotation
9-
from cvat.apps.engine.models import Task, ShapeType
14+
from cvat.apps.engine.models import Task, ShapeType, AttributeType
1015

1116
import datumaro.components.extractor as datumaro
1217
from datumaro.util.image import lazy_image
@@ -128,18 +133,33 @@ def _read_cvat_anno(self, cvat_anno):
128133
attrs = {}
129134
db_attributes = db_label.attributespec_set.all()
130135
for db_attr in db_attributes:
131-
attrs[db_attr.name] = db_attr.default_value
136+
attrs[db_attr.name] = db_attr
132137
label_attrs[db_label.name] = attrs
133138
map_label = lambda label_db_name: label_map[label_db_name]
134139

140+
def convert_attrs(label, cvat_attrs):
141+
cvat_attrs = {a.name: a.value for a in cvat_attrs}
142+
dm_attr = dict()
143+
for attr_name, attr_spec in label_attrs[label].items():
144+
attr_value = cvat_attrs.get(attr_name, attr_spec.default_value)
145+
try:
146+
if attr_spec.input_type == AttributeType.NUMBER:
147+
attr_value = float(attr_value)
148+
elif attr_spec.input_type == AttributeType.CHECKBOX:
149+
attr_value = attr_value.lower() == 'true'
150+
dm_attr[attr_name] = attr_value
151+
except Exception as e:
152+
slogger.task[self._db_task.id].error(
153+
"Failed to convert attribute '%s'='%s': %s" % \
154+
(attr_name, attr_value, e))
155+
return dm_attr
156+
135157
for tag_obj in cvat_anno.tags:
136158
anno_group = tag_obj.group
137159
if isinstance(anno_group, int):
138160
anno_group = anno_group
139161
anno_label = map_label(tag_obj.label)
140-
anno_attr = dict(label_attrs[tag_obj.label])
141-
for attr in tag_obj.attributes:
142-
anno_attr[attr.name] = attr.value
162+
anno_attr = convert_attrs(tag_obj.label, tag_obj.attributes)
143163

144164
anno = datumaro.LabelObject(label=anno_label,
145165
attributes=anno_attr, group=anno_group)
@@ -150,9 +170,7 @@ def _read_cvat_anno(self, cvat_anno):
150170
if isinstance(anno_group, int):
151171
anno_group = anno_group
152172
anno_label = map_label(shape_obj.label)
153-
anno_attr = dict(label_attrs[shape_obj.label])
154-
for attr in shape_obj.attributes:
155-
anno_attr[attr.name] = attr.value
173+
anno_attr = convert_attrs(shape_obj.label, shape_obj.attributes)
156174

157175
anno_points = shape_obj.points
158176
if shape_obj.type == ShapeType.POINTS:

cvat/apps/dataset_manager/export_templates/README.md

+3-5
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,15 @@ python -m virtualenv .venv
66
. .venv/bin/activate
77

88
# install dependencies
9-
sed -r "s/^(.*)#.*$/\1/g" datumaro/requirements.txt | xargs -n 1 -L 1 pip install
9+
pip install -e datumaro/
1010
pip install -r cvat/utils/cli/requirements.txt
1111

1212
# set up environment
1313
PYTHONPATH=':'
1414
export PYTHONPATH
15-
ln -s $PWD/datumaro/datum.py ./datum
16-
chmod a+x datum
1715

1816
# use Datumaro
19-
./datum --help
17+
datum --help
2018
```
2119

22-
Check Datumaro [QUICKSTART.md](datumaro/docs/quickstart.md) for further info.
20+
Check Datumaro [docs](datumaro/README.md) for more info.

cvat/apps/dataset_manager/export_templates/extractors/cvat_rest_api_task_images.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
2+
# Copyright (C) 2019-2020 Intel Corporation
3+
#
4+
# SPDX-License-Identifier: MIT
5+
16
from collections import OrderedDict
27
import getpass
38
import json
@@ -27,7 +32,7 @@ class cvat_rest_api_task_images(datumaro.Extractor):
2732
def _image_local_path(self, item_id):
2833
task_id = self._config.task_id
2934
return osp.join(self._cache_dir,
30-
'task_{}_frame_{:06d}.jpg'.format(task_id, item_id))
35+
'task_{}_frame_{:06d}.jpg'.format(task_id, int(item_id)))
3136

3237
def _make_image_loader(self, item_id):
3338
return lazy_image(item_id,

cvat/apps/dataset_manager/task.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
2+
# Copyright (C) 2019-2020 Intel Corporation
3+
#
4+
# SPDX-License-Identifier: MIT
5+
16
from datetime import timedelta
27
import json
38
import os
@@ -217,8 +222,9 @@ def export(self, dst_format, save_dir, save_images=False, server_url=None):
217222
if dst_format == EXPORT_FORMAT_DATUMARO_PROJECT:
218223
self._remote_export(save_dir=save_dir, server_url=server_url)
219224
else:
220-
self._dataset.export_project(output_format=dst_format,
221-
save_dir=save_dir, save_images=save_images)
225+
converter = self._dataset.env.make_converter(dst_format,
226+
save_images=save_images)
227+
self._dataset.export_project(converter=converter, save_dir=save_dir)
222228

223229
def _remote_image_converter(self, save_dir, server_url=None):
224230
os.makedirs(save_dir, exist_ok=True)
@@ -246,7 +252,7 @@ def _remote_image_converter(self, save_dir, server_url=None):
246252
if db_video is not None:
247253
for i in range(self._db_task.size):
248254
frame_info = {
249-
'id': str(i),
255+
'id': i,
250256
'width': db_video.width,
251257
'height': db_video.height,
252258
}

cvat/apps/dataset_manager/util.py

+5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
2+
# Copyright (C) 2019-2020 Intel Corporation
3+
#
4+
# SPDX-License-Identifier: MIT
5+
16
import inspect
27
import os, os.path as osp
38
import zipfile

datumaro/CONTRIBUTING.md

+119
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
## Table of Contents
2+
3+
- [Installation](#installation)
4+
- [Usage](#usage)
5+
- [Testing](#testing)
6+
- [Design](#design-and-code-structure)
7+
8+
## Installation
9+
10+
### Prerequisites
11+
12+
- Python (3.5+)
13+
- OpenVINO (optional)
14+
15+
``` bash
16+
git clone https://github.com/opencv/cvat
17+
```
18+
19+
Optionally, install a virtual environment:
20+
21+
``` bash
22+
python -m pip install virtualenv
23+
python -m virtualenv venv
24+
. venv/bin/activate
25+
```
26+
27+
Then install all dependencies:
28+
29+
``` bash
30+
while read -r p; do pip install $p; done < requirements.txt
31+
```
32+
33+
If you're working inside CVAT environment:
34+
``` bash
35+
. .env/bin/activate
36+
while read -r p; do pip install $p; done < datumaro/requirements.txt
37+
```
38+
39+
## Usage
40+
41+
> The directory containing Datumaro should be in the `PYTHONPATH`
42+
> environment variable or `cvat/datumaro/` should be the current directory.
43+
44+
``` bash
45+
datum --help
46+
python -m datumaro --help
47+
python datumaro/ --help
48+
python datum.py --help
49+
```
50+
51+
``` python
52+
import datumaro
53+
```
54+
55+
## Testing
56+
57+
It is expected that all Datumaro functionality is covered and checked by
58+
unit tests. Tests are placed in `tests/` directory.
59+
60+
To run tests use:
61+
62+
``` bash
63+
python -m unittest discover -s tests
64+
```
65+
66+
If you're working inside CVAT environment, you can also use:
67+
68+
``` bash
69+
python manage.py test datumaro/
70+
```
71+
72+
## Design and code structure
73+
74+
- [Design document](docs/design.md)
75+
76+
### Command-line
77+
78+
Use [Docker](https://www.docker.com/) as an example. Basically,
79+
the interface is divided on contexts and single commands.
80+
Contexts are semantically grouped commands,
81+
related to a single topic or target. Single commands are handy shorter
82+
alternatives for the most used commands and also special commands,
83+
which are hard to be put into any specific context.
84+
85+
![cli-design-image](docs/images/cli_design.png)
86+
87+
- The diagram above was created with [FreeMind](http://freemind.sourceforge.net/wiki/index.php/Main_Page)
88+
89+
Model-View-ViewModel (MVVM) UI pattern is used.
90+
91+
![mvvm-image](docs/images/mvvm.png)
92+
93+
### Datumaro project and environment structure
94+
95+
<!--lint disable fenced-code-flag-->
96+
```
97+
├── [datumaro module]
98+
└── [project folder]
99+
├── .datumaro/
100+
| ├── config.yml
101+
│   ├── .git/
102+
│   ├── importers/
103+
│   │   ├── custom_format_importer1.py
104+
│   │   └── ...
105+
│   ├── statistics/
106+
│   │   ├── custom_statistic1.py
107+
│   │   └── ...
108+
│   ├── visualizers/
109+
│   │ ├── custom_visualizer1.py
110+
│   │ └── ...
111+
│   └── extractors/
112+
│   ├── custom_extractor1.py
113+
│   └── ...
114+
├── dataset/
115+
└── sources/
116+
├── source1
117+
└── ...
118+
```
119+
<!--lint enable fenced-code-flag-->

0 commit comments

Comments
 (0)