@@ -38,28 +38,27 @@ Datumaro has a number of dataset and annotation features:
38
38
- various annotation operations
39
39
40
40
``` python
41
- from datumaro.components.project import Environment, Dataset
41
+ from datumaro.components.dataset import Dataset
42
42
from datumaro.components.extractor import Bbox, Polygon, DatasetItem
43
43
44
- # Import and save a dataset
45
- env = Environment()
46
- dataset = env.make_importer(' voc' )(' src/dir' ).make_dataset()
47
- env.converters.get(' coco' ).convert(dataset, save_dir = ' dst/dir' )
44
+ # Import and export a dataset
45
+ dataset = Dataset.import_from(' src/dir' , ' voc' )
46
+ dataset.export(' dst/dir' , ' coco' )
48
47
49
48
# Create a dataset, convert polygons to masks, save in PASCAL VOC format
50
49
dataset = Dataset.from_iterable([
51
- DatasetItem(id = ' image1' , annotations = [
52
- Bbox(x = 1 , y = 2 , w = 3 , h = 4 , label = 1 ),
53
- Polygon([1 , 2 , 3 , 2 , 4 , 4 ], label = 2 , attributes = {' occluded' : True }),
54
- ]),
50
+ DatasetItem(id = ' image1' , annotations = [
51
+ Bbox(x = 1 , y = 2 , w = 3 , h = 4 , label = 1 ),
52
+ Polygon([1 , 2 , 3 , 2 , 4 , 4 ], label = 2 , attributes = {' occluded' : True }),
53
+ ]),
55
54
], categories = [' cat' , ' dog' , ' person' ])
56
- dataset = dataset.transform(env.transforms.get( ' polygons_to_masks' ) )
57
- env.converters.get( ' voc ' ).convert(dataset, save_dir = ' dst/dir ' )
55
+ dataset = dataset.transform(' polygons_to_masks' )
56
+ dataset.export( ' dst/dir ' , ' voc ' )
58
57
```
59
58
60
59
### The Dataset class
61
60
62
- The ` Dataset ` class from the ` datumaro.components.project ` module represents
61
+ The ` Dataset ` class from the ` datumaro.components.dataset ` module represents
63
62
a dataset, consisting of multiple ` DatasetItem ` s. Annotations are
64
63
represented by members of the ` datumaro.components.extractor ` module,
65
64
such as ` Label ` , ` Mask ` or ` Polygon ` . A dataset can contain items from one or
@@ -80,16 +79,19 @@ The main operation for a dataset is iteration over its elements.
80
79
An item corresponds to a single image, a video sequence, etc. There are also
81
80
few other operations available, such as filtration (` dataset.select ` ) and
82
81
transformations (` dataset.transform ` ). A dataset can be created from extractors
83
- or other datasets with ` dataset .from_extractors` and directly from items with
84
- ` dataset .from_iterable` . A dataset is an extractor itself. If it is created from
85
- multiple extractors, their categories must match, and their contents will be
86
- merged.
82
+ or other datasets with ` Dataset .from_extractors() ` and directly from items with
83
+ ` Dataset .from_iterable() ` . A dataset is an extractor itself. If it is created
84
+ from multiple extractors, their categories must match, and their contents
85
+ will be merged.
87
86
88
87
A dataset item is an element of a dataset. Its ` id ` is a name of a
89
88
corresponding image. There can be some image ` attributes ` ,
90
89
an ` image ` and ` annotations ` .
91
90
92
91
``` python
92
+ from datumaro.components.dataset import Dataset
93
+ from datumaro.components.extractor import Bbox, Polygon, DatasetItem
94
+
93
95
# create a dataset from other datasets
94
96
dataset = Dataset.from_extractors(dataset1, dataset2)
95
97
@@ -105,7 +107,7 @@ dataset = Dataset.from_iterable([
105
107
dataset = dataset.select(lambda item : len (item.annotations) != 0 )
106
108
107
109
# change dataset labels
108
- dataset = dataset.transform(project.env.transforms.get( ' remap_labels' ) ,
110
+ dataset = dataset.transform(' remap_labels' ,
109
111
{' cat' : ' dog' , # rename cat to dog
110
112
' truck' : ' car' , # rename truck to car
111
113
' person' : ' ' , # remove this label
@@ -116,8 +118,7 @@ for item in dataset:
116
118
print (item.id, item.annotations)
117
119
118
120
# iterate over subsets
119
- for subset_name in dataset.subsets():
120
- subset = dataset.get_subset(subset_name) # a dataset, again
121
+ for subset_name, subset in dataset.subsets().items():
121
122
for item in subset:
122
123
print (item.id, item.annotations)
123
124
```
@@ -129,6 +130,7 @@ persistence, of extending, and CLI operation for Datasets. A project can
129
130
be converted to a Dataset with ` project.make_dataset ` . Project datasets
130
131
can have multiple data sources, which are merged on dataset creation. They
131
132
can have a hierarchy. Project configuration is available in ` project.config ` .
133
+ A dataset can be saved in ` datumaro_project ` format.
132
134
133
135
The ` Environment ` class is responsible for accessing built-in and
134
136
project-specific plugins. For a project, there is an instance of
@@ -204,11 +206,12 @@ YoloConverter.convert(dataset, save_dir=dst_dir)
204
206
205
207
### Writing a plugin
206
208
207
- A plugin is a Python module with any name, which exports some symbols.
208
- To export a symbol, inherit it from one of special classes:
209
+ A plugin is a Python module with any name, which exports some symbols. Symbols,
210
+ starting with ` _ ` are not exported by default. To export a symbol,
211
+ inherit it from one of the special classes:
209
212
210
213
``` python
211
- from datumaro.components.extractor import Importer, SourceExtractor , Transform
214
+ from datumaro.components.extractor import Importer, Extractor , Transform
212
215
from datumaro.components.launcher import Launcher
213
216
from datumaro.components.converter import Converter
214
217
```
@@ -224,6 +227,19 @@ There is also an additional class to modify plugin appearance in command line:
224
227
225
228
``` python
226
229
from datumaro.components.cli_plugin import CliPlugin
230
+
231
+ class MyPlugin (Converter , CliPlugin ):
232
+ """
233
+ Optional documentation text, which will appear in command-line help
234
+ """
235
+
236
+ NAME = ' optional_custom_plugin_name'
237
+
238
+ def build_cmdline_parser (self , ** kwargs ):
239
+ parser = super ().build_cmdline_parser(** kwargs)
240
+ # set up argparse.ArgumentParser instance
241
+ # the parsed args are supposed to be used as invocation options
242
+ return parser
227
243
```
228
244
229
245
#### Plugin example
@@ -269,13 +285,14 @@ class MyTransform(Transform, CliPlugin):
269
285
` my_plugin2.py ` contents:
270
286
271
287
``` python
272
- from datumaro.components.extractor import SourceExtractor
288
+ from datumaro.components.extractor import Extractor
273
289
274
290
class MyFormat : ...
275
- class MyFormatExtractor (SourceExtractor ): ...
291
+ class _MyFormatConverter (Converter ): ...
292
+ class MyFormatExtractor (Extractor ): ...
276
293
277
294
exports = [MyFormat] # explicit exports declaration
278
- # MyFormatExtractor won't be exported
295
+ # MyFormatExtractor and _MyFormatConverter won't be exported
279
296
```
280
297
281
298
## Command-line
0 commit comments