Skip to content

Commit 2e62db8

Browse files
author
Maxim Zhiltsov
committed
Merge branch 'ay/mpii-format' of https://github.com/openvinotoolkit/datumaro into ay/mpii-format
2 parents dddb44a + 801e1e2 commit 2e62db8

File tree

16 files changed

+494
-157
lines changed

16 files changed

+494
-157
lines changed

.github/workflows/github_pages.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ jobs:
3434
run: |
3535
pip install gitpython packaging toml Sphinx==4.2.0 sphinx-rtd-theme==1.0.0
3636
pip install -r requirements.txt
37-
sphinx-build -a -n site/source site/static/api
37+
pip install git+https://github.com/pytorch-ignite/sphinxcontrib-versioning.git@a1a1a94ca80a0233f0df3eaf9876812484901e76
38+
sphinx-versioning -l site/source/conf.py build -r develop -w develop site/source site/static/api
3839
python site/build_docs.py
3940
4041
- name: Deploy

CHANGELOG.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1212
(<https://github.com/openvinotoolkit/datumaro/pull/582>)
1313
- Extension autodetection in `ByteImage`
1414
(<https://github.com/openvinotoolkit/datumaro/pull/595>)
15-
- Import for MPII Human Pose Dataset
15+
- MPII Human Pose Dataset (import-only) (.mat and .json)
1616
(<https://github.com/openvinotoolkit/datumaro/pull/584>)
17+
- MARS format (import-only)
18+
(<https://github.com/openvinotoolkit/datumaro/pull/585>)
1719

1820
### Changed
1921
- `smooth_line` from `datumaro.util.annotation_util` - the function

datumaro/plugins/market1501_format.py

Lines changed: 82 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -3,137 +3,149 @@
33
# SPDX-License-Identifier: MIT
44

55
from distutils.util import strtobool
6-
from itertools import chain
76
import os
87
import os.path as osp
98
import re
109

1110
from datumaro.components.converter import Converter
12-
from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor
11+
from datumaro.components.extractor import DatasetItem, Extractor, Importer
1312
from datumaro.util.image import find_images
1413

1514

1615
class Market1501Path:
1716
QUERY_DIR = 'query'
1817
BBOX_DIR = 'bounding_box_'
1918
IMAGE_EXT = '.jpg'
20-
PATTERN = re.compile(r'^(-?\d+)_c(\d+)(?:s\d+_\d+_00(.*))?')
19+
PATTERN = re.compile(r'^(-?\d+)_c(\d+)s(\d+)_(\d+)_(\d+)(.*)')
2120
LIST_PREFIX = 'images_'
2221
UNKNOWN_ID = -1
22+
ATTRIBUTES = ['person_id', 'camera_id', 'track_id', 'frame_id', 'bbox_id']
2323

24-
class Market1501Extractor(SourceExtractor):
25-
def __init__(self, path, subset=None):
24+
class Market1501Extractor(Extractor):
25+
def __init__(self, path):
2626
if not osp.isdir(path):
2727
raise NotADirectoryError(
2828
"Can't open folder with annotation files '%s'" % path)
2929

30-
if not subset:
31-
subset = ''
32-
for p in os.listdir(path):
33-
pf = osp.join(path, p)
30+
self._path = path
31+
super().__init__()
3432

35-
if p.startswith(Market1501Path.BBOX_DIR) and osp.isdir(pf):
36-
subset = p.replace(Market1501Path.BBOX_DIR, '')
37-
break
33+
subsets = {}
34+
for p in os.listdir(path):
35+
pf = osp.join(path, p)
3836

39-
if p.startswith(Market1501Path.LIST_PREFIX) and osp.isfile(pf):
40-
subset = p.replace(Market1501Path.LIST_PREFIX, '')
41-
subset = osp.splitext(subset)[0]
42-
break
43-
super().__init__(subset=subset)
37+
if p.startswith(Market1501Path.BBOX_DIR) and osp.isdir(pf):
38+
subset = p.replace(Market1501Path.BBOX_DIR, '')
39+
subsets[subset] = pf
4440

45-
self._path = path
46-
self._items = list(self._load_items(path).values())
41+
if p.startswith(Market1501Path.LIST_PREFIX) and osp.isfile(pf):
42+
subset = p.replace(Market1501Path.LIST_PREFIX, '')
43+
subset = osp.splitext(subset)[0]
44+
subsets[subset] = pf
45+
46+
if p.startswith(Market1501Path.QUERY_DIR) and osp.isdir(pf):
47+
subset = Market1501Path.QUERY_DIR
48+
subsets[subset] = pf
49+
50+
self._items = []
51+
for subset, subset_path in subsets.items():
52+
self._items.extend(list(
53+
self._load_items(subset, subset_path).values()))
54+
55+
def __iter__(self):
56+
yield from self._items
4757

48-
def _load_items(self, rootdir):
58+
def _load_items(self, subset, subset_path):
4959
items = {}
5060

5161
paths = []
52-
anno_file = osp.join(rootdir,
53-
Market1501Path.LIST_PREFIX + self._subset + '.txt')
54-
if osp.isfile(anno_file):
55-
with open(anno_file, encoding='utf-8') as f:
62+
if osp.isfile(subset_path):
63+
with open(subset_path, encoding='utf-8') as f:
5664
for line in f:
57-
paths.append(osp.join(rootdir, line.strip()))
65+
paths.append(osp.join(self._path, line.strip()))
5866
else:
59-
paths = list(chain(
60-
find_images(osp.join(rootdir,
61-
Market1501Path.QUERY_DIR),
62-
recursive=True),
63-
find_images(osp.join(rootdir,
64-
Market1501Path.BBOX_DIR + self._subset),
65-
recursive=True),
66-
))
67-
68-
for image_path in paths:
67+
paths = list(find_images(subset_path, recursive=True))
68+
69+
for image_path in sorted(paths):
6970
item_id = osp.splitext(osp.normpath(image_path))[0]
7071
if osp.isabs(image_path):
71-
item_id = osp.relpath(item_id, rootdir)
72-
subdir, item_id = item_id.split(os.sep, maxsplit=1)
72+
item_id = osp.relpath(item_id, self._path)
73+
item_id = item_id.split(osp.sep, maxsplit=1)[1]
7374

74-
pid = Market1501Path.UNKNOWN_ID
75-
camid = Market1501Path.UNKNOWN_ID
75+
attributes = {}
7676
search = Market1501Path.PATTERN.search(osp.basename(item_id))
7777
if search:
78-
pid, camid = map(int, search.groups()[0:2])
79-
camid -= 1 # make ids 0-based
80-
custom_name = search.groups()[2]
78+
attribute_values = search.groups()[0:5]
79+
attributes = {
80+
'person_id': attribute_values[0],
81+
'camera_id': int(attribute_values[1]) - 1,
82+
'track_id': int(attribute_values[2]),
83+
'frame_id': int(attribute_values[3]),
84+
'bbox_id': int(attribute_values[4]),
85+
'query': subset == Market1501Path.QUERY_DIR
86+
}
87+
88+
custom_name = search.groups()[5]
8189
if custom_name:
8290
item_id = osp.join(osp.dirname(item_id), custom_name)
8391

8492
item = items.get(item_id)
8593
if item is None:
86-
item = DatasetItem(id=item_id, subset=self._subset,
87-
image=image_path)
94+
item = DatasetItem(id=item_id, subset=subset, image=image_path,
95+
attributes=attributes)
8896
items[item_id] = item
8997

90-
if pid != Market1501Path.UNKNOWN_ID or \
91-
camid != Market1501Path.UNKNOWN_ID:
92-
attributes = item.attributes
93-
attributes['query'] = subdir == Market1501Path.QUERY_DIR
94-
attributes['person_id'] = pid
95-
attributes['camera_id'] = camid
9698
return items
9799

98100
class Market1501Importer(Importer):
99101
@classmethod
100102
def find_sources(cls, path):
101-
if not osp.isdir(path):
102-
return []
103-
return [{ 'url': path, 'format': Market1501Extractor.NAME }]
103+
for dirname in os.listdir(path):
104+
if dirname.startswith((Market1501Path.BBOX_DIR,
105+
Market1501Path.QUERY_DIR, Market1501Path.LIST_PREFIX)):
106+
return [{'url': path, 'format': Market1501Extractor.NAME}]
104107

105108
class Market1501Converter(Converter):
106109
DEFAULT_IMAGE_EXT = Market1501Path.IMAGE_EXT
107110

111+
def _make_dir_name(self, item):
112+
dirname = Market1501Path.BBOX_DIR + item.subset
113+
query = item.attributes.get('query')
114+
if query is not None and isinstance(query, str):
115+
query = strtobool(query)
116+
if query:
117+
dirname = Market1501Path.QUERY_DIR
118+
return dirname
119+
108120
def apply(self):
109121
for subset_name, subset in self._extractor.subsets().items():
110122
annotation = ''
123+
used_frames = {}
111124

112125
for item in subset:
126+
dirname = self._make_dir_name(item)
127+
113128
image_name = item.id
114-
if Market1501Path.PATTERN.search(image_name) is None:
115-
if 'person_id' in item.attributes and \
116-
'camera_id' in item.attributes:
117-
image_pattern = '{:04d}_c{}s1_000000_00{}'
118-
pid = int(item.attributes['person_id'])
119-
camid = int(item.attributes['camera_id']) + 1
120-
dirname, basename = osp.split(item.id)
121-
image_name = osp.join(dirname,
122-
image_pattern.format(pid, camid, basename))
123-
124-
dirname = Market1501Path.BBOX_DIR + subset_name
125-
if 'query' in item.attributes:
126-
query = item.attributes.get('query')
127-
if isinstance(query, str):
128-
query = strtobool(query)
129-
if query:
130-
dirname = Market1501Path.QUERY_DIR
129+
pid = item.attributes.get('person_id')
130+
match = Market1501Path.PATTERN.fullmatch(item.id)
131+
if not match and pid:
132+
cid = int(item.attributes.get('camera_id', 0)) + 1
133+
tid = int(item.attributes.get('track_id', 1))
134+
bbid = int(item.attributes.get('bbox_id', 0))
135+
fid = int(item.attributes.get('frame_id',
136+
max(used_frames.get((pid, cid, tid), [-1])) + 1))
137+
image_name = osp.join(osp.dirname(image_name),
138+
f'{pid}_c{cid}s{tid}_{fid:06d}_{bbid:02d}')
131139

132140
image_path = self._make_image_filename(item,
133141
name=image_name, subdir=dirname)
134142
if self._save_images and item.has_image:
135143
self._save_image(item, osp.join(self._save_dir, image_path))
136144

145+
attrs = Market1501Path.PATTERN.search(image_name)
146+
if attrs:
147+
attrs = attrs.groups()
148+
used_frames.setdefault(attrs[0:2], []).append(int(attrs[3]))
137149
annotation += '%s\n' % image_path
138150

139151
annotation_file = osp.join(self._save_dir,

datumaro/plugins/mars_format.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
# Copyright (C) 2020-2021 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: MIT
4+
import fnmatch
5+
import glob
6+
import logging as log
7+
import os
8+
import os.path as osp
9+
10+
from datumaro.components.annotation import (
11+
AnnotationType, Label, LabelCategories,
12+
)
13+
from datumaro.components.dataset import DatasetItem
14+
from datumaro.components.extractor import Extractor, Importer
15+
from datumaro.components.format_detection import FormatDetectionContext
16+
from datumaro.util.image import find_images
17+
18+
19+
class MarsPath:
20+
SUBSET_DIR_PATTERN = 'bbox_*'
21+
IMAGE_DIR_PATTERNS = ['[0-9]' * 4, '00-1']
22+
IMAGE_NAME_POSTFIX = 'C[0-9]' + 'T' + '[0-9]' * 4 \
23+
+ 'F' + '[0-9]' * 3 + '.*'
24+
25+
class MarsExtractor(Extractor):
26+
def __init__(self, path):
27+
assert osp.isdir(path), path
28+
super().__init__()
29+
30+
self._dataset_dir = path
31+
self._subsets = {
32+
subset_dir.split('_', maxsplit=1)[1]: osp.join(path, subset_dir)
33+
for subset_dir in os.listdir(path)
34+
if (osp.isdir(osp.join(path, subset_dir)) and
35+
fnmatch.fnmatch(subset_dir, MarsPath.SUBSET_DIR_PATTERN))
36+
}
37+
38+
self._categories = self._load_categories()
39+
self._items = []
40+
for subset, subset_path in self._subsets.items():
41+
self._items.extend(self._load_items(subset, subset_path))
42+
43+
def __iter__(self):
44+
yield from self._items
45+
46+
def categories(self):
47+
return self._categories
48+
49+
def _load_categories(self):
50+
dirs = sorted([dir_name for subset_path in self._subsets.values()
51+
for dir_name in os.listdir(subset_path)
52+
if (osp.isdir(osp.join(self._dataset_dir, subset_path, dir_name))
53+
and any(fnmatch.fnmatch(dir_name, image_dir)
54+
for image_dir in MarsPath.IMAGE_DIR_PATTERNS))
55+
])
56+
return {AnnotationType.label: LabelCategories.from_iterable(dirs)}
57+
58+
def _load_items(self, subset, path):
59+
items = []
60+
for label_cat in self._categories[AnnotationType.label]:
61+
label = label_cat.name
62+
label_id = self._categories[AnnotationType.label].find(label)[0]
63+
for image_path in find_images(osp.join(path, label)):
64+
image_name = osp.basename(image_path)
65+
item_id = osp.splitext(image_name)[0]
66+
pedestrian_id = image_name[0:4]
67+
68+
if not fnmatch.fnmatch(image_name,
69+
label + MarsPath.IMAGE_NAME_POSTFIX):
70+
items.append(DatasetItem(id=item_id, image=image_path))
71+
continue
72+
73+
if pedestrian_id != label:
74+
log.warning(f'The image {image_path} will be skip because'
75+
'pedestrian id for it does not match with'
76+
f'the directory name: {label}')
77+
continue
78+
79+
items.append(DatasetItem(id=item_id, image=image_path,
80+
subset=subset, annotations=[Label(label=label_id)],
81+
attributes={'person_id': pedestrian_id,
82+
'camera_id': int(image_name[5]),
83+
'track_id': int(image_name[7:11]),
84+
'frame_id': int(image_name[12:15])
85+
})
86+
)
87+
88+
return items
89+
90+
class MarsImporter(Importer):
91+
@classmethod
92+
def detect(cls, context: FormatDetectionContext):
93+
with context.require_any():
94+
for image_dir in MarsPath.IMAGE_DIR_PATTERNS:
95+
with context.alternative():
96+
context.require_file('/'.join([MarsPath.SUBSET_DIR_PATTERN,
97+
image_dir, image_dir + MarsPath.IMAGE_NAME_POSTFIX]
98+
))
99+
100+
@classmethod
101+
def find_sources(cls, path):
102+
patterns = ['/'.join((path, subset_dir, image_dir,
103+
image_dir + MarsPath.IMAGE_NAME_POSTFIX))
104+
for image_dir in MarsPath.IMAGE_DIR_PATTERNS
105+
for subset_dir in os.listdir(path)
106+
if (osp.isdir(osp.join(path, subset_dir)) and
107+
fnmatch.fnmatch(subset_dir, MarsPath.SUBSET_DIR_PATTERN))
108+
]
109+
110+
for pattern in patterns:
111+
try:
112+
next(glob.iglob(pattern))
113+
return [{'url': path, 'format': 'mars'}]
114+
except StopIteration:
115+
continue

0 commit comments

Comments
 (0)