|
3 | 3 | # SPDX-License-Identifier: MIT
|
4 | 4 |
|
5 | 5 | from distutils.util import strtobool
|
6 |
| -from itertools import chain |
7 | 6 | import os
|
8 | 7 | import os.path as osp
|
9 | 8 | import re
|
10 | 9 |
|
11 | 10 | from datumaro.components.converter import Converter
|
12 |
| -from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor |
| 11 | +from datumaro.components.extractor import DatasetItem, Extractor, Importer |
13 | 12 | from datumaro.util.image import find_images
|
14 | 13 |
|
15 | 14 |
|
16 | 15 | class Market1501Path:
|
17 | 16 | QUERY_DIR = 'query'
|
18 | 17 | BBOX_DIR = 'bounding_box_'
|
19 | 18 | IMAGE_EXT = '.jpg'
|
20 |
| - PATTERN = re.compile(r'^(-?\d+)_c(\d+)(?:s\d+_\d+_00(.*))?') |
| 19 | + PATTERN = re.compile(r'^(-?\d+)_c(\d+)s(\d+)_(\d+)_(\d+)(.*)') |
21 | 20 | LIST_PREFIX = 'images_'
|
22 | 21 | UNKNOWN_ID = -1
|
| 22 | + ATTRIBUTES = ['person_id', 'camera_id', 'track_id', 'frame_id', 'bbox_id'] |
23 | 23 |
|
24 |
| -class Market1501Extractor(SourceExtractor): |
25 |
| - def __init__(self, path, subset=None): |
| 24 | +class Market1501Extractor(Extractor): |
| 25 | + def __init__(self, path): |
26 | 26 | if not osp.isdir(path):
|
27 | 27 | raise NotADirectoryError(
|
28 | 28 | "Can't open folder with annotation files '%s'" % path)
|
29 | 29 |
|
30 |
| - if not subset: |
31 |
| - subset = '' |
32 |
| - for p in os.listdir(path): |
33 |
| - pf = osp.join(path, p) |
| 30 | + self._path = path |
| 31 | + super().__init__() |
34 | 32 |
|
35 |
| - if p.startswith(Market1501Path.BBOX_DIR) and osp.isdir(pf): |
36 |
| - subset = p.replace(Market1501Path.BBOX_DIR, '') |
37 |
| - break |
| 33 | + subsets = {} |
| 34 | + for p in os.listdir(path): |
| 35 | + pf = osp.join(path, p) |
38 | 36 |
|
39 |
| - if p.startswith(Market1501Path.LIST_PREFIX) and osp.isfile(pf): |
40 |
| - subset = p.replace(Market1501Path.LIST_PREFIX, '') |
41 |
| - subset = osp.splitext(subset)[0] |
42 |
| - break |
43 |
| - super().__init__(subset=subset) |
| 37 | + if p.startswith(Market1501Path.BBOX_DIR) and osp.isdir(pf): |
| 38 | + subset = p.replace(Market1501Path.BBOX_DIR, '') |
| 39 | + subsets[subset] = pf |
44 | 40 |
|
45 |
| - self._path = path |
46 |
| - self._items = list(self._load_items(path).values()) |
| 41 | + if p.startswith(Market1501Path.LIST_PREFIX) and osp.isfile(pf): |
| 42 | + subset = p.replace(Market1501Path.LIST_PREFIX, '') |
| 43 | + subset = osp.splitext(subset)[0] |
| 44 | + subsets[subset] = pf |
| 45 | + |
| 46 | + if p.startswith(Market1501Path.QUERY_DIR) and osp.isdir(pf): |
| 47 | + subset = Market1501Path.QUERY_DIR |
| 48 | + subsets[subset] = pf |
| 49 | + |
| 50 | + self._items = [] |
| 51 | + for subset, subset_path in subsets.items(): |
| 52 | + self._items.extend(list( |
| 53 | + self._load_items(subset, subset_path).values())) |
| 54 | + |
| 55 | + def __iter__(self): |
| 56 | + yield from self._items |
47 | 57 |
|
48 |
| - def _load_items(self, rootdir): |
| 58 | + def _load_items(self, subset, subset_path): |
49 | 59 | items = {}
|
50 | 60 |
|
51 | 61 | paths = []
|
52 |
| - anno_file = osp.join(rootdir, |
53 |
| - Market1501Path.LIST_PREFIX + self._subset + '.txt') |
54 |
| - if osp.isfile(anno_file): |
55 |
| - with open(anno_file, encoding='utf-8') as f: |
| 62 | + if osp.isfile(subset_path): |
| 63 | + with open(subset_path, encoding='utf-8') as f: |
56 | 64 | for line in f:
|
57 |
| - paths.append(osp.join(rootdir, line.strip())) |
| 65 | + paths.append(osp.join(self._path, line.strip())) |
58 | 66 | else:
|
59 |
| - paths = list(chain( |
60 |
| - find_images(osp.join(rootdir, |
61 |
| - Market1501Path.QUERY_DIR), |
62 |
| - recursive=True), |
63 |
| - find_images(osp.join(rootdir, |
64 |
| - Market1501Path.BBOX_DIR + self._subset), |
65 |
| - recursive=True), |
66 |
| - )) |
67 |
| - |
68 |
| - for image_path in paths: |
| 67 | + paths = list(find_images(subset_path, recursive=True)) |
| 68 | + |
| 69 | + for image_path in sorted(paths): |
69 | 70 | item_id = osp.splitext(osp.normpath(image_path))[0]
|
70 | 71 | if osp.isabs(image_path):
|
71 |
| - item_id = osp.relpath(item_id, rootdir) |
72 |
| - subdir, item_id = item_id.split(os.sep, maxsplit=1) |
| 72 | + item_id = osp.relpath(item_id, self._path) |
| 73 | + item_id = item_id.split(osp.sep, maxsplit=1)[1] |
73 | 74 |
|
74 |
| - pid = Market1501Path.UNKNOWN_ID |
75 |
| - camid = Market1501Path.UNKNOWN_ID |
| 75 | + attributes = {} |
76 | 76 | search = Market1501Path.PATTERN.search(osp.basename(item_id))
|
77 | 77 | if search:
|
78 |
| - pid, camid = map(int, search.groups()[0:2]) |
79 |
| - camid -= 1 # make ids 0-based |
80 |
| - custom_name = search.groups()[2] |
| 78 | + attribute_values = search.groups()[0:5] |
| 79 | + attributes = { |
| 80 | + 'person_id': attribute_values[0], |
| 81 | + 'camera_id': int(attribute_values[1]) - 1, |
| 82 | + 'track_id': int(attribute_values[2]), |
| 83 | + 'frame_id': int(attribute_values[3]), |
| 84 | + 'bbox_id': int(attribute_values[4]), |
| 85 | + 'query': subset == Market1501Path.QUERY_DIR |
| 86 | + } |
| 87 | + |
| 88 | + custom_name = search.groups()[5] |
81 | 89 | if custom_name:
|
82 | 90 | item_id = osp.join(osp.dirname(item_id), custom_name)
|
83 | 91 |
|
84 | 92 | item = items.get(item_id)
|
85 | 93 | if item is None:
|
86 |
| - item = DatasetItem(id=item_id, subset=self._subset, |
87 |
| - image=image_path) |
| 94 | + item = DatasetItem(id=item_id, subset=subset, image=image_path, |
| 95 | + attributes=attributes) |
88 | 96 | items[item_id] = item
|
89 | 97 |
|
90 |
| - if pid != Market1501Path.UNKNOWN_ID or \ |
91 |
| - camid != Market1501Path.UNKNOWN_ID: |
92 |
| - attributes = item.attributes |
93 |
| - attributes['query'] = subdir == Market1501Path.QUERY_DIR |
94 |
| - attributes['person_id'] = pid |
95 |
| - attributes['camera_id'] = camid |
96 | 98 | return items
|
97 | 99 |
|
98 | 100 | class Market1501Importer(Importer):
|
99 | 101 | @classmethod
|
100 | 102 | def find_sources(cls, path):
|
101 |
| - if not osp.isdir(path): |
102 |
| - return [] |
103 |
| - return [{ 'url': path, 'format': Market1501Extractor.NAME }] |
| 103 | + for dirname in os.listdir(path): |
| 104 | + if dirname.startswith((Market1501Path.BBOX_DIR, |
| 105 | + Market1501Path.QUERY_DIR, Market1501Path.LIST_PREFIX)): |
| 106 | + return [{'url': path, 'format': Market1501Extractor.NAME}] |
104 | 107 |
|
105 | 108 | class Market1501Converter(Converter):
|
106 | 109 | DEFAULT_IMAGE_EXT = Market1501Path.IMAGE_EXT
|
107 | 110 |
|
| 111 | + def _make_dir_name(self, item): |
| 112 | + dirname = Market1501Path.BBOX_DIR + item.subset |
| 113 | + query = item.attributes.get('query') |
| 114 | + if query is not None and isinstance(query, str): |
| 115 | + query = strtobool(query) |
| 116 | + if query: |
| 117 | + dirname = Market1501Path.QUERY_DIR |
| 118 | + return dirname |
| 119 | + |
108 | 120 | def apply(self):
|
109 | 121 | for subset_name, subset in self._extractor.subsets().items():
|
110 | 122 | annotation = ''
|
| 123 | + used_frames = {} |
111 | 124 |
|
112 | 125 | for item in subset:
|
| 126 | + dirname = self._make_dir_name(item) |
| 127 | + |
113 | 128 | image_name = item.id
|
114 |
| - if Market1501Path.PATTERN.search(image_name) is None: |
115 |
| - if 'person_id' in item.attributes and \ |
116 |
| - 'camera_id' in item.attributes: |
117 |
| - image_pattern = '{:04d}_c{}s1_000000_00{}' |
118 |
| - pid = int(item.attributes['person_id']) |
119 |
| - camid = int(item.attributes['camera_id']) + 1 |
120 |
| - dirname, basename = osp.split(item.id) |
121 |
| - image_name = osp.join(dirname, |
122 |
| - image_pattern.format(pid, camid, basename)) |
123 |
| - |
124 |
| - dirname = Market1501Path.BBOX_DIR + subset_name |
125 |
| - if 'query' in item.attributes: |
126 |
| - query = item.attributes.get('query') |
127 |
| - if isinstance(query, str): |
128 |
| - query = strtobool(query) |
129 |
| - if query: |
130 |
| - dirname = Market1501Path.QUERY_DIR |
| 129 | + pid = item.attributes.get('person_id') |
| 130 | + match = Market1501Path.PATTERN.fullmatch(item.id) |
| 131 | + if not match and pid: |
| 132 | + cid = int(item.attributes.get('camera_id', 0)) + 1 |
| 133 | + tid = int(item.attributes.get('track_id', 1)) |
| 134 | + bbid = int(item.attributes.get('bbox_id', 0)) |
| 135 | + fid = int(item.attributes.get('frame_id', |
| 136 | + max(used_frames.get((pid, cid, tid), [-1])) + 1)) |
| 137 | + image_name = osp.join(osp.dirname(image_name), |
| 138 | + f'{pid}_c{cid}s{tid}_{fid:06d}_{bbid:02d}') |
131 | 139 |
|
132 | 140 | image_path = self._make_image_filename(item,
|
133 | 141 | name=image_name, subdir=dirname)
|
134 | 142 | if self._save_images and item.has_image:
|
135 | 143 | self._save_image(item, osp.join(self._save_dir, image_path))
|
136 | 144 |
|
| 145 | + attrs = Market1501Path.PATTERN.search(image_name) |
| 146 | + if attrs: |
| 147 | + attrs = attrs.groups() |
| 148 | + used_frames.setdefault(attrs[0:2], []).append(int(attrs[3])) |
137 | 149 | annotation += '%s\n' % image_path
|
138 | 150 |
|
139 | 151 | annotation_file = osp.join(self._save_dir,
|
|
0 commit comments