5
5
6
6
import os .path as osp
7
7
from collections import OrderedDict , namedtuple
8
+ from pathlib import Path
8
9
9
10
from django .utils import timezone
10
11
@@ -125,8 +126,8 @@ def _init_frame_info(self):
125
126
} for db_image in self ._db_task .data .images .all ()}
126
127
127
128
self ._frame_mapping = {
128
- self ._get_filename (info ["path" ]): frame
129
- for frame , info in self ._frame_info .items ()
129
+ self ._get_filename (info ["path" ]): frame_number
130
+ for frame_number , info in self ._frame_info .items ()
130
131
}
131
132
132
133
def _init_meta (self ):
@@ -398,16 +399,27 @@ def db_task(self):
398
399
399
400
@staticmethod
400
401
def _get_filename (path ):
401
- return osp .splitext (osp .basename (path ))[0 ]
402
-
403
- def match_frame (self , filename ):
404
- # try to match by filename
405
- _filename = self ._get_filename (filename )
406
- if _filename in self ._frame_mapping :
407
- return self ._frame_mapping [_filename ]
408
-
409
- raise Exception (
410
- "Cannot match filename or determine frame number for {} filename" .format (filename ))
402
+ return osp .splitext (path )[0 ]
403
+
404
+ def match_frame (self , path , root_hint = None ):
405
+ path = self ._get_filename (path )
406
+ match = self ._frame_mapping .get (path )
407
+ if not match and root_hint and not path .startswith (root_hint ):
408
+ path = osp .join (root_hint , path )
409
+ match = self ._frame_mapping .get (path )
410
+ return match
411
+
412
+ def match_frame_fuzzy (self , path ):
413
+ # Preconditions:
414
+ # - The input dataset is full, i.e. all items present. Partial dataset
415
+ # matching can't be correct for all input cases.
416
+ # - path is the longest path of input dataset in terms of path parts
417
+
418
+ path = Path (self ._get_filename (path )).parts
419
+ for p , v in self ._frame_mapping .items ():
420
+ if Path (p ).parts [- len (path ):] == path : # endswith() for paths
421
+ return v
422
+ return None
411
423
412
424
class CvatTaskDataExtractor (datumaro .SourceExtractor ):
413
425
def __init__ (self , task_data , include_images = False ):
@@ -450,8 +462,7 @@ def categories(self):
450
462
def _load_categories (cvat_anno ):
451
463
categories = {}
452
464
453
- label_categories = datumaro .LabelCategories (
454
- attributes = ['occluded' , 'z_order' ])
465
+ label_categories = datumaro .LabelCategories (attributes = ['occluded' ])
455
466
456
467
for _ , label in cvat_anno .meta ['task' ]['labels' ]:
457
468
label_categories .add (label ['name' ])
@@ -537,20 +548,14 @@ def convert_attrs(label, cvat_attrs):
537
548
538
549
return item_anno
539
550
540
- def match_frame (item , task_data ):
551
+ def match_dm_item (item , task_data , root_hint = None ):
541
552
is_video = task_data .meta ['task' ]['mode' ] == 'interpolation'
542
553
543
554
frame_number = None
544
555
if frame_number is None and item .has_image :
545
- try :
546
- frame_number = task_data .match_frame (item .image .path )
547
- except Exception :
548
- pass
556
+ frame_number = task_data .match_frame (item .image .path , root_hint )
549
557
if frame_number is None :
550
- try :
551
- frame_number = task_data .match_frame (item .id )
552
- except Exception :
553
- pass
558
+ frame_number = task_data .match_frame (item .id , root_hint )
554
559
if frame_number is None :
555
560
frame_number = cast (item .attributes .get ('frame' , item .id ), int )
556
561
if frame_number is None and is_video :
@@ -561,6 +566,19 @@ def match_frame(item, task_data):
561
566
item .id )
562
567
return frame_number
563
568
569
+ def find_dataset_root (dm_dataset , task_data ):
570
+ longest_path = max (dm_dataset , key = lambda x : len (Path (x .id ).parts )).id
571
+ longest_match = task_data .match_frame_fuzzy (longest_path )
572
+ if longest_match is None :
573
+ return None
574
+
575
+ longest_match = osp .dirname (task_data .frame_info [longest_match ]['path' ])
576
+ prefix = longest_match [:- len (osp .dirname (longest_path )) or None ]
577
+ if prefix .endswith ('/' ):
578
+ prefix = prefix [:- 1 ]
579
+ return prefix
580
+
581
+
564
582
def import_dm_annotations (dm_dataset , task_data ):
565
583
shapes = {
566
584
datumaro .AnnotationType .bbox : ShapeType .RECTANGLE ,
@@ -569,10 +587,16 @@ def import_dm_annotations(dm_dataset, task_data):
569
587
datumaro .AnnotationType .points : ShapeType .POINTS ,
570
588
}
571
589
590
+ if len (dm_dataset ) == 0 :
591
+ return
592
+
572
593
label_cat = dm_dataset .categories ()[datumaro .AnnotationType .label ]
573
594
595
+ root_hint = find_dataset_root (dm_dataset , task_data )
596
+
574
597
for item in dm_dataset :
575
- frame_number = task_data .abs_frame_id (match_frame (item , task_data ))
598
+ frame_number = task_data .abs_frame_id (
599
+ match_dm_item (item , task_data , root_hint = root_hint ))
576
600
577
601
# do not store one-item groups
578
602
group_map = {0 : 0 }
0 commit comments