Skip to content

Commit 4f6c19a

Browse files
committed
added in yolo auto annotation sciprt
1 parent a435b41 commit 4f6c19a

File tree

4 files changed

+267
-0
lines changed

4 files changed

+267
-0
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1111
https://github.com/opencv/cvat/issues/750).
1212
- Changed REST API: removed PUT and added DELETE methods for /api/v1/users/ID.
1313
- Added Mask-RCNN Auto Annotation Script
14+
- Added Yolo Auto Annotation Script
1415

1516
### Changed
1617
-

utils/open_model_zoo/yolov3/README.md

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Object Detection YOLO V3 Python Demo, Async API Performance Showcase
2+
3+
See [these instructions][1] for converting the yolo weights to the OpenVino format.
4+
5+
As of OpenVINO 2019 R3, only tensorflow 1.13 and NetworkX 2.3.
6+
These can be explicitly installed using the following command.
7+
8+
```bash
9+
$ pip3 install tensorflow==1.13 networkx==2.3
10+
```
11+
12+
13+
Additionally, at the time of writing, the model optimizer required an input shape.
14+
15+
``` bash
16+
$ python3 mo_tf.py \
17+
--input_model /path/to/yolo_v3.pb \
18+
--tensorflow_use_custom_operations_config $MO_ROOT/extensions/front/tf/yolo_v3.json \
19+
--input_shape [1,416,416,3]
20+
```
21+
22+
[1]: https://docs.openvinotoolkit.org/latest/_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_YOLO_From_Tensorflow.html

utils/open_model_zoo/yolov3/interp.py

+160
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
from math import exp
2+
3+
4+
class Parser:
5+
IOU_THRESHOLD = 0.4
6+
PROB_THRESHOLD = 0.5
7+
8+
def __init__(self):
9+
self.objects = []
10+
11+
def scale_bbox(self, x, y, h, w, class_id, confidence, h_scale, w_scale):
12+
xmin = int((x - w / 2) * w_scale)
13+
ymin = int((y - h / 2) * h_scale)
14+
xmax = int(xmin + w * w_scale)
15+
ymax = int(ymin + h * h_scale)
16+
17+
return dict(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, class_id=class_id, confidence=confidence)
18+
19+
def entry_index(self, side, coord, classes, location, entry):
20+
side_power_2 = side ** 2
21+
n = location // side_power_2
22+
loc = location % side_power_2
23+
return int(side_power_2 * (n * (coord + classes + 1) + entry) + loc)
24+
25+
def intersection_over_union(self, box_1, box_2):
26+
width_of_overlap_area = min(box_1['xmax'], box_2['xmax']) - max(box_1['xmin'], box_2['xmin'])
27+
height_of_overlap_area = min(box_1['ymax'], box_2['ymax']) - max(box_1['ymin'], box_2['ymin'])
28+
if width_of_overlap_area < 0 or height_of_overlap_area < 0:
29+
area_of_overlap = 0
30+
else:
31+
area_of_overlap = width_of_overlap_area * height_of_overlap_area
32+
box_1_area = (box_1['ymax'] - box_1['ymin']) * (box_1['xmax'] - box_1['xmin'])
33+
box_2_area = (box_2['ymax'] - box_2['ymin']) * (box_2['xmax'] - box_2['xmin'])
34+
area_of_union = box_1_area + box_2_area - area_of_overlap
35+
if area_of_union == 0:
36+
return 0
37+
return area_of_overlap / area_of_union
38+
39+
40+
def sort_objects(self):
41+
self.objects = sorted(self.objects, key=lambda obj : obj['confidence'], reverse=True)
42+
43+
for i in range(len(self.objects)):
44+
if self.objects[i]['confidence'] == 0:
45+
continue
46+
for j in range(i + 1, len(self.objects)):
47+
if self.intersection_over_union(self.objects[i], self.objects[j]) > self.IOU_THRESHOLD:
48+
self.objects[j]['confidence'] = 0
49+
50+
def parse_yolo_region(self, blob: 'np.ndarray', original_shape: list, params: dict) -> list:
51+
52+
# YOLO magic numbers
53+
# See: https://github.com/opencv/open_model_zoo/blob/acf297c73db8cb3f68791ae1fad4a7cc4a6039e5/demos/python_demos/object_detection_demo_yolov3_async/object_detection_demo_yolov3_async.py#L61
54+
num = 3
55+
coords = 4
56+
classes = 80
57+
# -----------------
58+
59+
_, _, out_blob_h, out_blob_w = blob.shape
60+
assert out_blob_w == out_blob_h, "Invalid size of output blob. It sould be in NCHW layout and height should " \
61+
"be equal to width. Current height = {}, current width = {}" \
62+
"".format(out_blob_h, out_blob_w)
63+
64+
# ------ Extracting layer parameters --
65+
orig_im_h, orig_im_w = original_shape
66+
predictions = blob.flatten()
67+
side_square = params['side'] * params['side']
68+
69+
# ------ Parsing YOLO Region output --
70+
for i in range(side_square):
71+
row = i // params['side']
72+
col = i % params['side']
73+
for n in range(num):
74+
# -----entry index calcs------
75+
obj_index = self.entry_index(params['side'], coords, classes, n * side_square + i, coords)
76+
# -----entry index calcs------
77+
scale = predictions[obj_index]
78+
if scale < self.PROB_THRESHOLD:
79+
continue
80+
box_index = self.entry_index(params['side'], coords, classes, n * side_square + i, 0)
81+
82+
# Network produces location predictions in absolute coordinates of feature maps.
83+
# Scale it to relative coordinates.
84+
x = (col + predictions[box_index + 0 * side_square]) / params['side']
85+
y = (row + predictions[box_index + 1 * side_square]) / params['side']
86+
# Value for exp is very big number in some cases so following construction is using here
87+
try:
88+
w_exp = exp(predictions[box_index + 2 * side_square])
89+
h_exp = exp(predictions[box_index + 3 * side_square])
90+
except OverflowError:
91+
continue
92+
93+
w = w_exp * params['anchors'][2 * n] / 416
94+
h = h_exp * params['anchors'][2 * n + 1] / 416
95+
for j in range(classes):
96+
class_index = self.entry_index(params['side'], coords, classes, n * side_square + i,
97+
coords + 1 + j)
98+
confidence = scale * predictions[class_index]
99+
if confidence < self.PROB_THRESHOLD:
100+
continue
101+
102+
self.objects.append(self.scale_bbox(x=x,
103+
y=y,
104+
h=h,
105+
w=w,
106+
class_id=j,
107+
confidence=confidence,
108+
h_scale=orig_im_h,
109+
w_scale=orig_im_w))
110+
111+
112+
for detection in detections:
113+
frame_number = detection['frame_id']
114+
height = detection['frame_height']
115+
width = detection['frame_width']
116+
detection = detection['detections']
117+
118+
original_shape = (width, height)
119+
120+
resized_width = width / 416
121+
resized_height = height / 416
122+
123+
resized_shape = (resized_width, resized_height)
124+
125+
# https://github.com/opencv/open_model_zoo/blob/master/demos/python_demos/object_detection_demo_yolov3_async/object_detection_demo_yolov3_async.py#L72
126+
anchors = [10,13,16,30,33,23,30,61,62,45,59,119,116,90,156,198,373,326]
127+
conv_6 = {'side': 13, 'mask': [6,7,8]}
128+
conv_14 = {'side': 26, 'mask': [3,4,5]}
129+
conv_22 = {'side': 52, 'mask': [0,1,2]}
130+
131+
yolo_params = {'detector/yolo-v3/Conv_6/BiasAdd/YoloRegion': conv_6,
132+
'detector/yolo-v3/Conv_14/BiasAdd/YoloRegion': conv_14,
133+
'detector/yolo-v3/Conv_22/BiasAdd/YoloRegion': conv_22}
134+
135+
for conv_net in yolo_params.values():
136+
mask = conv_net['mask']
137+
masked_anchors = []
138+
for idx in mask:
139+
masked_anchors += [anchors[idx * 2], anchors[idx * 2 + 1]]
140+
141+
conv_net['anchors'] = masked_anchors
142+
143+
parser = Parser()
144+
145+
for name, blob in detection.items():
146+
parser.parse_yolo_region(blob, original_shape, yolo_params[name])
147+
148+
parser.sort_objects()
149+
150+
objects = []
151+
152+
for obj in parser.objects:
153+
if obj['confidence'] >= parser.PROB_THRESHOLD:
154+
label = obj['class_id']
155+
xmin = obj['xmin']
156+
xmax = obj['xmax']
157+
ymin = obj['ymin']
158+
ymax = obj['ymax']
159+
160+
results.add_box(xmax, ymax, xmin, ymin, label, frame_number)
+84
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
{
2+
"label_map": {
3+
"1": "person",
4+
"2": "bicycle",
5+
"3": "car",
6+
"4": "motorbike",
7+
"5": "aeroplane",
8+
"6": "bus",
9+
"7": "train",
10+
"8": "truck",
11+
"9": "boat",
12+
"10": "traffic light",
13+
"11": "fire hydrant",
14+
"12": "stop sign",
15+
"13": "parking meter",
16+
"14": "bench",
17+
"15": "bird",
18+
"16": "cat",
19+
"17": "dog",
20+
"18": "horse",
21+
"19": "sheep",
22+
"20": "cow",
23+
"21": "elephant",
24+
"22": "bear",
25+
"23": "zebra",
26+
"24": "giraffe",
27+
"25": "backpack",
28+
"26": "umbrella",
29+
"27": "handbag",
30+
"28": "tie",
31+
"29": "suitcase",
32+
"30": "frisbee",
33+
"31": "skis",
34+
"32": "snowboard",
35+
"33": "sports ball",
36+
"34": "kite",
37+
"35": "baseball bat",
38+
"36": "baseball glove",
39+
"37": "skateboard",
40+
"38": "surfboard",
41+
"39": "tennis racket",
42+
"40": "bottle",
43+
"41": "wine glass",
44+
"42": "cup",
45+
"43": "fork",
46+
"44": "knife",
47+
"45": "spoon",
48+
"46": "bowl",
49+
"47": "banana",
50+
"48": "apple",
51+
"49": "sandwich",
52+
"50": "orange",
53+
"51": "broccoli",
54+
"52": "carrot",
55+
"53": "hot dog",
56+
"54": "pizza",
57+
"55": "donut",
58+
"56": "cake",
59+
"57": "chair",
60+
"58": "sofa",
61+
"59": "pottedplant",
62+
"60": "bed",
63+
"61": "diningtable",
64+
"62": "toilet",
65+
"63": "tvmonitor",
66+
"64": "laptop",
67+
"65": "mouse",
68+
"66": "remote",
69+
"67": "keyboard",
70+
"68": "cell phone",
71+
"69": "microwave",
72+
"70": "oven",
73+
"71": "toaster",
74+
"72": "sink",
75+
"73": "refrigerator",
76+
"74": "book",
77+
"75": "clock",
78+
"76": "vase",
79+
"77": "scissors",
80+
"78": "teddy bear",
81+
"79": "hair drier",
82+
"80": "toothbrush"
83+
}
84+
}

0 commit comments

Comments
 (0)