|
| 1 | +from math import exp |
| 2 | + |
| 3 | + |
| 4 | +class Parser: |
| 5 | + IOU_THRESHOLD = 0.4 |
| 6 | + PROB_THRESHOLD = 0.5 |
| 7 | + |
| 8 | + def __init__(self): |
| 9 | + self.objects = [] |
| 10 | + |
| 11 | + def scale_bbox(self, x, y, h, w, class_id, confidence, h_scale, w_scale): |
| 12 | + xmin = int((x - w / 2) * w_scale) |
| 13 | + ymin = int((y - h / 2) * h_scale) |
| 14 | + xmax = int(xmin + w * w_scale) |
| 15 | + ymax = int(ymin + h * h_scale) |
| 16 | + |
| 17 | + return dict(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, class_id=class_id, confidence=confidence) |
| 18 | + |
| 19 | + def entry_index(self, side, coord, classes, location, entry): |
| 20 | + side_power_2 = side ** 2 |
| 21 | + n = location // side_power_2 |
| 22 | + loc = location % side_power_2 |
| 23 | + return int(side_power_2 * (n * (coord + classes + 1) + entry) + loc) |
| 24 | + |
| 25 | + def intersection_over_union(self, box_1, box_2): |
| 26 | + width_of_overlap_area = min(box_1['xmax'], box_2['xmax']) - max(box_1['xmin'], box_2['xmin']) |
| 27 | + height_of_overlap_area = min(box_1['ymax'], box_2['ymax']) - max(box_1['ymin'], box_2['ymin']) |
| 28 | + if width_of_overlap_area < 0 or height_of_overlap_area < 0: |
| 29 | + area_of_overlap = 0 |
| 30 | + else: |
| 31 | + area_of_overlap = width_of_overlap_area * height_of_overlap_area |
| 32 | + box_1_area = (box_1['ymax'] - box_1['ymin']) * (box_1['xmax'] - box_1['xmin']) |
| 33 | + box_2_area = (box_2['ymax'] - box_2['ymin']) * (box_2['xmax'] - box_2['xmin']) |
| 34 | + area_of_union = box_1_area + box_2_area - area_of_overlap |
| 35 | + if area_of_union == 0: |
| 36 | + return 0 |
| 37 | + return area_of_overlap / area_of_union |
| 38 | + |
| 39 | + |
| 40 | + def sort_objects(self): |
| 41 | + self.objects = sorted(self.objects, key=lambda obj : obj['confidence'], reverse=True) |
| 42 | + |
| 43 | + for i in range(len(self.objects)): |
| 44 | + if self.objects[i]['confidence'] == 0: |
| 45 | + continue |
| 46 | + for j in range(i + 1, len(self.objects)): |
| 47 | + if self.intersection_over_union(self.objects[i], self.objects[j]) > self.IOU_THRESHOLD: |
| 48 | + self.objects[j]['confidence'] = 0 |
| 49 | + |
| 50 | + def parse_yolo_region(self, blob: 'np.ndarray', original_shape: list, params: dict) -> list: |
| 51 | + |
| 52 | + # YOLO magic numbers |
| 53 | + # See: https://github.com/opencv/open_model_zoo/blob/acf297c73db8cb3f68791ae1fad4a7cc4a6039e5/demos/python_demos/object_detection_demo_yolov3_async/object_detection_demo_yolov3_async.py#L61 |
| 54 | + num = 3 |
| 55 | + coords = 4 |
| 56 | + classes = 80 |
| 57 | + # ----------------- |
| 58 | + |
| 59 | + _, _, out_blob_h, out_blob_w = blob.shape |
| 60 | + assert out_blob_w == out_blob_h, "Invalid size of output blob. It sould be in NCHW layout and height should " \ |
| 61 | + "be equal to width. Current height = {}, current width = {}" \ |
| 62 | + "".format(out_blob_h, out_blob_w) |
| 63 | + |
| 64 | + # ------ Extracting layer parameters -- |
| 65 | + orig_im_h, orig_im_w = original_shape |
| 66 | + predictions = blob.flatten() |
| 67 | + side_square = params['side'] * params['side'] |
| 68 | + |
| 69 | + # ------ Parsing YOLO Region output -- |
| 70 | + for i in range(side_square): |
| 71 | + row = i // params['side'] |
| 72 | + col = i % params['side'] |
| 73 | + for n in range(num): |
| 74 | + # -----entry index calcs------ |
| 75 | + obj_index = self.entry_index(params['side'], coords, classes, n * side_square + i, coords) |
| 76 | + # -----entry index calcs------ |
| 77 | + scale = predictions[obj_index] |
| 78 | + if scale < self.PROB_THRESHOLD: |
| 79 | + continue |
| 80 | + box_index = self.entry_index(params['side'], coords, classes, n * side_square + i, 0) |
| 81 | + |
| 82 | + # Network produces location predictions in absolute coordinates of feature maps. |
| 83 | + # Scale it to relative coordinates. |
| 84 | + x = (col + predictions[box_index + 0 * side_square]) / params['side'] |
| 85 | + y = (row + predictions[box_index + 1 * side_square]) / params['side'] |
| 86 | + # Value for exp is very big number in some cases so following construction is using here |
| 87 | + try: |
| 88 | + w_exp = exp(predictions[box_index + 2 * side_square]) |
| 89 | + h_exp = exp(predictions[box_index + 3 * side_square]) |
| 90 | + except OverflowError: |
| 91 | + continue |
| 92 | + |
| 93 | + w = w_exp * params['anchors'][2 * n] / 416 |
| 94 | + h = h_exp * params['anchors'][2 * n + 1] / 416 |
| 95 | + for j in range(classes): |
| 96 | + class_index = self.entry_index(params['side'], coords, classes, n * side_square + i, |
| 97 | + coords + 1 + j) |
| 98 | + confidence = scale * predictions[class_index] |
| 99 | + if confidence < self.PROB_THRESHOLD: |
| 100 | + continue |
| 101 | + |
| 102 | + self.objects.append(self.scale_bbox(x=x, |
| 103 | + y=y, |
| 104 | + h=h, |
| 105 | + w=w, |
| 106 | + class_id=j, |
| 107 | + confidence=confidence, |
| 108 | + h_scale=orig_im_h, |
| 109 | + w_scale=orig_im_w)) |
| 110 | + |
| 111 | + |
| 112 | +for detection in detections: |
| 113 | + frame_number = detection['frame_id'] |
| 114 | + height = detection['frame_height'] |
| 115 | + width = detection['frame_width'] |
| 116 | + detection = detection['detections'] |
| 117 | + |
| 118 | + original_shape = (width, height) |
| 119 | + |
| 120 | + resized_width = width / 416 |
| 121 | + resized_height = height / 416 |
| 122 | + |
| 123 | + resized_shape = (resized_width, resized_height) |
| 124 | + |
| 125 | + # https://github.com/opencv/open_model_zoo/blob/master/demos/python_demos/object_detection_demo_yolov3_async/object_detection_demo_yolov3_async.py#L72 |
| 126 | + anchors = [10,13,16,30,33,23,30,61,62,45,59,119,116,90,156,198,373,326] |
| 127 | + conv_6 = {'side': 13, 'mask': [6,7,8]} |
| 128 | + conv_14 = {'side': 26, 'mask': [3,4,5]} |
| 129 | + conv_22 = {'side': 52, 'mask': [0,1,2]} |
| 130 | + |
| 131 | + yolo_params = {'detector/yolo-v3/Conv_6/BiasAdd/YoloRegion': conv_6, |
| 132 | + 'detector/yolo-v3/Conv_14/BiasAdd/YoloRegion': conv_14, |
| 133 | + 'detector/yolo-v3/Conv_22/BiasAdd/YoloRegion': conv_22} |
| 134 | + |
| 135 | + for conv_net in yolo_params.values(): |
| 136 | + mask = conv_net['mask'] |
| 137 | + masked_anchors = [] |
| 138 | + for idx in mask: |
| 139 | + masked_anchors += [anchors[idx * 2], anchors[idx * 2 + 1]] |
| 140 | + |
| 141 | + conv_net['anchors'] = masked_anchors |
| 142 | + |
| 143 | + parser = Parser() |
| 144 | + |
| 145 | + for name, blob in detection.items(): |
| 146 | + parser.parse_yolo_region(blob, original_shape, yolo_params[name]) |
| 147 | + |
| 148 | + parser.sort_objects() |
| 149 | + |
| 150 | + objects = [] |
| 151 | + |
| 152 | + for obj in parser.objects: |
| 153 | + if obj['confidence'] >= parser.PROB_THRESHOLD: |
| 154 | + label = obj['class_id'] |
| 155 | + xmin = obj['xmin'] |
| 156 | + xmax = obj['xmax'] |
| 157 | + ymin = obj['ymin'] |
| 158 | + ymax = obj['ymax'] |
| 159 | + |
| 160 | + results.add_box(xmax, ymax, xmin, ymin, label, frame_number) |
0 commit comments