TransT tracker integration (#5226)

yasakova-anastasia · dschoerk · dschoerk · web-flow · commit 82700f60da93 · 2022-11-04T15:26:09.000+02:00
* AI tracker was one frame late * TransT tracker integration * fixed linter issues * added transt tracker to readme * clone a fixed transt version * nvidia/cuda:11.1-devel-ubuntu20.04 not available anymore, replaced with nvidia/cuda:11.7.0-devel-ubuntu20.04 * Fix show empty tasks (#100) * Fix show empty tasks * v1.41.1 * Update changelog Co-authored-by: Boris Sekachev <sekachev.bs@gmail.com> * [Snyk] Upgrade dotenv-webpack from 7.1.1 to 8.0.0 (#98) feat: upgrade dotenv-webpack from 7.1.1 to 8.0.0 Snyk has created this PR to upgrade dotenv-webpack from 7.1.1 to 8.0.0. See this package in npm: https://www.npmjs.com/package/dotenv-webpack See this project in Snyk: https://app.snyk.io/org/cvat/project/6c66365f-c154-46f2-b5db-4a4cd35fea4d?utm_source=github&utm_medium=referral&page=upgrade-pr Co-authored-by: snyk-bot <snyk-bot@snyk.io> * Add repo disclaimer in README (#127) * Update README.md * Update README.md * Update tools-control.tsx * Add ModelHandler class * Small fixes Co-authored-by: dschoerk <dominik.schoerkhuber@tuwien.ac.at> Co-authored-by: Dominik Schörkhuber <dschoerk@gmx.at> Co-authored-by: Dominik Schörkhuber <e1027470@student.tuwien.ac.at> Co-authored-by: Nikita Manovich <nikita@cvat.ai> Co-authored-by: Maria Khrustaleva <maya17grd@gmail.com> Co-authored-by: Boris Sekachev <sekachev.bs@gmail.com> Co-authored-by: Andrey Zhavoronkov <andrey@cvat.ai> Co-authored-by: snyk-bot <snyk-bot@snyk.io> Co-authored-by: Maxim Zhiltsov <zhiltsov.max35@gmail.com>
diff --git a/README.md b/README.md
@@ -176,6 +176,7 @@ can be ran on:
 | [Text detection v4](/serverless/openvino/omz/intel/text-detection-0004/nuclio)                          | detector   | OpenVINO   |  ✔️  |     |
 | [YOLO v5](/serverless/pytorch/ultralytics/yolov5/nuclio)                                                | detector   | PyTorch    |  ✔️  |     |
 | [SiamMask](/serverless/pytorch/foolwood/siammask/nuclio)                                                | tracker    | PyTorch    |  ✔️  |  ✔️  |
+| [TransT](/serverless/pytorch/dschoerk/transt/nuclio)                                                    | tracker    | PyTorch    |  ✔️  |  ✔️  |
 | [f-BRS](/serverless/pytorch/saic-vul/fbrs/nuclio)                                                       | interactor | PyTorch    |  ✔️  |     |
 | [HRNet](/serverless/pytorch/saic-vul/hrnet/nuclio)                                                      | interactor | PyTorch    |     |  ✔️  |
 | [Inside-Outside Guidance](/serverless/pytorch/shiyinzhang/iog/nuclio)                                   | interactor | PyTorch    |  ✔️  |     |
diff --git a/serverless/pytorch/dschoerk/transt/nuclio/function-gpu.yaml b/serverless/pytorch/dschoerk/transt/nuclio/function-gpu.yaml
@@ -0,0 +1,79 @@
+metadata:
+  name: pth-dschoerk-transt
+  namespace: cvat
+  annotations:
+    name: TransT
+    type: tracker
+    spec:
+    framework: pytorch
+
+spec:
+  description: Fast Online Object Tracking and Segmentation
+  runtime: 'python:3.8'
+  handler: main:handler
+  eventTimeout: 30s
+  env:
+    - name: PYTHONPATH
+      value: /opt/nuclio/trans-t
+
+  build:
+    image: cvat/pth.dschoerk.transt
+    baseImage: nvidia/cuda:11.7.0-devel-ubuntu20.04
+
+    directives:
+      preCopy:
+        - kind: ENV
+          value: PATH="/root/miniconda3/bin:${PATH}"
+        - kind: ARG
+          value: PATH="/root/miniconda3/bin:${PATH}"
+        - kind: RUN
+          value: rm -f /etc/apt/sources.list.d/cuda.list /etc/apt/sources.list.d/nvidia-ml.list
+        - kind: RUN
+          value: apt update && apt install -y --no-install-recommends wget git ca-certificates libglib2.0-0 libgl1 && rm -rf /var/lib/apt/lists/* # libxrender1 libxext6
+        - kind: RUN
+          value: wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh &&
+            chmod +x Miniconda3-latest-Linux-x86_64.sh && ./Miniconda3-latest-Linux-x86_64.sh -b &&
+            rm -f Miniconda3-latest-Linux-x86_64.sh
+        - kind: WORKDIR
+          value: /opt/nuclio
+        - kind: RUN
+          value: conda create -y -n transt python=3.8
+        - kind: SHELL
+          value: '["conda", "run", "-n", "transt", "/bin/bash", "-c"]'
+        - kind: RUN
+          value: git clone https://github.com/dschoerk/TransT trans-t
+
+        - kind: RUN
+          value: pip install torch==1.7.0+cu110 torchvision==0.8.1+cu110 -f https://download.pytorch.org/whl/torch_stable.html
+
+        - kind: RUN
+          value: pip install jsonpickle opencv-python
+
+        - kind: RUN
+          value: wget --no-check-certificate 'https://drive.google.com/uc?id=1Pq0sK-9jmbLAVtgB9-dPDc2pipCxYdM5' -O /transt.pth
+
+        - kind: RUN
+          value: apt remove -y git wget
+        - kind: RUN
+          value: cd trans-t
+        - kind: ENTRYPOINT
+          value: '["conda", "run", "-n", "transt"]'
+
+  triggers:
+    myHttpTrigger:
+      maxWorkers: 1
+      kind: 'http'
+      workerAvailabilityTimeoutMilliseconds: 10000
+      attributes:
+        maxRequestBodySize: 33554432 # 32MB
+
+  resources:
+    limits:
+      nvidia.com/gpu: 1
+
+  platform:
+    attributes:
+      restartPolicy:
+        name: always
+        maximumRetryCount: 3
+      mountMode: volume
diff --git a/serverless/pytorch/dschoerk/transt/nuclio/function.yaml b/serverless/pytorch/dschoerk/transt/nuclio/function.yaml
@@ -0,0 +1,75 @@
+metadata:
+  name: pth-dschoerk-transt
+  namespace: cvat
+  annotations:
+    name: TransT
+    type: tracker
+    spec:
+    framework: pytorch
+
+spec:
+  description: Fast Online Object Tracking and Segmentation
+  runtime: 'python:3.8'
+  handler: main:handler
+  eventTimeout: 30s
+  env:
+    - name: PYTHONPATH
+      value: /opt/nuclio/trans-t
+
+  build:
+    image: cvat/pth.dschoerk.transt
+    baseImage: ubuntu:20.04
+
+    directives:
+      preCopy:
+        - kind: ENV
+          value: PATH="/root/miniconda3/bin:${PATH}"
+        - kind: ARG
+          value: PATH="/root/miniconda3/bin:${PATH}"
+        - kind: RUN
+          value: rm -f /etc/apt/sources.list.d/cuda.list /etc/apt/sources.list.d/nvidia-ml.list
+        - kind: RUN
+          value: apt update && apt install -y --no-install-recommends wget git ca-certificates libglib2.0-0 libgl1 && rm -rf /var/lib/apt/lists/* # libxrender1 libxext6
+        - kind: RUN
+          value: wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh &&
+            chmod +x Miniconda3-latest-Linux-x86_64.sh && ./Miniconda3-latest-Linux-x86_64.sh -b &&
+            rm -f Miniconda3-latest-Linux-x86_64.sh
+        - kind: WORKDIR
+          value: /opt/nuclio
+        - kind: RUN
+          value: conda create -y -n transt python=3.8
+        - kind: SHELL
+          value: '["conda", "run", "-n", "transt", "/bin/bash", "-c"]'
+        - kind: RUN
+          value: git clone --depth 1 --branch v1.0 https://github.com/dschoerk/TransT trans-t
+
+        - kind: RUN
+          value: pip install torch==1.7.1+cpu torchvision==0.8.2+cpu torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html
+
+        - kind: RUN
+          value: pip install jsonpickle opencv-python
+
+        - kind: RUN
+          value: wget --no-check-certificate 'https://drive.google.com/uc?id=1Pq0sK-9jmbLAVtgB9-dPDc2pipCxYdM5' -O /transt.pth
+
+        - kind: RUN
+          value: apt remove -y git wget
+        - kind: RUN
+          value: cd trans-t
+        - kind: ENTRYPOINT
+          value: '["conda", "run", "-n", "transt"]'
+
+  triggers:
+    myHttpTrigger:
+      maxWorkers: 1
+      kind: 'http'
+      workerAvailabilityTimeoutMilliseconds: 10000
+      attributes:
+        maxRequestBodySize: 33554432 # 32MB
+
+  platform:
+    attributes:
+      restartPolicy:
+        name: always
+        maximumRetryCount: 3
+      mountMode: volume
diff --git a/serverless/pytorch/dschoerk/transt/nuclio/main.py b/serverless/pytorch/dschoerk/transt/nuclio/main.py
@@ -0,0 +1,36 @@
+import base64
+import io
+import json
+
+import numpy as np
+from model_handler import ModelHandler
+from PIL import Image
+
+
+def init_context(context):
+    context.logger.info("Init context...  0%")
+    model = ModelHandler()
+    context.user_data.model = model
+    context.logger.info("Init context...100%")
+
+def handler(context, event):
+    context.logger.info("Run TransT model")
+    data = event.body
+    buf = io.BytesIO(base64.b64decode(data["image"]))
+    shapes = data.get("shapes")
+    states = data.get("states")
+
+    image = Image.open(buf).convert('RGB')
+    image = np.array(image)[:, :, ::-1].copy()
+
+    results = {
+        'shapes': [],
+        'states': []
+    }
+    for i, shape in enumerate(shapes):
+        shape, state = context.user_data.model.infer(image, shape, states[i] if i < len(states) else None)
+        results['shapes'].append(shape)
+        results['states'].append(state)
+
+    return context.Response(body=json.dumps(results), headers={},
+        content_type='application/json', status_code=200)
diff --git a/serverless/pytorch/dschoerk/transt/nuclio/model_handler.py b/serverless/pytorch/dschoerk/transt/nuclio/model_handler.py
@@ -0,0 +1,78 @@
+# Copyright (C) 2022 CVAT.ai Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import jsonpickle
+import numpy as np
+import torch
+from pysot_toolkit.bbox import get_axis_aligned_bbox
+from pysot_toolkit.trackers.net_wrappers import NetWithBackbone
+from pysot_toolkit.trackers.tracker import Tracker
+
+
+class ModelHandler:
+    def __init__(self):
+        use_gpu = torch.cuda.is_available()
+        net_path = '/transt.pth' # Absolute path of the model
+        net = NetWithBackbone(net_path=net_path, use_gpu=use_gpu)
+        self.tracker = Tracker(name='transt', net=net, window_penalty=0.49, exemplar_size=128, instance_size=256)
+
+    def decode_state(self, state):
+        self.tracker.net.net.zf = jsonpickle.decode(state['model.net.net.zf'])
+        self.tracker.net.net.pos_template = jsonpickle.decode(state['model.net.net.pos_template'])
+
+        self.tracker.window = jsonpickle.decode(state['model.window'])
+        self.tracker.center_pos = jsonpickle.decode(state['model.center_pos'])
+        self.tracker.size = jsonpickle.decode(state['model.size'])
+        self.tracker.channel_average = jsonpickle.decode(state['model.channel_average'])
+        self.tracker.mean = jsonpickle.decode(state['model.mean'])
+        self.tracker.std = jsonpickle.decode(state['model.std'])
+        self.tracker.inplace = jsonpickle.decode(state['model.inplace'])
+
+        self.tracker.features_initialized = False
+        if 'model.features_initialized' in state:
+            self.tracker.features_initialized = jsonpickle.decode(state['model.features_initialized'])
+
+    def encode_state(self):
+        state = {}
+        state['model.net.net.zf'] = jsonpickle.encode(self.tracker.net.net.zf)
+        state['model.net.net.pos_template'] = jsonpickle.encode(self.tracker.net.net.pos_template)
+        state['model.window'] = jsonpickle.encode(self.tracker.window)
+        state['model.center_pos'] = jsonpickle.encode(self.tracker.center_pos)
+        state['model.size'] = jsonpickle.encode(self.tracker.size)
+        state['model.channel_average'] = jsonpickle.encode(self.tracker.channel_average)
+        state['model.mean'] = jsonpickle.encode(self.tracker.mean)
+        state['model.std'] = jsonpickle.encode(self.tracker.std)
+        state['model.inplace'] = jsonpickle.encode(self.tracker.inplace)
+        state['model.features_initialized'] = jsonpickle.encode(getattr(self.tracker, 'features_initialized', False))
+
+        return state
+
+    def init_tracker(self, img, bbox):
+        cx, cy, w, h = get_axis_aligned_bbox(np.array(bbox))
+        gt_bbox_ = [cx - w / 2, cy - h / 2, w, h]
+        init_info = {'init_bbox': gt_bbox_}
+        self.tracker.initialize(img, init_info)
+
+    def track(self, img):
+        outputs = self.tracker.track(img)
+        prediction_bbox = outputs['target_bbox']
+
+        left = prediction_bbox[0]
+        top = prediction_bbox[1]
+        right = prediction_bbox[0] + prediction_bbox[2]
+        bottom = prediction_bbox[1] + prediction_bbox[3]
+        return (left, top, right, bottom)
+
+    def infer(self, image, shape, state):
+        if state is None:
+            init_shape = (shape[0], shape[1], shape[2] - shape[0], shape[3] - shape[1])
+
+            self.init_tracker(image, init_shape)
+            state = self.encode_state()
+        else:
+            self.decode_state(state)
+            shape = self.track(image)
+            state = self.encode_state()
+
+        return shape, state