Parskatt
diff --git a/‎.gitignore
Lines changed: 2 additions & 0 deletions b/‎.gitignore
Lines changed: 2 additions & 0 deletions
diff --git a/‎DeDoDe/__init__.py
Lines changed: 1 addition & 0 deletions b/‎DeDoDe/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎DeDoDe/benchmarks/__init__.py
Lines changed: 3 additions & 0 deletions b/‎DeDoDe/benchmarks/__init__.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎DeDoDe/benchmarks/mega_pose_est.py
Lines changed: 114 additions & 0 deletions b/‎DeDoDe/benchmarks/mega_pose_est.py
Lines changed: 114 additions & 0 deletions
diff --git a/‎DeDoDe/benchmarks/mega_pose_est_mnn.py
Lines changed: 119 additions & 0 deletions b/‎DeDoDe/benchmarks/mega_pose_est_mnn.py
Lines changed: 119 additions & 0 deletions
diff --git a/‎DeDoDe/benchmarks/num_inliers.py
Lines changed: 76 additions & 0 deletions b/‎DeDoDe/benchmarks/num_inliers.py
Lines changed: 76 additions & 0 deletions
diff --git a/‎DeDoDe/checkpoint.py
Lines changed: 59 additions & 0 deletions b/‎DeDoDe/checkpoint.py
Lines changed: 59 additions & 0 deletions
diff --git a/‎DeDoDe/datasets/__init__.py b/‎DeDoDe/datasets/__init__.py
@@ -158,3 +158,5 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+.vscode*
@@ -0,0 +1 @@
+from .model_zoo import dedode_detector_B, dedode_detector_L, dedode_descriptor_B
@@ -0,0 +1,3 @@
+from .num_inliers import NumInliersBenchmark
+from .mega_pose_est import MegaDepthPoseEstimationBenchmark
+from .mega_pose_est_mnn import MegaDepthPoseMNNBenchmark
@@ -0,0 +1,114 @@
+import numpy as np
+import torch
+from DeDoDe.utils import *
+from PIL import Image
+from tqdm import tqdm
+import torch.nn.functional as F
+
+class MegaDepthPoseEstimationBenchmark:
+    def __init__(self, data_root="data/megadepth", scene_names = None) -> None:
+        if scene_names is None:
+            self.scene_names = [
+                "0015_0.1_0.3.npz",
+                "0015_0.3_0.5.npz",
+                "0022_0.1_0.3.npz",
+                "0022_0.3_0.5.npz",
+                "0022_0.5_0.7.npz",
+            ]
+        else:
+            self.scene_names = scene_names
+        self.scenes = [
+            np.load(f"{data_root}/{scene}", allow_pickle=True)
+            for scene in self.scene_names
+        ]
+        self.data_root = data_root
+
+    def benchmark(self, keypoint_model, matching_model, model_name = None, resolution = None, scale_intrinsics = True, calibrated = True):
+        H,W = matching_model.get_output_resolution()
+        with torch.no_grad():
+            data_root = self.data_root
+            tot_e_t, tot_e_R, tot_e_pose = [], [], []
+            thresholds = [5, 10, 20]
+            for scene_ind in range(len(self.scenes)):
+                import os
+                scene_name = os.path.splitext(self.scene_names[scene_ind])[0]
+                scene = self.scenes[scene_ind]
+                pairs = scene["pair_infos"]
+                intrinsics = scene["intrinsics"]
+                poses = scene["poses"]
+                im_paths = scene["image_paths"]
+                pair_inds = range(len(pairs))
+                for pairind in tqdm(pair_inds):
+                    idx1, idx2 = pairs[pairind][0]
+                    K1 = intrinsics[idx1].copy()
+                    T1 = poses[idx1].copy()
+                    R1, t1 = T1[:3, :3], T1[:3, 3]
+                    K2 = intrinsics[idx2].copy()
+                    T2 = poses[idx2].copy()
+                    R2, t2 = T2[:3, :3], T2[:3, 3]
+                    R, t = compute_relative_pose(R1, t1, R2, t2)
+                    T1_to_2 = np.concatenate((R,t[:,None]), axis=-1)
+                    im_A_path = f"{data_root}/{im_paths[idx1]}"
+                    im_B_path = f"{data_root}/{im_paths[idx2]}"
+                    
+                    keypoints_A = keypoint_model.detect_from_path(im_A_path, num_keypoints = 20_000)["keypoints"][0]
+                    keypoints_B = keypoint_model.detect_from_path(im_B_path, num_keypoints = 20_000)["keypoints"][0]
+                    warp, certainty = matching_model.match(im_A_path, im_B_path)
+                    matches = matching_model.match_keypoints(keypoints_A, keypoints_B, warp, certainty, return_tuple = False)                    
+                    im_A = Image.open(im_A_path)
+                    w1, h1 = im_A.size
+                    im_B = Image.open(im_B_path)
+                    w2, h2 = im_B.size
+                    if scale_intrinsics:
+                        scale1 = 1200 / max(w1, h1)
+                        scale2 = 1200 / max(w2, h2)
+                        w1, h1 = scale1 * w1, scale1 * h1
+                        w2, h2 = scale2 * w2, scale2 * h2
+                        K1, K2 = K1.copy(), K2.copy()
+                        K1[:2] = K1[:2] * scale1
+                        K2[:2] = K2[:2] * scale2
+                    kpts1, kpts2 = matching_model.to_pixel_coordinates(matches, h1, w1, h2, w2)
+                    for _ in range(1):
+                        shuffling = np.random.permutation(np.arange(len(kpts1)))
+                        kpts1 = kpts1[shuffling]
+                        kpts2 = kpts2[shuffling]
+                        try:
+                            threshold = 0.5 
+                            if calibrated:
+                                norm_threshold = threshold / (np.mean(np.abs(K1[:2, :2])) + np.mean(np.abs(K2[:2, :2])))
+                                R_est, t_est, mask = estimate_pose(
+                                    kpts1.cpu().numpy(),
+                                    kpts2.cpu().numpy(),
+                                    K1,
+                                    K2,
+                                    norm_threshold,
+                                    conf=0.99999,
+                                )
+                            T1_to_2_est = np.concatenate((R_est, t_est), axis=-1)  #
+                            e_t, e_R = compute_pose_error(T1_to_2_est, R, t)
+                            e_pose = max(e_t, e_R)
+                        except Exception as e:
+                            print(repr(e))
+                            e_t, e_R = 90, 90
+                            e_pose = max(e_t, e_R)
+                        tot_e_t.append(e_t)
+                        tot_e_R.append(e_R)
+                        tot_e_pose.append(e_pose)
+            tot_e_pose = np.array(tot_e_pose)
+            auc = pose_auc(tot_e_pose, thresholds)
+            acc_5 = (tot_e_pose < 5).mean()
+            acc_10 = (tot_e_pose < 10).mean()
+            acc_15 = (tot_e_pose < 15).mean()
+            acc_20 = (tot_e_pose < 20).mean()
+            map_5 = acc_5
+            map_10 = np.mean([acc_5, acc_10])
+            map_20 = np.mean([acc_5, acc_10, acc_15, acc_20])
+            print(f"{model_name} auc: {auc}")
+            return {
+                "auc_5": auc[0],
+                "auc_10": auc[1],
+                "auc_20": auc[2],
+                "map_5": map_5,
+                "map_10": map_10,
+                "map_20": map_20,
+            }
@@ -0,0 +1,119 @@
+import numpy as np
+import torch
+from DeDoDe.utils import *
+from PIL import Image
+from tqdm import tqdm
+import torch.nn.functional as F
+
+class MegaDepthPoseMNNBenchmark:
+    def __init__(self, data_root="data/megadepth", scene_names = None) -> None:
+        if scene_names is None:
+            self.scene_names = [
+                "0015_0.1_0.3.npz",
+                "0015_0.3_0.5.npz",
+                "0022_0.1_0.3.npz",
+                "0022_0.3_0.5.npz",
+                "0022_0.5_0.7.npz",
+            ]
+        else:
+            self.scene_names = scene_names
+        self.scenes = [
+            np.load(f"{data_root}/{scene}", allow_pickle=True)
+            for scene in self.scene_names
+        ]
+        self.data_root = data_root
+
+    def benchmark(self, detector_model, descriptor_model, matcher_model, model_name = None, resolution = None, scale_intrinsics = True, calibrated = True):
+        with torch.no_grad():
+            data_root = self.data_root
+            tot_e_t, tot_e_R, tot_e_pose = [], [], []
+            thresholds = [5, 10, 20]
+            for scene_ind in range(len(self.scenes)):
+                import os
+                scene_name = os.path.splitext(self.scene_names[scene_ind])[0]
+                scene = self.scenes[scene_ind]
+                pairs = scene["pair_infos"]
+                intrinsics = scene["intrinsics"]
+                poses = scene["poses"]
+                im_paths = scene["image_paths"]
+                pair_inds = range(len(pairs))
+                for pairind in tqdm(pair_inds):
+                    idx1, idx2 = pairs[pairind][0]
+                    K1 = intrinsics[idx1].copy()
+                    T1 = poses[idx1].copy()
+                    R1, t1 = T1[:3, :3], T1[:3, 3]
+                    K2 = intrinsics[idx2].copy()
+                    T2 = poses[idx2].copy()
+                    R2, t2 = T2[:3, :3], T2[:3, 3]
+                    R, t = compute_relative_pose(R1, t1, R2, t2)
+                    T1_to_2 = np.concatenate((R,t[:,None]), axis=-1)
+                    im_A_path = f"{data_root}/{im_paths[idx1]}"
+                    im_B_path = f"{data_root}/{im_paths[idx2]}"
+                    detections_A = detector_model.detect_from_path(im_A_path)
+                    keypoints_A, P_A = detections_A["keypoints"], detections_A["confidence"]
+                    detections_B = detector_model.detect_from_path(im_B_path)
+                    keypoints_B, P_B = detections_B["keypoints"], detections_B["confidence"]
+                    description_A = descriptor_model.describe_keypoints_from_path(im_A_path, keypoints_A)["descriptions"]
+                    description_B = descriptor_model.describe_keypoints_from_path(im_B_path, keypoints_B)["descriptions"]
+                    matches_A, matches_B, batch_ids = matcher_model.match(keypoints_A, description_A,
+                        keypoints_B, description_B,
+                        P_A = P_A, P_B = P_B,
+                        normalize = True, inv_temp=20, threshold = 0.01)
+
+                    im_A = Image.open(im_A_path)
+                    w1, h1 = im_A.size
+                    im_B = Image.open(im_B_path)
+                    w2, h2 = im_B.size
+                    if scale_intrinsics:
+                        scale1 = 1200 / max(w1, h1)
+                        scale2 = 1200 / max(w2, h2)
+                        w1, h1 = scale1 * w1, scale1 * h1
+                        w2, h2 = scale2 * w2, scale2 * h2
+                        K1, K2 = K1.copy(), K2.copy()
+                        K1[:2] = K1[:2] * scale1
+                        K2[:2] = K2[:2] * scale2
+                    kpts1, kpts2 = matcher_model.to_pixel_coords(matches_A, matches_B, h1, w1, h2, w2)
+                    for _ in range(1):
+                        shuffling = np.random.permutation(np.arange(len(kpts1)))
+                        kpts1 = kpts1[shuffling]
+                        kpts2 = kpts2[shuffling]
+                        try:
+                            threshold = 0.5 
+                            if calibrated:
+                                norm_threshold = threshold / (np.mean(np.abs(K1[:2, :2])) + np.mean(np.abs(K2[:2, :2])))
+                                R_est, t_est, mask = estimate_pose(
+                                    kpts1.cpu().numpy(),
+                                    kpts2.cpu().numpy(),
+                                    K1,
+                                    K2,
+                                    norm_threshold,
+                                    conf=0.99999,
+                                )
+                            T1_to_2_est = np.concatenate((R_est, t_est), axis=-1)  #
+                            e_t, e_R = compute_pose_error(T1_to_2_est, R, t)
+                            e_pose = max(e_t, e_R)
+                        except Exception as e:
+                            print(repr(e))
+                            e_t, e_R = 90, 90
+                            e_pose = max(e_t, e_R)
+                        tot_e_t.append(e_t)
+                        tot_e_R.append(e_R)
+                        tot_e_pose.append(e_pose)
+            tot_e_pose = np.array(tot_e_pose)
+            auc = pose_auc(tot_e_pose, thresholds)
+            acc_5 = (tot_e_pose < 5).mean()
+            acc_10 = (tot_e_pose < 10).mean()
+            acc_15 = (tot_e_pose < 15).mean()
+            acc_20 = (tot_e_pose < 20).mean()
+            map_5 = acc_5
+            map_10 = np.mean([acc_5, acc_10])
+            map_20 = np.mean([acc_5, acc_10, acc_15, acc_20])
+            print(f"{model_name} auc: {auc}")
+            return {
+                "auc_5": auc[0],
+                "auc_10": auc[1],
+                "auc_20": auc[2],
+                "map_5": map_5,
+                "map_10": map_10,
+                "map_20": map_20,
+            }
@@ -0,0 +1,76 @@
+import torch
+import torch.nn as nn
+from DeDoDe.utils import *
+import DeDoDe
+
+class NumInliersBenchmark(nn.Module):
+    
+    def __init__(self, dataset, num_samples = 1000, batch_size = 8, num_keypoints = 10_000, device = "cuda") -> None:
+        super().__init__()
+        sampler = torch.utils.data.WeightedRandomSampler(
+                torch.ones(len(dataset)), replacement=False, num_samples=num_samples
+            )
+        dataloader = torch.utils.data.DataLoader(
+                dataset, batch_size=batch_size, num_workers=batch_size, sampler=sampler
+            )
+        self.dataloader = dataloader
+        self.tracked_metrics = {}
+        self.batch_size = batch_size
+        self.N = len(dataloader)
+        self.num_keypoints = num_keypoints
+    
+    def compute_batch_metrics(self, outputs, batch, device = "cuda"):
+        kpts_A, kpts_B = outputs["keypoints_A"], outputs["keypoints_B"]
+        B, K, H, W = batch["im_A"].shape
+        gt_warp_A_to_B, valid_mask_A_to_B = get_gt_warp(                
+                    batch["im_A_depth"],
+                    batch["im_B_depth"],
+                    batch["T_1to2"],
+                    batch["K1"],
+                    batch["K2"],
+                    H=H,
+                    W=W,
+                )
+        kpts_A_to_B = F.grid_sample(gt_warp_A_to_B[...,2:].float().permute(0,3,1,2), kpts_A[...,None,:], 
+                                    align_corners=False, mode = 'bilinear')[...,0].mT
+        legit_A_to_B = F.grid_sample(valid_mask_A_to_B.reshape(B,1,H,W), kpts_A[...,None,:], 
+                                    align_corners=False, mode = 'bilinear')[...,0,:,0]
+        dists = (torch.cdist(kpts_A_to_B, kpts_B).min(dim=-1).values[legit_A_to_B > 0.]).float()
+        if legit_A_to_B.sum() == 0:
+            return
+        percent_inliers_at_1 = (dists < 0.02).float().mean()
+        percent_inliers_at_05 = (dists < 0.01).float().mean()
+        percent_inliers_at_025 = (dists < 0.005).float().mean()
+        percent_inliers_at_01 = (dists < 0.002).float().mean()
+        percent_inliers_at_005 = (dists < 0.001).float().mean()
+
+        inlier_bins = torch.linspace(0, 0.002, steps = 100, device = device)[None]
+        inlier_counts = (dists[...,None] < inlier_bins).float().mean(dim=0)
+        self.tracked_metrics["inlier_counts"] = self.tracked_metrics.get("inlier_counts", 0) + 1/self.N * inlier_counts
+        self.tracked_metrics["percent_inliers_at_1"] = self.tracked_metrics.get("percent_inliers_at_1", 0) + 1/self.N * percent_inliers_at_1
+        self.tracked_metrics["percent_inliers_at_05"] = self.tracked_metrics.get("percent_inliers_at_05", 0) + 1/self.N * percent_inliers_at_05
+        self.tracked_metrics["percent_inliers_at_025"] = self.tracked_metrics.get("percent_inliers_at_025", 0) + 1/self.N * percent_inliers_at_025
+        self.tracked_metrics["percent_inliers_at_01"] = self.tracked_metrics.get("percent_inliers_at_01", 0) + 1/self.N * percent_inliers_at_01
+        self.tracked_metrics["percent_inliers_at_005"] = self.tracked_metrics.get("percent_inliers_at_005", 0) + 1/self.N * percent_inliers_at_005
+
+    def benchmark(self, detector):
+        self.tracked_metrics = {}
+        from tqdm import tqdm
+        print("Evaluating percent inliers...")
+        for idx, batch in tqdm(enumerate(self.dataloader), mininterval = 10.):
+            batch = to_cuda(batch)
+            outputs = detector.detect(batch, num_keypoints = self.num_keypoints)
+            keypoints_A, keypoints_B = outputs["keypoints"][:self.batch_size], outputs["keypoints"][self.batch_size:] 
+            if isinstance(outputs["keypoints"], (tuple, list)):
+                keypoints_A, keypoints_B = torch.stack(keypoints_A), torch.stack(keypoints_B)
+            outputs = {"keypoints_A": keypoints_A, "keypoints_B": keypoints_B}
+            self.compute_batch_metrics(outputs, batch)
+        import matplotlib.pyplot as plt
+        plt.plot(torch.linspace(0, 0.002, steps = 100), self.tracked_metrics["inlier_counts"].cpu())
+        import numpy as np
+        x = np.linspace(0,0.002, 100)
+        sigma = 0.52 * 2 / 512
+        F = 1 - np.exp(-x**2 / (2*sigma**2))
+        plt.plot(x, F)
+        plt.savefig("vis/inlier_counts")
+        [print(name, metric.item() * self.N / (idx+1)) for name, metric in self.tracked_metrics.items() if "percent" in name]
@@ -0,0 +1,59 @@
+import os
+import torch
+from torch.nn.parallel.data_parallel import DataParallel
+from torch.nn.parallel.distributed import DistributedDataParallel
+import gc
+
+import DeDoDe
+
+class CheckPoint:
+    def __init__(self, dir=None, name="tmp"):
+        self.name = name
+        self.dir = dir
+        os.makedirs(self.dir, exist_ok=True)
+
+    def save(
+        self,
+        model,
+        optimizer,
+        lr_scheduler,
+        n,
+        ):
+        if DeDoDe.RANK == 0:
+            assert model is not None
+            if isinstance(model, (DataParallel, DistributedDataParallel)):
+                model = model.module
+            states = {
+                "model": model.state_dict(),
+                "n": n,
+                "optimizer": optimizer.state_dict(),
+                "lr_scheduler": lr_scheduler.state_dict(),
+            }
+            torch.save(states, self.dir + self.name + f"_latest.pth")
+            print(f"Saved states {list(states.keys())}, at step {n}")
+    
+    def load(
+        self,
+        model,
+        optimizer,
+        lr_scheduler,
+        n,
+        ):
+        if os.path.exists(self.dir + self.name + f"_latest.pth") and DeDoDe.RANK == 0:
+            states = torch.load(self.dir + self.name + f"_latest.pth")
+            if "model" in states:
+                model.load_state_dict(states["model"])
+            if "n" in states:
+                n = states["n"] if states["n"] else n
+            if "optimizer" in states:
+                try:
+                    optimizer.load_state_dict(states["optimizer"])
+                except Exception as e:
+                    print(f"Failed to load states for optimizer, with error {e}")
+            if "lr_scheduler" in states:
+                lr_scheduler.load_state_dict(states["lr_scheduler"])
+            print(f"Loaded states {list(states.keys())}, at step {n}")
+            del states
+            gc.collect()
+            torch.cuda.empty_cache()
+        return model, optimizer, lr_scheduler, n
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+from .model_zoo import dedode_detector_B, dedode_detector_L, dedode_descriptor_B`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .num_inliers import NumInliersBenchmark`
	`2`	`+from .mega_pose_est import MegaDepthPoseEstimationBenchmark`
	`3`	`+from .mega_pose_est_mnn import MegaDepthPoseMNNBenchmark`