Skip to content

Commit e2fece3

Browse files
iseesselfacebook-github-bot
authored andcommitted
Fix gem logic, reverse cropping/transformation order. (facebookresearch#379)
Summary: Pull Request resolved: facebookresearch#379 1. Fix the gem post processing logic. Before this change, the code assumes that each non-preprocessed feature tensor has the same tensor shape: ``` if cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE == "gem": gem_out_fname = f"{out_dir}/{train_dataset_name}_GeM.npy" train_features = torch.tensor(np.concatenate(train_features)) ``` This is not the case, since ROxford/RParis images do not have a standard size, hence the resx layers have different height and widths (but same number of channels). GeM pooling will transform an image of any shape to a shape of `(num_channels)` The change performs gem_pooling on each individual images, as opposed to all the images at once. This should be fine because both gem and l2 normalization are to be performed per-image. 2. Transform before cropping to the bounding box (as opposed to after cropping). The experiments show that this yields much better results. This is also what the deepcluster implentation uses: https://github.com/facebookresearch/deepcluster/blob/master/eval_retrieval.py#L44 ``` Oxford: 61.57 / 41.74 / 14.33 vs. 69.65 / 48.51 / 16.41 Paris: 83.7 / 66.87 / 44.81 vs. 87.9 / 70.57 / 47.39 ``` f288434289 f288438150 Differential Revision: D29993204 fbshipit-source-id: 6eb48e00011704c6f670f60417e2ed53a9ff0cb9
1 parent 5a8a2b1 commit e2fece3

File tree

2 files changed

+34
-59
lines changed

2 files changed

+34
-59
lines changed

tools/instance_retrieval_test.py

Lines changed: 24 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -80,20 +80,6 @@ def build_retrieval_model(cfg):
8080
return model
8181

8282

83-
def gem_pool_and_save_features(features, p, add_bias, gem_out_fname):
84-
if gem_out_fname and PathManager.exists(gem_out_fname):
85-
logging.info("Loading train GeM features...")
86-
features = load_file(gem_out_fname)
87-
else:
88-
logging.info(f"GeM pooling features: {features.shape}")
89-
features = l2n(gem(features, p=p, add_bias=True))
90-
91-
if gem_out_fname:
92-
save_file(features, gem_out_fname, verbose=False)
93-
logging.info(f"Saved GeM features to: {gem_out_fname}")
94-
return features
95-
96-
9783
def get_train_features(
9884
cfg,
9985
temp_dir,
@@ -120,7 +106,7 @@ def process_train_image(i, out_dir):
120106
else:
121107
fname_in = train_dataset.get_filename(i)
122108
if is_revisited_dataset(train_dataset_name):
123-
img = image_helper.load_and_prepare_revisited_image(fname_in)
109+
img = image_helper.load_and_prepare_revisited_image(fname_in, roi=None)
124110
elif is_whiten_dataset(train_dataset_name):
125111
img = image_helper.load_and_prepare_whitening_image(fname_in)
126112
else:
@@ -136,6 +122,14 @@ def process_train_image(i, out_dir):
136122
elif cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE == "l2_norm":
137123
# we simply L2 normalize the features otherwise
138124
descriptors = F.normalize(activation_map, p=2, dim=0)
125+
elif cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE == "gem":
126+
descriptors = l2n(
127+
gem(
128+
activation_map,
129+
p=cfg.IMG_RETRIEVAL.GEM_POOL_POWER,
130+
add_bias=False,
131+
)
132+
)
139133
else:
140134
descriptors = activation_map
141135

@@ -154,19 +148,6 @@ def process_train_image(i, out_dir):
154148
for i in range(num_images):
155149
process_train_image(i, out_dir)
156150

157-
if cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE == "gem":
158-
159-
gem_out_fname = None
160-
if out_dir:
161-
gem_out_fname = f"{out_dir}/{train_dataset_name}_GeM.npy"
162-
163-
train_features = torch.tensor(np.concatenate(train_features))
164-
train_features = gem_pool_and_save_features(
165-
train_features,
166-
p=cfg.IMG_RETRIEVAL.GEM_POOL_POWER,
167-
add_bias=True,
168-
gem_out_fname=gem_out_fname,
169-
)
170151
train_features = np.vstack([x.reshape(-1, x.shape[-1]) for x in train_features])
171152
logging.info(f"Train features size: {train_features.shape}")
172153
return train_features
@@ -201,6 +182,21 @@ def process_eval_image(
201182
elif cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE == "l2_norm":
202183
# we simply L2 normalize the features otherwise
203184
descriptors = F.normalize(activation_map, p=2, dim=0)
185+
# Optionally apply pca.
186+
if pca:
187+
descriptors = pca.apply(descriptors)
188+
189+
elif cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE == "gem":
190+
descriptors = l2n(
191+
gem(
192+
activation_map,
193+
p=cfg.IMG_RETRIEVAL.GEM_POOL_POWER,
194+
add_bias=True,
195+
)
196+
)
197+
# Optionally apply pca.
198+
if pca:
199+
descriptors = pca.apply(descriptors)
204200
else:
205201
descriptors = activation_map
206202

@@ -255,19 +251,6 @@ def get_dataset_features(
255251
)
256252
features_dataset.append(db_feature)
257253

258-
if cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE == "gem":
259-
# GeM pool the features and apply the PCA
260-
gem_out_fname = None
261-
if db_fname_out_dir:
262-
gem_out_fname = f"{db_fname_out_dir}/{eval_dataset_name}_GeM.npy"
263-
features_dataset = torch.tensor(np.concatenate(features_dataset))
264-
features_dataset = gem_pool_and_save_features(
265-
features_dataset,
266-
p=cfg.IMG_RETRIEVAL.GEM_POOL_POWER,
267-
add_bias=True,
268-
gem_out_fname=gem_out_fname,
269-
)
270-
features_dataset = pca.apply(features_dataset)
271254
features_dataset = np.vstack(features_dataset)
272255
logging.info(f"Dataset Features Size: {features_dataset.shape}")
273256
return features_dataset
@@ -325,19 +308,6 @@ def get_queries_features(
325308
)
326309
features_queries.append(query_feature)
327310

328-
if cfg.IMG_RETRIEVAL.FEATS_PROCESSING_TYPE == "gem":
329-
# GeM pool the features and apply the PCA
330-
gem_out_fname = None
331-
if q_fname_out_dir:
332-
gem_out_fname = f"{q_fname_out_dir}/{eval_dataset_name}_GeM.npy"
333-
features_queries = torch.tensor(np.concatenate(features_queries))
334-
features_queries = gem_pool_and_save_features(
335-
features_queries,
336-
p=cfg.IMG_RETRIEVAL.GEM_POOL_POWER,
337-
add_bias=True,
338-
gem_out_fname=gem_out_fname,
339-
)
340-
features_queries = pca.apply(features_queries)
341311
features_queries = np.vstack(features_queries)
342312
logging.info(f"Queries Features Size: {features_queries.shape}")
343313
return features_queries

vissl/utils/instance_retrieval_utils/data_util.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -485,12 +485,13 @@ def load_and_prepare_image(self, fname, roi=None):
485485
"""
486486
# Read image, get aspect ratio, and resize such as the largest side equals S
487487
with PathManager.open(fname, "rb") as f:
488-
im = Image.open(f).convert(mode="RGB")
489-
im_resized, ratio = self.apply_img_transform(im)
488+
img = Image.open(f).convert(mode="RGB")
489+
im_resized, ratio = self.apply_img_transform(img)
490490
# If there is a roi, adapt the roi to the new size and crop. Do not rescale
491491
# the image once again
492492
if roi is not None:
493493
# ROI format is (xmin,ymin,xmax,ymax)
494+
roi = np.array(roi)
494495
roi = np.round(roi * ratio).astype(np.int32)
495496
im_resized = im_resized[:, roi[1] : roi[3], roi[0] : roi[2]]
496497
return im_resized
@@ -507,10 +508,14 @@ def load_and_prepare_revisited_image(self, img_path, roi=None):
507508
with PathManager.open(img_path, "rb") as f:
508509
img = Image.open(f).convert("RGB")
509510

511+
im_resized, ratio = self.apply_img_transform(img)
512+
# If there is a roi, adapt the roi to the new size and crop. Do not rescale
513+
# the image once again
510514
if roi is not None:
511-
img = img.crop(roi)
512-
513-
im_resized, _ = self.apply_img_transform(img)
515+
# ROI format is (xmin,ymin,xmax,ymax)
516+
roi = np.array(roi)
517+
roi = np.round(roi * ratio).astype(np.int32)
518+
im_resized = im_resized[:, roi[1] : roi[3], roi[0] : roi[2]]
514519
return im_resized
515520

516521

0 commit comments

Comments
 (0)