6
6
import logging
7
7
import os
8
8
import sys
9
- import uuid
10
9
from argparse import Namespace
11
10
from typing import Any , List
12
11
13
12
import numpy as np
14
13
import torch
15
- import torch .nn .functional as F
16
14
import torchvision
17
15
from classy_vision .generic .util import copy_model_to_gpu , load_checkpoint
18
16
from fvcore .common .file_io import PathManager
@@ -92,7 +90,7 @@ def get_train_features(
92
90
):
93
91
train_features = []
94
92
95
- def process_train_image (i , out_dir ):
93
+ def process_train_image (i , out_dir , verbose = False ):
96
94
if i % LOG_FREQUENCY == 0 :
97
95
logging .info (f"Train Image: { i } " ),
98
96
@@ -115,24 +113,35 @@ def process_train_image(i, out_dir):
115
113
vc = v .cuda ()
116
114
# the model output is a list always.
117
115
activation_map = model (vc )[0 ].cpu ()
116
+
117
+ if verbose :
118
+ print (f"Train Image raw activation map shape: { activation_map .shape } " )
119
+
118
120
# once we have the features,
119
121
# we can perform: rmac | gem pooling | l2 norm
120
122
if cfg .IMG_RETRIEVAL .FEATS_PROCESSING_TYPE == "rmac" :
121
- descriptors = get_rmac_descriptors (activation_map , spatial_levels )
122
- elif cfg .IMG_RETRIEVAL .FEATS_PROCESSING_TYPE == "l2_norm" :
123
- # we simply L2 normalize the features otherwise
124
- descriptors = F .normalize (activation_map , p = 2 , dim = 0 )
123
+ descriptors = get_rmac_descriptors (
124
+ activation_map ,
125
+ spatial_levels ,
126
+ normalize = cfg .IMG_RETRIEVAL .NORMALIZE_FEATURES ,
127
+ )
125
128
elif cfg .IMG_RETRIEVAL .FEATS_PROCESSING_TYPE == "gem" :
126
- descriptors = l2n (
127
- gem (
128
- activation_map ,
129
- p = cfg .IMG_RETRIEVAL .GEM_POOL_POWER ,
130
- add_bias = False ,
131
- )
129
+ descriptors = gem (
130
+ activation_map ,
131
+ p = cfg .IMG_RETRIEVAL .GEM_POOL_POWER ,
132
+ add_bias = True ,
132
133
)
133
134
else :
134
135
descriptors = activation_map
135
136
137
+ # Optionally l2 normalize the features.
138
+ if (
139
+ cfg .IMG_RETRIEVAL .NORMALIZE_FEATURES
140
+ and cfg .IMG_RETRIEVAL .FEATS_PROCESSING_TYPE != "rmac"
141
+ ):
142
+ # RMAC performs normalization within the algorithm, hence we skip it here.
143
+ descriptors = l2n (descriptors , dim = 1 )
144
+
136
145
if fname_out :
137
146
save_file (descriptors .data .numpy (), fname_out , verbose = False )
138
147
train_features .append (descriptors .data .numpy ())
@@ -146,7 +155,7 @@ def process_train_image(i, out_dir):
146
155
147
156
logging .info (f"Getting features for train images: { num_images } " )
148
157
for i in range (num_images ):
149
- process_train_image (i , out_dir )
158
+ process_train_image (i , out_dir , verbose = ( i == 0 ) )
150
159
151
160
train_features = np .vstack ([x .reshape (- 1 , x .shape [- 1 ]) for x in train_features ])
152
161
logging .info (f"Train features size: { train_features .shape } " )
@@ -163,6 +172,7 @@ def process_eval_image(
163
172
model ,
164
173
pca ,
165
174
eval_dataset_name ,
175
+ verbose = False ,
166
176
):
167
177
if is_revisited_dataset (eval_dataset_name ):
168
178
img = image_helper .load_and_prepare_revisited_image (fname_in , roi = roi )
@@ -176,30 +186,39 @@ def process_eval_image(
176
186
# the model output is a list always.
177
187
activation_map = model (vc )[0 ].cpu ()
178
188
189
+ if verbose :
190
+ print (f"Eval image raw activation map shape: { activation_map .shape } " )
191
+
179
192
# process the features: rmac | l2 norm
180
193
if cfg .IMG_RETRIEVAL .FEATS_PROCESSING_TYPE == "rmac" :
181
- descriptors = get_rmac_descriptors (activation_map , spatial_levels , pca = pca )
182
- elif cfg .IMG_RETRIEVAL .FEATS_PROCESSING_TYPE == "l2_norm" :
183
- # we simply L2 normalize the features otherwise
184
- descriptors = F .normalize (activation_map , p = 2 , dim = 0 )
185
- # Optionally apply pca.
186
- if pca :
187
- descriptors = pca .apply (descriptors )
188
-
194
+ descriptors = get_rmac_descriptors (
195
+ activation_map ,
196
+ spatial_levels ,
197
+ pca = pca ,
198
+ normalize = cfg .IMG_RETRIEVAL .NORMALIZE_FEATURES ,
199
+ )
189
200
elif cfg .IMG_RETRIEVAL .FEATS_PROCESSING_TYPE == "gem" :
190
- descriptors = l2n (
191
- gem (
192
- activation_map ,
193
- p = cfg .IMG_RETRIEVAL .GEM_POOL_POWER ,
194
- add_bias = True ,
195
- )
201
+ descriptors = gem (
202
+ activation_map ,
203
+ p = cfg .IMG_RETRIEVAL .GEM_POOL_POWER ,
204
+ add_bias = True ,
196
205
)
197
- # Optionally apply pca.
198
- if pca :
199
- descriptors = pca .apply (descriptors )
200
206
else :
201
207
descriptors = activation_map
202
208
209
+ # Optionally l2 normalize the features.
210
+ if (
211
+ cfg .IMG_RETRIEVAL .NORMALIZE_FEATURES
212
+ and cfg .IMG_RETRIEVAL .FEATS_PROCESSING_TYPE != "rmac"
213
+ ):
214
+ # RMAC performs normalization within the algorithm, hence we skip it here.
215
+ descriptors = l2n (descriptors , dim = 1 )
216
+
217
+ # Optionally apply pca.
218
+ if pca and cfg .IMG_RETRIEVAL .FEATS_PROCESSING_TYPE != "rmac" :
219
+ # RMAC performs pca within the algorithm, hence we skip it here.
220
+ descriptors = pca .apply (descriptors )
221
+
203
222
if fname_out :
204
223
save_file (descriptors .data .numpy (), fname_out , verbose = False )
205
224
return descriptors .data .numpy ()
@@ -248,6 +267,7 @@ def get_dataset_features(
248
267
model ,
249
268
pca ,
250
269
eval_dataset_name ,
270
+ verbose = (idx == 0 ),
251
271
)
252
272
features_dataset .append (db_feature )
253
273
@@ -286,6 +306,7 @@ def get_queries_features(
286
306
if idx % LOG_FREQUENCY == 0 :
287
307
logging .info (f"Eval Query: { idx } " ),
288
308
q_fname_in = eval_dataset .get_query_filename (idx )
309
+ # Optionally crop the query by the region-of-interest (ROI).
289
310
roi = (
290
311
eval_dataset .get_query_roi (idx )
291
312
if cfg .IMG_RETRIEVAL .CROP_QUERY_ROI
@@ -309,6 +330,7 @@ def get_queries_features(
309
330
model ,
310
331
pca ,
311
332
eval_dataset_name ,
333
+ verbose = (idx == 0 ),
312
334
)
313
335
features_queries .append (query_feature )
314
336
@@ -345,7 +367,7 @@ def get_transforms(cfg, dataset_name):
345
367
def get_train_dataset (cfg , root_dataset_path , train_dataset_name , eval_binary_path ):
346
368
# We only create the train dataset if we need PCA or whitening training.
347
369
# Otherwise not.
348
- if cfg .IMG_RETRIEVAL .SHOULD_TRAIN_PCA_OR_WHITENING :
370
+ if cfg .IMG_RETRIEVAL .TRAIN_PCA_WHITENING :
349
371
train_data_path = f"{ root_dataset_path } /{ train_dataset_name } "
350
372
assert PathManager .exists (train_data_path ), f"Unknown path: { train_data_path } "
351
373
@@ -444,7 +466,7 @@ def instance_retrieval_test(args, cfg):
444
466
############################################################################
445
467
# Step 2: Extract the features for the train dataset, calculate PCA or
446
468
# whitening and save
447
- if cfg .IMG_RETRIEVAL .SHOULD_TRAIN_PCA_OR_WHITENING :
469
+ if cfg .IMG_RETRIEVAL .TRAIN_PCA_WHITENING :
448
470
logging .info ("Extracting training features..." )
449
471
# the features are already processed based on type: rmac | GeM | l2 norm
450
472
train_features = get_train_features (
@@ -551,7 +573,7 @@ def validate_and_infer_config(config: AttrDict):
551
573
), "Spatial levels must be greater than 0."
552
574
if config .IMG_RETRIEVAL .FEATS_PROCESSING_TYPE == "rmac" :
553
575
assert (
554
- config .IMG_RETRIEVAL .SHOULD_TRAIN_PCA_OR_WHITENING
576
+ config .IMG_RETRIEVAL .TRAIN_PCA_WHITENING
555
577
), "PCA Whitening is built-in to the RMAC algorithm and is required"
556
578
557
579
return config
0 commit comments