CCRG-XJU
diff --git a/‎PromptHash_COCO.ps1
Lines changed: 11 additions & 0 deletions b/‎PromptHash_COCO.ps1
Lines changed: 11 additions & 0 deletions
diff --git a/‎PromptHash_COCO.sh
Lines changed: 4 additions & 2 deletions b/‎PromptHash_COCO.sh
Lines changed: 4 additions & 2 deletions
diff --git a/‎PromptHash_Flickr.ps1
Lines changed: 11 additions & 0 deletions b/‎PromptHash_Flickr.ps1
Lines changed: 11 additions & 0 deletions
diff --git a/‎PromptHash_Flickr.sh
Lines changed: 4 additions & 2 deletions b/‎PromptHash_Flickr.sh
Lines changed: 4 additions & 2 deletions
diff --git a/‎PromptHash_NUSWIDE.ps1
Lines changed: 11 additions & 0 deletions b/‎PromptHash_NUSWIDE.ps1
Lines changed: 11 additions & 0 deletions
diff --git a/‎PromptHash_NUSWIDE.sh
Lines changed: 4 additions & 2 deletions b/‎PromptHash_NUSWIDE.sh
Lines changed: 4 additions & 2 deletions
diff --git a/‎README.md
Lines changed: 6 additions & 4 deletions b/‎README.md
Lines changed: 6 additions & 4 deletions
diff --git a/‎dataset/README.md
Lines changed: 68 additions & 0 deletions b/‎dataset/README.md
Lines changed: 68 additions & 0 deletions
diff --git a/‎hash_model.py
Lines changed: 2 additions & 22 deletions b/‎hash_model.py
Lines changed: 2 additions & 22 deletions
diff --git a/‎load_data.py
Lines changed: 2 additions & 2 deletions b/‎load_data.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎model/modules.py
Lines changed: 1 addition & 4 deletions b/‎model/modules.py
Lines changed: 1 addition & 4 deletions
diff --git a/‎model/open_clip/__init__.py
Lines changed: 2 additions & 0 deletions b/‎model/open_clip/__init__.py
Lines changed: 2 additions & 0 deletions
@@ -0,0 +1,11 @@
+conda activate torch
+$lr = 0.001
+$gpu_rank = 0
+$valid_freq = 1
+$epochs = 100
+$res_name = "result/Result_PromptHash_COCO"
+$recon = 0.001
+$hyper_cls_inter = 20.0
+$hyper_quan = 1.0
+
+python main.py --is-train --dataset coco --query-num 5000 --train-num 10000 --lr $lr --rank $gpu_rank --valid-freq $valid_freq --epochs $epochs --result-name $res_name --hyper-recon $recon --hyper-cls-inter $hyper_cls_inter --hyper-quan $hyper_quan
@@ -2,10 +2,12 @@
 
 ################# bash -x ***.sh #################
 lr=0.001
-gpu_rank=0
+gpu_rank=3
 valid_freq=1
 epochs=100
 res_name="result/Result_PromptHash_COCO"
 recon=0.001
+hyper_cls_inter=20.0
+hyper_quan=1.0
 
-python main.py --is-train --dataset coco --query-num 5000 --train-num 10000 --lr "$lr" --rank "$gpu_rank" --valid-freq "$valid_freq" --epochs "$epochs" --result-name "$res_name" --hyper-recon "$recon"
+python main.py --is-train --dataset coco --query-num 5000 --train-num 10000 --lr "$lr" --rank "$gpu_rank" --valid-freq "$valid_freq" --epochs "$epochs" --result-name "$res_name" --hyper-recon "$recon" --hyper-cls-inter "$hyper_cls_inter" --hyper-quan "$hyper_quan"
@@ -0,0 +1,11 @@
+conda activate torch
+$lr = 0.001
+$gpu_rank = 0
+$valid_freq = 1
+$epochs = 100
+$res_name = "result/Result_PromptHash_Flickr"
+$recon = 0.001
+$hyper_cls_inter = 5.0
+$hyper_quan = 0.1
+
+python main.py --is-train --dataset flickr25k --query-num 2000 --train-num 10000 --lr $lr --rank $gpu_rank --valid-freq $valid_freq --epochs $epochs --result-name $res_name --hyper-recon $recon --hyper-cls-inter $hyper_cls_inter --hyper-quan $hyper_quan
@@ -2,10 +2,12 @@
 
 ################# bash -x ***.sh #################
 lr=0.001
-gpu_rank=0
+gpu_rank=7
 valid_freq=1
 epochs=100
 res_name="result/Result_PromptHash_Flickr"
 recon=0.001
+hyper_cls_inter=5.0
+hyper_quan=0.1
 
-python main.py --is-train --dataset flickr25k --query-num 2000 --train-num 10000  --lr "$lr" --rank "$gpu_rank" --valid-freq "$valid_freq" --epochs "$epochs" --result-name "$res_name" --hyper-recon "$recon"
+python main.py --is-train --dataset flickr25k --query-num 2000 --train-num 10000  --lr "$lr" --rank "$gpu_rank" --valid-freq "$valid_freq" --epochs "$epochs" --result-name "$res_name" --hyper-recon "$recon" --hyper-cls-inter "$hyper_cls_inter" --hyper-quan "$hyper_quan"
@@ -0,0 +1,11 @@
+conda activate torch
+$lr = 0.001
+$gpu_rank = 0
+$valid_freq = 1
+$epochs = 100
+$res_name = "result/Result_PromptHash_NUSWIDE"
+$recon = 0.001
+$hyper_cls_inter = 5.0
+$hyper_quan = 0.1
+
+python main.py --is-train --dataset nuswide --caption-file caption.txt --query-num 2100 --train-num 10500 --lr $lr --rank $gpu_rank --valid-freq $valid_freq --epochs $epochs --result-name $res_name --hyper-recon $recon --hyper-cls-inter $hyper_cls_inter --hyper-quan $hyper_quan
@@ -2,10 +2,12 @@
 
 ################# bash -x ***.sh #################
 lr=0.001
-gpu_rank=0
+gpu_rank=4
 valid_freq=1
 epochs=100
 res_name="result/Result_PromptHash_NUSWIDE"
 recon=0.001
+hyper_cls_inter=5.0
+hyper_quan=0.1
 
-python main.py --is-train --dataset nuswide --caption-file caption.txt --query-num 2100 --train-num 10500 --lr "$lr" --rank "$gpu_rank" --valid-freq "$valid_freq" --epochs "$epochs" --result-name "$res_name" --hyper-recon "$recon"
+python main.py --is-train --dataset nuswide --caption-file caption.txt --query-num 2100 --train-num 10500 --lr "$lr" --rank "$gpu_rank" --valid-freq "$valid_freq" --epochs "$epochs" --result-name "$res_name" --hyper-recon "$recon" --hyper-cls-inter "$hyper_cls_inter" --hyper-quan "$hyper_quan"
@@ -14,20 +14,22 @@ cd PromptHash
 
 2. Please install the following packages:
 ```bash
-conda create -n prompthash python=3.11 -y
+conda create -n prompthash python=3.13 -y
 conda activate prompthash
 ```
 
-3. Install PyTorch 2.3.1, mamba_ssm, and causal-conv1d:
+3. Install PyTorch 2.7.0, mamba_ssm, and causal-conv1d:
 ```bash
-# Install PyTorch 2.3.1
-conda install pytorch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 -c pytorch
+# Install PyTorch 2.7.0
+pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
 
 # Install mamba_ssm
 # Please refer to https://github.com/state-spaces/mamba for detailed installation instructions
+# If you are using a CUDA 12.8 environment, you can download the pre-built whl file from the release page
 
 # Install causal-conv1d
 # Please refer to https://github.com/Dao-AILab/causal-conv1d for detailed installation instructions
+# If you are using a CUDA 12.8 environment, you can download the pre-built whl file from the release page
 ```
 
 ## Data 🗂️
 
@@ -0,0 +1,68 @@
+### The generation of each mat file
+
+You can use our pre-processed datasets, which makes it easier to get start.
+
+To generate required `.mat` files, you could:
+1. Download cleaned datasets in  `pan.baidu.com`:
+    
+    link：https://pan.baidu.com/s/1jCYEBhm-bpikAh_Bti139g 
+    password：9idm
+
+2. Move the downloaded  `all_imgs.txt`, `all_tags.txt`, and `all_labels.txt` to `./dataset/XXXDatasetName/` as follows:
+    ```
+    dataset
+    ├── coco
+    │   ├── all_imgs.txt 
+    │   ├── all_tags.txt
+    │   └── all_labels.txt
+    ├── flickr25k
+    │   ├── all_imgs.txt 
+    │   ├── all_tags.txt
+    │   └── all_labels.txt
+    └── nuswide
+        ├── all_imgs.txt 
+        ├── all_tags.txt
+        └── all_labels.txt
+    ```
+3. Modify variable `img_root_path` in scripts `make_XXXDatasetName.py` to the absolute path of the directory, which contains all source images and is available at above provided `pan.baidu.com` link.
+4. Run scripts `make_XXXDatasetName.py` to generate corresponding `.mat` files. Then use these mat files to conduct experiment.
+
+
+
+### (Optional) The meaning and format of each mat file
+
+#### caption.mat
+For each dataset, `caption.mat` is data of text modality. It is a mat file with key `caption`.
+The shape of this mat is, i.e., `(20015,)` for MIRFlickr25K. 
+Each element of this mat is a `string` that 
+describes one image, i.e., "cigarette tattoos smoke red dress sunglasses" for `im1.jpg` in MIRFlickr25K dataset.
+
+Note that 20,015 instances of MIRFlickr25K with 1,386 frequent textual tags and 190,421 instances of NUSWIDE with 1,000 frequent textual tags are used for experiments.
+
+For MS COCO, we obtain 122,218 data points by removing the pairs without any label following DCHMT, and one of five sentences is randomly selected to form one image-text pair.
+
+#### index.mat
+
+`index.mat` is a mat file with key `index`. The shape is `(20015,)` for MIRFlickr25K. 
+Each element is a `string` that indicates image path, i.e., "/path/flickr25k/im1.jpg".
+
+#### label.mat
+
+`label.mat` is a mat file with key `label`. The shape is `(20015, 24)` for MIRFlickr25K. 
+Each element is a `numpy.ndarray`, i.e., `[0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0.]`.
+
+For all dataset, the detailed data is showed as follows:
+
+|   Dataset    |        File name         |    Shape     |                 One element                  |
+|:------------:|:------------------------:|:------------:|:--------------------------------------------:|
+| MIRFlickr25K |       caption.mat        |   (20015,)   | cigarette tattoos smoke red dress sunglasses |
+| MIRFlickr25K |        index.mat         |   (20015,)   |                /path/im1.jpg                 |
+| MIRFlickr25K |        label.mat         | (20015, 24)  |                [0. 0. ... 0.]                |
+|   MS COCO    |       caption.mat        |  (122218,)   |   A woman cutting a large white sheet cake   |
+|   MS COCO    |        index.mat         |  (122218,)   |     /path/COCO_val2014_000000522418.jpg      |
+|   MS COCO    |        label.mat         | (122218, 80) |                [1. 0. ... 0.]                |
+|   NUSWIDE    |       caption.mat        |  (190421,)   | portrait man flash sunglasses actor december |
+|   NUSWIDE    |        index.mat         |  (190421,)   |          /path/0001_2124494179.jpg           |
+|   NUSWIDE    |        label.mat         | (190421, 21) |                [0. 0. ... 0.]                |
+
+You should generate these mat files in above format for experiments.
@@ -71,7 +71,7 @@ def replace_underscore(self, name_list):
             self._prompt_cache[cache_key] = prompt_ids
         return prompt_ids
 
-    @torch.cuda.amp.autocast()
+    @torch.amp.autocast("cuda")
     def forward(self, classnames):
         batch_size = len(classnames)
 
@@ -109,30 +109,9 @@ def __init__(self,  num_layers=1, hidden_size=1024, nhead=4):
         self.inproj = nn.Linear(self.sigal_d, self.sigal_d)
         self.outproj = nn.Linear(self.sigal_d, self.sigal_d)
         self.mamba = MambaLayer(dim=self.sigal_d, d_state=16, d_conv=4, expand=2)
-        # self.grn1 = nn.LayerNorm(self.sigal_d)
-        # self.grn2 = nn.LayerNorm(self.d_model)
         self.grn1 = GRN(dim=self.sigal_d)
         self.grn2 = GRN(dim=self.d_model)
 
-    def weight_init(self):
-        self.inproj.apply(self.kaiming_init)
-        self.outproj.apply(self.kaiming_init)
-    
-    def kaiming_init(self, m):
-        classname = m.__class__.__name__
-        if classname.find('Conv') != -1:
-            init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
-            if m.bias is not None:
-                init.constant_(m.bias, 0.0)
-        elif classname.find('Linear') != -1:
-            init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
-            if m.bias is not None:
-                init.constant_(m.bias, 0.0)
-        elif classname.find('Norm') != -1:
-            init.normal_(m.weight.data, 1.0, 0.02)
-            if m.bias is not None:
-                init.constant_(m.bias.data, 0.0)
-
     def forward(self, img_cls, txt_eos):
         short_img_cls = self.inproj(img_cls)
         short_txt_eos = self.inproj(txt_eos)
@@ -355,6 +334,7 @@ def __init__(self, class_name_list, layers_to_unfreeze, args=None):
 
         # 解冻特定层
         for name, param in self.clip.named_parameters():
+            # print(name)
             if name in layers_to_unfreeze:
                 param.requires_grad = True
 
 
@@ -3,7 +3,7 @@
 from __future__ import unicode_literals
 from __future__ import print_function
 
-from model.open_clip.simple_tokenizer import SimpleTokenizer
+from model.open_clip.tokenizer import SimpleTokenizer
 import os
 import numpy as np
 import scipy.io as scio
@@ -153,7 +153,7 @@ def generate_dataset(captionFile: str,
             raise RuntimeError("text file is not support, we only read the keys of [caption, tags, YAll].")
         captions = captions[0] if captions.shape[0] == 1 else captions
     elif captionFile.endswith("txt"):
-        with open(captionFile, "r") as f:
+        with open(captionFile, 'r', encoding="utf-8") as f:
             captions = f.readlines()
         captions = np.asarray([[item.strip()] for item in captions])
     else:
 
@@ -2,6 +2,7 @@
 import torch.nn as nn
 import torch.nn.functional as F
 from mamba_ssm import Mamba
+
 class GRN(nn.Module):
     """ GRN (Global Response Normalization) layer
     """
@@ -36,12 +37,10 @@ def __init__(self, dim, d_state=16, d_conv=4, expand=2):
         self.dim = dim
         self.nin = nn.Linear(dim, dim)
         self.nin2 = nn.Linear(dim, dim)
-        # self.norm2 = nn.LayerNorm(dim)
         self.norm2 = GRN1(dim=dim)
         self.act2 = nn.SiLU()
         self.act3 = nn.SiLU()
 
-        # self.norm = nn.LayerNorm(dim)
         self.norm = GRN1(dim=dim)
         self.act = nn.SiLU()
         self.mamba = Mamba(
@@ -88,8 +87,6 @@ def forward(self, x):
 
 if __name__ == '__main__':
     mamba = MambaLayer(dim=512).cuda()
-
-
     input = torch.rand(32, 196, 512).cuda()
     output = mamba(input)
     print(input.size())
 
@@ -1,3 +1,5 @@
+from .version import __version__
+
 from .coca_model import CoCa
 from .constants import OPENAI_DATASET_MEAN, OPENAI_DATASET_STD
 from .factory import create_model, create_model_and_transforms, create_model_from_pretrained, get_tokenizer, create_loss
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+from .version import __version__`
	`2`	`+`
`1`	`3`	`from .coca_model import CoCa`
`2`	`4`	`from .constants import OPENAI_DATASET_MEAN, OPENAI_DATASET_STD`
`3`	`5`	`from .factory import create_model, create_model_and_transforms, create_model_from_pretrained, get_tokenizer, create_loss`