GraceXiaoo
diff --git a/‎README.md
+59-1 b/‎README.md
+59-1
diff --git a/‎ablation.sh
+9 b/‎ablation.sh
+9
diff --git a/‎app.sh
+4 b/‎app.sh
+4
diff --git a/‎code/ablation.py
+155 b/‎code/ablation.py
+155
diff --git a/‎code/app/app_compute.py
+31 b/‎code/app/app_compute.py
+31
diff --git a/‎code/app/ppl_compute.py
+54 b/‎code/app/ppl_compute.py
+54
diff --git a/‎code/app/quote_extract.py
+51 b/‎code/app/quote_extract.py
+51
diff --git a/‎code/eval/rerank_dcg.py
+24 b/‎code/eval/rerank_dcg.py
+24
@@ -1,2 +1,60 @@
 # QUILL
-QUILL: Quotation Generation Enhancement of Large Language Models
+
+
+## Install Dependencies
+
+```
+
+```
+
+## Note
+
+Before proceeding, Run the following scirpt `app.sh` to compute the PPL and extract the quote.  (Confirm that the model path is correctly in the file)
+
+```
+cd QUILL/
+CUDA_VISIBLE_DEVICES=0 python /code/app/ppl_compute.py
+CUDA_VISIBLE_DEVICES=0 python /code/app/quote_extract.py
+```
+
+## Evaluation System for QG (Quotation Generation)
+
+You can evaluate the QG task from any desired model via the following scirpt `naive.sh`:
+
+```
+cd QUILL/
+model='llama2-70b-chat-hf'
+num=1
+memory=0.8
+prompt='0_shot_quote'
+CUDA_VISIBLE_DEVICES=0 python /code/naive_rewrite.py --model_name "$model" --file_name 'quote_author'  --tensor_parallel_size "$num" --gpu_memory_utilization "$memory" --prompt "$prompt"
+CUDA_VISIBLE_DEVICES=0 python /code/naive_compute.py --model_name "$model" --prompt "$prompt"
+```
+
+All the model results are in the folder [data/eval](data/eval).
+
+## Reranking Metrics
+
+The metrics for our designed rerank metrics and Other rerankers can be calculated using the following script  `ablation.sh`:
+
+```
+cd QUILL/
+#### QUILL's Reranker
+CUDA_VISIBLE_DEVICES=0 python /code/ablation.py  --file_name 'quote_author' --rerank_fun 'avg_novelty'
+
+#### Other Rerankers
+CUDA_VISIBLE_DEVICES=0 LINKER_TYPE="json" JSON_LINKER_PATH="JSON_LINKER.json"  python /code/ablation.py  --file_name 'quote_author' --rerank_fun 'bm25'
+```
+
+All the rerankers model are in the folder [code/reranker](code/reranker).
+All the reranking results are in the folder [data/eval/ablation](data/eval/ablation).
+
+## Data
+
+The collected data can be found in the [data/rag](data/rag). All samples have been anonymized.
+
+## Citation
+
+```
+
+```
@@ -0,0 +1,9 @@
+#### QUILL's Reranker
+CUDA_VISIBLE_DEVICES=0 python /code/ablation.py  --file_name 'quote_author' --rerank_fun 'avg_novelty'
+
+#### Other Rerankers
+CUDA_VISIBLE_DEVICES=0 LINKER_TYPE="json" JSON_LINKER_PATH="JSON_LINKER.json"  python /code/ablation.py  --file_name 'quote_author' --rerank_fun 'bm25'
+
+
+
+
@@ -0,0 +1,4 @@
+cd QUILL/
+CUDA_VISIBLE_DEVICES=0 python /code/app/ppl_compute.py
+CUDA_VISIBLE_DEVICES=0 python /code/app/quote_extract.py
+
@@ -0,0 +1,155 @@
+##Ablation 1：Validating QUILL's reranker is useful i.e. ppl1,ppl2,avg,novelty
+
+##vanilla：No rerank i.e. Top1 recalled based on similarity
+##ppl1:Compute the following ppl Given the above text
+##ppl2:Compute the following ppl Given the above text and first k words of the quote
+##Other rerankers：Supervised(BM25、monoT5) Unsupervised(UPR、BGE) GPT(GPT3.5-turbo、GPT4o)
+
+###########
+from rag.rag_module import MyVectorDBConnector
+from rag.rag_function import retrieval
+import pandas as pd
+import argparse
+from tqdm import tqdm
+import os
+from eval.rerank_dcg import ndcg_at_k
+from eval.rerank_score import mrr_score,hits_at_k
+from reranker.chatgpt import gpt_rerank
+from reranker.bge import model_bge,bge_rerank
+from reranker.upr import model_upr,upr_rerank
+from reranker.bm25 import model_bm25,bm25_rerank
+from reranker.monoT5 import model_monoT5,monoT5_rerank
+from reranker.cal_feature import *
+from utils.utils import *
+from app.app_compute import *
+
+vector = MyVectorDBConnector(path='QUILL/code/rag/model/quill_final', collection_name='quill_final')
+
+def rerank_fn(reranker,old_context,topk_list,ppl_fun=None):
+    try:
+        if ppl_fun==None:
+            return topk_list
+        if ppl_fun==gpt_rerank:
+            return gpt_rerank(topk_list,old_context)
+        if ppl_fun==bge_rerank:
+            return bge_rerank(reranker,topk_list,old_context)
+        if ppl_fun==upr_rerank:
+            return upr_rerank(reranker,old_context,topk_list)
+        if ppl_fun==bm25_rerank:
+            return bm25_rerank(reranker,old_context,topk_list)
+        if ppl_fun==monoT5_rerank:
+            return monoT5_rerank(reranker,old_context,topk_list)
+    except Exception as e:
+        print('error',e)
+        return ['error'*5]
+    try:
+        if isinstance(topk_list[0],str):
+            topk_list[0]=eval(topk_list[0])
+        rerank_list=sorted(topk_list[0], key=lambda x: ppl_fun(context=old_context,string=x), reverse=False)
+        print('rerank',str(rerank_list))
+        return rerank_list
+    except Exception as e:
+        print('error',e)
+        return ['error'*5]
+
+def ablation(reranker,data_info,ppl_fun,index):
+    query = data_info['挖空语料-插入点']
+    golden_author = data_info['作者']
+    golden_quote = data_info['引言']
+    print("Query: " + query)
+    topk_list = retrieval(vector,query, 5,golden_author)
+    print('The retrieval Top K：',str(topk_list))
+    if ppl_fun == 'avg':
+        ppl_fun = cal_feature_avg
+    elif ppl_fun == 'ppl1':
+        ppl_fun = cal_feature_ppl1
+    elif ppl_fun == 'ppl2':
+        ppl_fun = cal_feature_ppl2
+    elif ppl_fun == 'vanilla':
+        ppl_fun = None
+    elif ppl_fun == 'ppl1_novelty':
+        ppl_fun = cal_feature_ppl1_novelty
+    elif ppl_fun == 'ppl2_novelty':
+        ppl_fun = cal_feature_ppl2_novelty
+    elif ppl_fun == 'avg_novelty':
+        ppl_fun = cal_feature_avg_novelty
+    elif ppl_fun == 'chatgpt':
+        ppl_fun = gpt_rerank
+    elif ppl_fun == 'bge':
+        ppl_fun = bge_rerank
+    elif ppl_fun == 'upr':
+        ppl_fun = upr_rerank
+    elif ppl_fun == 'bm25':
+        ppl_fun = bm25_rerank
+    elif ppl_fun == 'monoT5':
+        ppl_fun = monoT5_rerank
+    rerank_list = rerank_fn(reranker,query, topk_list, ppl_fun)
+    if ppl_fun == None:
+        rerank_list = rerank_list[0]
+    quote = rerank_list[0]
+    mrr=mrr_score(golden_quote,rerank_list)
+    hit1=hits_at_k(golden_quote,rerank_list,1)
+    hit3=hits_at_k(golden_quote,rerank_list,3)
+    ndcg_1=ndcg_at_k(rerank_list,Search_quote_rel,index,k=1) 
+    ndcg_3=ndcg_at_k(rerank_list,Search_quote_rel,index,k=3)
+    return quote,mrr,hit1,hit3,ndcg_1,ndcg_3,rerank_list
+
+def main(args):
+    file_name=args.file_name
+    ppl_fun=args.rerank_fun
+    if ppl_fun == 'bge':
+        reranker=model_bge()
+    elif ppl_fun == 'upr':
+        reranker=model_upr()
+    elif ppl_fun == 'bm25':
+        reranker=model_bm25()
+    elif ppl_fun == 'monoT5':
+        reranker=model_monoT5()
+    else: 
+        reranker=None
+    file_path = f'QUILL/data/dev/{file_name}.xlsx'
+    df = pd.read_excel(file_path)
+    rerank_quote=[]
+    mrr_list=[]
+    hit1_list=[]
+    hit3_list=[]
+    ndcg1_list = []
+    ndcg3_list = []
+    rerank_all_list =[]
+    for index, row in tqdm(df.iterrows(), total=df.shape[0]):
+        quote,mrr,hit1,hit3,ndcg_1,ndcg_3,rerank_list=ablation(reranker,row,ppl_fun,index)
+        rerank_quote.append(quote)
+        mrr_list.append(mrr)
+        hit1_list.append(hit1)
+        hit3_list.append(hit3)
+        ndcg1_list.append(ndcg_1)
+        ndcg3_list.append(ndcg_3)
+        rerank_all_list.append(rerank_list)
+    df['rerank_all']=rerank_all_list
+    df['rerank_quote']=rerank_quote
+    df['mrr']=mrr_list
+    df['hit1']=hit1_list
+    df['hit3']=hit3_list
+    df['dcg1']=ndcg1_list
+    df['dcg3']=ndcg3_list
+    file_path = f'/QUILL/data/eval/ablation/res_{file_name}_{ppl_fun}.xlsx'
+    directory = os.path.dirname(file_path)
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+    df.to_excel(file_path, index=False)
+    print("The new Excel file is saved!!!")
+
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Ablation Pipeline")
+
+    parser.add_argument('--rerank_fun', type=str, required=True, help="ablation index")
+    parser.add_argument('--file_name', type=str, required=True, help="dev file name")
+    args = parser.parse_args()
+
+    main(args)
+
+
+
+
@@ -0,0 +1,31 @@
+import time
+import requests
+
+def compute_ppl(left,right): 
+    data_to_send = {"left": left,"right":right}
+    attempt = 0
+    max_retries = 10
+    backoff_factor = 1
+    while attempt < max_retries:
+        response = requests.post("http://10.176.40.139:8080/generate", json=data_to_send)
+        if response.status_code == 200:
+            return response.json()[0]
+        attempt += 1
+        print(f"Attempt {attempt} failed with status code: {response.status_code}. Retrying...")
+        time.sleep(backoff_factor * (2 ** (attempt - 1)))
+    raise Exception(f"Request failed after {max_retries} attempts")
+
+def extract_quote(quote): 
+    data_to_send = {'quote':quote}
+    attempt = 0
+    max_retries = 10
+    backoff_factor = 1
+    while attempt < max_retries:
+        response = requests.post("http://10.176.40.139:6060/extract", json=data_to_send)
+        if response.status_code == 200:
+            return response.json()[0]
+        attempt += 1
+        print(f"Attempt {attempt} failed with status code: {response.status_code}. Retrying...")
+        # Exponential backoff
+        time.sleep(backoff_factor * (2 ** (attempt - 1)))
+    raise Exception(f"Request failed after {max_retries} attempts")
@@ -0,0 +1,54 @@
+from flask import Flask, request, jsonify
+from transformers import AutoTokenizer
+import os
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+app = Flask(__name__)
+
+model_path="/Qwen/Qwen2-7B-Instruct"
+tokenizer1 = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+model1 = AutoModelForCausalLM.from_pretrained(
+    model_path,
+    device_map="auto",
+    torch_dtype='auto'
+).eval()
+
+model_path="/meta-llama/Meta-Llama-3-8B"
+tokenizer2 = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+model2 = AutoModelForCausalLM.from_pretrained(
+    model_path,
+    device_map="auto",
+    torch_dtype='auto'
+).eval()
+
+
+def compute_ppl(left_context,right_context, tokenizer, model, device='cuda'):
+    context_ids = tokenizer.encode(left_context, return_tensors='pt').to(device)
+    input_ids = tokenizer.encode(left_context+right_context, return_tensors='pt').to(device)
+    target_ids = input_ids.clone()
+    target_ids[:, :context_ids.shape[1]] = -100
+    with torch.no_grad():
+        outputs = model(input_ids, labels=target_ids)
+        neg_log_likelihood = outputs.loss
+    ppl = torch.exp(neg_log_likelihood)
+    return ppl.item()
+
+@app.route('/generate', methods=['POST'])
+def generate():
+    data = request.json
+    if 'left' not in data or 'right' not in data:
+        return jsonify({'error': 'Both "left" and "right" keys are required.'}), 400
+    left_context = data['left']
+    right_context = data['right']
+    
+
+    ppl1=compute_ppl(left_context,right_context,tokenizer1,model1)
+    ppl2=compute_ppl(left_context,right_context,tokenizer2,model2)
+    ppl = (ppl1+ppl2)/2
+
+    return [ppl]
+
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=8080)
@@ -0,0 +1,51 @@
+from modelscope import AutoModelForCausalLM, AutoTokenizer
+import os
+from flask import Flask, request
+
+
+with open(f'/QUILL/code/prompt/prompt_ch_extract_quote.md', 'r') as file:
+    prompt = file.read()
+
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+app = Flask(__name__)
+
+model_name = "/model/Qwen2.5-32B-Instruct"
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype="auto",
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+
+def quote_extract(quote):
+    prompt_quote = prompt.replace('{quote}',quote)
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": prompt_quote}
+    ]
+    text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+    generated_ids = model.generate(
+        **model_inputs,
+        max_new_tokens=512
+    )
+    generated_ids = [
+        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+    ]
+    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return response
+
+@app.route('/extract', methods=['POST'])
+def extract():
+    data = request.json
+    quote = data['quote']
+    response = quote_extract(quote)
+    return [response]
+
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=6060)
@@ -0,0 +1,24 @@
+## Only need 1 time to compute the Search_quote_rel, Specifically QUILL/eval/rerank_dcg4rel.py to calculate the dict!!
+
+import numpy as np
+
+def get_relevances(rerank_list,Search_quote_rel,index):
+    relevances = []
+    for quote in eval(rerank_list):
+        rel_here = Search_quote_rel[index]
+        if quote in rel_here:
+            relevances.append(rel_here[quote])
+        else:
+            relevances.append(0)
+    return relevances
+
+def dcg(relevances):
+    return np.sum(relevances / np.log2(np.arange(1, len(relevances) + 1) + 1))
+
+def ndcg_at_k(rerank_list,Search_quote_rel,index,k):
+    relevances = get_relevances(rerank_list,Search_quote_rel,index)
+    relevances_k = relevances[:k]
+    dcg_value = dcg(relevances_k)
+    idcg_value = dcg(sorted(relevances, reverse=True)[:k])
+    return dcg_value / idcg_value if idcg_value > 0 else 0
+