Upload assets and examples

Browse files

Files changed (7) hide show

.gitattributes +2 -0
assets/latency.png +3 -0
assets/overview.png +3 -0
examples/ERank_Transformer.py +146 -0
examples/ERank_vLLM.py +97 -0
examples/instructions.json +10 -0
examples/utils.py +44 -0

.gitattributes CHANGED Viewed

@@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text
+assets/latency.png filter=lfs diff=lfs merge=lfs -text
+assets/overview.png filter=lfs diff=lfs merge=lfs -text

assets/latency.png ADDED Viewed

Git LFS Details

SHA256: 1856b8a8431ba274392c617c023eb740c43fa9dc3efe989bf785299468497813
Pointer size: 131 Bytes
Size of remote file: 417 kB

assets/overview.png ADDED Viewed

Git LFS Details

SHA256: ceb3b7cd2434480a45ea8a3c8184e47be6c67de74ab2f22edc71103ac4a5b570
Pointer size: 131 Bytes
Size of remote file: 754 kB

examples/ERank_Transformer.py ADDED Viewed

	@@ -0,0 +1,146 @@

+from torch.nn import functional as F
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from utils import prompt_template, truncate, hybrid_scores
+class ERank_Transformer:
+    def __init__(self, model_name_or_path: str):
+        """
+        Initializes the ERank_Transformer reranker.
+        Args:
+            model_name_or_path (str): The name or path of the model to be loaded.
+                                      This can be a Hugging Face model ID or a local path.
+        """
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
+        self.reranker = AutoModelForCausalLM.from_pretrained(model_name_or_path).eval()
+        self.reranker.to("cuda")
+    def rerank(self, query: str, docs: list, instruction: str, truncate_length: int=None) -> list:
+        """
+        Reranks a list of documents based on a query and a specific instruction.
+        Args:
+            query (str): The search query provided by the user.
+            docs (list): A list of dictionaries, where each dictionary represents a document
+                         and must contain a "content" key.
+            instruction (str): The instruction for the model, guiding it on how to evaluate the documents.
+            truncate_length (int, optional): The maximum length to truncate the query and document content to. Defaults to None.
+        Returns:
+            list: A new list of document dictionaries, sorted by their "rank_score" in descending order.
+        """
+        # prepare messages
+        messages = [
+            [{
+                "role": "user",
+                "content": prompt_template.format(
+                    query=truncate(self.tokenizer, query, length=truncate_length) if truncate_length else query,
+                    doc=truncate(self.tokenizer, doc["content"], length=truncate_length) if truncate_length else doc["content"],
+                    instruction=instruction
+                )
+            }] for doc in docs
+        ]
+        # encode tokens
+        texts = [
+            self.tokenizer.apply_chat_template(
+                each,
+                tokenize=False,
+                add_generation_prompt=True,
+            ) for each in messages
+        ]
+        inputs = self.tokenizer(texts, padding=True, return_tensors="pt").to(self.reranker.device)
+        # LLM completion
+        outputs = self.reranker.generate(
+            **inputs,
+            max_new_tokens=8192,
+            output_scores=True,
+            return_dict_in_generate=True
+        )
+        # extract and organize results
+        results = []
+        scores = outputs.scores
+        generated_ids = outputs.sequences
+        answer_token_ids = self.tokenizer.encode("<answer>", add_special_tokens=False)
+        for idx in range(len(texts)):
+            # find <answer> in the generated sequence
+            output_ids = generated_ids[idx].tolist()
+            start_index = -1
+            for i in range(len(output_ids)-len(answer_token_ids)-1, -1, -1):
+                if output_ids[i:i + len(answer_token_ids)] == answer_token_ids:
+                    start_index = i + len(answer_token_ids)
+                    break
+            # start from the index after <answer>
+            answer = ""
+            prob = 1.0
+            if start_index != -1:
+                for t in range(start_index - inputs.input_ids.size(1), len(scores)):
+                    generated_token_id = generated_ids[idx][inputs.input_ids.size(1) + t]
+                    token = self.tokenizer.decode(generated_token_id)
+                    if token.isdigit():
+                        logits = scores[t][idx]
+                        probs = F.softmax(logits, dim=-1)
+                        prob *= probs[generated_token_id].item()
+                        answer += token
+                    else:
+                        break
+            # in case the answer is not a digit or exceeds 10
+            try:
+                answer = int(answer)
+                assert answer <= 10
+            except:
+                answer = -1
+            # append to the final results
+            results.append({
+                **docs[idx],
+                "rank_score": answer * prob
+            })
+        # sort the reranking results for the query
+        results.sort(key=lambda x:x["rank_score"], reverse=True)
+        return results
+if __name__ == "__main__":
+    # select a model
+    model_name_or_path = "Ucreate/ERank-4B"
+    # model_name_or_path = "Ucreate/ERank-14B"
+    # model_name_or_path = "Ucreate/ERank-32B"
+    reranker = ERank_Transformer(model_name_or_path)
+    # input data
+    instruction = "Retrieve relevant documents for the query."
+    query = "I am happy"
+    docs = [
+        {"content": "excited", "first_stage_score": 46.7},
+        {"content": "sad", "first_stage_score": 1.5},
+        {"content": "peaceful", "first_stage_score": 2.3},
+    ]
+    # rerank
+    results = reranker.rerank(query, docs, instruction, truncate_length=2048)
+    print(results)
+    # [
+    #	 {'content': 'excited', 'first_stage_score': 46.7, 'rank_score': 4.84},
+    #	 {'content': 'peaceful', 'first_stage_score': 2.3, 'rank_score': 2.98}
+    #	 {'content': 'sad', 'first_stage_score': 1.5, 'rank_score': 0.0},
+    # ]
+    # Optional: hybrid with first-stage scores
+    alpha = 0.2
+    hybrid_results = hybrid_scores(results, alpha)
+    print(hybrid_results)
+    # [
+    #	 {'content': 'excited', 'first_stage_score': 46.7, 'rank_score': 4.84, 'hybrid_score': 1.18},
+    #	 {'content': 'peaceful', 'first_stage_score': 2.3, 'rank_score': 2.98, 'hybrid_score':0.01},
+    #	 {'content': 'sad', 'first_stage_score': 1.5, 'rank_score': 0.0, 'hybrid_score': -1.19}
+    # ]

examples/ERank_vLLM.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import torch
+import math
+from vllm import LLM, SamplingParams
+from utils import prompt_template, truncate
+class ERank_vLLM:
+    def __init__(self, model_name_or_path: str):
+        """
+        Initializes the ERank_vLLM reranker.
+        Args:
+            model_name_or_path (str): The name or path of the model to be loaded.
+                                      This can be a Hugging Face model ID or a local path.
+        """
+        num_gpu = torch.cuda.device_count()
+        self.ranker = LLM(
+            model=model_name_or_path,
+            tensor_parallel_size=num_gpu,
+            gpu_memory_utilization=0.95,
+            enable_prefix_caching=True
+        )
+        self.tokenizer = self.ranker.get_tokenizer()
+        self.sampling_params = SamplingParams(
+            temperature=0,
+            max_tokens=4096,
+            logprobs=20
+        )
+    def rerank(self, query: str, docs: list, instruction: str, truncate_length: int=None) -> list:
+        """
+        Reranks a list of documents based on a query and a specific instruction.
+        Args:
+            query (str): The search query provided by the user.
+            docs (list): A list of dictionaries, where each dictionary represents a document
+                         and must contain a "content" key.
+            instruction (str): The instruction for the model, guiding it on how to evaluate the documents.
+            truncate_length (int, optional): The maximum length to truncate the query and document content to. Defaults to None.
+        Returns:
+            list: A new list of document dictionaries, sorted by their "rank_score" in descending order.
+        """
+        # prepare messages
+        messages = [
+            [{
+                "role": "user",
+                "content": prompt_template.format(
+                    query=truncate(self.tokenizer, query, length=truncate_length) if truncate_length else query,
+                    doc=truncate(self.tokenizer, doc["content"], length=truncate_length) if truncate_length else doc["content"],
+                    instruction=instruction
+                )
+            }] for doc in docs
+        ]
+        # LLM generate
+        outputs = self.ranker.chat(messages, self.sampling_params)
+        # extract and organize results
+        results = []
+        for doc, output in zip(docs, outputs):
+            # extract the answer and its probability
+            cur = ""
+            answer = ""
+            is_ans = False
+            prob = 1.0
+            for each in output.outputs[0].logprobs[-10:]:
+                _, detail = next(iter(each.items()))
+                token = detail.decoded_token
+                logprob = detail.logprob
+                if is_ans and token.isdigit():
+                    answer += token
+                    prob *= math.exp(logprob)
+                else:
+                    cur += token
+                    if cur.endswith("<answer>"):
+                        is_ans = True
+            # in case the answer is not a digit or exceeds 10
+            try:
+                answer = int(answer)
+                assert answer <= 10
+            except:
+                answer = -1
+            # append to the final results
+            results.append({
+                **doc,
+                "rank_score": answer * prob
+            })
+        # sort the reranking results for the query
+        results.sort(key=lambda x:x["rank_score"], reverse=True)
+        return results

examples/instructions.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "BRIGHT (AoPS)": "We want to find different but similar math problems to the query. A document is relevant if it uses the same class of functions and shares any overlapping techniques.",
+    "BRIGHT (LeetCode)": "I am looking to find different problems that share similar data structures (of any kind) or algorithms (e.g. DFS, DP, sorting, traversals, etc.). I am looking for problems that share one or both of these similarities to the query. Does the passage below share any similarities? e.g. if there was a textbook on leetcode problems, this would be in the same book even though it could be in a different chapter.",
+    "BRIGHT (Pony)": "I will use the programming language pony. But to solve the problem above, I need to know things about pony. A passage is relevant if it contains docs that match any part (even basic parts) of the code I will have to write for the above program.",
+    "BRIGHT (TheoremQA-Q)": "We want to find a document which uses the same mathematical process as the query. A document is relevant if it uses the same mathematical process as the query.",
+    "BRIGHT (TheoremQA-T)": "We want to find a document which uses the same mathematical process as the query. A document is relevant if it uses the same mathematical process as the query.",
+    "BRIGHT (others)": "A document is relevant if it contains information that helps answer or address the query. A document is not relevant if it doesn't contain information that helps answer the query, even if it mentions similar topics.",
+    "BEIR / TREC DL": "Given a query, retrieval relevant passage.",
+    "FollowIR": "Retrieval the relevant passage for the given query. Be careful about the extra requirements about relevance in the query."
+  }

examples/utils.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import numpy as np
+prompt_template = """Given a query and a document, please give a relevance score of 0~10.
+The goal or relevance definition is: {instruction}
+Here is the query:
+{query}
+Here is the document:
+{doc}
+After thinking, directly choose a relevance score from [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10].
+- 0 represents completely not related
+- 10 means perfectly related.
+Desired output format:
+<think>put your thinking here</think><answer>Only allows an integer here</answer>
+Your output:"""
+def truncate(tokenizer, text, length):
+    if length == None or text == None:
+        return text
+    return tokenizer.convert_tokens_to_string(tokenizer.tokenize(text)[:length])
+def hybrid_scores(results, alpha):
+    first_stage_scores = [each["first_stage_score"] for each in results]
+    rank_scores = [each["rank_score"] for each in results]
+    first_stage_mean, first_stage_std = np.mean(first_stage_scores), np.std(first_stage_scores)
+    rank_mean, rank_std = np.mean(rank_scores), np.std(rank_scores)
+    hybrid_results = []
+    for result in results:
+        normalized_first_stage_score = (result["first_stage_score"] - first_stage_mean) / first_stage_std
+        normalized_rank_score = (result["rank_score"] - rank_mean) / rank_std
+        hybrid_results.append({
+            **result,
+            "hybrid_score": float(alpha * normalized_first_stage_score + (1-alpha) * normalized_rank_score)
+        })
+    hybrid_results.sort(key=lambda x:x['hybrid_score'], reverse=True)
+    return hybrid_results