Safetensors
qwen3
Ucreate commited on
Commit
6b2e913
·
verified ·
1 Parent(s): a3289a8

Upload assets and examples

Browse files
.gitattributes CHANGED
@@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ assets/latency.png filter=lfs diff=lfs merge=lfs -text
38
+ assets/overview.png filter=lfs diff=lfs merge=lfs -text
assets/latency.png ADDED

Git LFS Details

  • SHA256: 1856b8a8431ba274392c617c023eb740c43fa9dc3efe989bf785299468497813
  • Pointer size: 131 Bytes
  • Size of remote file: 417 kB
assets/overview.png ADDED

Git LFS Details

  • SHA256: ceb3b7cd2434480a45ea8a3c8184e47be6c67de74ab2f22edc71103ac4a5b570
  • Pointer size: 131 Bytes
  • Size of remote file: 754 kB
examples/ERank_Transformer.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch.nn import functional as F
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ from utils import prompt_template, truncate, hybrid_scores
4
+
5
+ class ERank_Transformer:
6
+
7
+ def __init__(self, model_name_or_path: str):
8
+ """
9
+ Initializes the ERank_Transformer reranker.
10
+
11
+ Args:
12
+ model_name_or_path (str): The name or path of the model to be loaded.
13
+ This can be a Hugging Face model ID or a local path.
14
+ """
15
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
16
+ self.reranker = AutoModelForCausalLM.from_pretrained(model_name_or_path).eval()
17
+ self.reranker.to("cuda")
18
+
19
+ def rerank(self, query: str, docs: list, instruction: str, truncate_length: int=None) -> list:
20
+ """
21
+ Reranks a list of documents based on a query and a specific instruction.
22
+
23
+ Args:
24
+ query (str): The search query provided by the user.
25
+ docs (list): A list of dictionaries, where each dictionary represents a document
26
+ and must contain a "content" key.
27
+ instruction (str): The instruction for the model, guiding it on how to evaluate the documents.
28
+ truncate_length (int, optional): The maximum length to truncate the query and document content to. Defaults to None.
29
+
30
+ Returns:
31
+ list: A new list of document dictionaries, sorted by their "rank_score" in descending order.
32
+ """
33
+
34
+ # prepare messages
35
+ messages = [
36
+ [{
37
+ "role": "user",
38
+ "content": prompt_template.format(
39
+ query=truncate(self.tokenizer, query, length=truncate_length) if truncate_length else query,
40
+ doc=truncate(self.tokenizer, doc["content"], length=truncate_length) if truncate_length else doc["content"],
41
+ instruction=instruction
42
+ )
43
+ }] for doc in docs
44
+ ]
45
+
46
+ # encode tokens
47
+ texts = [
48
+ self.tokenizer.apply_chat_template(
49
+ each,
50
+ tokenize=False,
51
+ add_generation_prompt=True,
52
+ ) for each in messages
53
+ ]
54
+ inputs = self.tokenizer(texts, padding=True, return_tensors="pt").to(self.reranker.device)
55
+
56
+ # LLM completion
57
+ outputs = self.reranker.generate(
58
+ **inputs,
59
+ max_new_tokens=8192,
60
+ output_scores=True,
61
+ return_dict_in_generate=True
62
+ )
63
+
64
+ # extract and organize results
65
+ results = []
66
+ scores = outputs.scores
67
+ generated_ids = outputs.sequences
68
+ answer_token_ids = self.tokenizer.encode("<answer>", add_special_tokens=False)
69
+ for idx in range(len(texts)):
70
+
71
+ # find <answer> in the generated sequence
72
+ output_ids = generated_ids[idx].tolist()
73
+ start_index = -1
74
+ for i in range(len(output_ids)-len(answer_token_ids)-1, -1, -1):
75
+ if output_ids[i:i + len(answer_token_ids)] == answer_token_ids:
76
+ start_index = i + len(answer_token_ids)
77
+ break
78
+
79
+ # start from the index after <answer>
80
+ answer = ""
81
+ prob = 1.0
82
+ if start_index != -1:
83
+ for t in range(start_index - inputs.input_ids.size(1), len(scores)):
84
+ generated_token_id = generated_ids[idx][inputs.input_ids.size(1) + t]
85
+ token = self.tokenizer.decode(generated_token_id)
86
+ if token.isdigit():
87
+ logits = scores[t][idx]
88
+ probs = F.softmax(logits, dim=-1)
89
+ prob *= probs[generated_token_id].item()
90
+ answer += token
91
+ else:
92
+ break
93
+
94
+ # in case the answer is not a digit or exceeds 10
95
+ try:
96
+ answer = int(answer)
97
+ assert answer <= 10
98
+ except:
99
+ answer = -1
100
+
101
+ # append to the final results
102
+ results.append({
103
+ **docs[idx],
104
+ "rank_score": answer * prob
105
+ })
106
+
107
+ # sort the reranking results for the query
108
+ results.sort(key=lambda x:x["rank_score"], reverse=True)
109
+ return results
110
+
111
+
112
+ if __name__ == "__main__":
113
+
114
+ # select a model
115
+ model_name_or_path = "Ucreate/ERank-4B"
116
+ # model_name_or_path = "Ucreate/ERank-14B"
117
+ # model_name_or_path = "Ucreate/ERank-32B"
118
+ reranker = ERank_Transformer(model_name_or_path)
119
+
120
+ # input data
121
+ instruction = "Retrieve relevant documents for the query."
122
+ query = "I am happy"
123
+ docs = [
124
+ {"content": "excited", "first_stage_score": 46.7},
125
+ {"content": "sad", "first_stage_score": 1.5},
126
+ {"content": "peaceful", "first_stage_score": 2.3},
127
+ ]
128
+
129
+ # rerank
130
+ results = reranker.rerank(query, docs, instruction, truncate_length=2048)
131
+ print(results)
132
+ # [
133
+ # {'content': 'excited', 'first_stage_score': 46.7, 'rank_score': 4.84},
134
+ # {'content': 'peaceful', 'first_stage_score': 2.3, 'rank_score': 2.98}
135
+ # {'content': 'sad', 'first_stage_score': 1.5, 'rank_score': 0.0},
136
+ # ]
137
+
138
+ # Optional: hybrid with first-stage scores
139
+ alpha = 0.2
140
+ hybrid_results = hybrid_scores(results, alpha)
141
+ print(hybrid_results)
142
+ # [
143
+ # {'content': 'excited', 'first_stage_score': 46.7, 'rank_score': 4.84, 'hybrid_score': 1.18},
144
+ # {'content': 'peaceful', 'first_stage_score': 2.3, 'rank_score': 2.98, 'hybrid_score':0.01},
145
+ # {'content': 'sad', 'first_stage_score': 1.5, 'rank_score': 0.0, 'hybrid_score': -1.19}
146
+ # ]
examples/ERank_vLLM.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import math
3
+ from vllm import LLM, SamplingParams
4
+ from utils import prompt_template, truncate
5
+
6
+
7
+ class ERank_vLLM:
8
+
9
+ def __init__(self, model_name_or_path: str):
10
+ """
11
+ Initializes the ERank_vLLM reranker.
12
+
13
+ Args:
14
+ model_name_or_path (str): The name or path of the model to be loaded.
15
+ This can be a Hugging Face model ID or a local path.
16
+ """
17
+ num_gpu = torch.cuda.device_count()
18
+ self.ranker = LLM(
19
+ model=model_name_or_path,
20
+ tensor_parallel_size=num_gpu,
21
+ gpu_memory_utilization=0.95,
22
+ enable_prefix_caching=True
23
+ )
24
+ self.tokenizer = self.ranker.get_tokenizer()
25
+ self.sampling_params = SamplingParams(
26
+ temperature=0,
27
+ max_tokens=4096,
28
+ logprobs=20
29
+ )
30
+
31
+ def rerank(self, query: str, docs: list, instruction: str, truncate_length: int=None) -> list:
32
+ """
33
+ Reranks a list of documents based on a query and a specific instruction.
34
+
35
+ Args:
36
+ query (str): The search query provided by the user.
37
+ docs (list): A list of dictionaries, where each dictionary represents a document
38
+ and must contain a "content" key.
39
+ instruction (str): The instruction for the model, guiding it on how to evaluate the documents.
40
+ truncate_length (int, optional): The maximum length to truncate the query and document content to. Defaults to None.
41
+
42
+ Returns:
43
+ list: A new list of document dictionaries, sorted by their "rank_score" in descending order.
44
+ """
45
+
46
+ # prepare messages
47
+ messages = [
48
+ [{
49
+ "role": "user",
50
+ "content": prompt_template.format(
51
+ query=truncate(self.tokenizer, query, length=truncate_length) if truncate_length else query,
52
+ doc=truncate(self.tokenizer, doc["content"], length=truncate_length) if truncate_length else doc["content"],
53
+ instruction=instruction
54
+ )
55
+ }] for doc in docs
56
+ ]
57
+
58
+ # LLM generate
59
+ outputs = self.ranker.chat(messages, self.sampling_params)
60
+
61
+ # extract and organize results
62
+ results = []
63
+ for doc, output in zip(docs, outputs):
64
+
65
+ # extract the answer and its probability
66
+ cur = ""
67
+ answer = ""
68
+ is_ans = False
69
+ prob = 1.0
70
+ for each in output.outputs[0].logprobs[-10:]:
71
+ _, detail = next(iter(each.items()))
72
+ token = detail.decoded_token
73
+ logprob = detail.logprob
74
+ if is_ans and token.isdigit():
75
+ answer += token
76
+ prob *= math.exp(logprob)
77
+ else:
78
+ cur += token
79
+ if cur.endswith("<answer>"):
80
+ is_ans = True
81
+
82
+ # in case the answer is not a digit or exceeds 10
83
+ try:
84
+ answer = int(answer)
85
+ assert answer <= 10
86
+ except:
87
+ answer = -1
88
+
89
+ # append to the final results
90
+ results.append({
91
+ **doc,
92
+ "rank_score": answer * prob
93
+ })
94
+
95
+ # sort the reranking results for the query
96
+ results.sort(key=lambda x:x["rank_score"], reverse=True)
97
+ return results
examples/instructions.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "BRIGHT (AoPS)": "We want to find different but similar math problems to the query. A document is relevant if it uses the same class of functions and shares any overlapping techniques.",
3
+ "BRIGHT (LeetCode)": "I am looking to find different problems that share similar data structures (of any kind) or algorithms (e.g. DFS, DP, sorting, traversals, etc.). I am looking for problems that share one or both of these similarities to the query. Does the passage below share any similarities? e.g. if there was a textbook on leetcode problems, this would be in the same book even though it could be in a different chapter.",
4
+ "BRIGHT (Pony)": "I will use the programming language pony. But to solve the problem above, I need to know things about pony. A passage is relevant if it contains docs that match any part (even basic parts) of the code I will have to write for the above program.",
5
+ "BRIGHT (TheoremQA-Q)": "We want to find a document which uses the same mathematical process as the query. A document is relevant if it uses the same mathematical process as the query.",
6
+ "BRIGHT (TheoremQA-T)": "We want to find a document which uses the same mathematical process as the query. A document is relevant if it uses the same mathematical process as the query.",
7
+ "BRIGHT (others)": "A document is relevant if it contains information that helps answer or address the query. A document is not relevant if it doesn't contain information that helps answer the query, even if it mentions similar topics.",
8
+ "BEIR / TREC DL": "Given a query, retrieval relevant passage.",
9
+ "FollowIR": "Retrieval the relevant passage for the given query. Be careful about the extra requirements about relevance in the query."
10
+ }
examples/utils.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ prompt_template = """Given a query and a document, please give a relevance score of 0~10.
4
+ The goal or relevance definition is: {instruction}
5
+
6
+ Here is the query:
7
+ {query}
8
+
9
+ Here is the document:
10
+ {doc}
11
+
12
+ After thinking, directly choose a relevance score from [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10].
13
+ - 0 represents completely not related
14
+ - 10 means perfectly related.
15
+
16
+ Desired output format:
17
+ <think>put your thinking here</think><answer>Only allows an integer here</answer>
18
+
19
+ Your output:"""
20
+
21
+
22
+ def truncate(tokenizer, text, length):
23
+ if length == None or text == None:
24
+ return text
25
+ return tokenizer.convert_tokens_to_string(tokenizer.tokenize(text)[:length])
26
+
27
+
28
+ def hybrid_scores(results, alpha):
29
+ first_stage_scores = [each["first_stage_score"] for each in results]
30
+ rank_scores = [each["rank_score"] for each in results]
31
+ first_stage_mean, first_stage_std = np.mean(first_stage_scores), np.std(first_stage_scores)
32
+ rank_mean, rank_std = np.mean(rank_scores), np.std(rank_scores)
33
+
34
+ hybrid_results = []
35
+ for result in results:
36
+ normalized_first_stage_score = (result["first_stage_score"] - first_stage_mean) / first_stage_std
37
+ normalized_rank_score = (result["rank_score"] - rank_mean) / rank_std
38
+ hybrid_results.append({
39
+ **result,
40
+ "hybrid_score": float(alpha * normalized_first_stage_score + (1-alpha) * normalized_rank_score)
41
+ })
42
+ hybrid_results.sort(key=lambda x:x['hybrid_score'], reverse=True)
43
+
44
+ return hybrid_results