Upload assets and examples
Browse files- .gitattributes +2 -0
- assets/latency.png +3 -0
- assets/overview.png +3 -0
- examples/ERank_Transformer.py +146 -0
- examples/ERank_vLLM.py +97 -0
- examples/instructions.json +10 -0
- examples/utils.py +44 -0
.gitattributes
CHANGED
@@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
37 |
+
assets/latency.png filter=lfs diff=lfs merge=lfs -text
|
38 |
+
assets/overview.png filter=lfs diff=lfs merge=lfs -text
|
assets/latency.png
ADDED
![]() |
Git LFS Details
|
assets/overview.png
ADDED
![]() |
Git LFS Details
|
examples/ERank_Transformer.py
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.nn import functional as F
|
2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
+
from utils import prompt_template, truncate, hybrid_scores
|
4 |
+
|
5 |
+
class ERank_Transformer:
|
6 |
+
|
7 |
+
def __init__(self, model_name_or_path: str):
|
8 |
+
"""
|
9 |
+
Initializes the ERank_Transformer reranker.
|
10 |
+
|
11 |
+
Args:
|
12 |
+
model_name_or_path (str): The name or path of the model to be loaded.
|
13 |
+
This can be a Hugging Face model ID or a local path.
|
14 |
+
"""
|
15 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
|
16 |
+
self.reranker = AutoModelForCausalLM.from_pretrained(model_name_or_path).eval()
|
17 |
+
self.reranker.to("cuda")
|
18 |
+
|
19 |
+
def rerank(self, query: str, docs: list, instruction: str, truncate_length: int=None) -> list:
|
20 |
+
"""
|
21 |
+
Reranks a list of documents based on a query and a specific instruction.
|
22 |
+
|
23 |
+
Args:
|
24 |
+
query (str): The search query provided by the user.
|
25 |
+
docs (list): A list of dictionaries, where each dictionary represents a document
|
26 |
+
and must contain a "content" key.
|
27 |
+
instruction (str): The instruction for the model, guiding it on how to evaluate the documents.
|
28 |
+
truncate_length (int, optional): The maximum length to truncate the query and document content to. Defaults to None.
|
29 |
+
|
30 |
+
Returns:
|
31 |
+
list: A new list of document dictionaries, sorted by their "rank_score" in descending order.
|
32 |
+
"""
|
33 |
+
|
34 |
+
# prepare messages
|
35 |
+
messages = [
|
36 |
+
[{
|
37 |
+
"role": "user",
|
38 |
+
"content": prompt_template.format(
|
39 |
+
query=truncate(self.tokenizer, query, length=truncate_length) if truncate_length else query,
|
40 |
+
doc=truncate(self.tokenizer, doc["content"], length=truncate_length) if truncate_length else doc["content"],
|
41 |
+
instruction=instruction
|
42 |
+
)
|
43 |
+
}] for doc in docs
|
44 |
+
]
|
45 |
+
|
46 |
+
# encode tokens
|
47 |
+
texts = [
|
48 |
+
self.tokenizer.apply_chat_template(
|
49 |
+
each,
|
50 |
+
tokenize=False,
|
51 |
+
add_generation_prompt=True,
|
52 |
+
) for each in messages
|
53 |
+
]
|
54 |
+
inputs = self.tokenizer(texts, padding=True, return_tensors="pt").to(self.reranker.device)
|
55 |
+
|
56 |
+
# LLM completion
|
57 |
+
outputs = self.reranker.generate(
|
58 |
+
**inputs,
|
59 |
+
max_new_tokens=8192,
|
60 |
+
output_scores=True,
|
61 |
+
return_dict_in_generate=True
|
62 |
+
)
|
63 |
+
|
64 |
+
# extract and organize results
|
65 |
+
results = []
|
66 |
+
scores = outputs.scores
|
67 |
+
generated_ids = outputs.sequences
|
68 |
+
answer_token_ids = self.tokenizer.encode("<answer>", add_special_tokens=False)
|
69 |
+
for idx in range(len(texts)):
|
70 |
+
|
71 |
+
# find <answer> in the generated sequence
|
72 |
+
output_ids = generated_ids[idx].tolist()
|
73 |
+
start_index = -1
|
74 |
+
for i in range(len(output_ids)-len(answer_token_ids)-1, -1, -1):
|
75 |
+
if output_ids[i:i + len(answer_token_ids)] == answer_token_ids:
|
76 |
+
start_index = i + len(answer_token_ids)
|
77 |
+
break
|
78 |
+
|
79 |
+
# start from the index after <answer>
|
80 |
+
answer = ""
|
81 |
+
prob = 1.0
|
82 |
+
if start_index != -1:
|
83 |
+
for t in range(start_index - inputs.input_ids.size(1), len(scores)):
|
84 |
+
generated_token_id = generated_ids[idx][inputs.input_ids.size(1) + t]
|
85 |
+
token = self.tokenizer.decode(generated_token_id)
|
86 |
+
if token.isdigit():
|
87 |
+
logits = scores[t][idx]
|
88 |
+
probs = F.softmax(logits, dim=-1)
|
89 |
+
prob *= probs[generated_token_id].item()
|
90 |
+
answer += token
|
91 |
+
else:
|
92 |
+
break
|
93 |
+
|
94 |
+
# in case the answer is not a digit or exceeds 10
|
95 |
+
try:
|
96 |
+
answer = int(answer)
|
97 |
+
assert answer <= 10
|
98 |
+
except:
|
99 |
+
answer = -1
|
100 |
+
|
101 |
+
# append to the final results
|
102 |
+
results.append({
|
103 |
+
**docs[idx],
|
104 |
+
"rank_score": answer * prob
|
105 |
+
})
|
106 |
+
|
107 |
+
# sort the reranking results for the query
|
108 |
+
results.sort(key=lambda x:x["rank_score"], reverse=True)
|
109 |
+
return results
|
110 |
+
|
111 |
+
|
112 |
+
if __name__ == "__main__":
|
113 |
+
|
114 |
+
# select a model
|
115 |
+
model_name_or_path = "Ucreate/ERank-4B"
|
116 |
+
# model_name_or_path = "Ucreate/ERank-14B"
|
117 |
+
# model_name_or_path = "Ucreate/ERank-32B"
|
118 |
+
reranker = ERank_Transformer(model_name_or_path)
|
119 |
+
|
120 |
+
# input data
|
121 |
+
instruction = "Retrieve relevant documents for the query."
|
122 |
+
query = "I am happy"
|
123 |
+
docs = [
|
124 |
+
{"content": "excited", "first_stage_score": 46.7},
|
125 |
+
{"content": "sad", "first_stage_score": 1.5},
|
126 |
+
{"content": "peaceful", "first_stage_score": 2.3},
|
127 |
+
]
|
128 |
+
|
129 |
+
# rerank
|
130 |
+
results = reranker.rerank(query, docs, instruction, truncate_length=2048)
|
131 |
+
print(results)
|
132 |
+
# [
|
133 |
+
# {'content': 'excited', 'first_stage_score': 46.7, 'rank_score': 4.84},
|
134 |
+
# {'content': 'peaceful', 'first_stage_score': 2.3, 'rank_score': 2.98}
|
135 |
+
# {'content': 'sad', 'first_stage_score': 1.5, 'rank_score': 0.0},
|
136 |
+
# ]
|
137 |
+
|
138 |
+
# Optional: hybrid with first-stage scores
|
139 |
+
alpha = 0.2
|
140 |
+
hybrid_results = hybrid_scores(results, alpha)
|
141 |
+
print(hybrid_results)
|
142 |
+
# [
|
143 |
+
# {'content': 'excited', 'first_stage_score': 46.7, 'rank_score': 4.84, 'hybrid_score': 1.18},
|
144 |
+
# {'content': 'peaceful', 'first_stage_score': 2.3, 'rank_score': 2.98, 'hybrid_score':0.01},
|
145 |
+
# {'content': 'sad', 'first_stage_score': 1.5, 'rank_score': 0.0, 'hybrid_score': -1.19}
|
146 |
+
# ]
|
examples/ERank_vLLM.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import math
|
3 |
+
from vllm import LLM, SamplingParams
|
4 |
+
from utils import prompt_template, truncate
|
5 |
+
|
6 |
+
|
7 |
+
class ERank_vLLM:
|
8 |
+
|
9 |
+
def __init__(self, model_name_or_path: str):
|
10 |
+
"""
|
11 |
+
Initializes the ERank_vLLM reranker.
|
12 |
+
|
13 |
+
Args:
|
14 |
+
model_name_or_path (str): The name or path of the model to be loaded.
|
15 |
+
This can be a Hugging Face model ID or a local path.
|
16 |
+
"""
|
17 |
+
num_gpu = torch.cuda.device_count()
|
18 |
+
self.ranker = LLM(
|
19 |
+
model=model_name_or_path,
|
20 |
+
tensor_parallel_size=num_gpu,
|
21 |
+
gpu_memory_utilization=0.95,
|
22 |
+
enable_prefix_caching=True
|
23 |
+
)
|
24 |
+
self.tokenizer = self.ranker.get_tokenizer()
|
25 |
+
self.sampling_params = SamplingParams(
|
26 |
+
temperature=0,
|
27 |
+
max_tokens=4096,
|
28 |
+
logprobs=20
|
29 |
+
)
|
30 |
+
|
31 |
+
def rerank(self, query: str, docs: list, instruction: str, truncate_length: int=None) -> list:
|
32 |
+
"""
|
33 |
+
Reranks a list of documents based on a query and a specific instruction.
|
34 |
+
|
35 |
+
Args:
|
36 |
+
query (str): The search query provided by the user.
|
37 |
+
docs (list): A list of dictionaries, where each dictionary represents a document
|
38 |
+
and must contain a "content" key.
|
39 |
+
instruction (str): The instruction for the model, guiding it on how to evaluate the documents.
|
40 |
+
truncate_length (int, optional): The maximum length to truncate the query and document content to. Defaults to None.
|
41 |
+
|
42 |
+
Returns:
|
43 |
+
list: A new list of document dictionaries, sorted by their "rank_score" in descending order.
|
44 |
+
"""
|
45 |
+
|
46 |
+
# prepare messages
|
47 |
+
messages = [
|
48 |
+
[{
|
49 |
+
"role": "user",
|
50 |
+
"content": prompt_template.format(
|
51 |
+
query=truncate(self.tokenizer, query, length=truncate_length) if truncate_length else query,
|
52 |
+
doc=truncate(self.tokenizer, doc["content"], length=truncate_length) if truncate_length else doc["content"],
|
53 |
+
instruction=instruction
|
54 |
+
)
|
55 |
+
}] for doc in docs
|
56 |
+
]
|
57 |
+
|
58 |
+
# LLM generate
|
59 |
+
outputs = self.ranker.chat(messages, self.sampling_params)
|
60 |
+
|
61 |
+
# extract and organize results
|
62 |
+
results = []
|
63 |
+
for doc, output in zip(docs, outputs):
|
64 |
+
|
65 |
+
# extract the answer and its probability
|
66 |
+
cur = ""
|
67 |
+
answer = ""
|
68 |
+
is_ans = False
|
69 |
+
prob = 1.0
|
70 |
+
for each in output.outputs[0].logprobs[-10:]:
|
71 |
+
_, detail = next(iter(each.items()))
|
72 |
+
token = detail.decoded_token
|
73 |
+
logprob = detail.logprob
|
74 |
+
if is_ans and token.isdigit():
|
75 |
+
answer += token
|
76 |
+
prob *= math.exp(logprob)
|
77 |
+
else:
|
78 |
+
cur += token
|
79 |
+
if cur.endswith("<answer>"):
|
80 |
+
is_ans = True
|
81 |
+
|
82 |
+
# in case the answer is not a digit or exceeds 10
|
83 |
+
try:
|
84 |
+
answer = int(answer)
|
85 |
+
assert answer <= 10
|
86 |
+
except:
|
87 |
+
answer = -1
|
88 |
+
|
89 |
+
# append to the final results
|
90 |
+
results.append({
|
91 |
+
**doc,
|
92 |
+
"rank_score": answer * prob
|
93 |
+
})
|
94 |
+
|
95 |
+
# sort the reranking results for the query
|
96 |
+
results.sort(key=lambda x:x["rank_score"], reverse=True)
|
97 |
+
return results
|
examples/instructions.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"BRIGHT (AoPS)": "We want to find different but similar math problems to the query. A document is relevant if it uses the same class of functions and shares any overlapping techniques.",
|
3 |
+
"BRIGHT (LeetCode)": "I am looking to find different problems that share similar data structures (of any kind) or algorithms (e.g. DFS, DP, sorting, traversals, etc.). I am looking for problems that share one or both of these similarities to the query. Does the passage below share any similarities? e.g. if there was a textbook on leetcode problems, this would be in the same book even though it could be in a different chapter.",
|
4 |
+
"BRIGHT (Pony)": "I will use the programming language pony. But to solve the problem above, I need to know things about pony. A passage is relevant if it contains docs that match any part (even basic parts) of the code I will have to write for the above program.",
|
5 |
+
"BRIGHT (TheoremQA-Q)": "We want to find a document which uses the same mathematical process as the query. A document is relevant if it uses the same mathematical process as the query.",
|
6 |
+
"BRIGHT (TheoremQA-T)": "We want to find a document which uses the same mathematical process as the query. A document is relevant if it uses the same mathematical process as the query.",
|
7 |
+
"BRIGHT (others)": "A document is relevant if it contains information that helps answer or address the query. A document is not relevant if it doesn't contain information that helps answer the query, even if it mentions similar topics.",
|
8 |
+
"BEIR / TREC DL": "Given a query, retrieval relevant passage.",
|
9 |
+
"FollowIR": "Retrieval the relevant passage for the given query. Be careful about the extra requirements about relevance in the query."
|
10 |
+
}
|
examples/utils.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
prompt_template = """Given a query and a document, please give a relevance score of 0~10.
|
4 |
+
The goal or relevance definition is: {instruction}
|
5 |
+
|
6 |
+
Here is the query:
|
7 |
+
{query}
|
8 |
+
|
9 |
+
Here is the document:
|
10 |
+
{doc}
|
11 |
+
|
12 |
+
After thinking, directly choose a relevance score from [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10].
|
13 |
+
- 0 represents completely not related
|
14 |
+
- 10 means perfectly related.
|
15 |
+
|
16 |
+
Desired output format:
|
17 |
+
<think>put your thinking here</think><answer>Only allows an integer here</answer>
|
18 |
+
|
19 |
+
Your output:"""
|
20 |
+
|
21 |
+
|
22 |
+
def truncate(tokenizer, text, length):
|
23 |
+
if length == None or text == None:
|
24 |
+
return text
|
25 |
+
return tokenizer.convert_tokens_to_string(tokenizer.tokenize(text)[:length])
|
26 |
+
|
27 |
+
|
28 |
+
def hybrid_scores(results, alpha):
|
29 |
+
first_stage_scores = [each["first_stage_score"] for each in results]
|
30 |
+
rank_scores = [each["rank_score"] for each in results]
|
31 |
+
first_stage_mean, first_stage_std = np.mean(first_stage_scores), np.std(first_stage_scores)
|
32 |
+
rank_mean, rank_std = np.mean(rank_scores), np.std(rank_scores)
|
33 |
+
|
34 |
+
hybrid_results = []
|
35 |
+
for result in results:
|
36 |
+
normalized_first_stage_score = (result["first_stage_score"] - first_stage_mean) / first_stage_std
|
37 |
+
normalized_rank_score = (result["rank_score"] - rank_mean) / rank_std
|
38 |
+
hybrid_results.append({
|
39 |
+
**result,
|
40 |
+
"hybrid_score": float(alpha * normalized_first_stage_score + (1-alpha) * normalized_rank_score)
|
41 |
+
})
|
42 |
+
hybrid_results.sort(key=lambda x:x['hybrid_score'], reverse=True)
|
43 |
+
|
44 |
+
return hybrid_results
|