|
import numpy as np |
|
|
|
prompt_template = """Given a query and a document, please give a relevance score of 0~10. |
|
The goal or relevance definition is: {instruction} |
|
|
|
Here is the query: |
|
{query} |
|
|
|
Here is the document: |
|
{doc} |
|
|
|
After thinking, directly choose a relevance score from [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]. |
|
- 0 represents completely not related |
|
- 10 means perfectly related. |
|
|
|
Desired output format: |
|
<think>put your thinking here</think><answer>Only allows an integer here</answer> |
|
|
|
Your output:""" |
|
|
|
|
|
def truncate(tokenizer, text, length): |
|
if length == None or text == None: |
|
return text |
|
return tokenizer.convert_tokens_to_string(tokenizer.tokenize(text)[:length]) |
|
|
|
|
|
def hybrid_scores(results, alpha): |
|
first_stage_scores = [each["first_stage_score"] for each in results] |
|
rank_scores = [each["rank_score"] for each in results] |
|
first_stage_mean, first_stage_std = np.mean(first_stage_scores), np.std(first_stage_scores) |
|
rank_mean, rank_std = np.mean(rank_scores), np.std(rank_scores) |
|
|
|
hybrid_results = [] |
|
for result in results: |
|
normalized_first_stage_score = (result["first_stage_score"] - first_stage_mean) / first_stage_std |
|
normalized_rank_score = (result["rank_score"] - rank_mean) / rank_std |
|
hybrid_results.append({ |
|
**result, |
|
"hybrid_score": float(alpha * normalized_first_stage_score + (1-alpha) * normalized_rank_score) |
|
}) |
|
hybrid_results.sort(key=lambda x:x['hybrid_score'], reverse=True) |
|
|
|
return hybrid_results |
|
|