|
--- |
|
license: cc-by-nc-4.0 |
|
language: |
|
- en |
|
base_model: |
|
- Qwen/Qwen2.5-3B-Instruct |
|
pipeline_tag: text-generation |
|
tags: |
|
- finance |
|
--- |
|
|
|
This is a toy model using CoT-sft with GRPO. |
|
|
|
## Usage |
|
|
|
``` |
|
tokenizer = AutoTokenizer.from_pretrained("yixuantt/Qwen2.5-3B-R1-Finance") |
|
|
|
model = AutoModelForCausalLM.from_pretrained("yixuantt/Qwen2.5-3B-R1-Finance", |
|
torch_dtype = torch.bfloat16, |
|
device_map = "auto" |
|
) |
|
model.eval() |
|
|
|
print(model) |
|
def generate(text): |
|
conv = [{"role": "system", |
|
"content": "You are a helpful AI Assistant that provides well-reasoned and detailed responses. You first think about the reasoning process as an internal monologue and then provide the user with the answer."},{"role": "user", "content": text}] |
|
prompt = tokenizer.apply_chat_template(conversation=conv, tokenize=False, add_generation_prompt=True) |
|
encoded = tokenizer(prompt, return_tensors="pt") |
|
generate_params = dict( |
|
max_new_tokens=1024, |
|
do_sample=True, |
|
top_k=20, |
|
) |
|
with torch.no_grad(): |
|
generation_output = model.generate(input_ids=encoded.input_ids.cuda(), |
|
attention_mask=encoded.attention_mask.cuda(), |
|
tokenizer=tokenizer, |
|
**generate_params) |
|
|
|
generation_output = generation_output[:, encoded.input_ids.shape[1]:] |
|
out = tokenizer.decode(generation_output[0], skip_special_tokens=True) |
|
# print(out) |
|
return out |
|
``` |