|
--- |
|
library_name: transformers |
|
tags: [] |
|
--- |
|
|
|
### Model Description |
|
|
|
|
|
|
|
``` python |
|
import os |
|
import torch |
|
import pandas as pd |
|
from datasets import Dataset |
|
from trl import SFTTrainer |
|
from huggingface_hub import login |
|
import re |
|
from peft import LoraConfig, get_peft_model |
|
import numpy as np |
|
from transformers import ( |
|
AutoTokenizer, |
|
Llama4ForConditionalGeneration, |
|
BitsAndBytesConfig, |
|
TrainingArguments, |
|
DataCollatorForLanguageModeling, |
|
AutoModelForCausalLM |
|
) |
|
|
|
#should install transformers 4.51.3 |
|
|
|
hf_token = "xxxxxxxxxxxxxxxxxxxxxxxxxxxe" |
|
login(hf_token) |
|
|
|
model_id = "NYUAD-ComNets/NYUAD_Llama4_Inheritance_Solver2" |
|
|
|
# Load tokenizer and model |
|
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) |
|
model = Llama4ForConditionalGeneration.from_pretrained( |
|
model_id, |
|
device_map="auto", |
|
torch_dtype=torch.bfloat16, |
|
trust_remote_code=True |
|
) |
|
|
|
# Template for inference prompt |
|
|
|
inference_prompt_template = """ |
|
أنت خبير في علم المواريث في الشريعة الإسلامية. استخدم التفكير خطوة بخطوة لتحديد أنصبة الورثة. ابدأ دائماً بذكر الورثة، وتحديد نوعهم (مثل: زوج، ابن، أخ)، ثم تحقق من وجود فرع وارث أو أصل وارث. بعد ذلك، طبّق الفرائض المقدّرة ثم قواعد التعصيب إذا وُجد فائض في التركة. |
|
اتبع الخطوات التالية: |
|
اذكر الورثة. |
|
حدد الفروض المقدّرة لكل وارث. |
|
افحص وجود الحجب والتقديم. |
|
وزّع الباقي إن وجد بالتعصيب. |
|
تحقق من أن مجموع الأنصبة يساوي كامل التركة. |
|
Then output your final answer using a single word only from this list A, B, C, D, E, F. |
|
### Context: |
|
{} |
|
|
|
### Response: |
|
{}""" |
|
|
|
def generate_answer(context): |
|
prompt = inference_prompt_template.format(context, "") |
|
inputs = tokenizer(prompt + tokenizer.eos_token, return_tensors="pt").to("cuda") |
|
|
|
with torch.no_grad(): |
|
outputs = model.generate( |
|
input_ids=inputs["input_ids"], |
|
attention_mask=inputs["attention_mask"], |
|
max_new_tokens=10, |
|
eos_token_id=tokenizer.eos_token_id, |
|
use_cache=True, |
|
temperature =0.1, |
|
top_p=1 |
|
) |
|
response = tokenizer.batch_decode(outputs, skip_special_tokens=True) |
|
print(response) |
|
|
|
response = response[0].split("### Response:")[1][-1] |
|
|
|
df=pd.read_csv('/path_to/islamic_inheritance_problem.csv.csv') |
|
for k,o1,o2,o3,o4,o5,o6 in zip(df.question.values |
|
,df.option1.values,df.option2.values |
|
,df.option3.values,df.option4.values |
|
,df.option5.values,df.option6.values): |
|
|
|
|
|
example = k+' '+o1+' '+o2+' '+o3+' '+o4+' '+o5+' '+o6 |
|
|
|
predicted_label = generate_answer(example) |
|
print("Predicted:", predicted_label) |
|
``` |
|
|
|
|
|
### Training hyperparameters |
|
|
|
|
|
- Low-Rank Adaptation (LoRA) |
|
- training = 7 epochs |
|
- learning rate = 0.0002 |
|
- batch sizes = 1 per device |
|
- gradient accumulation steps = 1. |
|
- The optimizer "paged_adamw_32bit" |
|
- 10 warmup steps |
|
- 4-bit quantization with the NormalFloat 4 format |
|
|
|
|
|
|
|
## Model Card Contact |
|
|
|
[email protected] |
|
|
|
|
|
# BibTeX entry and citation info |
|
|
|
``` |
|
|
|
@article{aldahoul2025benchmarking, |
|
title={Benchmarking the Legal Reasoning of LLMs in Arabic Islamic Inheritance Cases}, |
|
author={AlDahoul, Nouar and Zaki, Yasir}, |
|
journal={arXiv preprint arXiv:2508.15796}, |
|
year={2025} |
|
} |
|
|
|
|
|
|
|
|