--- library_name: transformers tags: [] --- ### Model Description ``` python import os import torch import pandas as pd from datasets import Dataset from trl import SFTTrainer from huggingface_hub import login import re from peft import LoraConfig, get_peft_model import numpy as np from transformers import ( AutoTokenizer, Llama4ForConditionalGeneration, BitsAndBytesConfig, TrainingArguments, DataCollatorForLanguageModeling, AutoModelForCausalLM ) #should install transformers 4.51.3 hf_token = "xxxxxxxxxxxxxxxxxxxxxxxxxxxe" login(hf_token) model_id = "NYUAD-ComNets/NYUAD_Llama4_Inheritance_Solver" # Load tokenizer and model tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) model = Llama4ForConditionalGeneration.from_pretrained( model_id, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True ) # Template for inference prompt inference_prompt_template = """Answer the following question using a single word only from this list A, B, C, D, E, F. Do not add details. ### Context: {} ### Response: {}""" def generate_answer(context): prompt = inference_prompt_template.format(context, "") inputs = tokenizer(prompt + tokenizer.eos_token, return_tensors="pt").to("cuda") with torch.no_grad(): outputs = model.generate( input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_new_tokens=10, eos_token_id=tokenizer.eos_token_id, use_cache=True, temperature =0.1, top_p=1 ) response = tokenizer.batch_decode(outputs, skip_special_tokens=True) print(response) response = response[0].split("### Response:")[1][-1] df=pd.read_csv('/path_to/islamic_inheritance_problem.csv.csv') for k,o1,o2,o3,o4,o5,o6 in zip(df.question.values ,df.option1.values,df.option2.values ,df.option3.values,df.option4.values ,df.option5.values,df.option6.values): example = k+' '+o1+' '+o2+' '+o3+' '+o4+' '+o5+' '+o6 predicted_label = generate_answer(example) print("Predicted:", predicted_label) ``` ### Training hyperparameters - Low-Rank Adaptation (LoRA) - training = 7 epochs - learning rate = 0.0002 - batch sizes = 1 per device - gradient accumulation steps = 1. - The optimizer "paged_adamw_32bit" - 10 warmup steps - 4-bit quantization with the NormalFloat 4 format ## Model Card Contact nouar.aldahoul@nyu.edu # BibTeX entry and citation info ``` @article{aldahoul2025benchmarking, title={Benchmarking the Legal Reasoning of LLMs in Arabic Islamic Inheritance Cases}, author={AlDahoul, Nouar and Zaki, Yasir}, journal={arXiv preprint arXiv:2508.15796}, year={2025} }