--- base_model: unsloth/phi-3-mini-4k-instruct-bnb-4bit library_name: peft license: mit datasets: - openai/gsm8k language: - en pipeline_tag: text-generation tags: - Maths - reasoning --- How to use : ```python !pip install peft accelerate bitsandbytes from peft import PeftModel, PeftConfig from transformers import AutoModelForCausalLM, AutoTokenizer # Function to generate and solve problems using the fine-tuned model def generate_and_solve_problems(model, tokenizer, num_problems=5): """ Generate and solve math and reasoning problems using the fine-tuned model. Parameters: model: Fine-tuned language model tokenizer: Corresponding tokenizer num_problems: Number of problems to generate and solve """ # Prompt template test_prompt = """Below is a math problem. Solve the problem step by step and provide a detailed explanation. ### Problem: {} ### Solution:""" # Sample test problems test_problems = [ "A car travels at 40 mph for 2 hours, then at 60 mph for another 3 hours. How far does it travel in total?", "If the sum of three consecutive integers is 72, what are the integers?", "A train leaves Station A at 10:00 AM traveling at 50 mph. Another train leaves Station A at 12:00 PM traveling at 70 mph on the same track. At what time will the second train catch up to the first?", "A rectangle has a length of 12 units and a width of 8 units. If the length is increased by 50% and the width is reduced by 25%, what is the new area of the rectangle?", "If a person invests $1000 in a savings account that earns 5% annual interest compounded yearly, how much money will be in the account after 10 years?" ] # Use only the specified number of problems test_problems = test_problems[:num_problems] for problem in test_problems: # Create the prompt prompt = test_prompt.format(problem) # Tokenize and generate response inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to("cuda") outputs = model.generate( input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_length=512, temperature=0.7, top_p=0.9, do_sample=True, ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Print the problem and the solution print(response) print("\n" + "="*50 + "\n") # Example usage with model and tokenizer base_model_name = "unsloth/phi-3-mini-4k-instruct-bnb-4bit" lora_model_name = "Vijayendra/Phi3-LoRA-GSM8k" # Load base model and tokenizer base_model = AutoModelForCausalLM.from_pretrained(base_model_name, device_map="auto", torch_dtype="auto") tokenizer = AutoTokenizer.from_pretrained(base_model_name) # Load the fine-tuned LoRA model model = PeftModel.from_pretrained(base_model, lora_model_name) model.eval() # Call the function to solve problems generate_and_solve_problems(model, tokenizer)