{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "883dad38-fa21-4372-9946-b11dec49e88c",
     "showTitle": false,
     "tableResultSettingsMap": {},
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "# Install Pytorch & other libraries\n",
    "%pip install torch==2.6.0 tensorboard\n",
    "\n",
    "# Install Hugging Face libraries\n",
    "%pip install transformers==4.55.0 datasets==4.0.0 accelerate==1.10.0 evaluate trl==0.21.0 protobuf sentencepiece==0.2.0\n",
    "\n",
    "# COMMENT IN: if you are running on a GPU that supports BF16 data type and flash attn, such as NVIDIA L4 or NVIDIA A100\n",
    "%pip install flash-attn --no-build-isolation\n",
    "%pip install mlflow \n",
    "dbutils.library.restartPython()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "a25cb48a-6ce3-43d6-85ae-1c6b8c8e107c",
     "showTitle": false,
     "tableResultSettingsMap": {},
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "# https://aws.amazon.com/ec2/instance-types/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "9ba2c509-2e7f-4d54-8d24-a7f85ad25607",
     "showTitle": false,
     "tableResultSettingsMap": {},
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "import torch \n",
    "import transformers\n",
    "import accelerate\n",
    "import trl\n",
    "import sentencepiece\n",
    "import datasets \n",
    "\n",
    "print(\"Torch version: \", torch.__version__)\n",
    "print(\"Transformers version: \", transformers.__version__)\n",
    "print(\"Accelerate version: \", accelerate.__version__)\n",
    "print(\"TRL version: \", trl.__version__)\n",
    "print(\"Sentencepiece version: \", sentencepiece.__version__)\n",
    "print(\"Datasets version: \", datasets.__version__)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "ffb21bb7-53e5-4512-bc2c-84c24786645a",
     "showTitle": false,
     "tableResultSettingsMap": {},
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "from huggingface_hub import login\n",
    "from datasets import load_dataset, Dataset\n",
    "from transformers import AutoTokenizer, AutoModelForCausalLM\n",
    "from transformers import pipeline\n",
    "from random import randint\n",
    "import re\n",
    "from pathlib import Path\n",
    "import json\n",
    "from pyspark.sql import functions as F\n",
    "from sklearn.metrics import classification_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "96997d06-34c7-4c09-8c52-8497e0b78925",
     "showTitle": false,
     "tableResultSettingsMap": {},
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "import sys\n",
    "print(\"Python version:\", sys.version)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "563a43ec-ca3e-4af9-9056-95419ab9b15c",
     "showTitle": false,
     "tableResultSettingsMap": {},
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "login()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "3a821058-d1ed-4e2f-a691-05b8af1821ab",
     "showTitle": false,
     "tableResultSettingsMap": {},
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "def parse_json_blob(text: str) -> dict:\n",
    "    \"\"\"Extract and parse a JSON block if one exists in markdown format.\"\"\"\n",
    "    pattern = r\"```(?:json)?\\s*\\n(.*?)\\n```\"\n",
    "    matches = re.findall(pattern, text, re.DOTALL)\n",
    "\n",
    "    for match in matches:\n",
    "        try:\n",
    "            return json.loads(match.strip())\n",
    "        except json.JSONDecodeError:\n",
    "            continue\n",
    "\n",
    "    try:\n",
    "        return json.loads(text.strip())\n",
    "    except json.JSONDecodeError:\n",
    "        return None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "58936d51-3e88-46e3-846e-899c92a3b962",
     "showTitle": false,
     "tableResultSettingsMap": {},
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "# does not work with python version == 3.11.11. It works with Python version: 3.12.3\n",
    "base_model = \"google/gemma-3-270m-it\"\n",
    "\n",
    "\n",
    "# lets try flash_attention_2\n",
    "model = AutoModelForCausalLM.from_pretrained(base_model ,torch_dtype='auto', device_map='auto', attn_implementation='sdpa')\n",
    "tokenizer = AutoTokenizer.from_pretrained(base_model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "bb409321-f8cf-492b-86f8-b60413bfe04d",
     "showTitle": false,
     "tableResultSettingsMap": {},
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "print(f\"Device: {model.device}\")\n",
    "print(f\"DType: {model.dtype}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "554c9407-ec33-4f80-8833-77d038f68b63",
     "showTitle": true,
     "tableResultSettingsMap": {},
     "title": "Pipeline inference"
    }
   },
   "outputs": [],
   "source": [
    "# load the model and tokenizer into the pipeline\n",
    "pipe = pipeline(\"text-generation\", model=model, tokenizer=tokenizer)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "ce44dd3b-53df-4b9b-b557-25501592e717",
     "showTitle": true,
     "tableResultSettingsMap": {},
     "title": "Evaluation Before Fine-Tuning"
    }
   },
   "outputs": [],
   "source": [
    "# lets evaluate the system prompt\n",
    "full_path = \"/Workspace/Users/hyemam@expediagroup.com/Trust_and_Safety/Shared_Spaces/LLMExperiments/LabelCollection/prompt_4.txt\"\n",
    "system_prompt = Path(full_path).read_text()\n",
    "\n",
    "example_prompt = \"\"\"Welcome to the heart of the bay area. You'll find yourself conveniently located to downtown San Mateo and all the transportation. This is a shared walkway,  1 bed guest suite. The unit has 1 queen sized bed with ample pillows, , mini kitchen, fast wifi, 2 x 43\\\" TV, Netflix, coffee, fast wifi.  Self-check in. Mini Kitchen includes: Refrigerator, cook-top, microwave, Keurig / Keurig pods, utensils, cookware. Bathroom includes, towels, blow dryer, iron / ironing board, hand soap, shampoo, conditioner, body wash. Living area has pull out queen sofa bed with mattress topper, blanket, pillows, and sheets.\"\"\"\n",
    "\n",
    "example =  [{\"role\": \"system\", \"content\": system_prompt}\n",
    "            , {\"role\": \"user\", \"content\": example_prompt}]\n",
    "\n",
    "prompt = pipe.tokenizer.apply_chat_template(example, tokenize=False, add_generation_prompt=True)\n",
    "\n",
    "output = pipe(example, max_new_tokens=500, disable_compile=True\n",
    "                  , truncation=True\n",
    "                  , batch_size=10, return_full_text=False)\n",
    "\n",
    "\n",
    "parse_json_blob(output[-1]['generated_text'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "a1dcd4b7-a2c7-44ea-bac4-a0e02f054830",
     "showTitle": true,
     "tableResultSettingsMap": {},
     "title": "Benchmarking"
    }
   },
   "outputs": [],
   "source": [
    "# columns selected\n",
    "SELECTED_COLUMNS = ['vrbo_property_id',\"listing_rental_description\", \"ground_truth\", \"reviewer_notes\"]\n",
    "\n",
    "# load datasets\n",
    "path = \"s3://apiary-analytics-927134741764-us-east-1-mxt-ml/hyemam/shared_spaces/discovery/operations/batch_1/\"\n",
    "batch_1 = spark.read.parquet(path)\\\n",
    "                    .withColumnRenamed(\"Notes\", \"reviewer_notes\")\\\n",
    "                    .select(*SELECTED_COLUMNS)\\\n",
    "                    .withColumn(\"batch_num\", F.lit(1))\n",
    "\n",
    "\n",
    "path = \"s3://apiary-analytics-927134741764-us-east-1-mxt-ml/hyemam/shared_spaces/discovery/operations/batch_2/\"\n",
    "batch_2 = spark.read.parquet(path)\\\n",
    "                    .withColumnRenamed(\"Reviewer's Name\", \"reviewer_notes\")\\\n",
    "                    .select(*SELECTED_COLUMNS)\\\n",
    "                    .withColumn(\"batch_num\", F.lit(2))\n",
    "                    \n",
    "\n",
    "\n",
    "path = \"s3://apiary-analytics-927134741764-us-east-1-mxt-ml/hyemam/shared_spaces/discovery/operations/batch_3/\"\n",
    "batch_3 = spark.read.parquet(path)\\\n",
    "                    .withColumnRenamed(\"Analyst Notes\", \"reviewer_notes\")\\\n",
    "                    .select(*SELECTED_COLUMNS)\\\n",
    "                    .withColumn(\"batch_num\", F.lit(3))\n",
    "# preprocess data for evaluation\n",
    "batch = (batch_1.union(batch_2).union(batch_3)\n",
    "              .withColumn(\"ground_truth\", F.trim(F.upper(\"ground_truth\")))\n",
    "              .withColumn('ground_truth', F.when(F.col('ground_truth') == \"UNSURE\", 'CLARIFICATION').otherwise(F.col('ground_truth')))\n",
    "              .withColumn(\"label\", F.when(F.col(\"ground_truth\") == 'YES', 1).otherwise(0))\n",
    "              .withColumnRenamed(\"listing_rental_description\", \"text\")\n",
    "        )\n",
    "\n",
    "\n",
    "\n",
    "# total number of samples\n",
    "NUM_SAMPLES = 100\n",
    "batch_df = batch.sample(0.5, False).filter(\"reviewer_notes != 'Leon'\").limit(NUM_SAMPLES)\n",
    "\n",
    "display(batch_df.groupBy(\"label\", \"ground_truth\").count())\n",
    "\n",
    "batch_df = batch_df.toPandas()\n",
    "\n",
    "display(batch_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "0368ef27-1661-41d2-b516-231a637f3b68",
     "showTitle": false,
     "tableResultSettingsMap": {},
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "def generate(sample):\n",
    "    output = pipe(sample['prompt'], max_new_tokens=500\n",
    "                  , disable_compile=True\n",
    "                  , truncation=True\n",
    "                  , batch_size=10, return_full_text=False)\n",
    "    response = parse_json_blob(output[0]['generated_text'])\n",
    "    if response is None:\n",
    "        response = {'response': response}\n",
    "    else: \n",
    "        response = {\"response\": response['decision'].upper()}\n",
    "    return response "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "346c0cc8-cab9-4f43-b30d-013daff6d41b",
     "showTitle": true,
     "tableResultSettingsMap": {},
     "title": "Single inference"
    }
   },
   "outputs": [],
   "source": [
    "batch_df['prompt'] = batch_df.apply(lambda x: [{\"role\": \"system\", \"content\": system_prompt}, {\"role\": \"user\", \"content\": x['text']}], axis=1)\n",
    "\n",
    "\n",
    "# datasets \n",
    "batch_dataset = Dataset.from_pandas(batch_df[['prompt']])\n",
    "\n",
    "# batch response [took 7minutes]\n",
    "batch_response = batch_dataset.map(generate)\n",
    "\n",
    "\n",
    "batch_response\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "ce88521a-f040-4822-8e11-b4b6f627d89b",
     "showTitle": false,
     "tableResultSettingsMap": {},
     "title": ""
    }
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "c81d71a2-51d7-46aa-837d-6352210fa44b",
     "showTitle": true,
     "tableResultSettingsMap": {},
     "title": "Batch inference [Json output - Few Shot]"
    }
   },
   "outputs": [],
   "source": [
    "# extract prompt\n",
    "prompts = batch_df['text'].apply(lambda x: [{\"role\": \"system\", \"content\": system_prompt}, {\"role\": \"user\", \"content\": x}]).tolist()\n",
    "\n",
    "# batch outputs\n",
    "outputs = pipe(\n",
    "    prompts,\n",
    "    max_new_tokens=500,\n",
    "    truncation=True,\n",
    "    batch_size=10,  # now it matters\n",
    "    return_full_text=False, # if true, it returns full prompt plus model output\n",
    ")\n",
    "\n",
    "def parse_output(out):\n",
    "    text = out[0][\"generated_text\"] if isinstance(out, list) else out[\"generated_text\"]\n",
    "    parsed = parse_json_blob(text)\n",
    "    return parsed['decision'].upper() if parsed else None \n",
    "    # return {\"response\": parsed['decision'].upper()} if parsed else {\"response\": None}\n",
    "\n",
    "# batched response\n",
    "batch_response = [parse_output(out) for out in outputs]\n",
    "\n",
    "\n",
    "\n",
    "batch_df['gemma_270M_it_response'] = batch_response\n",
    "\n",
    "display(batch_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "aaa0135e-0201-4464-a764-673910575ed0",
     "showTitle": false,
     "tableResultSettingsMap": {},
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "# every few shot example was YES so the model is returning Yes. \n",
    "print(classification_report(batch_df.dropna()['ground_truth'], batch_df.dropna()['gemma_270M_it_response']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "5d98ce5d-634c-439b-b517-fbb3e8954e29",
     "showTitle": false,
     "tableResultSettingsMap": {},
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "zero_shot_system_prompt = \"\"\"TASK\n",
    "\n",
    "You are a policy classification assistant trained to detect Shared Space policy violations in text from Vrbo listings description, post-stay traveler reviews, or traveler-host exchanges.\n",
    "Your job: Decide if the listing violates Vrbo's shared space policy based only on the text provided.\n",
    "\n",
    "POLICY DEFINITIONS\n",
    "\n",
    "Core Rule:\n",
    "Vrbo does not support short-term rentals where guests share internal living areas with the host or other travelers outside their party.\n",
    "\n",
    "Internal living areas include (but are not limited to):\n",
    "\t•\tBedrooms\n",
    "\t•\tBathrooms\n",
    "\t•\tKitchens / kitchenettes\n",
    "\t•\tLiving rooms\n",
    "\t•\tInterior hallways within a host-occupied unit\n",
    "\t•\tAny interior space that is part of the host's own living quarters\n",
    "\n",
    "A listing must have:\n",
    "\t•\tA secured, private entrance that guests can lock and control themselves (deadbolt or smart lock)\n",
    "\t•\tA private bathroom\n",
    "\t•\tNo shared internal living areas with the host or other unrelated guests\n",
    "\n",
    "\n",
    "CLEAR VIOLATIONS (“yes”)\n",
    "\n",
    "Mark yes when the text explicitly states or strongly implies that:\n",
    "\t1.\tGuests share an internal living area (bathroom, kitchen, living room, or hallway inside host's unit) with the host or other guests outside their travel party.\n",
    "\t2.\tGuests must enter their space through the host's internal living area.\n",
    "\t3.\tImplicit suggestion of sharing with the host eg. the property is a room in the host's home without clear separation of internal spaces.\n",
    "\n",
    "PERMITTED EXCEPTIONS (not violations if requirements are met)\n",
    "\t•\tBed & Breakfasts with fewer than 20 rooms (must still have private entry and no internal sharing unless otherwise allowed).\n",
    "\t•\tMicro-studios with shared kitchen or bathroom if this is typical for that unit type.\n",
    "\t•\tShared external spaces (driveway, parking area, yard, garden, patio, fire pit, pool, hot tub, external laundry).\n",
    "\n",
    "\n",
    "UNCLEAR CASES (“clarification”)\n",
    "\n",
    "Use clarification only if:\n",
    "\t•\tThe text suggests possible internal sharing\n",
    "\t•\tContradictory information appears (e.g., “private kitchen” in one place, “shared kitchen” in another).\n",
    "\n",
    "\n",
    "Do NOT use “clarification” just because:\n",
    "\t•\tHost lives on property (no violation unless internal sharing is mentioned).\n",
    "\t•\tThere is another rental unit on the property.\n",
    "\n",
    "NO VIOLATION (“no”)\n",
    "\n",
    "Default to no when:\n",
    "\t•\tListing clearly offers a private sleeping area, private bathroom, and no language implying shared internal spaces.\n",
    "\t•\tExternal spaces are shared but internal spaces are private.\n",
    "\t•\tMissing details about entry layout, but no suggestion of shared internal living areas.\n",
    "\n",
    "EVIDENCE PRIORITY RULES\n",
    "\n",
    "If the listing contains both private and shared claims:\n",
    "\t1.\tShared internal space claim overrides any “private” marketing statement.\n",
    "\t2.\tIf “private” is stated and sharing is only implied vaguely → choose \"no\", not “yes.”\n",
    "\n",
    "OUTPUT FORMAT\n",
    "\n",
    "Return your answer as JSON:\n",
    "\n",
    "```json\n",
    "{\n",
    "  \"decision\": \"yes\" | \"no\" | \"clarification\",\n",
    "  \"reasoning\": \"Briefly explain why, citing policy rules and the specific clues found\",\n",
    "  \"taxonomy\": \"One of: 'Shared Bathroom', 'Shared Kitchen', 'Shared Living Room', 'Entry Through Host Space', 'No Private Bathroom', 'No Private Kitchen', 'B&B Over 20 Rooms', 'Micro-Studio Exception', 'Unclear Internal Sharing'\",\n",
    "  \"label_excerpt\": \"Direct quote from listing text that supports your decision\"\n",
    "}\n",
    "```\n",
    "\n",
    "\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "8df05ee1-e27e-4800-a39b-62abe1305367",
     "showTitle": true,
     "tableResultSettingsMap": {},
     "title": "Batch inference [Json output - Zero Shot]"
    }
   },
   "outputs": [],
   "source": [
    "# when it is zero shot, the model has a hard time following the format so we are getting more null\n",
    "\n",
    "# extract prompt\n",
    "prompts = batch_df['text'].apply(lambda x: [{\"role\": \"system\", \"content\": zero_shot_system_prompt}, {\"role\": \"user\", \"content\": x}]).tolist()\n",
    "\n",
    "# batch outputs\n",
    "outputs = pipe(\n",
    "    prompts,\n",
    "    max_new_tokens=500,\n",
    "    truncation=True,\n",
    "    batch_size=10,  # now it matters\n",
    "    return_full_text=False, # if true, it returns full prompt plus model output\n",
    ")\n",
    "\n",
    "# def parse_output(out):\n",
    "#     text = out[0][\"generated_text\"] if isinstance(out, list) else out[\"generated_text\"]\n",
    "#     # parsed = parse_json_blob(text)\n",
    "#     # return parsed['decision'].upper() if parsed else None \n",
    "#     return text\n",
    "#     # return {\"response\": parsed['decision'].upper()} if parsed else {\"response\": None}\n",
    "\n",
    "# # batched response\n",
    "# batch_response = [parse_output(out) for out in outputs]\n",
    "\n",
    "\n",
    "\n",
    "# batch_df['gemma_270M_it_response'] = batch_response\n",
    "\n",
    "# display(batch_df)\n",
    "outputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "aea06f01-2fb0-4a10-ac59-4560ee19c062",
     "showTitle": false,
     "tableResultSettingsMap": {},
     "title": ""
    }
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "e8e2ab9b-a6ea-42d5-af76-d323710eafbf",
     "showTitle": false,
     "tableResultSettingsMap": {},
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "parse_json_blob(outputs[90][0]['generated_text'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "52524e93-83e2-479c-96da-1cc42cf6a282",
     "showTitle": false,
     "tableResultSettingsMap": {},
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "print(classification_report(batch_df.dropna()['ground_truth'], batch_df.dropna()['gemma_270M_it_response']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "a838eb41-b0a7-4007-b90f-189db8060128",
     "showTitle": false,
     "tableResultSettingsMap": {},
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "text_output_system_prompt = \"\"\"TASK\n",
    "\n",
    "You are a policy classification assistant trained to detect Shared Space policy violations in text from Vrbo listings description, post-stay traveler reviews, or traveler-host exchanges.\n",
    "Your job: Decide if the listing violates Vrbo's shared space policy based only on the text provided.\n",
    "\n",
    "POLICY DEFINITIONS\n",
    "\n",
    "Core Rule:\n",
    "Vrbo does not support short-term rentals where guests share internal living areas with the host or other travelers outside their party.\n",
    "\n",
    "Internal living areas include (but are not limited to):\n",
    "\t•\tBedrooms\n",
    "\t•\tBathrooms\n",
    "\t•\tKitchens / kitchenettes\n",
    "\t•\tLiving rooms\n",
    "\t•\tInterior hallways within a host-occupied unit\n",
    "\t•\tAny interior space that is part of the host's own living quarters\n",
    "\n",
    "A listing must have:\n",
    "\t•\tA secured, private entrance that guests can lock and control themselves (deadbolt or smart lock)\n",
    "\t•\tA private bathroom\n",
    "\t•\tNo shared internal living areas with the host or other unrelated guests\n",
    "\n",
    "\n",
    "CLEAR VIOLATIONS (“yes”)\n",
    "\n",
    "Mark yes when the text explicitly states or strongly implies that:\n",
    "\t1.\tGuests share an internal living area (bathroom, kitchen, living room, or hallway inside host's unit) with the host or other guests outside their travel party.\n",
    "\t2.\tGuests must enter their space through the host's internal living area.\n",
    "\t3.\tImplicit suggestion of sharing with the host eg. the property is a room in the host's home without clear separation of internal spaces.\n",
    "\n",
    "PERMITTED EXCEPTIONS (not violations if requirements are met)\n",
    "\t•\tBed & Breakfasts with fewer than 20 rooms (must still have private entry and no internal sharing unless otherwise allowed).\n",
    "\t•\tMicro-studios with shared kitchen or bathroom if this is typical for that unit type.\n",
    "\t•\tShared external spaces (driveway, parking area, yard, garden, patio, fire pit, pool, hot tub, external laundry).\n",
    "\n",
    "\n",
    "UNCLEAR CASES (“clarification”)\n",
    "\n",
    "Use clarification only if:\n",
    "\t•\tThe text suggests possible internal sharing\n",
    "\t•\tContradictory information appears (e.g., “private kitchen” in one place, “shared kitchen” in another).\n",
    "\n",
    "\n",
    "Do NOT use “clarification” just because:\n",
    "\t•\tHost lives on property (no violation unless internal sharing is mentioned).\n",
    "\t•\tThere is another rental unit on the property.\n",
    "\n",
    "NO VIOLATION (“no”)\n",
    "\n",
    "Default to no when:\n",
    "\t•\tListing clearly offers a private sleeping area, private bathroom, and no language implying shared internal spaces.\n",
    "\t•\tExternal spaces are shared but internal spaces are private.\n",
    "\t•\tMissing details about entry layout, but no suggestion of shared internal living areas.\n",
    "\n",
    "EVIDENCE PRIORITY RULES\n",
    "\n",
    "If the listing contains both private and shared claims:\n",
    "\t1.\tShared internal space claim overrides any “private” marketing statement.\n",
    "\t2.\tIf “private” is stated and sharing is only implied vaguely → choose \"no\", not “yes.”\n",
    "\n",
    "OUTPUT FORMAT\n",
    "\n",
    "Return only the decision as a single word:\n",
    "\n",
    "yes or no or clarification\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "d7d51c2e-a873-4727-a470-985c99018971",
     "showTitle": false,
     "tableResultSettingsMap": {},
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "# when it is zero shot, the model has a hard time following the format so we are getting more null\n",
    "\n",
    "# extract prompt\n",
    "prompts = batch_df['text'].apply(lambda x: [{\"role\": \"system\", \"content\": text_output_system_prompt}, {\"role\": \"user\", \"content\": x}]).tolist()\n",
    "\n",
    "# batch outputs\n",
    "outputs = pipe(\n",
    "    prompts,\n",
    "    max_new_tokens=500,\n",
    "    truncation=True,\n",
    "    batch_size=10,  # now it matters\n",
    "    return_full_text=False, # if true, it returns full prompt plus model output\n",
    ")\n",
    "\n",
    "def parse_output(out):\n",
    "    text = out[0][\"generated_text\"] if isinstance(out, list) else out[\"generated_text\"]\n",
    "    return text\n",
    "\n",
    "# batched response\n",
    "batch_response = [parse_output(out) for out in outputs]\n",
    "\n",
    "\n",
    "\n",
    "batch_df['gemma_270M_it_response'] = batch_response\n",
    "\n",
    "display(batch_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "add756f1-1691-4de5-91ee-2306d9eeeec3",
     "showTitle": false,
     "tableResultSettingsMap": {},
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "# this shows the model lacks the ability to follow the output format. This is where fine-tuning comes in.\n",
    "# it also outputs "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "4655f81e-59ec-4b88-bc93-cd30c09b3e08",
     "showTitle": true,
     "tableResultSettingsMap": {},
     "title": "FewShot-with each decision example"
    }
   },
   "outputs": [],
   "source": [
    "few_shot_examples = \"\"\"TASK\n",
    "\n",
    "You are a policy classification assistant trained to detect Shared Space policy violations in text from Vrbo listings description, post-stay traveler reviews, or traveler-host exchanges.\n",
    "Your job: Decide if the listing violates Vrbo’s shared space policy based only on the text provided.\n",
    "\n",
    "POLICY DEFINITIONS\n",
    "\n",
    "Core Rule:\n",
    "Vrbo does not support short-term rentals where guests share internal living areas with the host or other travelers outside their party.\n",
    "\n",
    "Internal living areas include (but are not limited to):\n",
    "\t•\tBedrooms\n",
    "\t•\tBathrooms\n",
    "\t•\tKitchens / kitchenettes\n",
    "\t•\tLiving rooms\n",
    "\t•\tInterior hallways within a host-occupied unit\n",
    "\t•\tAny interior space that is part of the host’s own living quarters\n",
    "\n",
    "A listing must have:\n",
    "\t•\tA secured, private entrance that guests can lock and control themselves (deadbolt or smart lock)\n",
    "\t•\tA private bathroom\n",
    "\t•\tNo shared internal living areas with the host or other unrelated guests\n",
    "\n",
    "CLEAR VIOLATIONS (“yes”)\n",
    "\n",
    "Mark yes when the text explicitly states or strongly implies that:\n",
    "\t1.\tGuests share an internal living area (bathroom, kitchen, living room, or hallway inside host’s unit) with the host or other guests outside their travel party.\n",
    "\t2.\tGuests must enter their space through the host’s internal living area.\n",
    "\t3.\tImplicit suggestion of sharing with the host e.g. the property is a room in the host’s home without clear separation of internal spaces.\n",
    "\n",
    "PERMITTED EXCEPTIONS (not violations if requirements are met)\n",
    "\t•\tBed & Breakfasts with fewer than 20 rooms (must still have private entry and no internal sharing unless otherwise allowed).\n",
    "\t•\tMicro-studios with shared kitchen or bathroom if this is typical for that unit type.\n",
    "\t•\tShared external spaces (driveway, parking area, yard, garden, patio, fire pit, pool, hot tub, external laundry).\n",
    "\n",
    "UNCLEAR CASES (“clarification”)\n",
    "\n",
    "Use clarification only if:\n",
    "\t•\tThe text suggests possible internal sharing\n",
    "\t•\tContradictory information appears (e.g., “private kitchen” in one place, “shared kitchen” in another).\n",
    "\n",
    "Do NOT use “clarification” just because:\n",
    "\t•\tHost lives on property (no violation unless internal sharing is mentioned).\n",
    "\t•\tThere is another rental unit on the property.\n",
    "\n",
    "NO VIOLATION (“no”)\n",
    "\n",
    "Default to no when:\n",
    "\t•\tListing clearly offers a private sleeping area, private bathroom, and no language implying shared internal spaces.\n",
    "\t•\tExternal spaces are shared but internal spaces are private.\n",
    "\t•\tMissing details about entry layout, but no suggestion of shared internal living areas.\n",
    "\n",
    "EVIDENCE PRIORITY RULES\n",
    "\n",
    "If the listing contains both private and shared claims:\n",
    "\t1.\tShared internal space claim overrides any “private” marketing statement.\n",
    "\t2.\tIf “private” is stated and sharing is only implied vaguely → choose “no”, not “yes.”\n",
    "\n",
    "OUTPUT FORMAT\n",
    "\n",
    "Return your answer as JSON:\n",
    "\n",
    "{\n",
    "“decision”: “yes” | “no” | “clarification”,\n",
    "“reasoning”: “Briefly explain why, citing policy rules and the specific clues found”,\n",
    "“taxonomy”: “One of: ‘Shared Bathroom’, ‘Shared Kitchen’, ‘Shared Living Room’, ‘Entry Through Host Space’, ‘No Private Bathroom’, ‘No Private Kitchen’, ‘B&B Over 20 Rooms’, ‘Micro-Studio Exception’, ‘Unclear Internal Sharing’”,\n",
    "“label_excerpt”: “Direct quote from listing text that supports your decision”\n",
    "}\n",
    "\n",
    "⸻\n",
    "\n",
    "EXAMPLES\n",
    "\n",
    "Example 1 – Clear Violation (yes)\n",
    "\n",
    "Input:\n",
    "Charming room in my home. Guests will share a bathroom with the host and have access to the main kitchen and living room.\n",
    "\n",
    "Output:\n",
    "{\n",
    "“decision”: “yes”,\n",
    "“reasoning”: “The listing explicitly says guests share the bathroom and kitchen with the host, which are internal living areas. This is a direct policy violation.”,\n",
    "“taxonomy”: “Shared Bathroom”,\n",
    "“label_excerpt”: “Guests will share a bathroom with the host.”\n",
    "}\n",
    "\n",
    "⸻\n",
    "\n",
    "Example 2 – No Violation (no)\n",
    "\n",
    "Input:\n",
    "Private guesthouse with separate entrance, bedroom, and bathroom. Guests may use the backyard and pool, which are shared with the host’s family.\n",
    "\n",
    "Output:\n",
    "{\n",
    "“decision”: “no”,\n",
    "“reasoning”: “The listing offers a private guesthouse with its own entrance and bathroom. The only shared areas are external (yard, pool), which are permitted.”,\n",
    "“taxonomy”: “No Violation”,\n",
    "“label_excerpt”: “Private guesthouse with separate entrance, bedroom, and bathroom.”\n",
    "}\n",
    "\n",
    "⸻\n",
    "\n",
    "Example 3 – Clarification (clarification)\n",
    "\n",
    "Input:\n",
    "Cozy private bedroom in host’s home. Guests have access to a kitchen for cooking.\n",
    "\n",
    "Output:\n",
    "{\n",
    "“decision”: “clarification”,\n",
    "“reasoning”: “The listing says ‘private bedroom’ but it is unclear if the kitchen is shared with the host or dedicated to guests. This creates ambiguity about internal living area sharing.”,\n",
    "“taxonomy”: “Unclear Internal Sharing”,\n",
    "“label_excerpt”: “Guests have access to a kitchen for cooking.”\n",
    "}\"\"\"\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "6a49ef23-b35e-4fdd-b1c0-d4cbdafc34cd",
     "showTitle": false,
     "tableResultSettingsMap": {},
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "# when it is zero shot, the model has a hard time following the format so we are getting more null\n",
    "\n",
    "# extract prompt\n",
    "prompts = batch_df['text'].apply(lambda x: [{\"role\": \"system\", \"content\": few_shot_examples}, {\"role\": \"user\", \"content\": x}]).tolist()\n",
    "\n",
    "# batch outputs\n",
    "outputs = pipe(\n",
    "    prompts,\n",
    "    max_new_tokens=500,\n",
    "    truncation=True,\n",
    "    batch_size=10,  # now it matters\n",
    "    return_full_text=False, # if true, it returns full prompt plus model output\n",
    ")\n",
    "\n",
    "def parse_output(out):\n",
    "    text = out[0][\"generated_text\"] if isinstance(out, list) else out[\"generated_text\"]\n",
    "    return text\n",
    "\n",
    "# batched response\n",
    "batch_response = [parse_output(out) for out in outputs]\n",
    "\n",
    "\n",
    "\n",
    "batch_df['gemma_270M_it_response'] = batch_response\n",
    "\n",
    "display(batch_df)"
   ]
  }
 ],
 "metadata": {
  "application/vnd.databricks.v1+notebook": {
   "computePreferences": null,
   "dashboards": [],
   "environmentMetadata": {
    "base_environment": "",
    "environment_version": "2"
   },
   "inputWidgetPreferences": null,
   "language": "python",
   "notebookMetadata": {
    "pythonIndentUnit": 4
   },
   "notebookName": "evaluate_instruction_following",
   "widgets": {}
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}