In [0]:
# Install Pytorch & other libraries
%pip install torch==2.6.0 tensorboard

# Install Hugging Face libraries
%pip install transformers==4.55.0 datasets==4.0.0 accelerate==1.10.0 evaluate trl==0.21.0 protobuf sentencepiece==0.2.0

# COMMENT IN: if you are running on a GPU that supports BF16 data type and flash attn, such as NVIDIA L4 or NVIDIA A100
%pip install flash-attn --no-build-isolation
%pip install mlflow 
dbutils.library.restartPython()

In [0]:
# https://aws.amazon.com/ec2/instance-types/

In [0]:
import torch 
import transformers
import accelerate
import trl
import sentencepiece
import datasets 

print("Torch version: ", torch.__version__)
print("Transformers version: ", transformers.__version__)
print("Accelerate version: ", accelerate.__version__)
print("TRL version: ", trl.__version__)
print("Sentencepiece version: ", sentencepiece.__version__)
print("Datasets version: ", datasets.__version__)

In [0]:
from huggingface_hub import login
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline
from random import randint
import re
from pathlib import Path
import json
from pyspark.sql import functions as F
from sklearn.metrics import classification_report

In [0]:
import sys
print("Python version:", sys.version)

In [0]:
login()

In [0]:
def parse_json_blob(text: str) -> dict:
 """Extract and parse a JSON block if one exists in markdown format."""
 pattern = r"```(?:json)?\s*\n(.*?)\n```"
 matches = re.findall(pattern, text, re.DOTALL)

 for match in matches:
 try:
 return json.loads(match.strip())
 except json.JSONDecodeError:
 continue

 try:
 return json.loads(text.strip())
 except json.JSONDecodeError:
 return None

In [0]:
# does not work with python version == 3.11.11. It works with Python version: 3.12.3
base_model = "google/gemma-3-270m-it"


# lets try flash_attention_2
model = AutoModelForCausalLM.from_pretrained(base_model ,torch_dtype='auto', device_map='auto', attn_implementation='sdpa')
tokenizer = AutoTokenizer.from_pretrained(base_model)

In [0]:
print(f"Device: {model.device}")
print(f"DType: {model.dtype}")

In [0]:
# load the model and tokenizer into the pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)


In [0]:
# lets evaluate the system prompt
full_path = "/Workspace/Users/hyemam@expediagroup.com/Trust_and_Safety/Shared_Spaces/LLMExperiments/LabelCollection/prompt_4.txt"
system_prompt = Path(full_path).read_text()

example_prompt = """Welcome to the heart of the bay area. You'll find yourself conveniently located to downtown San Mateo and all the transportation. This is a shared walkway, 1 bed guest suite. The unit has 1 queen sized bed with ample pillows, , mini kitchen, fast wifi, 2 x 43\" TV, Netflix, coffee, fast wifi. Self-check in. Mini Kitchen includes: Refrigerator, cook-top, microwave, Keurig / Keurig pods, utensils, cookware. Bathroom includes, towels, blow dryer, iron / ironing board, hand soap, shampoo, conditioner, body wash. Living area has pull out queen sofa bed with mattress topper, blanket, pillows, and sheets."""

example = [{"role": "system", "content": system_prompt}
 , {"role": "user", "content": example_prompt}]

prompt = pipe.tokenizer.apply_chat_template(example, tokenize=False, add_generation_prompt=True)

output = pipe(example, max_new_tokens=500, disable_compile=True
 , truncation=True
 , batch_size=10, return_full_text=False)


parse_json_blob(output[-1]['generated_text'])

In [0]:
# columns selected
SELECTED_COLUMNS = ['vrbo_property_id',"listing_rental_description", "ground_truth", "reviewer_notes"]

# load datasets
path = "s3://apiary-analytics-927134741764-us-east-1-mxt-ml/hyemam/shared_spaces/discovery/operations/batch_1/"
batch_1 = spark.read.parquet(path)\
 .withColumnRenamed("Notes", "reviewer_notes")\
 .select(*SELECTED_COLUMNS)\
 .withColumn("batch_num", F.lit(1))


path = "s3://apiary-analytics-927134741764-us-east-1-mxt-ml/hyemam/shared_spaces/discovery/operations/batch_2/"
batch_2 = spark.read.parquet(path)\
 .withColumnRenamed("Reviewer's Name", "reviewer_notes")\
 .select(*SELECTED_COLUMNS)\
 .withColumn("batch_num", F.lit(2))
 


path = "s3://apiary-analytics-927134741764-us-east-1-mxt-ml/hyemam/shared_spaces/discovery/operations/batch_3/"
batch_3 = spark.read.parquet(path)\
 .withColumnRenamed("Analyst Notes", "reviewer_notes")\
 .select(*SELECTED_COLUMNS)\
 .withColumn("batch_num", F.lit(3))
# preprocess data for evaluation
batch = (batch_1.union(batch_2).union(batch_3)
 .withColumn("ground_truth", F.trim(F.upper("ground_truth")))
 .withColumn('ground_truth', F.when(F.col('ground_truth') == "UNSURE", 'CLARIFICATION').otherwise(F.col('ground_truth')))
 .withColumn("label", F.when(F.col("ground_truth") == 'YES', 1).otherwise(0))
 .withColumnRenamed("listing_rental_description", "text")
 )



# total number of samples
NUM_SAMPLES = 100
batch_df = batch.sample(0.5, False).filter("reviewer_notes != 'Leon'").limit(NUM_SAMPLES)

display(batch_df.groupBy("label", "ground_truth").count())

batch_df = batch_df.toPandas()

display(batch_df)

In [0]:
def generate(sample):
 output = pipe(sample['prompt'], max_new_tokens=500
 , disable_compile=True
 , truncation=True
 , batch_size=10, return_full_text=False)
 response = parse_json_blob(output[0]['generated_text'])
 if response is None:
 response = {'response': response}
 else: 
 response = {"response": response['decision'].upper()}
 return response 

In [0]:
batch_df['prompt'] = batch_df.apply(lambda x: [{"role": "system", "content": system_prompt}, {"role": "user", "content": x['text']}], axis=1)


# datasets 
batch_dataset = Dataset.from_pandas(batch_df[['prompt']])

# batch response [took 7minutes]
batch_response = batch_dataset.map(generate)


batch_response


In [0]:
# extract prompt
prompts = batch_df['text'].apply(lambda x: [{"role": "system", "content": system_prompt}, {"role": "user", "content": x}]).tolist()

# batch outputs
outputs = pipe(
 prompts,
 max_new_tokens=500,
 truncation=True,
 batch_size=10, # now it matters
 return_full_text=False, # if true, it returns full prompt plus model output
)

def parse_output(out):
 text = out[0]["generated_text"] if isinstance(out, list) else out["generated_text"]
 parsed = parse_json_blob(text)
 return parsed['decision'].upper() if parsed else None 
 # return {"response": parsed['decision'].upper()} if parsed else {"response": None}

# batched response
batch_response = [parse_output(out) for out in outputs]



batch_df['gemma_270M_it_response'] = batch_response

display(batch_df)

In [0]:
# every few shot example was YES so the model is returning Yes. 
print(classification_report(batch_df.dropna()['ground_truth'], batch_df.dropna()['gemma_270M_it_response']))

In [0]:
zero_shot_system_prompt = """TASK

You are a policy classification assistant trained to detect Shared Space policy violations in text from Vrbo listings description, post-stay traveler reviews, or traveler-host exchanges.
Your job: Decide if the listing violates Vrbo's shared space policy based only on the text provided.

POLICY DEFINITIONS

Core Rule:
Vrbo does not support short-term rentals where guests share internal living areas with the host or other travelers outside their party.

Internal living areas include (but are not limited to):
	•	Bedrooms
	•	Bathrooms
	•	Kitchens / kitchenettes
	•	Living rooms
	•	Interior hallways within a host-occupied unit
	•	Any interior space that is part of the host's own living quarters

A listing must have:
	•	A secured, private entrance that guests can lock and control themselves (deadbolt or smart lock)
	•	A private bathroom
	•	No shared internal living areas with the host or other unrelated guests


CLEAR VIOLATIONS (“yes”)

Mark yes when the text explicitly states or strongly implies that:
	1.	Guests share an internal living area (bathroom, kitchen, living room, or hallway inside host's unit) with the host or other guests outside their travel party.
	2.	Guests must enter their space through the host's internal living area.
	3.	Implicit suggestion of sharing with the host eg. the property is a room in the host's home without clear separation of internal spaces.

PERMITTED EXCEPTIONS (not violations if requirements are met)
	•	Bed & Breakfasts with fewer than 20 rooms (must still have private entry and no internal sharing unless otherwise allowed).
	•	Micro-studios with shared kitchen or bathroom if this is typical for that unit type.
	•	Shared external spaces (driveway, parking area, yard, garden, patio, fire pit, pool, hot tub, external laundry).


UNCLEAR CASES (“clarification”)

Use clarification only if:
	•	The text suggests possible internal sharing
	•	Contradictory information appears (e.g., “private kitchen” in one place, “shared kitchen” in another).


Do NOT use “clarification” just because:
	•	Host lives on property (no violation unless internal sharing is mentioned).
	•	There is another rental unit on the property.

NO VIOLATION (“no”)

Default to no when:
	•	Listing clearly offers a private sleeping area, private bathroom, and no language implying shared internal spaces.
	•	External spaces are shared but internal spaces are private.
	•	Missing details about entry layout, but no suggestion of shared internal living areas.

EVIDENCE PRIORITY RULES

If the listing contains both private and shared claims:
	1.	Shared internal space claim overrides any “private” marketing statement.
	2.	If “private” is stated and sharing is only implied vaguely → choose "no", not “yes.”

OUTPUT FORMAT

Return your answer as JSON:

```json
{
 "decision": "yes" | "no" | "clarification",
 "reasoning": "Briefly explain why, citing policy rules and the specific clues found",
 "taxonomy": "One of: 'Shared Bathroom', 'Shared Kitchen', 'Shared Living Room', 'Entry Through Host Space', 'No Private Bathroom', 'No Private Kitchen', 'B&B Over 20 Rooms', 'Micro-Studio Exception', 'Unclear Internal Sharing'",
 "label_excerpt": "Direct quote from listing text that supports your decision"
}
```


"""

In [0]:
# when it is zero shot, the model has a hard time following the format so we are getting more null

# extract prompt
prompts = batch_df['text'].apply(lambda x: [{"role": "system", "content": zero_shot_system_prompt}, {"role": "user", "content": x}]).tolist()

# batch outputs
outputs = pipe(
 prompts,
 max_new_tokens=500,
 truncation=True,
 batch_size=10, # now it matters
 return_full_text=False, # if true, it returns full prompt plus model output
)

# def parse_output(out):
# text = out[0]["generated_text"] if isinstance(out, list) else out["generated_text"]
# # parsed = parse_json_blob(text)
# # return parsed['decision'].upper() if parsed else None 
# return text
# # return {"response": parsed['decision'].upper()} if parsed else {"response": None}

# # batched response
# batch_response = [parse_output(out) for out in outputs]



# batch_df['gemma_270M_it_response'] = batch_response

# display(batch_df)
outputs

In [0]:
parse_json_blob(outputs[90][0]['generated_text'])

In [0]:
print(classification_report(batch_df.dropna()['ground_truth'], batch_df.dropna()['gemma_270M_it_response']))

In [0]:
text_output_system_prompt = """TASK

You are a policy classification assistant trained to detect Shared Space policy violations in text from Vrbo listings description, post-stay traveler reviews, or traveler-host exchanges.
Your job: Decide if the listing violates Vrbo's shared space policy based only on the text provided.

POLICY DEFINITIONS

Core Rule:
Vrbo does not support short-term rentals where guests share internal living areas with the host or other travelers outside their party.

Internal living areas include (but are not limited to):
	•	Bedrooms
	•	Bathrooms
	•	Kitchens / kitchenettes
	•	Living rooms
	•	Interior hallways within a host-occupied unit
	•	Any interior space that is part of the host's own living quarters

A listing must have:
	•	A secured, private entrance that guests can lock and control themselves (deadbolt or smart lock)
	•	A private bathroom
	•	No shared internal living areas with the host or other unrelated guests


CLEAR VIOLATIONS (“yes”)

Mark yes when the text explicitly states or strongly implies that:
	1.	Guests share an internal living area (bathroom, kitchen, living room, or hallway inside host's unit) with the host or other guests outside their travel party.
	2.	Guests must enter their space through the host's internal living area.
	3.	Implicit suggestion of sharing with the host eg. the property is a room in the host's home without clear separation of internal spaces.

PERMITTED EXCEPTIONS (not violations if requirements are met)
	•	Bed & Breakfasts with fewer than 20 rooms (must still have private entry and no internal sharing unless otherwise allowed).
	•	Micro-studios with shared kitchen or bathroom if this is typical for that unit type.
	•	Shared external spaces (driveway, parking area, yard, garden, patio, fire pit, pool, hot tub, external laundry).


UNCLEAR CASES (“clarification”)

Use clarification only if:
	•	The text suggests possible internal sharing
	•	Contradictory information appears (e.g., “private kitchen” in one place, “shared kitchen” in another).


Do NOT use “clarification” just because:
	•	Host lives on property (no violation unless internal sharing is mentioned).
	•	There is another rental unit on the property.

NO VIOLATION (“no”)

Default to no when:
	•	Listing clearly offers a private sleeping area, private bathroom, and no language implying shared internal spaces.
	•	External spaces are shared but internal spaces are private.
	•	Missing details about entry layout, but no suggestion of shared internal living areas.

EVIDENCE PRIORITY RULES

If the listing contains both private and shared claims:
	1.	Shared internal space claim overrides any “private” marketing statement.
	2.	If “private” is stated and sharing is only implied vaguely → choose "no", not “yes.”

OUTPUT FORMAT

Return only the decision as a single word:

yes or no or clarification
"""

In [0]:
# when it is zero shot, the model has a hard time following the format so we are getting more null

# extract prompt
prompts = batch_df['text'].apply(lambda x: [{"role": "system", "content": text_output_system_prompt}, {"role": "user", "content": x}]).tolist()

# batch outputs
outputs = pipe(
 prompts,
 max_new_tokens=500,
 truncation=True,
 batch_size=10, # now it matters
 return_full_text=False, # if true, it returns full prompt plus model output
)

def parse_output(out):
 text = out[0]["generated_text"] if isinstance(out, list) else out["generated_text"]
 return text

# batched response
batch_response = [parse_output(out) for out in outputs]



batch_df['gemma_270M_it_response'] = batch_response

display(batch_df)

In [0]:
# this shows the model lacks the ability to follow the output format. This is where fine-tuning comes in.
# it also outputs 

In [0]:
few_shot_examples = """TASK

You are a policy classification assistant trained to detect Shared Space policy violations in text from Vrbo listings description, post-stay traveler reviews, or traveler-host exchanges.
Your job: Decide if the listing violates Vrbo’s shared space policy based only on the text provided.

POLICY DEFINITIONS

Core Rule:
Vrbo does not support short-term rentals where guests share internal living areas with the host or other travelers outside their party.

Internal living areas include (but are not limited to):
	•	Bedrooms
	•	Bathrooms
	•	Kitchens / kitchenettes
	•	Living rooms
	•	Interior hallways within a host-occupied unit
	•	Any interior space that is part of the host’s own living quarters

A listing must have:
	•	A secured, private entrance that guests can lock and control themselves (deadbolt or smart lock)
	•	A private bathroom
	•	No shared internal living areas with the host or other unrelated guests

CLEAR VIOLATIONS (“yes”)

Mark yes when the text explicitly states or strongly implies that:
	1.	Guests share an internal living area (bathroom, kitchen, living room, or hallway inside host’s unit) with the host or other guests outside their travel party.
	2.	Guests must enter their space through the host’s internal living area.
	3.	Implicit suggestion of sharing with the host e.g. the property is a room in the host’s home without clear separation of internal spaces.

PERMITTED EXCEPTIONS (not violations if requirements are met)
	•	Bed & Breakfasts with fewer than 20 rooms (must still have private entry and no internal sharing unless otherwise allowed).
	•	Micro-studios with shared kitchen or bathroom if this is typical for that unit type.
	•	Shared external spaces (driveway, parking area, yard, garden, patio, fire pit, pool, hot tub, external laundry).

UNCLEAR CASES (“clarification”)

Use clarification only if:
	•	The text suggests possible internal sharing
	•	Contradictory information appears (e.g., “private kitchen” in one place, “shared kitchen” in another).

Do NOT use “clarification” just because:
	•	Host lives on property (no violation unless internal sharing is mentioned).
	•	There is another rental unit on the property.

NO VIOLATION (“no”)

Default to no when:
	•	Listing clearly offers a private sleeping area, private bathroom, and no language implying shared internal spaces.
	•	External spaces are shared but internal spaces are private.
	•	Missing details about entry layout, but no suggestion of shared internal living areas.

EVIDENCE PRIORITY RULES

If the listing contains both private and shared claims:
	1.	Shared internal space claim overrides any “private” marketing statement.
	2.	If “private” is stated and sharing is only implied vaguely → choose “no”, not “yes.”

OUTPUT FORMAT

Return your answer as JSON:

{
“decision”: “yes” | “no” | “clarification”,
“reasoning”: “Briefly explain why, citing policy rules and the specific clues found”,
“taxonomy”: “One of: ‘Shared Bathroom’, ‘Shared Kitchen’, ‘Shared Living Room’, ‘Entry Through Host Space’, ‘No Private Bathroom’, ‘No Private Kitchen’, ‘B&B Over 20 Rooms’, ‘Micro-Studio Exception’, ‘Unclear Internal Sharing’”,
“label_excerpt”: “Direct quote from listing text that supports your decision”
}

⸻

EXAMPLES

Example 1 – Clear Violation (yes)

Input:
Charming room in my home. Guests will share a bathroom with the host and have access to the main kitchen and living room.

Output:
{
“decision”: “yes”,
“reasoning”: “The listing explicitly says guests share the bathroom and kitchen with the host, which are internal living areas. This is a direct policy violation.”,
“taxonomy”: “Shared Bathroom”,
“label_excerpt”: “Guests will share a bathroom with the host.”
}

⸻

Example 2 – No Violation (no)

Input:
Private guesthouse with separate entrance, bedroom, and bathroom. Guests may use the backyard and pool, which are shared with the host’s family.

Output:
{
“decision”: “no”,
“reasoning”: “The listing offers a private guesthouse with its own entrance and bathroom. The only shared areas are external (yard, pool), which are permitted.”,
“taxonomy”: “No Violation”,
“label_excerpt”: “Private guesthouse with separate entrance, bedroom, and bathroom.”
}

⸻

Example 3 – Clarification (clarification)

Input:
Cozy private bedroom in host’s home. Guests have access to a kitchen for cooking.

Output:
{
“decision”: “clarification”,
“reasoning”: “The listing says ‘private bedroom’ but it is unclear if the kitchen is shared with the host or dedicated to guests. This creates ambiguity about internal living area sharing.”,
“taxonomy”: “Unclear Internal Sharing”,
“label_excerpt”: “Guests have access to a kitchen for cooking.”
}"""



In [0]:
# when it is zero shot, the model has a hard time following the format so we are getting more null

# extract prompt
prompts = batch_df['text'].apply(lambda x: [{"role": "system", "content": few_shot_examples}, {"role": "user", "content": x}]).tolist()

# batch outputs
outputs = pipe(
 prompts,
 max_new_tokens=500,
 truncation=True,
 batch_size=10, # now it matters
 return_full_text=False, # if true, it returns full prompt plus model output
)

def parse_output(out):
 text = out[0]["generated_text"] if isinstance(out, list) else out["generated_text"]
 return text

# batched response
batch_response = [parse_output(out) for out in outputs]



batch_df['gemma_270M_it_response'] = batch_response

display(batch_df)