In [1]:
import json
from transformers import AutoTokenizer
from transformers.utils import logging
import torch
model_name = "/mnt/data/zifeng.cao/reasoning/arc-agi/LLaMA-Factory/saves/Qwen2.5-Coder-7B-Instruct/pt_output_plus_step_output/checkpoint-274"
tokenizer = AutoTokenizer.from_pretrained(model_name)

system_prompt = '''You are a helpful assistant that can solve reasoning tasks by using a limited set of DSL functions that are implemented in Python. 
*** Task description ***
- Each task consists of around a handful of training examples, where an training example consists of an input grid and an output grid. 
- For each training example, the output grid is the result of applying the same task-specific transformation to the input grid. 
- The goal is to infer the transformation from the few training examples.
- The transformation is a task-specific grid transformation, which can be decomposed into a sequence of the DSL functions.
*** DSL description ***
- Types and Constants
 - **Types**: Define various data types like `Grid`, `Object`, `Indices`, and more to facilitate grid operations.
 - **Constants**: Include color constants (e.g., `ZERO`, `ONE`), boolean constants (`T`, `F`), and directional vectors (e.g., `UP`, `DOWN`).
- Primitives
 - **Math Operations**: Functions like `add`, `subtract`, `multiply`, and `divide` perform basic arithmetic on integers or tuples.
 - **Logical Operations**: Functions such as `even`, `flip`, and `both` handle logical evaluations.
 - **Data Operations**: Functions like `identity`, `order`, `merge`, `difference`, and `dedupe` manage data containers.
- Grid and Object Manipulation
 - **Grid Creation**: `canvas` creates grids with specified dimensions and values.
 - **Grid Transformation**: Functions like `rot90`, `hmirror`, `upscale`, and `downscale` transform grids in various ways.
 - **Subgrid Operations**: `crop`, `hsplit`, `vsplit`, and `trim` extract or modify parts of grids.
 - **Object and Patch Handling**: Functions like `objects`, `normalize`, `shift`, `toindices`, and `recolor` handle grid patches and objects.
- Analysis and Filtering
 - **Color Analysis**: Functions such as `mostcolor`, `leastcolor`, `colorcount`, and `palette` analyze color distributions.
 - **Object Filtering**: `colorfilter` and `sizefilter` filter objects by color or size.
 - **Spatial Analysis**: Functions like `center`, `position`, `manhattan`, and `adjacent` analyze spatial relationships.
- Connectivity and Bounding
 - **Connectivity**: `connect`, `neighbors`, `dneighbors`, and `ineighbors` determine connections between grid indices.
 - **Bounding**: Functions like `box`, `inbox`, `outbox`, and `corners` manage bounding areas of patches.
- Utils
 - **Random Integer Generation**: `unifint` generates random integers within specified bounds and difficulty levels.
 - **Grid Validation**: `is_grid` checks if an input is a valid grid.
 - **Grid Formatting**: `format_grid` casts lists to the grid type.
*** Format of the generated code ***
- The only allowed operations are storing the result of a function call in a variable, where all arguments must either be the input grid, some constants such as integers or common vectors indicating directions, or a variable previously computed within the same solver, and each function that is being called must either be a DSL function or a variable previously constructed within the same solver. 
- This also means that each line of code is enforced to be a single function call.
So, you are given a task and a set of examples, you need to generate a code that can solve the task.
'''

token_length = tokenizer.encode(system_prompt, return_tensors="pt").shape[1]
print(f"Token length: {token_length}")



def check_token_length(query):
 # Suppress warnings
 logging.set_verbosity_error()

 # Load the tokenizer

 # Apply chat template and tokenize
 formatted_prompt = tokenizer.apply_chat_template(query, tokenize=False)
 tokens = tokenizer.encode(formatted_prompt, return_tensors="pt")

 # Get token length
 token_length = tokens.shape[1]

 #print(f"Token length: {token_length}")

 # Check if it exceeds the model's context length (assuming 4096 for Llama-2)

 return token_length
def list_of_lists_to_string_with_commas_and_newlines(list_of_lists):
 return '\n'.join(','.join(str(item) for item in sublist) for sublist in list_of_lists)

def transform_query(query):
 result_str = ""
 previous_result = ""
 for i, example in enumerate(query):
 try:
 r_i, c_i = len(example["input"]), len(example["input"][0])
 r_o, c_o = len(example["output"]), len(example["output"][0])
 except:
 print(example)
 return None
 # input_str = "\n".join(["|".join(map(str, row)) for row in example["input"]])
 # output_str = "\n".join(["|".join(map(str, row)) for row in example["output"]])
 input_str = list_of_lists_to_string_with_commas_and_newlines(example["input"])
 output_str = list_of_lists_to_string_with_commas_and_newlines(example["output"])
 result_str = previous_result + f"** Example {i+1} ** \n input: ({r_i} by {c_i}) Matrix \n{input_str}\n output: ({r_o} by {c_o}) Matrix \n{output_str}\n\n"
 previous_result = result_str
 if len(result_str) > 14000: #6000
 token_length = tokenizer.encode(previous_result, return_tensors="pt").shape[1]
 #print(f"previous Token length: {token_length}")
 if token_length > 14000: #6000
 return None
 return previous_result
 token_length = tokenizer.encode(result_str, return_tensors="pt").shape[1]
 #print(i, len(result_str))
 #print(f"Token length: {token_length}")
 
 return result_str

import re
f = open("multi_step_verifiers_training.txt", "r")
data_output = []
token_len_list = []
skip = 0
total = 0
for line in f:
 total += 1
 tmp = json.loads(line)
 new_dict = {"messages":[]}
 new_dict["messages"].append({"role":"system", "content":system_prompt})
 tran = transform_query(tmp["example"]) 
 if tran == None:
 skip += 1
 continue

 new_dict["messages"].append({"role":"user", "content":tran})
 tmp["verifier"][0] =re.sub(r'veri.*?\(', 'solver(', tmp["verifier"][0])
 #tmp["verifier"][0] = tmp["verifier"][0].replace("veri.*\(", "solver(")
 new_dict["messages"].append({"role":"assistant", "content":"\n".join(tmp["verifier"])})
 token_len = check_token_length(new_dict["messages"])
 token_len_list.append(token_len)
 data_output.append(new_dict)
print(data_output[0]["messages"][1]["content"])
print("skip:", skip)
print("total:", total)
print("Token length max:", max(token_len_list))
print("Token length min:", min(token_len_list))
f.close()
with open("multi_step_verifiers_training.json", "w") as f:
 json.dump(data_output, f, indent=4)


Token length: 726


KeyboardInterrupt: 

In [2]:
import json
from transformers import AutoTokenizer
from transformers.utils import logging
import torch
import multiprocessing as mp
from tqdm import tqdm

model_name = "/mnt/data/zifeng.cao/reasoning/arc-agi/LLaMA-Factory/saves/Qwen2.5-Coder-7B-Instruct/pt_output_plus_step_output/checkpoint-274"
tokenizer = AutoTokenizer.from_pretrained(model_name)

system_prompt = '''You are a helpful assistant that can solve reasoning tasks by using a limited set of DSL functions that are implemented in Python. 
*** Task description ***
- Each task consists of around a handful of training examples, where an training example consists of an input grid and an output grid. 
- For each training example, the output grid is the result of applying the same task-specific transformation to the input grid. 
- The goal is to infer the transformation from the few training examples.
- The transformation is a task-specific grid transformation, which can be decomposed into a sequence of the DSL functions.
*** DSL description ***
- Types and Constants
 - **Types**: Define various data types like `Grid`, `Object`, `Indices`, and more to facilitate grid operations.
 - **Constants**: Include color constants (e.g., `ZERO`, `ONE`), boolean constants (`T`, `F`), and directional vectors (e.g., `UP`, `DOWN`).
- Primitives
 - **Math Operations**: Functions like `add`, `subtract`, `multiply`, and `divide` perform basic arithmetic on integers or tuples.
 - **Logical Operations**: Functions such as `even`, `flip`, and `both` handle logical evaluations.
 - **Data Operations**: Functions like `identity`, `order`, `merge`, `difference`, and `dedupe` manage data containers.
- Grid and Object Manipulation
 - **Grid Creation**: `canvas` creates grids with specified dimensions and values.
 - **Grid Transformation**: Functions like `rot90`, `hmirror`, `upscale`, and `downscale` transform grids in various ways.
 - **Subgrid Operations**: `crop`, `hsplit`, `vsplit`, and `trim` extract or modify parts of grids.
 - **Object and Patch Handling**: Functions like `objects`, `normalize`, `shift`, `toindices`, and `recolor` handle grid patches and objects.
- Analysis and Filtering
 - **Color Analysis**: Functions such as `mostcolor`, `leastcolor`, `colorcount`, and `palette` analyze color distributions.
 - **Object Filtering**: `colorfilter` and `sizefilter` filter objects by color or size.
 - **Spatial Analysis**: Functions like `center`, `position`, `manhattan`, and `adjacent` analyze spatial relationships.
- Connectivity and Bounding
 - **Connectivity**: `connect`, `neighbors`, `dneighbors`, and `ineighbors` determine connections between grid indices.
 - **Bounding**: Functions like `box`, `inbox`, `outbox`, and `corners` manage bounding areas of patches.
- Utils
 - **Random Integer Generation**: `unifint` generates random integers within specified bounds and difficulty levels.
 - **Grid Validation**: `is_grid` checks if an input is a valid grid.
 - **Grid Formatting**: `format_grid` casts lists to the grid type.
*** Format of the generated code ***
- The only allowed operations are storing the result of a function call in a variable, where all arguments must either be the input grid, some constants such as integers or common vectors indicating directions, or a variable previously computed within the same solver, and each function that is being called must either be a DSL function or a variable previously constructed within the same solver. 
- This also means that each line of code is enforced to be a single function call.
So, you are given a task and a set of examples, you need to generate a code that can solve the task.
'''

token_length = tokenizer.encode(system_prompt, return_tensors="pt").shape[1]
print(f"Token length: {token_length}")

def check_token_length(query):
 # Suppress warnings
 logging.set_verbosity_error()

 # Apply chat template and tokenize
 formatted_prompt = tokenizer.apply_chat_template(query, tokenize=False)
 tokens = tokenizer.encode(formatted_prompt, return_tensors="pt")

 # Get token length
 token_length = tokens.shape[1]

 return token_length

def list_of_lists_to_string_with_commas_and_newlines(list_of_lists):
 return '\n'.join(','.join(str(item) for item in sublist) for sublist in list_of_lists)

def transform_query(query):
 result_str = ""
 previous_result = ""
 for i, example in enumerate(query):
 try:
 r_i, c_i = len(example["input"]), len(example["input"][0])
 r_o, c_o = len(example["output"]), len(example["output"][0])
 except:
 print(example)
 return None
 input_str = list_of_lists_to_string_with_commas_and_newlines(example["input"])
 output_str = list_of_lists_to_string_with_commas_and_newlines(example["output"])
 result_str = previous_result + f"** Example {i+1} ** \n input: ({r_i} by {c_i}) Matrix \n{input_str}\n output: ({r_o} by {c_o}) Matrix \n{output_str}\n\n"
 previous_result = result_str
 if len(result_str) > 14000:
 token_length = tokenizer.encode(previous_result, return_tensors="pt").shape[1]
 if token_length > 14000:
 return None
 return previous_result
 token_length = tokenizer.encode(result_str, return_tensors="pt").shape[1]
 
 return result_str

def process_line(line):
 tmp = json.loads(line)
 new_dict = {"messages":[]}
 new_dict["messages"].append({"role":"system", "content":system_prompt})
 tran = transform_query(tmp["example"]) 
 if tran == None:
 return None

 new_dict["messages"].append({"role":"user", "content":tran})
 tmp["verifier"][0] = re.sub(r'veri.*?\(', 'solver(', tmp["verifier"][0])
 new_dict["messages"].append({"role":"assistant", "content":"\n".join(tmp["verifier"])})
 token_len = check_token_length(new_dict["messages"])
 return new_dict, token_len

def main():
 with open("multi_step_verifiers_training.txt", "r") as f:
 lines = f.readlines()
 
 total = len(lines)
 data_output = []
 token_len_list = []
 skip = 0
 
 # Create a pool of workers
 with mp.Pool(processes=mp.cpu_count()) as pool:
 # Process lines in parallel with progress bar
 results = list(tqdm(pool.imap(process_line, lines), total=total))
 
 # Collect results
 for result in results:
 if result is None:
 skip += 1
 continue
 new_dict, token_len = result
 data_output.append(new_dict)
 token_len_list.append(token_len)
 
 print(data_output[0]["messages"][1]["content"])
 print("skip:", skip)
 print("total:", total)
 print("Token length max:", max(token_len_list))
 print("Token length min:", min(token_len_list))
 
 with open("multi_step_verifiers_training.json", "w") as f:
 json.dump(data_output, f, indent=4)

if __name__ == '__main__':
 main()


Token length: 726


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

KeyboardInterrupt: 

In [20]:
import re

def replace_verifier_with_solver(input_string):
 return re.sub(r'veri.*?\(', 'solver(', input_string)

original_string = 'def verifier_9f6b5f41(I: Grid) -> Grid:'
modified_string = replace_verifier_with_solver(original_string)
print(modified_string)

'def verifier_9f6b5f41(I: Grid) -> Grid:'

In [2]:
import json
# merge two datasets
data1 = json.load(open("multi_step_verifiers_training.json", "r"))
data2 = json.load(open("re_arc_v4.json", "r"))
data1.extend(data2)
with open("multi_step_merged_arc_v4.json", "w") as f:
 json.dump(data1, f, indent=4)