NYUAD-ComNets commited on
Commit
666398d
·
verified ·
1 Parent(s): 4970f90

Upload tokenizer

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
chat_template.jinja ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- bos_token }}
2
+ {%- if custom_tools is defined and custom_tools%}
3
+ {%- set tools = custom_tools %}
4
+ {%- endif %}
5
+ {%- if tools is defined and tools %}
6
+ {%- set tool_definition = tool_definition ~ (tools | tojson(indent=4)) %}
7
+ {%- else %}
8
+ {%- set tools = none %}
9
+ {%- endif %}
10
+
11
+
12
+ {#- This block extracts the system message, so we can slot it into the right place. #}
13
+ {%- if messages[0]['role'] == 'system' %}
14
+ {%- set user_provided_system_message = true %}
15
+ {%- if messages[0]['content'] is string %}
16
+ {%- set system_message = messages[0]['content']|trim %}
17
+ {%- else %}
18
+ {%- set system_message = messages[0]['content'][0]['text']|trim %}
19
+ {%- endif %}
20
+ {%- set messages = messages[1:] %}
21
+ {%- else %}
22
+ {%- if tools is not none %}
23
+ {#- Since not system_message was provided by user, if tool is provided, system_message is now default tool system message #}
24
+ {#- This system message is from llama website:https://www.llama.com/docs/model-cards-and-prompt-formats/llama4/ #}
25
+ {%- set system_message = "You are a helpful assistant and an expert in function composition. You can answer general questions using your internal knowledge OR invoke functions when necessary. Follow these strict guidelines:\n\n1. FUNCTION CALLS:\n- ONLY use functions that are EXPLICITLY listed in the function list below\n- If NO functions are listed (empty function list []), respond ONLY with internal knowledge or \"I don't have access to [Unavailable service] information\"\n- If a function is not in the list, respond ONLY with internal knowledge or \"I don't have access to [Unavailable service] information\"\n- If ALL required parameters are present AND the query EXACTLY matches a listed function's purpose: output ONLY the function call(s)\n- Use exact format: [func_name1(param1=value1, param2=value2), func_name2(...)]\nExamples:\nCORRECT: [get_weather(location=\"Vancouver\"), calculate_route(start=\"Boston\", end=\"New York\")] <- Only if get_weather and calculate_route are in function list\nINCORRECT: get_weather(location=\"New York\")\nINCORRECT: Let me check the weather: [get_weather(location=\"New York\")]\nINCORRECT: [get_events(location=\"Singapore\")] <- If function not in list\n\n2. RESPONSE RULES:\n- For pure function requests matching a listed function: ONLY output the function call(s)\n- For knowledge questions: ONLY output text\n- For missing parameters: ONLY request the specific missing parameters\n- For unavailable services (not in function list): output ONLY with internal knowledge or \"I don't have access to [Unavailable service] information\". Do NOT execute a function call.\n- If the query asks for information beyond what a listed function provides: output ONLY with internal knowledge about your limitations\n- NEVER combine text and function calls in the same response\n- NEVER suggest alternative functions when the requested service is unavailable\n- NEVER create or invent new functions not listed below\n\n3. STRICT BOUNDARIES:\n- ONLY use functions from the list below - no exceptions\n- NEVER use a function as an alternative to unavailable information\n- NEVER call functions not present in the function list\n- NEVER add explanatory text to function calls\n- NEVER respond with empty brackets\n- Use proper Python/JSON syntax for function calls\n- Check the function list carefully before responding\n\n4. TOOL RESPONSE HANDLING:\n- When receiving tool responses: provide concise, natural language responses\n- Don't repeat tool response verbatim\n- Don't add supplementary information\n\nHere is a list of functions in JSON format that you can invoke:\n" %}
26
+ {%- else %}
27
+ {%- set system_message = "" %}
28
+ {%- endif %}
29
+ {%- endif %}
30
+ {#- Now writing the system message: use the user provided system message if user_provided_system_message, else default tool system message if tools presented #}
31
+ {%- if system_message %}
32
+ {#- always use user provided system message to override default tool system message #}
33
+ {{- "<|header_start|>system<|header_end|>\n\n" }}
34
+ {{- system_message }}
35
+ {%- if user_provided_system_message and tools %}
36
+ {{- "\nHere is a list of functions in JSON format that you can invoke. Use exact format: [func_name1(param1=value1, param2=value2), func_name2(...)]\n" }}
37
+ {{- tool_definition -}}
38
+ {%- elif tool_definition %}
39
+ {{- tool_definition -}}
40
+ {%- endif %}
41
+ {{- "<|eot|>" }}
42
+ {%- endif %}
43
+
44
+ {#- Now deal with all other messages #}
45
+ {%- for message in messages %}
46
+ {#- Base case: messages that are not from tool role and has empty tool_call list #}
47
+ {%- if not (message.role == 'ipython' or message.role == 'tool' or ('tool_calls' in message and message.tool_calls|length != 0 )) %}
48
+ {{- '<|header_start|>' + message['role'] + '<|header_end|>\n\n' }}
49
+ {%- if message['content'] is string %}
50
+ {{- message['content'] }}
51
+ {%- else %}
52
+ {%- for content in message['content'] %}
53
+ {%- if content['type'] == 'image' %}
54
+ {{- '<|image|>' }}
55
+ {%- elif content['type'] == 'text' %}
56
+ {{- content['text'] | trim }}
57
+ {%- endif %}
58
+ {%- endfor %}
59
+ {%- endif %}
60
+ {{- "<|eot|>" }}
61
+ {#- Tool case: messages has non-empty tool_call list, must from assistant #}
62
+ {%- elif 'tool_calls' in message %}
63
+ {#- assume tool_calls are always coming from assistant #}
64
+ {%- if message.role == 'assistant' %}
65
+ {{- '<|header_start|>assistant<|header_end|>\n\n' -}}
66
+ {%- if message['content'] is string %}
67
+ {{- message['content'] }}
68
+ {%- else %}
69
+ {%- for content in message['content'] %}
70
+ {%- if content['type'] == 'image' %}
71
+ {{- '<|image|>' }}
72
+ {%- elif content['type'] == 'text' %}
73
+ {{- content['text'] }}
74
+ {%- endif %}
75
+ {%- endfor %}
76
+ {%- endif %}
77
+ {{- "[" }}
78
+ {%- for tool_call in message.tool_calls %}
79
+ {%- if tool_call.function is defined %}
80
+ {%- set tool_call = tool_call.function %}
81
+ {%- endif %}
82
+ {{- tool_call.name + '(' -}}
83
+ {%- for param in tool_call.arguments %}
84
+ {{- param + '="' -}}
85
+ {{- "%s" | format(tool_call.arguments[param]) -}}
86
+ {{- '"' -}}
87
+ {% if not loop.last %}, {% endif %}
88
+ {%- endfor %}
89
+ {{- ')' -}}
90
+ {% if not loop.last %}, {% endif %}
91
+ {%- endfor %}
92
+ {{- "]<|eot|>" }}
93
+ {%- endif %}
94
+ {#- Tool_response case: messages are from tool_response #}
95
+ {%- elif message.role == "tool" or message.role == "ipython" %}
96
+ {{- "<|header_start|>ipython<|header_end|>\n\n" }}
97
+ {%- if message.content is string %}
98
+ {{- message.content | tojson }}
99
+ {%- else %}
100
+ {%- for content in message['content'] %}
101
+ {%- if content['type'] == 'text' %}
102
+ {{- content['text'] | tojson }}
103
+ {%- endif %}
104
+ {%- endfor %}
105
+ {%- endif %}
106
+ {{- "<|eot|>" }}
107
+ {%- endif %}
108
+ {%- endfor %}
109
+ {%- if add_generation_prompt %}
110
+ {{- '<|header_start|>assistant<|header_end|>\n\n' }}
111
+ {%- endif %}
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|eot|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|finetune_right_pad|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:172c9eb4beafc72601690da3ccfcede5c2e6806a8d5ec1fca33e22acea8023a4
3
+ size 27948578
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff