Prince-1 commited on Aug 4

Commit

86d3ae8

verified ·

1 Parent(s): 4d5433a

Add files using upload-large-folder tool

Browse files

Files changed (17) hide show

.gitattributes +2 -0
BIAS.md +4 -0
EXPLAINABILITY.md +13 -0
PRIVACY.md +10 -0
README.md +288 -0
SAFETY.md +7 -0
added_tokens.json +24 -0
chat_template.jinja +20 -0
genai_config.json +50 -0
genselect_hf.py +204 -0
merges.txt +0 -0
model.onnx +3 -0
model.onnx.data +3 -0
special_tokens_map.json +31 -0
tokenizer.json +3 -0
tokenizer_config.json +207 -0
vocab.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
+model.onnx.data filter=lfs diff=lfs merge=lfs -text

BIAS.md ADDED Viewed

	@@ -0,0 +1,4 @@

+Field                                                                                               |  Response
+:---------------------------------------------------------------------------------------------------|:---------------
+Participation considerations from adversely impacted groups [protected classes](https://www.senate.ca.gov/content/protected-classes) in model design and testing:  |  None
+Measures taken to mitigate against unwanted bias:                                                   |  None

EXPLAINABILITY.md ADDED Viewed

	@@ -0,0 +1,13 @@

+Field                                                                                                  |  Response
+:------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------
+Intended Task/Domain:                                                                   |  Reasoning for Math, Code Science Solution Generation
+Model Type:                                                                                            |  Transformer
+Intended Users:                                                                                        | Solving competitive programming questions and evaluation for benchmark comparison.
+Output:                                                                                                |  Text
+Describe how the model works:                                                                          |  The model generates a reasoning trace and responds with a final solution in response to a user prompting a programming question.
+Name the adversely impacted groups this has been tested to deliver comparable outcomes regardless of:  |  Not Applicable
+Technical Limitations & Mitigation:                                                                    |  This model is not applicable for Software Engineering tasks. It primarily should be used for competitive coding challenges that require optimized code solutions that can operate in appropriate space and time complexity.
+Verified to have met prescribed NVIDIA quality standards:  |  Yes
+Performance Metrics:                                                                                   |  Pass@1 score
+Potential Known Risks:                                                                                 | The model may provide incorrect code solutions that fail to solve the problem. The model may enter a feedback loop and constantly generate reasoning tokens without generating the final solution.
+Licensing:                                                                                             |  [CC-BY-4.0](https://creativecommons.org/licenses/by/4.0/deed.en/)

PRIVACY.md ADDED Viewed

	@@ -0,0 +1,10 @@

+Field                                                                                                                              |  Response
+:----------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------
+Generatable or reverse engineerable personal data?                                                     |  No
+Personal data used to create this model?                                                                                       |  No
+How often is the dataset reviewed?                                                                                                     |  Before Release
+Is there provenance for all datasets used in training?                                                                                |  Yes
+Does data labeling (annotation, metadata) comply with privacy laws?                                                                |  Yes
+Is data compliant with data subject requests for data correction or removal, if such a request was made?                           |  No, not possible with externally-sourced data.
+Applicable Privacy Policy        | https://www.nvidia.com/en-us/about-nvidia/privacy-policy/

README.md ADDED Viewed

	@@ -0,0 +1,288 @@

+---
+license: cc-by-4.0
+language:
+- en
+base_model:
+- nvidia/OpenReasoning-Nemotron-1.5B
+pipeline_tag: text-generation
+library_name: onnxruntime-genai
+tags:
+- nvidia
+- unsloth
+- code
+- onnxruntime
+- onnxruntime-genai
+---
+# OpenReasoning-Nemotron-1.5B Overview
+## Description: <br>
+OpenReasoning-Nemotron-1.5B is a large language model (LLM) which is a derivative of Qwen2.5-1.5B-Instruct (AKA the reference model). It is a reasoning model that is post-trained for reasoning about math, code and science solution generation. We evaluated this model with up to 64K output tokens. The OpenReasoning model is available in the following sizes: 1.5B, 7B and 14B and 32B. <br>
+This model is ready for commercial/non-commercial research use. <br>
+### License/Terms of Use: <br>
+GOVERNING TERMS: Use of the models listed above are governed by the [Creative Commons Attribution 4.0 International License (CC-BY-4.0)](https://creativecommons.org/licenses/by/4.0/legalcode.en). ADDITIONAL INFORMATION: [Apache 2.0 License](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct/blob/main/LICENSE)
+## Scores on Reasoning Benchmarks
+![Evaluation Results with pass@1](https://raw.githubusercontent.com/NVIDIA/NeMo-Skills/main/docs/releases/openreasoning/pass-1.png)
+Our models demonstrate exceptional performance across a suite of challenging reasoning benchmarks. The 7B, 14B, and 32B models consistently set new state-of-the-art records for their size classes.
+| **Model** | **AritificalAnalysisIndex*** | **GPQA** | **MMLU-PRO** | **HLE** | **LiveCodeBench*** | **SciCode** | **AIME24** | **AIME25** | **HMMT FEB 25**  |
+| :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- |
+| **1.5B**| 31.0 | 31.6 | 47.5 | 5.5 | 28.6 | 2.2 | 55.5 | 45.6 | 31.5 |
+| **7B** | 54.7 | 61.1 | 71.9 | 8.3 | 63.3 | 16.2 | 84.7 | 78.2 | 63.5 |
+| **14B** | 60.9 | 71.6 | 77.5 | 10.1 | 67.8 | 23.5 | 87.8 | 82.0 | 71.2 |
+| **32B** | 64.3 | 73.1 | 80.0 | 11.9 | 70.2 | 28.5 | 89.2 | 84.0 | 73.8 |
+\* This is our estimation of the Artificial Analysis Intelligence Index, not an official score.
+\* LiveCodeBench version 6, date range 2408-2505.
+## Combining the work of multiple agents
+OpenReasoning-Nemotron models can be used in a "heavy" mode by starting multiple parallel generations and combining them together via [generative solution selection (GenSelect)](https://arxiv.org/abs/2504.16891). To add this "skill" we follow the original GenSelect training pipeline except we do not train on the selection summary but use the full reasoning trace of DeepSeek R1 0528 671B instead. We only train models to select the best solution for math problems but surprisingly find that this capability directly generalizes to code and science questions! With this "heavy" GenSelect inference mode, OpenReasoning-Nemotron-32B model surpasses O3 (High) on math and coding benchmarks.
+![Evaluation Results with GenSelect](https://raw.githubusercontent.com/NVIDIA/NeMo-Skills/main/docs/releases/openreasoning/genselect.png)
+| **Model** | **Pass@1 (Avg@64)** | **Majority@64** | **GenSelect** |
+| :--- | :--- | :--- | :--- |
+| **1.5B** | | | |
+| **AIME24** | 55.5 | 76.7 | 76.7 |
+| **AIME25** | 45.6 | 70.0 | 70.0 |
+| **HMMT Feb 25** | 31.5 | 46.7 | 53.3 |
+| **7B** | | | |
+| **AIME24** | 84.7 | 93.3 | 93.3 |
+| **AIME25** | 78.2 | 86.7 | 93.3 |
+| **HMMT Feb 25** | 63.5 | 83.3 | 90.0 |
+| **LCB v6 2408-2505** | 63.4 | n/a | 67.7 |
+| **14B** | | | |
+| **AIME24** | 87.8 | 93.3 | 93.3 |
+| **AIME25** | 82.0 | 90.0 | 90.0 |
+| **HMMT Feb 25** | 71.2 | 86.7 | 93.3 |
+| **LCB v6 2408-2505** | 67.9 | n/a | 69.1 |
+| **32B** | | | |
+| **AIME24** | 89.2 | 93.3 | 93.3 |
+| **AIME25** | 84.0 | 90.0 | 93.3 |
+| **HMMT Feb 25** | 73.8 | 86.7 | 96.7 |
+| **LCB v6 2408-2505** | 70.2 | n/a | 75.3 |
+| **HLE** | 11.8 | 13.4 | 15.5 |
+## How to use the models?
+To run inference on coding problems:
+````python
+import transformers
+import torch
+model_id = "nvidia/OpenReasoning-Nemotron-1.5B"
+pipeline = transformers.pipeline(
+    "text-generation",
+    model=model_id,
+    model_kwargs={"torch_dtype": torch.bfloat16},
+    device_map="auto",
+)
+# Code generation prompt
+prompt = """You are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below.
+Please use python programming language only.
+You must use ```python for just the final solution code block with the following format:
+```python
+# Your code here
+```
+{user}
+"""
+# Math generation prompt
+# prompt = """Solve the following math problem. Make sure to put the answer (and only answer) inside \\boxed{}.
+#
+# {user}
+# """
+# Science generation prompt
+# You can refer to prompts here -
+# https://github.com/NVIDIA/NeMo-Skills/blob/main/nemo_skills/prompt/config/generic/hle.yaml (HLE)
+# https://github.com/NVIDIA/NeMo-Skills/blob/main/nemo_skills/prompt/config/eval/aai/mcq-4choices-boxed.yaml (for GPQA)
+# https://github.com/NVIDIA/NeMo-Skills/blob/main/nemo_skills/prompt/config/eval/aai/mcq-10choices-boxed.yaml (MMLU-Pro)
+messages = [
+    {
+        "role": "user",
+        "content": prompt.format(user="Write a program to calculate the sum of the first $N$ fibonacci numbers")},
+]
+outputs = pipeline(
+    messages,
+    max_new_tokens=64000,
+)
+print(outputs[0]["generated_text"][-1]['content'])
+````
+We have added [a simple transformer-based script](https://huggingface.co/nvidia/OpenReasoning-Nemotron-1.5B/blob/main/genselect_hf.py) in this repo to illustrate GenSelect.
+To learn how to use the models in GenSelect mode with NeMo-Skills, see our [documentation](https://nvidia.github.io/NeMo-Skills/releases/openreasoning/evaluation/).
+To use the model with GenSelect inference, we recommend following our
+[reference implementation in NeMo-Skills](https://github.com/NVIDIA/NeMo-Skills/blob/main/nemo_skills/pipeline/genselect.py). Alternatively, you can manually extract the summary from all solutions and use this
+[prompt](https://github.com/NVIDIA/NeMo-Skills/blob/main/nemo_skills/prompt/config/openmath/genselect.yaml) for the math problems. We will add the prompt we used for the coding problems and a reference implementation soon!
+You can learn more about GenSelect in these papers:
+* [AIMO-2 Winning Solution: Building State-of-the-Art Mathematical Reasoning Models with OpenMathReasoning dataset](https://arxiv.org/abs/2504.16891)
+* [GenSelect: A Generative Approach to Best-of-N](https://openreview.net/forum?id=8LhnmNmUDb)
+## Accessing training data
+Training data has been released! Math and code are available as part of
+[Nemotron-Post-Training-Dataset-v1](https://huggingface.co/datasets/nvidia/Nemotron-Post-Training-Dataset-v1) and science is available in
+[OpenScienceReasoning-2](https://huggingface.co/datasets/nvidia/OpenScienceReasoning-2).
+See our [documentation](https://nvidia.github.io/NeMo-Skills/releases/openreasoning/training) for more details.
+## Citation
+If you find the data useful, please cite:
+```
+@article{ahmad2025opencodereasoning,
+      title={{OpenCodeReasoning: Advancing Data Distillation for Competitive Coding}},
+      author={Wasi Uddin Ahmad, Sean Narenthiran, Somshubra Majumdar, Aleksander Ficek, Siddhartha Jain, Jocelyn Huang, Vahid Noroozi, Boris Ginsburg},
+      year={2025},
+      eprint={2504.01943},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2504.01943},
+}
+```
+```
+@misc{ahmad2025opencodereasoningiisimpletesttime,
+      title={{OpenCodeReasoning-II: A Simple Test Time Scaling Approach via Self-Critique}},
+      author={Wasi Uddin Ahmad and Somshubra Majumdar and Aleksander Ficek and Sean Narenthiran and Mehrzad Samadi and Jocelyn Huang and Siddhartha Jain and Vahid Noroozi and Boris Ginsburg},
+      year={2025},
+      eprint={2507.09075},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2507.09075},
+}
+```
+```
+@misc{moshkov2025aimo2winningsolutionbuilding,
+      title={{AIMO-2 Winning Solution: Building State-of-the-Art Mathematical Reasoning Models with OpenMathReasoning dataset}},
+      author={Ivan Moshkov and Darragh Hanley and Ivan Sorokin and Shubham Toshniwal and Christof Henkel and Benedikt Schifferer and Wei Du and Igor Gitman},
+      year={2025},
+      eprint={2504.16891},
+      archivePrefix={arXiv},
+      primaryClass={cs.AI},
+      url={https://arxiv.org/abs/2504.16891},
+}
+```
+```
+@inproceedings{toshniwal2025genselect,
+      title={{GenSelect: A Generative Approach to Best-of-N}},
+      author={Shubham Toshniwal and Ivan Sorokin and Aleksander Ficek and Ivan Moshkov and Igor Gitman},
+      booktitle={2nd AI for Math Workshop @ ICML 2025},
+      year={2025},
+      url={https://openreview.net/forum?id=8LhnmNmUDb}
+}
+```
+## Additional Information:
+### Deployment Geography:
+Global<br>
+### Use Case: <br>
+This model is intended for developers and researchers who work on competitive math, code and science problems. It has been trained via only supervised fine-tuning to achieve strong scores on benchmarks. <br>
+### Release Date:  <br>
+Huggingface [07/16/2025] via https://huggingface.co/nvidia/OpenReasoning-Nemotron-1.5B/ <br>
+## Reference(s):
+* [2504.01943] OpenCodeReasoning: Advancing Data Distillation for Competitive Coding
+* [2504.01943] OpenCodeReasoning: Advancing Data Distillation for Competitive Coding
+* [2504.16891] AIMO-2 Winning Solution: Building State-of-the-Art Mathematical Reasoning Models with OpenMathReasoning dataset
+<br>
+## Model Architecture: <br>
+Architecture Type: Dense decoder-only Transformer model
+Network Architecture: Qwen-1.5B-Instruct
+<br>
+**This model was developed based on Qwen2.5-1.5B-Instruct and has 1.5B model parameters. <br>
+**OpenReasoning-Nemotron-1.5B was developed based on Qwen2.5-1.5B-Instruct and has 1.5B model parameters. <br>**
+**OpenReasoning-Nemotron-7B was developed based on Qwen2.5-7B-Instruct and has 7B model parameters. <br>**
+**OpenReasoning-Nemotron-14B was developed based on Qwen2.5-14B-Instruct and has 14B model parameters. <br>**
+**OpenReasoning-Nemotron-32B was developed based on Qwen2.5-32B-Instruct and has 32B model parameters. <br>**
+## Input: <br>
+**Input Type(s):** Text <br>
+**Input Format(s):** String <br>
+**Input Parameters:** One-Dimensional (1D) <br>
+**Other Properties Related to Input:** Trained for up to 64,000 output tokens <br>
+## Output: <br>
+**Output Type(s):** Text <br>
+**Output Format:** String <br>
+**Output Parameters:** One-Dimensional (1D) <br>
+**Other Properties Related to Output:** Trained for up to 64,000 output tokens <br>
+Our AI models are designed and/or optimized to run on NVIDIA GPU-accelerated systems. By leveraging NVIDIA’s hardware (e.g. GPU cores) and software frameworks (e.g., CUDA libraries), the model achieves faster training and inference times compared to CPU-only solutions. <br>
+## Software Integration : <br>
+* Runtime Engine: NeMo 2.3.0 <br>
+* Recommended Hardware Microarchitecture Compatibility: <br>
+NVIDIA Ampere <br>
+NVIDIA Hopper <br>
+* Preferred/Supported Operating System(s): Linux <br>
+## Model Version(s):
+1.0 (7/16/2025)  <br>
+OpenReasoning-Nemotron-32B<br>
+OpenReasoning-Nemotron-14B<br>
+OpenReasoning-Nemotron-7B<br>
+OpenReasoning-Nemotron-1.5B<br>
+# Training and Evaluation Datasets: <br>
+## Training Dataset:
+The training corpus for OpenReasoning-Nemotron-1.5B is comprised of questions from [OpenCodeReasoning](https://huggingface.co/datasets/nvidia/OpenCodeReasoning) dataset, [OpenCodeReasoning-II](https://arxiv.org/abs/2507.09075), [OpenMathReasoning](https://huggingface.co/datasets/nvidia/OpenMathReasoning), and the Synthetic Science questions from the [Llama-Nemotron-Post-Training-Dataset](https://huggingface.co/datasets/nvidia/Llama-Nemotron-Post-Training-Dataset). All responses are generated using DeepSeek-R1-0528. We also include the instruction following and tool calling data from Llama-Nemotron-Post-Training-Dataset without modification.
+Data Collection Method: Hybrid: Automated, Human, Synthetic <br>
+Labeling Method: Hybrid: Automated, Human, Synthetic <br>
+Properties: 5M DeepSeek-R1-0528 generated responses from OpenCodeReasoning questions (https://huggingface.co/datasets/nvidia/OpenCodeReasoning), [OpenMathReasoning](https://huggingface.co/datasets/nvidia/OpenMathReasoning), and the Synthetic Science questions from the [Llama-Nemotron-Post-Training-Dataset](https://huggingface.co/datasets/nvidia/Llama-Nemotron-Post-Training-Dataset). We also include the instruction following and tool calling data from Llama-Nemotron-Post-Training-Dataset without modification.
+## Evaluation Dataset:
+We used the following benchmarks to evaluate the model holistically.
+### Math
+- AIME 2024/2025 <br>
+- HMMT Feb 2025 <br>
+### Code
+- LiveCodeBench <br>
+- SciCode <br>
+### Science
+- GPQA <br>
+- MMLU-PRO <br>
+- HLE <br>
+Data Collection Method: Hybrid: Automated, Human, Synthetic <br>
+Labeling Method: Hybrid: Automated, Human, Synthetic <br>
+## Inference:
+**Acceleration Engine:** vLLM, Tensor(RT)-LLM <br>
+**Test Hardware** NVIDIA H100-80GB <br>
+## Ethical Considerations:
+NVIDIA believes Trustworthy AI is a shared responsibility and we have established policies and practices to enable development for a wide array of AI applications.  When downloaded or used in accordance with our terms of service, developers should work with their internal model team to ensure this model meets requirements for the relevant industry and use case and addresses unforeseen product misuse.
+For more detailed information on ethical considerations for this model, please see the Model Card++ Explainability, Bias, Safety & Security, and Privacy Subcards.
+Please report model quality, risk, security vulnerabilities or NVIDIA AI Concerns [here](https://www.nvidia.com/en-us/support/submit-security-vulnerability/).

SAFETY.md ADDED Viewed

	@@ -0,0 +1,7 @@

+Field                                               |  Response
+:---------------------------------------------------|:----------------------------------
+Model Application Field(s):                               |  Reasoning for Code Generation<br>
+Describe the life critical impact (if present).   |  Not Applicable <br>
+Use Case Restrictions:                              |  Abide by CC BY 4.0 <br>
+Model and dataset restrictions:            |  The Principle of least privilege (PoLP) is applied limiting access for dataset generation and model development.  Restrictions enforce dataset access during training, and dataset license constraints adhered to.

added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,20 @@

+{%- if messages[0]['role'] == 'system' %}
+    {{- '<|im_start|>system
+' + messages[0]['content'] + '<|im_end|>
+' }}
+{%- else %}
+    {{- '<|im_start|>system
+<|im_end|>
+' }}
+{%- endif %}
+{%- for message in messages %}
+    {%- if (message.role == 'user') or (message.role == 'system' and not loop.first) or (message.role == 'assistant') %}
+        {{- '<|im_start|>' + message.role + '
+' + message.content + '<|im_end|>' + '
+' }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant
+' }}
+{%- endif %}

genai_config.json ADDED Viewed

	@@ -0,0 +1,50 @@

+{
+    "model": {
+        "bos_token_id": 151643,
+        "context_length": 131072,
+        "decoder": {
+            "session_options": {
+                "log_id": "onnxruntime-genai",
+                "provider_options": []
+            },
+            "filename": "model.onnx",
+            "head_size": 128,
+            "hidden_size": 1536,
+            "inputs": {
+                "input_ids": "input_ids",
+                "attention_mask": "attention_mask",
+                "position_ids": "position_ids",
+                "past_key_names": "past_key_values.%d.key",
+                "past_value_names": "past_key_values.%d.value"
+            },
+            "outputs": {
+                "logits": "logits",
+                "present_key_names": "present.%d.key",
+                "present_value_names": "present.%d.value"
+            },
+            "num_attention_heads": 12,
+            "num_hidden_layers": 28,
+            "num_key_value_heads": 2
+        },
+        "eos_token_id": 151645,
+        "pad_token_id": 151645,
+        "type": "qwen2",
+        "vocab_size": 151936
+    },
+    "search": {
+        "diversity_penalty": 0.0,
+        "do_sample": false,
+        "early_stopping": true,
+        "length_penalty": 1.0,
+        "max_length": 131072,
+        "min_length": 0,
+        "no_repeat_ngram_size": 0,
+        "num_beams": 1,
+        "num_return_sequences": 1,
+        "past_present_share_buffer": false,
+        "repetition_penalty": 1.0,
+        "temperature": 1.0,
+        "top_k": 1,
+        "top_p": 1.0
+    }
+}

genselect_hf.py ADDED Viewed

	@@ -0,0 +1,204 @@

+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import random
+import re
+import torch
+PROMPT = """You will be given a challenging math problem followed by {num_solutions} solutions. Your task is to systematically analyze these solutions to identify the most mathematically sound approach.
+Input Format:
+Problem: A complex mathematical word problem at advanced high school or college level
+Solutions: Detailed solutions indexed 0-{max_idx}, each concluding with an answer in \\boxed{{}} notation
+YOUR TASK
+Problem: {problem}
+Solutions:
+{solutions}
+Evaluation Process:
+1. Initial Screening
+- Group solutions by their final answers
+- Identify and explain mathematical contradictions between different answers
+- Eliminate solutions with clear mathematical errors
+2. Detailed Analysis
+For remaining solutions, evaluate:
+- Mathematical precision and accuracy
+- Logical progression of steps
+- Completeness of mathematical reasoning
+- Proper use of mathematical notation, including \\boxed{{}}
+- Handling of edge cases or special conditions
+- For solutions containing and addressing errors, evaluate the error identification and correction methodology.
+3. Solution Comparison
+Compare viable solutions based on:
+- Efficiency of approach
+- Clarity of mathematical reasoning
+- Sophistication of method
+- Robustness of solution (works for all cases)
+Your response should include:
+1. Brief analysis of conflicting answers
+2. Detailed evaluation of mathematically sound solutions
+3. Justification for eliminating incorrect solutions
+4. Clear explanation for selecting the best approach
+End your evaluation with exactly:
+Judgment: [IDX]
+where IDX is the index 0-{max_idx} of the best solution.
+"""
+PROBLEM = """In an $11 \\times 11$ grid of cells, each pair of edge-adjacent cells is connected by a door. Karthik wants to walk a path in this grid. He can start in any cell, but he must end in the same cell he
+started in, and he cannot go through any door more than once (not even in opposite directions). Compute the maximum number of doors he can go through in such a path."""
+SOLUTION_LIST = [
+    """The grid is \\(11 \\times 11\\), so there are \\(11 \\times 11 = 121\\) cells. The total number of doors (edges) is calculated as follows:  \n- Horizontal doors: 11 rows with 10 doors each, so \\(1
+1 \\times 10 = 110\\).  \n- Vertical doors: 11 columns with 10 doors each, so \\(11 \\times 10 = 110\\).  \nTotal doors: \\(110 + 110 = 220\\).  \n\nKarthik must walk a closed path (cycle) starting and ending at th
+e same cell, without reusing any door. The goal is to maximize the number of doors used, which corresponds to finding a subgraph that is connected, has all even degrees (to admit an Eulerian circuit), and maximizes
+ the number of edges.  \n\nThe grid has 36 vertices of odd degree:  \n- Each side has \\(11 - 2 = 9\\) edge cells (not corners) with degree 3.  \n- With 4 sides, total odd-degree vertices: \\(4 \\times 9 = 36\\).
+\n\nTo make all degrees even, edges must be removed. The minimum number of edges to remove to achieve an Eulerian circuit in a connected graph is given by a minimum T-join, where T is the set of odd-degree vertices
+. For an \\(n \\times n\\) grid with \\(n\\) odd, the minimum number of edges to remove is \\(4n - 8\\). For \\(n = 11\\):  \n\\[4 \\times 11 - 8 = 44 - 8 = 36.\\]  \n\nThus, 36 edges must be removed. The remaining
+ number of edges is:  \n\\[220 - 36 = 184.\\]  \n\nThis is achievable with the following construction:  \n1. On each boundary (top, bottom, left, right), remove 4 horizontal or vertical edges to pair adjacent odd-d
+egree cells, covering 8 cells per boundary and leaving one unpaired per boundary. This removes \\(4 \\times 4 = 16\\) edges.  \n   - The unpaired cells are \\((1,10)\\), \\((11,10)\\), \\((10,1)\\), and \\((10,11)\
+\).  \n2. Pair \\((11,10)\\) and \\((10,11)\\) by removing a path of length 2 (e.g., edges \\((11,10)-(10,10)\\) and \\((10,10)-(10,11)\\)).  \n3. Pair \\((1,10)\\) and \\((10,1)\\) by removing a path of length 18
+(e.g., vertically down column 10 from \\((1,10)\\) to \\((10,10)\\), then horizontally left in row 10 from \\((10,10)\\) to \\((10,1)\\)).  \n   - This removes \\(2 + 18 = 20\\) edges.  \nTotal edges removed: \\(16
+ + 20 = 36\\).  \n\nThe remaining graph has 184 edges, is connected, and has all even degrees, so an Eulerian circuit exists with 184 edges. This is the maximum possible, as fewer removals would leave odd-degree ve
+rtices or disconnect the graph.  \n\n\\boxed{184}""",
+    """The problem involves finding the maximum number of doors Karthik can traverse in an \\(11 \\times 11\\) grid, starting and ending at the same cell, without repeating any door (edge adjacency).\n\nT
+he grid has \\(11 \\times 11 = 121\\) cells. The total number of doors (edges) is calculated as follows:\n- Horizontal doors: \\(11\\) rows, each with \\(10\\) doors, so \\(11 \\times 10 = 110\\).\n- Vertical doors
+: \\(11\\) columns, each with \\(10\\) doors, so \\(11 \\times 10 = 110\\).\n- Total doors: \\(110 + 110 = 220\\).\n\nThe grid graph is connected and bipartite, with a chessboard coloring. The number of black cells
+ is \\(61\\) and white cells is \\(60\\), since both dimensions are odd. The degrees of the vertices vary:\n- Corner cells have degree \\(2\\) (even).\n- Edge cells (not corners) have degree \\(3\\) (odd).\n- Inter
+nal cells have degree \\(4\\) (even).\n\nThe number of odd-degree vertices (denoted \\(S\\)) is:\n- Top row: \\(11 - 2 = 9\\) (excluding corners).\n- Bottom row: \\(9\\).\n- Left column: \\(9\\) (excluding corners)
+.\n- Right column: \\(9\\).\n- Total: \\(9 + 9 + 9 + 9 = 36\\).\n\nKarthik's path is a closed trail (start and end at the same vertex, no repeated edges, but vertices may be revisited). The maximum number of edges
+in such a trail corresponds to the size of the largest connected Eulerian subgraph (all degrees even and connected).\n\nFor an \\(m \\times n\\) grid with both dimensions odd, the minimum number of edges to remove
+to make the graph Eulerian (all degrees even) is \\(m + n - 2\\). Here, \\(m = n = 11\\), so:\n\\[\nm + n - 2 = 11 + 11 - 2 = 20.\n\\]\nAfter removing these 20 edges, the remaining graph has:\n\\[\n220 - 20 = 200\n
+\\]\nedges. This subgraph is connected and Eulerian, so it admits an Eulerian circuit (a closed trail) using all 200 edges.\n\nIt is not possible to have a closed trail with more than 200 edges, as removing fewer t
+han 20 edges cannot make all degrees even (since at least 20 edges must be removed to address the 36 odd-degree vertices, and the minimum is 20 for this grid).\n\nThus, the maximum number of doors Karthik can trave
+rse is 200.\n\n\\boxed{200}""",
+    """The grid is an $11 \\times 11$ graph with cells as vertices and doors between edge-adjacent cells as edges. The graph is bipartite, with partitions of sizes 61 and 60 due to the odd grid size. A cy
+cle must alternate between the two partitions, so the maximum number of vertices in any cycle is $2 \\times \\min(61, 60) = 120$.\n\nThis maximum is achievable by removing one vertex from the partition of size 61, resulting in a graph with 60 vertices in each partition. The remaining graph is Hamiltonian, meaning it contains a cycle that visits each vertex exactly once. For an $11 \\times 11$ grid with one vertex removed, su
+ch a cycle exists, as the grid remains connected and satisfies the conditions for Hamiltonicity when a single cell is removed (given the grid dimensions are at least $2 \\times 2$).\n\nA cycle with 120 vertices has
+ 120 edges, as the number of edges equals the number of vertices in a cycle. Since each edge corresponds to a door, the maximum number of doors Karthik can traverse in a closed path without reusing any door is 120.
+\n\n\\boxed{120}""",
+    """The grid is an $11 \\times 11$ graph, which is bipartite due to its chessboard coloring. The total number of cells is $11 \\times 11 = 121$. In this coloring, one color class has 61 cells and the o
+ther has 60 cells, as determined by the grid's odd size.\n\nA cycle in a bipartite graph must have an equal number of vertices from each color class. Therefore, the maximum number of vertices in any cycle is $2 \\t
+imes \\min(61, 60) = 2 \\times 60 = 120$. This implies that the maximum cycle length in terms of vertices is 120.\n\nSince the number of edges in a cycle equals the number of vertices, the maximum number of edges (
+doors) in a cycle is also 120. This maximum is achievable by removing one black vertex (from the color class with 61 vertices), specifically the center cell at $(6,6)$, and finding a Hamiltonian cycle on the remaining 120-vertex graph. The grid remains connected after removing the center, and such a cycle exists for an $n \\times n$ grid with $n$ odd by removing the center cell.\n\nThus, the maximum number of doors Karthik can traverse in a closed path without repeating any door is 120.\n\n\\boxed{120}""",
+]
+MODEL_NAME = "nvidia/OpenReasoning-Nemotron-1.5B"
+MAX_NEW_TOKENS = 32000
+def load_model(model_name):
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        device_map="auto",
+        torch_dtype=torch.bfloat16,
+        attn_implementation="flash_attention_2",
+    )
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    return model, tokenizer
+def format_prompt(problem, solutions, num_solutions):
+    prompt = PROMPT.format(problem=problem, solutions=solutions, num_solutions=num_solutions, max_idx=num_solutions - 1)
+    return prompt
+def format_solutions(solution_list):
+    solutions = "\n".join([f"Solution {i}: {solution}" for i, solution in enumerate(solution_list)])
+    return solutions
+def get_prompt():
+    problem = PROBLEM
+    solutions = format_solutions(SOLUTION_LIST)
+    num_solutions = len(SOLUTION_LIST)
+    prompt = format_prompt(problem, solutions, num_solutions)
+    return prompt, num_solutions
+def generate_response(model, tokenizer, prompt):
+    chat = [{"role": "user", "content": prompt}]
+    input_ids = tokenizer.apply_chat_template(
+        chat,
+        return_tensors="pt"
+    )
+    outputs = model.generate(
+        input_ids.to(model.device),
+        max_new_tokens=MAX_NEW_TOKENS,
+        temperature=0.6,
+        top_p=0.95,
+        do_sample=True,
+        use_cache=True,
+        eos_token_id=tokenizer.eos_token_id,
+        pad_token_id=tokenizer.eos_token_id
+    )
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
+def extract_judgment(generation, max_idx=None):
+    """Extract the judgment from the generation."""
+    judgment = None
+    try:
+        matches = re.findall(r"Judg[e]?ment: (\d+)", generation)
+        if matches:
+            number = matches[-1]
+            judgment = int(number)
+            if max_idx is not None and judgment > max_idx:
+                judgment = None
+        else:
+            judgment = None
+    except:
+        judgment = None
+    if judgment is not None and max_idx is not None:
+        if judgment > max_idx:
+            judgment = None
+    return judgment
+def main():
+    # Load model
+    model, tokenizer = load_model(MODEL_NAME)
+    # Construct prompt
+    prompt, num_solutions = get_prompt()
+    # Get response
+    response = generate_response(model, tokenizer, prompt)
+    # Extract judgment
+    judgment = extract_judgment(response, max_idx=num_solutions - 1)
+    # Print judgment
+    print("Selected solution index:", judgment)
+    if judgment is not None:
+        print("Chosen solution:", SOLUTION_LIST[judgment])
+    else:
+        print("No solution selected")
+if __name__ == "__main__":
+    main()

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:20d5dc2ac72bf20dba53aa75d1583925dfedfb7b5f3eb848456af2400bb5c2a2
+size 687453

model.onnx.data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf9eb9ca257010b10e91133e11ed2c0a3f4bf2614bb7c16f7fca2dbd56170b44
+size 3587730432

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
+size 11421896

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,207 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff