eval
Browse files
README.md
CHANGED
|
@@ -37,7 +37,7 @@ model-index:
|
|
| 37 |
num_few_shot: 0
|
| 38 |
metrics:
|
| 39 |
- type: inst_level_strict_acc and prompt_level_strict_acc
|
| 40 |
-
value:
|
| 41 |
name: strict accuracy
|
| 42 |
source:
|
| 43 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|
|
@@ -52,7 +52,7 @@ model-index:
|
|
| 52 |
num_few_shot: 3
|
| 53 |
metrics:
|
| 54 |
- type: acc_norm
|
| 55 |
-
value: 27.
|
| 56 |
name: normalized accuracy
|
| 57 |
source:
|
| 58 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|
|
@@ -67,7 +67,7 @@ model-index:
|
|
| 67 |
num_few_shot: 4
|
| 68 |
metrics:
|
| 69 |
- type: exact_match
|
| 70 |
-
value: 15.
|
| 71 |
name: exact match
|
| 72 |
source:
|
| 73 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|
|
@@ -82,7 +82,7 @@ model-index:
|
|
| 82 |
num_few_shot: 0
|
| 83 |
metrics:
|
| 84 |
- type: acc_norm
|
| 85 |
-
value: 4.
|
| 86 |
name: acc_norm
|
| 87 |
source:
|
| 88 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|
|
@@ -97,7 +97,7 @@ model-index:
|
|
| 97 |
num_few_shot: 0
|
| 98 |
metrics:
|
| 99 |
- type: acc_norm
|
| 100 |
-
value:
|
| 101 |
name: acc_norm
|
| 102 |
source:
|
| 103 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|
|
@@ -114,7 +114,7 @@ model-index:
|
|
| 114 |
num_few_shot: 5
|
| 115 |
metrics:
|
| 116 |
- type: acc
|
| 117 |
-
value: 29.
|
| 118 |
name: accuracy
|
| 119 |
source:
|
| 120 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|
|
|
|
| 37 |
num_few_shot: 0
|
| 38 |
metrics:
|
| 39 |
- type: inst_level_strict_acc and prompt_level_strict_acc
|
| 40 |
+
value: 71.68
|
| 41 |
name: strict accuracy
|
| 42 |
source:
|
| 43 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|
|
|
|
| 52 |
num_few_shot: 3
|
| 53 |
metrics:
|
| 54 |
- type: acc_norm
|
| 55 |
+
value: 27.24
|
| 56 |
name: normalized accuracy
|
| 57 |
source:
|
| 58 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|
|
|
|
| 67 |
num_few_shot: 4
|
| 68 |
metrics:
|
| 69 |
- type: exact_match
|
| 70 |
+
value: 15.33
|
| 71 |
name: exact match
|
| 72 |
source:
|
| 73 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|
|
|
|
| 82 |
num_few_shot: 0
|
| 83 |
metrics:
|
| 84 |
- type: acc_norm
|
| 85 |
+
value: 4.81
|
| 86 |
name: acc_norm
|
| 87 |
source:
|
| 88 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|
|
|
|
| 97 |
num_few_shot: 0
|
| 98 |
metrics:
|
| 99 |
- type: acc_norm
|
| 100 |
+
value: 4.7
|
| 101 |
name: acc_norm
|
| 102 |
source:
|
| 103 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|
|
|
|
| 114 |
num_few_shot: 5
|
| 115 |
metrics:
|
| 116 |
- type: acc
|
| 117 |
+
value: 29.59
|
| 118 |
name: accuracy
|
| 119 |
source:
|
| 120 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=ValiantLabs/Llama3.1-8B-Cobalt
|