Spaces:
Running
Running
Sunset Judge Arena — added static final leaderboard
Browse files- data/leaderboard_static.json +35 -0
data/leaderboard_static.json
CHANGED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"last_updated": "January 15, 2026",
|
| 3 |
+
"total_votes": 4689,
|
| 4 |
+
"total_models": 28,
|
| 5 |
+
"leaderboard": [
|
| 6 |
+
{"Model": "Meta Llama 3.3 70B Instruct Turbo", "ELO Score": 1335, "95% CI": "±71", "# Votes": 118, "Organization": "Meta", "License": "Open Source"},
|
| 7 |
+
{"Model": "Flow-Judge-v0.1", "ELO Score": 1335, "95% CI": "±102", "# Votes": 58, "Organization": "Flow AI", "License": "Open Source"},
|
| 8 |
+
{"Model": "Claude 3 Opus", "ELO Score": 1312, "95% CI": "±37", "# Votes": 445, "Organization": "Anthropic", "License": "Proprietary"},
|
| 9 |
+
{"Model": "GPT-4o", "ELO Score": 1308, "95% CI": "±37", "# Votes": 436, "Organization": "OpenAI", "License": "Proprietary"},
|
| 10 |
+
{"Model": "Atla-8B-preview", "ELO Score": 1306, "95% CI": "±48", "# Votes": 256, "Organization": "Atla", "License": "Open Source"},
|
| 11 |
+
{"Model": "GPT-4 Turbo", "ELO Score": 1304, "95% CI": "±36", "# Votes": 459, "Organization": "OpenAI", "License": "Proprietary"},
|
| 12 |
+
{"Model": "Claude 3 Haiku", "ELO Score": 1286, "95% CI": "±37", "# Votes": 438, "Organization": "Anthropic", "License": "Proprietary"},
|
| 13 |
+
{"Model": "Claude 3.5 Haiku", "ELO Score": 1282, "95% CI": "±37", "# Votes": 431, "Organization": "Anthropic", "License": "Proprietary"},
|
| 14 |
+
{"Model": "Qwen 2.5 7B Instruct Turbo", "ELO Score": 1280, "95% CI": "±36", "# Votes": 458, "Organization": "Alibaba", "License": "Open Source"},
|
| 15 |
+
{"Model": "GPT-3.5 Turbo", "ELO Score": 1271, "95% CI": "±36", "# Votes": 471, "Organization": "OpenAI", "License": "Proprietary"},
|
| 16 |
+
{"Model": "Qwen 2.5 72B Instruct Turbo", "ELO Score": 1269, "95% CI": "±37", "# Votes": 444, "Organization": "Alibaba", "License": "Open Source"},
|
| 17 |
+
{"Model": "Meta Llama 3.1 405B Instruct Turbo", "ELO Score": 1263, "95% CI": "±35", "# Votes": 497, "Organization": "Meta", "License": "Open Source"},
|
| 18 |
+
{"Model": "Meta Llama 3.1 8B Instruct Turbo", "ELO Score": 1233, "95% CI": "±36", "# Votes": 450, "Organization": "Meta", "License": "Open Source"},
|
| 19 |
+
{"Model": "Atla Selene 1", "ELO Score": 1216, "95% CI": "±132", "# Votes": 34, "Organization": "Atla", "License": "Proprietary"},
|
| 20 |
+
{"Model": "Mistral (7B) Instruct v0.3", "ELO Score": 1215, "95% CI": "±37", "# Votes": 426, "Organization": "Mistral AI", "License": "Open Source"},
|
| 21 |
+
{"Model": "Claude 3.5 Sonnet", "ELO Score": 1211, "95% CI": "±38", "# Votes": 422, "Organization": "Anthropic", "License": "Proprietary"},
|
| 22 |
+
{"Model": "Atla Selene 1 Mini", "ELO Score": 1204, "95% CI": "±217", "# Votes": 12, "Organization": "Atla", "License": "Open Source"},
|
| 23 |
+
{"Model": "QwQ 32B Preview", "ELO Score": 1172, "95% CI": "±70", "# Votes": 121, "Organization": "Qwen", "License": "Open Source"},
|
| 24 |
+
{"Model": "Mistral (7B) Instruct v0.1", "ELO Score": 1157, "95% CI": "±37", "# Votes": 437, "Organization": "Mistral AI", "License": "Open Source"},
|
| 25 |
+
{"Model": "Prometheus-7b v2", "ELO Score": 1145, "95% CI": "±84", "# Votes": 86, "Organization": "Prometheus", "License": "Open Source"},
|
| 26 |
+
{"Model": "Qwen 2 Instruct (72B)", "ELO Score": 1123, "95% CI": "±37", "# Votes": 445, "Organization": "Alibaba", "License": "Open Source"},
|
| 27 |
+
{"Model": "Command-R Plus", "ELO Score": 1119, "95% CI": "±53", "# Votes": 213, "Organization": "Cohere", "License": "Proprietary"},
|
| 28 |
+
{"Model": "Meta Llama 3.1 70B Instruct Turbo", "ELO Score": 1111, "95% CI": "±39", "# Votes": 393, "Organization": "Meta", "License": "Open Source"},
|
| 29 |
+
{"Model": "Claude 3 Sonnet", "ELO Score": 1049, "95% CI": "±36", "# Votes": 459, "Organization": "Anthropic", "License": "Proprietary"},
|
| 30 |
+
{"Model": "Command-R", "ELO Score": 1040, "95% CI": "±56", "# Votes": 195, "Organization": "Cohere", "License": "Proprietary"},
|
| 31 |
+
{"Model": "Gemma 2 9B", "ELO Score": 1035, "95% CI": "±36", "# Votes": 462, "Organization": "Google", "License": "Open Source"},
|
| 32 |
+
{"Model": "Gemma 2 27B", "ELO Score": 1013, "95% CI": "±37", "# Votes": 437, "Organization": "Google", "License": "Open Source"},
|
| 33 |
+
{"Model": "SFR-LLaMA-3.1-70B-Judge", "ELO Score": 991, "95% CI": "±57", "# Votes": 185, "Organization": "Salesforce", "License": "Proprietary"}
|
| 34 |
+
]
|
| 35 |
+
}
|