kaikaidai commited on
Commit
c4b4610
·
1 Parent(s): 37040ff

Sunset Judge Arena — added static final leaderboard

Browse files
Files changed (1) hide show
  1. data/leaderboard_static.json +35 -0
data/leaderboard_static.json CHANGED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "last_updated": "January 15, 2026",
3
+ "total_votes": 4689,
4
+ "total_models": 28,
5
+ "leaderboard": [
6
+ {"Model": "Meta Llama 3.3 70B Instruct Turbo", "ELO Score": 1335, "95% CI": "±71", "# Votes": 118, "Organization": "Meta", "License": "Open Source"},
7
+ {"Model": "Flow-Judge-v0.1", "ELO Score": 1335, "95% CI": "±102", "# Votes": 58, "Organization": "Flow AI", "License": "Open Source"},
8
+ {"Model": "Claude 3 Opus", "ELO Score": 1312, "95% CI": "±37", "# Votes": 445, "Organization": "Anthropic", "License": "Proprietary"},
9
+ {"Model": "GPT-4o", "ELO Score": 1308, "95% CI": "±37", "# Votes": 436, "Organization": "OpenAI", "License": "Proprietary"},
10
+ {"Model": "Atla-8B-preview", "ELO Score": 1306, "95% CI": "±48", "# Votes": 256, "Organization": "Atla", "License": "Open Source"},
11
+ {"Model": "GPT-4 Turbo", "ELO Score": 1304, "95% CI": "±36", "# Votes": 459, "Organization": "OpenAI", "License": "Proprietary"},
12
+ {"Model": "Claude 3 Haiku", "ELO Score": 1286, "95% CI": "±37", "# Votes": 438, "Organization": "Anthropic", "License": "Proprietary"},
13
+ {"Model": "Claude 3.5 Haiku", "ELO Score": 1282, "95% CI": "±37", "# Votes": 431, "Organization": "Anthropic", "License": "Proprietary"},
14
+ {"Model": "Qwen 2.5 7B Instruct Turbo", "ELO Score": 1280, "95% CI": "±36", "# Votes": 458, "Organization": "Alibaba", "License": "Open Source"},
15
+ {"Model": "GPT-3.5 Turbo", "ELO Score": 1271, "95% CI": "±36", "# Votes": 471, "Organization": "OpenAI", "License": "Proprietary"},
16
+ {"Model": "Qwen 2.5 72B Instruct Turbo", "ELO Score": 1269, "95% CI": "±37", "# Votes": 444, "Organization": "Alibaba", "License": "Open Source"},
17
+ {"Model": "Meta Llama 3.1 405B Instruct Turbo", "ELO Score": 1263, "95% CI": "±35", "# Votes": 497, "Organization": "Meta", "License": "Open Source"},
18
+ {"Model": "Meta Llama 3.1 8B Instruct Turbo", "ELO Score": 1233, "95% CI": "±36", "# Votes": 450, "Organization": "Meta", "License": "Open Source"},
19
+ {"Model": "Atla Selene 1", "ELO Score": 1216, "95% CI": "±132", "# Votes": 34, "Organization": "Atla", "License": "Proprietary"},
20
+ {"Model": "Mistral (7B) Instruct v0.3", "ELO Score": 1215, "95% CI": "±37", "# Votes": 426, "Organization": "Mistral AI", "License": "Open Source"},
21
+ {"Model": "Claude 3.5 Sonnet", "ELO Score": 1211, "95% CI": "±38", "# Votes": 422, "Organization": "Anthropic", "License": "Proprietary"},
22
+ {"Model": "Atla Selene 1 Mini", "ELO Score": 1204, "95% CI": "±217", "# Votes": 12, "Organization": "Atla", "License": "Open Source"},
23
+ {"Model": "QwQ 32B Preview", "ELO Score": 1172, "95% CI": "±70", "# Votes": 121, "Organization": "Qwen", "License": "Open Source"},
24
+ {"Model": "Mistral (7B) Instruct v0.1", "ELO Score": 1157, "95% CI": "±37", "# Votes": 437, "Organization": "Mistral AI", "License": "Open Source"},
25
+ {"Model": "Prometheus-7b v2", "ELO Score": 1145, "95% CI": "±84", "# Votes": 86, "Organization": "Prometheus", "License": "Open Source"},
26
+ {"Model": "Qwen 2 Instruct (72B)", "ELO Score": 1123, "95% CI": "±37", "# Votes": 445, "Organization": "Alibaba", "License": "Open Source"},
27
+ {"Model": "Command-R Plus", "ELO Score": 1119, "95% CI": "±53", "# Votes": 213, "Organization": "Cohere", "License": "Proprietary"},
28
+ {"Model": "Meta Llama 3.1 70B Instruct Turbo", "ELO Score": 1111, "95% CI": "±39", "# Votes": 393, "Organization": "Meta", "License": "Open Source"},
29
+ {"Model": "Claude 3 Sonnet", "ELO Score": 1049, "95% CI": "±36", "# Votes": 459, "Organization": "Anthropic", "License": "Proprietary"},
30
+ {"Model": "Command-R", "ELO Score": 1040, "95% CI": "±56", "# Votes": 195, "Organization": "Cohere", "License": "Proprietary"},
31
+ {"Model": "Gemma 2 9B", "ELO Score": 1035, "95% CI": "±36", "# Votes": 462, "Organization": "Google", "License": "Open Source"},
32
+ {"Model": "Gemma 2 27B", "ELO Score": 1013, "95% CI": "±37", "# Votes": 437, "Organization": "Google", "License": "Open Source"},
33
+ {"Model": "SFR-LLaMA-3.1-70B-Judge", "ELO Score": 991, "95% CI": "±57", "# Votes": 185, "Organization": "Salesforce", "License": "Proprietary"}
34
+ ]
35
+ }