From 098c2faae9039edcfb0abbeabea28431bb631ac0 Mon Sep 17 00:00:00 2001 From: ayana Date: Mon, 23 Jun 2025 12:50:38 -0700 Subject: [PATCH] initial implementation of leaderboard ratings chart --- package-lock.json | 19 ++++++ package.json | 1 + .../admin/Evaluations/Leaderboard.svelte | 25 ++++++-- .../admin/Evaluations/LeaderboardModal.svelte | 62 ++++++++++++++++++- 4 files changed, 99 insertions(+), 8 deletions(-) diff --git a/package-lock.json b/package-lock.json index d17e571808..8f1a71d0d8 100644 --- a/package-lock.json +++ b/package-lock.json @@ -32,6 +32,7 @@ "@xyflow/svelte": "^0.1.19", "async": "^3.2.5", "bits-ui": "^0.21.15", + "chart.js": "^4.5.0", "codemirror": "^6.0.1", "codemirror-lang-elixir": "^4.0.0", "codemirror-lang-hcl": "^0.1.0", @@ -1870,6 +1871,12 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@kurkle/color": { + "version": "0.3.4", + "resolved": "https://registry.npmjs.org/@kurkle/color/-/color-0.3.4.tgz", + "integrity": "sha512-M5UknZPHRu3DEDWoipU6sE8PdkZ6Z/S+v4dD+Ke8IaNlpdSQah50lz1KtcFBa2vsdOnwbbnxJwVM4wty6udA5w==", + "license": "MIT" + }, "node_modules/@lezer/common": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/@lezer/common/-/common-1.2.1.tgz", @@ -4723,6 +4730,18 @@ "url": "https://github.com/chalk/chalk?sponsor=1" } }, + "node_modules/chart.js": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/chart.js/-/chart.js-4.5.0.tgz", + "integrity": "sha512-aYeC/jDgSEx8SHWZvANYMioYMZ2KX02W6f6uVfyteuCGcadDLcYVHdfdygsTQkQ4TKn5lghoojAsPj5pu0SnvQ==", + "license": "MIT", + "dependencies": { + "@kurkle/color": "^0.3.0" + }, + "engines": { + "pnpm": ">=8" + } + }, "node_modules/check-error": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/check-error/-/check-error-1.0.3.tgz", diff --git a/package.json b/package.json index 7f0d121be7..12356f85a6 100644 --- a/package.json +++ b/package.json @@ -76,6 +76,7 @@ "@xyflow/svelte": "^0.1.19", "async": "^3.2.5", "bits-ui": "^0.21.15", + "chart.js": "^4.5.0", "codemirror": "^6.0.1", "codemirror-lang-elixir": "^4.0.0", "codemirror-lang-hcl": "^0.1.0", diff --git a/src/lib/components/admin/Evaluations/Leaderboard.svelte b/src/lib/components/admin/Evaluations/Leaderboard.svelte index 942d0e0d7f..dae410d012 100644 --- a/src/lib/components/admin/Evaluations/Leaderboard.svelte +++ b/src/lib/components/admin/Evaluations/Leaderboard.svelte @@ -93,8 +93,10 @@ // ////////////////////// + let modelRatingHistory = new Map(); + const rankHandler = async (similarities: Map = new Map()) => { - const modelStats = calculateModelStats(feedbacks, similarities); + const modelStats = calculateModelStats(feedbacks, similarities, modelRatingHistory); rankedModels = $models .filter((m) => m?.owned_by !== 'arena' && (m?.info?.meta?.hidden ?? false) !== true) @@ -122,7 +124,8 @@ function calculateModelStats( feedbacks: Feedback[], - similarities: Map + similarities: Map, + historyMap: Map> ): Map { const stats = new Map(); const K = 32; @@ -131,12 +134,21 @@ return stats.get(modelId) || { rating: 1000, won: 0, lost: 0 }; } - function updateStats(modelId: string, ratingChange: number, outcome: number) { + function updateStats( + modelId: string, + ratingChange: number, + outcome: number, + timestamp: number + ) { const currentStats = getOrDefaultStats(modelId); currentStats.rating += ratingChange; if (outcome === 1) currentStats.won++; else if (outcome === 0) currentStats.lost++; stats.set(modelId, currentStats); + if (historyMap) { + if (!historyMap.has(modelId)) historyMap.set(modelId, []); + historyMap.get(modelId).push({ timestamp, rating: Math.round(currentStats.rating) }); + } } function calculateEloChange( @@ -174,8 +186,8 @@ const changeA = calculateEloChange(statsA.rating, statsB.rating, outcome, similarity); const changeB = calculateEloChange(statsB.rating, statsA.rating, 1 - outcome, similarity); - updateStats(modelA, changeA, outcome); - updateStats(modelB, changeB, 1 - outcome); + updateStats(modelA, changeA, outcome, feedback.updated_at); + updateStats(modelB, changeB, 1 - outcome, feedback.updated_at); }); }); @@ -326,10 +338,11 @@ }); - diff --git a/src/lib/components/admin/Evaluations/LeaderboardModal.svelte b/src/lib/components/admin/Evaluations/LeaderboardModal.svelte index f0c1f012a0..c658747e0e 100644 --- a/src/lib/components/admin/Evaluations/LeaderboardModal.svelte +++ b/src/lib/components/admin/Evaluations/LeaderboardModal.svelte @@ -1,8 +1,9 @@ @@ -63,13 +108,26 @@ - {/if} + +
+ {#if chartData.length > 1} + + {:else} +
+ {i18n && i18n.t + ? i18n.t('Not enough data for rating history') + : 'Not enough data for rating history'} +
+ {/if} +
+