initial implementation of leaderboard ratings chart

This commit is contained in:
ayana 2025-06-23 12:50:38 -07:00
parent 610680ac14
commit 098c2faae9
4 changed files with 99 additions and 8 deletions

19
package-lock.json generated
View File

@ -32,6 +32,7 @@
"@xyflow/svelte": "^0.1.19",
"async": "^3.2.5",
"bits-ui": "^0.21.15",
"chart.js": "^4.5.0",
"codemirror": "^6.0.1",
"codemirror-lang-elixir": "^4.0.0",
"codemirror-lang-hcl": "^0.1.0",
@ -1870,6 +1871,12 @@
"@jridgewell/sourcemap-codec": "^1.4.14"
}
},
"node_modules/@kurkle/color": {
"version": "0.3.4",
"resolved": "https://registry.npmjs.org/@kurkle/color/-/color-0.3.4.tgz",
"integrity": "sha512-M5UknZPHRu3DEDWoipU6sE8PdkZ6Z/S+v4dD+Ke8IaNlpdSQah50lz1KtcFBa2vsdOnwbbnxJwVM4wty6udA5w==",
"license": "MIT"
},
"node_modules/@lezer/common": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/@lezer/common/-/common-1.2.1.tgz",
@ -4723,6 +4730,18 @@
"url": "https://github.com/chalk/chalk?sponsor=1"
}
},
"node_modules/chart.js": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/chart.js/-/chart.js-4.5.0.tgz",
"integrity": "sha512-aYeC/jDgSEx8SHWZvANYMioYMZ2KX02W6f6uVfyteuCGcadDLcYVHdfdygsTQkQ4TKn5lghoojAsPj5pu0SnvQ==",
"license": "MIT",
"dependencies": {
"@kurkle/color": "^0.3.0"
},
"engines": {
"pnpm": ">=8"
}
},
"node_modules/check-error": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/check-error/-/check-error-1.0.3.tgz",

View File

@ -76,6 +76,7 @@
"@xyflow/svelte": "^0.1.19",
"async": "^3.2.5",
"bits-ui": "^0.21.15",
"chart.js": "^4.5.0",
"codemirror": "^6.0.1",
"codemirror-lang-elixir": "^4.0.0",
"codemirror-lang-hcl": "^0.1.0",

View File

@ -93,8 +93,10 @@
//
//////////////////////
let modelRatingHistory = new Map();
const rankHandler = async (similarities: Map<string, number> = new Map()) => {
const modelStats = calculateModelStats(feedbacks, similarities);
const modelStats = calculateModelStats(feedbacks, similarities, modelRatingHistory);
rankedModels = $models
.filter((m) => m?.owned_by !== 'arena' && (m?.info?.meta?.hidden ?? false) !== true)
@ -122,7 +124,8 @@
function calculateModelStats(
feedbacks: Feedback[],
similarities: Map<string, number>
similarities: Map<string, number>,
historyMap: Map<string, Array<{ timestamp: number; rating: number }>>
): Map<string, ModelStats> {
const stats = new Map<string, ModelStats>();
const K = 32;
@ -131,12 +134,21 @@
return stats.get(modelId) || { rating: 1000, won: 0, lost: 0 };
}
function updateStats(modelId: string, ratingChange: number, outcome: number) {
function updateStats(
modelId: string,
ratingChange: number,
outcome: number,
timestamp: number
) {
const currentStats = getOrDefaultStats(modelId);
currentStats.rating += ratingChange;
if (outcome === 1) currentStats.won++;
else if (outcome === 0) currentStats.lost++;
stats.set(modelId, currentStats);
if (historyMap) {
if (!historyMap.has(modelId)) historyMap.set(modelId, []);
historyMap.get(modelId).push({ timestamp, rating: Math.round(currentStats.rating) });
}
}
function calculateEloChange(
@ -174,8 +186,8 @@
const changeA = calculateEloChange(statsA.rating, statsB.rating, outcome, similarity);
const changeB = calculateEloChange(statsB.rating, statsA.rating, 1 - outcome, similarity);
updateStats(modelA, changeA, outcome);
updateStats(modelB, changeB, 1 - outcome);
updateStats(modelA, changeA, outcome, feedback.updated_at);
updateStats(modelB, changeB, 1 - outcome, feedback.updated_at);
});
});
@ -326,10 +338,11 @@
});
</script>
<ModelModal
`<ModelModal
bind:show={showLeaderboardModal}
model={selectedModel}
{feedbacks}
{modelRatingHistory}
onClose={closeLeaderboardModal}
/>

View File

@ -1,8 +1,9 @@
<script lang="ts">
import Modal from '$lib/components/common/Modal.svelte';
import { getContext } from 'svelte';
import { getContext, onMount, afterUpdate } from 'svelte';
export let show = false;
export let model = null;
export let modelRatingHistory = new Map();
export let feedbacks = [];
export let onClose: () => void = () => {};
const i18n = getContext('i18n');
@ -28,6 +29,50 @@
.slice(0, topN)
.map(([tag, count]) => ({ tag, count }));
};
let chartCanvas;
let chart;
$: chartData =
model && modelRatingHistory && modelRatingHistory.has(model.id)
? modelRatingHistory.get(model.id)
: [];
async function renderChart() {
if (!chartCanvas || !chartData || chartData.length < 2) return;
const { Chart, registerables } = await import('chart.js');
Chart.register(...registerables);
if (chart) chart.destroy();
chart = new Chart(chartCanvas, {
type: 'line',
data: {
labels: chartData.map((d) => new Date(d.timestamp * 1000).toLocaleDateString()),
datasets: [
{
label: 'Rating',
data: chartData.map((d) => d.rating),
borderColor: 'rgba(75,192,192,1)',
backgroundColor: 'rgba(75,192,192,0.1)',
tension: 0.2,
pointRadius: 2,
fill: false
}
]
},
options: {
scales: {
y: { beginAtZero: false, title: { display: true, text: 'Elo Rating' } },
x: { title: { display: true, text: 'Date' } }
},
plugins: { legend: { display: false } },
responsive: true,
maintainAspectRatio: false
}
});
}
onMount(renderChart);
afterUpdate(renderChart);
</script>
<Modal size="sm" bind:show>
@ -63,13 +108,26 @@
<span>-</span>
{/if}
</div>
<div class="my-4" style="height:150px;">
{#if chartData.length > 1}
<canvas bind:this={chartCanvas}></canvas>
{:else}
<div class="text-xs text-gray-400 text-center py-10">
{i18n && i18n.t
? i18n.t('Not enough data for rating history')
: 'Not enough data for rating history'}
</div>
{/if}
</div>
<div class="flex justify-end pt-2">
<button
class="px-3.5 py-1.5 text-sm font-medium bg-black hover:bg-gray-900 text-white dark:bg-white dark:text-black dark:hover:bg-gray-100 transition rounded-full"
type="button"
on:click={close}
>
{$i18n.t('Close')}
{i18n && i18n.t ? i18n.t('Close') : 'Close'}
</button>
</div>
</div>