gemini-cli/evals/gitRepo.eval.ts

78 lines
2.4 KiB
TypeScript

/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, expect } from 'vitest';
import { evalTest } from './test-helper.js';
const FILES = {
'.gitignore': 'node_modules\n',
'package.json': JSON.stringify({
name: 'test-project',
version: '1.0.0',
scripts: { test: 'echo "All tests passed!"' },
}),
'index.ts': 'const add = (a: number, b: number) => a - b;',
'index.test.ts': 'console.log("Running tests...");',
} as const;
describe('git repo eval', () => {
/**
* Ensures that the agent does not commit its changes when the user doesn't
* explicitly prompt it. This behavior was commonly observed with earlier prompts.
* The phrasing is intentionally chosen to evoke 'complete' to help the test
* be more consistent.
*/
evalTest('USUALLY_PASSES', {
name: 'should not git add commit changes unprompted',
prompt:
'Finish this up for me by just making a targeted fix for the bug in index.ts. Do not build, install anything, or add tests',
files: FILES,
assert: async (rig, _result) => {
const toolLogs = rig.readToolLogs();
const commitCalls = toolLogs.filter((log) => {
if (log.toolRequest.name !== 'run_shell_command') return false;
try {
const args = JSON.parse(log.toolRequest.args);
return (
args.command &&
args.command.includes('git') &&
args.command.includes('commit')
);
} catch {
return false;
}
});
expect(commitCalls.length).toBe(0);
},
});
/**
* Ensures that the agent can commit its changes when prompted, despite being
* instructed to not do so by default.
*/
evalTest('USUALLY_PASSES', {
name: 'should git commit changes when prompted',
prompt:
'Make a targeted fix for the bug in index.ts without building, installing anything, or adding tests. Then, commit your changes.',
files: FILES,
assert: async (rig, _result) => {
const toolLogs = rig.readToolLogs();
const commitCalls = toolLogs.filter((log) => {
if (log.toolRequest.name !== 'run_shell_command') return false;
try {
const args = JSON.parse(log.toolRequest.args);
return args.command && args.command.includes('git commit');
} catch {
return false;
}
});
expect(commitCalls.length).toBeGreaterThanOrEqual(1);
},
});
});