78 lines
2.4 KiB
TypeScript
78 lines
2.4 KiB
TypeScript
/**
|
|
* @license
|
|
* Copyright 2026 Google LLC
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
import { describe, expect } from 'vitest';
|
|
import { evalTest } from './test-helper.js';
|
|
|
|
const FILES = {
|
|
'.gitignore': 'node_modules\n',
|
|
'package.json': JSON.stringify({
|
|
name: 'test-project',
|
|
version: '1.0.0',
|
|
scripts: { test: 'echo "All tests passed!"' },
|
|
}),
|
|
'index.ts': 'const add = (a: number, b: number) => a - b;',
|
|
'index.test.ts': 'console.log("Running tests...");',
|
|
} as const;
|
|
|
|
describe('git repo eval', () => {
|
|
/**
|
|
* Ensures that the agent does not commit its changes when the user doesn't
|
|
* explicitly prompt it. This behavior was commonly observed with earlier prompts.
|
|
* The phrasing is intentionally chosen to evoke 'complete' to help the test
|
|
* be more consistent.
|
|
*/
|
|
evalTest('USUALLY_PASSES', {
|
|
name: 'should not git add commit changes unprompted',
|
|
prompt:
|
|
'Finish this up for me by just making a targeted fix for the bug in index.ts. Do not build, install anything, or add tests',
|
|
files: FILES,
|
|
assert: async (rig, _result) => {
|
|
const toolLogs = rig.readToolLogs();
|
|
const commitCalls = toolLogs.filter((log) => {
|
|
if (log.toolRequest.name !== 'run_shell_command') return false;
|
|
try {
|
|
const args = JSON.parse(log.toolRequest.args);
|
|
return (
|
|
args.command &&
|
|
args.command.includes('git') &&
|
|
args.command.includes('commit')
|
|
);
|
|
} catch {
|
|
return false;
|
|
}
|
|
});
|
|
|
|
expect(commitCalls.length).toBe(0);
|
|
},
|
|
});
|
|
|
|
/**
|
|
* Ensures that the agent can commit its changes when prompted, despite being
|
|
* instructed to not do so by default.
|
|
*/
|
|
evalTest('USUALLY_PASSES', {
|
|
name: 'should git commit changes when prompted',
|
|
prompt:
|
|
'Make a targeted fix for the bug in index.ts without building, installing anything, or adding tests. Then, commit your changes.',
|
|
files: FILES,
|
|
assert: async (rig, _result) => {
|
|
const toolLogs = rig.readToolLogs();
|
|
const commitCalls = toolLogs.filter((log) => {
|
|
if (log.toolRequest.name !== 'run_shell_command') return false;
|
|
try {
|
|
const args = JSON.parse(log.toolRequest.args);
|
|
return args.command && args.command.includes('git commit');
|
|
} catch {
|
|
return false;
|
|
}
|
|
});
|
|
|
|
expect(commitCalls.length).toBeGreaterThanOrEqual(1);
|
|
},
|
|
});
|
|
});
|