vm/tests/performance_regression.rs

462 lines
14 KiB
Rust
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! 性能回归测试
//!
//! 检测性能退化,确保新代码不会显著降低性能
use vm_cross_arch::UnifiedExecutor;
use vm_core::{GuestArch, MMU};
use std::time::Instant;
use std::collections::HashMap;
/// 性能阈值配置
#[derive(Debug, Clone)]
pub struct PerformanceThresholds {
/// 平均执行时间阈值(微秒)
pub avg_execution_time_us: u64,
/// 最大执行时间阈值(微秒)
pub max_execution_time_us: u64,
/// JIT编译时间阈值微秒
pub jit_compile_time_us: u64,
/// GC暂停时间阈值微秒
pub gc_pause_time_us: u64,
/// TLB命中率阈值百分比
pub tlb_hit_rate_percent: f64,
}
impl Default for PerformanceThresholds {
fn default() -> Self {
Self {
avg_execution_time_us: 1000, // 1ms
max_execution_time_us: 5000, // 5ms
jit_compile_time_us: 10000, // 10ms
gc_pause_time_us: 1000, // 1ms
tlb_hit_rate_percent: 90.0, // 90%
}
}
}
/// 性能基准线
#[derive(Debug, Clone)]
pub struct PerformanceBaseline {
/// 基准名称
pub name: String,
/// 阈值配置
pub thresholds: PerformanceThresholds,
/// 历史性能数据
pub historical_data: Vec<PerformanceSnapshot>,
}
/// 性能快照
#[derive(Debug, Clone)]
pub struct PerformanceSnapshot {
/// 时间戳
pub timestamp: u64,
/// 平均执行时间(微秒)
pub avg_execution_time_us: u64,
/// 最大执行时间(微秒)
pub max_execution_time_us: u64,
/// JIT编译时间微秒
pub jit_compile_time_us: u64,
/// GC暂停时间微秒
pub gc_pause_time_us: u64,
/// TLB命中率百分比
pub tlb_hit_rate_percent: f64,
}
impl PerformanceBaseline {
/// 创建新的性能基准线
pub fn new(name: String, thresholds: PerformanceThresholds) -> Self {
Self {
name,
thresholds,
historical_data: Vec::new(),
}
}
/// 记录性能快照
pub fn record_snapshot(&mut self, snapshot: PerformanceSnapshot) {
self.historical_data.push(snapshot);
// 保留最近1000个快照
if self.historical_data.len() > 1000 {
self.historical_data.remove(0);
}
}
/// 检查性能是否退化
pub fn check_regression(&self, snapshot: &PerformanceSnapshot) -> Vec<String> {
let mut regressions = Vec::new();
if snapshot.avg_execution_time_us > self.thresholds.avg_execution_time_us {
regressions.push(format!(
"平均执行时间 {}us 超过阈值 {}us",
snapshot.avg_execution_time_us,
self.thresholds.avg_execution_time_us
));
}
if snapshot.max_execution_time_us > self.thresholds.max_execution_time_us {
regressions.push(format!(
"最大执行时间 {}us 超过阈值 {}us",
snapshot.max_execution_time_us,
self.thresholds.max_execution_time_us
));
}
if snapshot.jit_compile_time_us > self.thresholds.jit_compile_time_us {
regressions.push(format!(
"JIT编译时间 {}us 超过阈值 {}us",
snapshot.jit_compile_time_us,
self.thresholds.jit_compile_time_us
));
}
if snapshot.gc_pause_time_us > self.thresholds.gc_pause_time_us {
regressions.push(format!(
"GC暂停时间 {}us 超过阈值 {}us",
snapshot.gc_pause_time_us,
self.thresholds.gc_pause_time_us
));
}
if snapshot.tlb_hit_rate_percent < self.thresholds.tlb_hit_rate_percent {
regressions.push(format!(
"TLB命中率 {:.2}% 低于阈值 {:.2}%",
snapshot.tlb_hit_rate_percent,
self.thresholds.tlb_hit_rate_percent
));
}
regressions
}
}
/// 测试执行性能
#[test]
fn test_execution_performance() {
let thresholds = PerformanceThresholds::default();
let mut baseline = PerformanceBaseline::new("execution".to_string(), thresholds.clone());
let mut executor = UnifiedExecutor::auto_create(GuestArch::X86_64, 128 * 1024 * 1024)
.expect("创建执行器失败");
let code_base: u64 = 0x1000;
let test_code = create_performance_test_code();
// 加载代码
for (i, byte) in test_code.iter().enumerate() {
executor.mmu_mut().write(code_base + i as u64, *byte as u64, 1)
.expect("写入内存失败");
}
// 预热
for _ in 0..100 {
executor.execute(code_base).expect("执行失败");
}
// 性能测试
let iterations = 1000;
let mut times = Vec::with_capacity(iterations);
for _ in 0..iterations {
let start = Instant::now();
executor.execute(code_base).expect("执行失败");
let elapsed = start.elapsed();
times.push(elapsed.as_micros() as u64);
}
// 计算统计信息
let avg_time = times.iter().sum::<u64>() / iterations as u64;
let max_time = *times.iter().max().unwrap();
let snapshot = PerformanceSnapshot {
timestamp: std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs(),
avg_execution_time_us: avg_time,
max_execution_time_us: max_time,
jit_compile_time_us: 0,
gc_pause_time_us: 0,
tlb_hit_rate_percent: 95.0,
};
baseline.record_snapshot(snapshot.clone());
let regressions = baseline.check_regression(&snapshot);
assert!(regressions.is_empty(), "性能回归: {:?}", regressions);
}
/// 测试JIT编译性能
#[test]
fn test_jit_compile_performance() {
use vm_engine_jit::Jit;
use vm_ir::{IRBlock, IROp, Terminator};
use vm_mem::SoftMmu;
let thresholds = PerformanceThresholds::default();
let mut baseline = PerformanceBaseline::new("jit_compile".to_string(), thresholds.clone());
let mut jit = Jit::new();
let mut mmu = SoftMmu::new(1024 * 1024, false);
// 创建测试IR块
let block = IRBlock {
start_pc: 0x1000,
ops: vec![
IROp::MovImm { dst: 1, imm: 10 },
IROp::MovImm { dst: 2, imm: 20 },
IROp::Add { dst: 3, src1: 1, src2: 2 },
IROp::Sub { dst: 4, src1: 3, src2: 1 },
IROp::Mul { dst: 5, src1: 2, src2: 2 },
],
term: Terminator::Ret,
};
// 测试编译时间
let start = Instant::now();
// 触发编译(通过多次执行)
for _ in 0..150 { // 超过HOT_THRESHOLD (100)
let _ = jit.run(&mut mmu, &block);
}
let elapsed = start.elapsed();
let compile_time_us = elapsed.as_micros() as u64;
let snapshot = PerformanceSnapshot {
timestamp: std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs(),
avg_execution_time_us: 0,
max_execution_time_us: 0,
jit_compile_time_us: compile_time_us,
gc_pause_time_us: 0,
tlb_hit_rate_percent: 95.0,
};
baseline.record_snapshot(snapshot.clone());
let regressions = baseline.check_regression(&snapshot);
assert!(regressions.is_empty(), "性能回归: {:?}", regressions);
}
/// 测试GC性能
#[test]
fn test_gc_performance() {
use vm_engine_jit::{UnifiedGC, UnifiedGcConfig};
let thresholds = PerformanceThresholds::default();
let mut baseline = PerformanceBaseline::new("gc".to_string(), thresholds.clone());
let config = UnifiedGcConfig {
heap_size_limit: 10 * 1024 * 1024, // 10MB
mark_quota_us: 1000,
sweep_quota_us: 500,
adaptive_quota: true,
..Default::default()
};
let gc = UnifiedGC::new(config);
// 模拟对象分配
let roots: Vec<u64> = (0..1000).map(|i| i as u64 * 1024).collect();
// 执行GC周期并测量暂停时间
let mut pause_times = Vec::new();
for _ in 0..10 {
let cycle_start = gc.start_gc(&roots);
// 增量标记
loop {
let (complete, _) = gc.incremental_mark();
if complete {
break;
}
}
gc.terminate_marking();
// 增量清扫
loop {
let (complete, _) = gc.incremental_sweep();
if complete {
break;
}
}
gc.finish_gc(cycle_start);
let stats = gc.stats();
pause_times.push(stats.get_last_pause_us());
}
let avg_pause = pause_times.iter().sum::<u64>() / pause_times.len() as u64;
let max_pause = *pause_times.iter().max().unwrap();
let snapshot = PerformanceSnapshot {
timestamp: std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap()
.as_secs(),
avg_execution_time_us: 0,
max_execution_time_us: 0,
jit_compile_time_us: 0,
gc_pause_time_us: avg_pause,
tlb_hit_rate_percent: 95.0,
};
baseline.record_snapshot(snapshot.clone());
let regressions = baseline.check_regression(&snapshot);
assert!(regressions.is_empty(), "性能回归: {:?}", regressions);
}
/// 测试寄存器分配器性能
#[test]
fn test_register_allocator_performance() {
use vm_engine_jit::register_allocator::{LinearScanAllocator, GraphColoringAllocator, RegisterAllocatorTrait};
use vm_ir::{IROp, IRBuilder, Terminator};
let thresholds = PerformanceThresholds::default();
let mut baseline = PerformanceBaseline::new("register_allocator".to_string(), thresholds.clone());
// 创建测试IR块
let mut builder = IRBuilder::new(0x1000);
for i in 0..50 {
builder.push(IROp::MovImm {
dst: i as u32,
imm: i as u64,
});
if i > 0 {
builder.push(IROp::Add {
dst: (i + 50) as u32,
src1: (i - 1) as u32,
src2: i as u32,
});
}
}
builder.set_term(Terminator::Ret);
let block = builder.build();
// 测试线性扫描分配器
let start = Instant::now();
let mut linear_allocator = LinearScanAllocator::new();
linear_allocator.analyze_lifetimes(&block.ops);
let _allocations = linear_allocator.allocate_registers(&block.ops);
let linear_time = start.elapsed().as_micros() as u64;
// 测试图着色分配器
let start = Instant::now();
let mut graph_allocator = GraphColoringAllocator::new();
graph_allocator.analyze_lifetimes(&block.ops);
let _allocations = graph_allocator.allocate_registers(&block.ops);
let graph_time = start.elapsed().as_micros() as u64;
println!(" 线性扫描: {}us, 图着色: {}us", linear_time, graph_time);
// 验证性能阈值
assert!(linear_time < 10000, "线性扫描应该 < 10ms");
assert!(graph_time < 50000, "图着色应该 < 50ms");
}
/// 测试IR工具性能
#[test]
fn test_ir_utils_performance() {
use vm_engine_jit::ir_utils::IrAnalyzer;
use vm_ir::IROp;
// 创建大量IR操作
let ops: Vec<IROp> = (0..1000)
.map(|i| {
if i % 2 == 0 {
IROp::Add {
dst: (i % 32) as u32,
src1: ((i + 1) % 32) as u32,
src2: ((i + 2) % 32) as u32,
}
} else {
IROp::Load {
dst: (i % 32) as u32,
base: ((i + 1) % 32) as u32,
offset: i as i64,
size: 8,
flags: Default::default(),
}
}
})
.collect();
let start = Instant::now();
for op in &ops {
let _ = IrAnalyzer::collect_read_regs(op);
let _ = IrAnalyzer::collect_written_regs(op);
let _ = IrAnalyzer::is_memory_access(op);
let _ = IrAnalyzer::is_branch(op);
}
let elapsed = start.elapsed().as_micros() as u64;
println!(" IR工具处理1000个操作耗时: {}us", elapsed);
assert!(elapsed < 10000, "IR工具处理应该 < 10ms");
}
/// 测试GC模块性能
#[test]
fn test_gc_module_performance() {
use vm_engine_jit::gc_marker::GcMarker;
use vm_engine_jit::gc_sweeper::GcSweeper;
use vm_engine_jit::unified_gc::{LockFreeMarkStack, UnifiedGcStats, GCPhase};
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, Mutex, RwLock};
use std::collections::HashSet;
let mark_stack = Arc::new(LockFreeMarkStack::new(1000));
let marked_set = Arc::new(RwLock::new(HashSet::new()));
let phase = Arc::new(AtomicU64::new(GCPhase::Marking as u64));
let stats = Arc::new(UnifiedGcStats::default());
let marker = GcMarker::new(
mark_stack.clone(),
marked_set.clone(),
phase.clone(),
stats.clone(),
);
// 添加根对象
let roots: Vec<u64> = (0..1000).map(|i| i as u64 * 1024).collect();
marker.prepare_marking(&roots);
// 测试增量标记性能
let start = Instant::now();
let (complete, marked_count) = marker.incremental_mark(10000); // 10ms配额
let elapsed = start.elapsed().as_micros() as u64;
println!(" GC标记: 完成={}, 标记={}, 耗时={}us", complete, marked_count, elapsed);
assert!(elapsed < 10000, "GC标记应该 < 10ms");
// 测试清扫器性能
let sweep_list = Arc::new(Mutex::new((0..1000).map(|i| i as u64 * 1024).collect()));
let sweeper = GcSweeper::new(sweep_list.clone(), phase.clone(), stats.clone(), 100);
sweeper.prepare_sweeping(&(0..1000).map(|i| i as u64 * 1024).collect::<Vec<_>>(), &HashSet::new());
let start = Instant::now();
let (complete, freed_count) = sweeper.incremental_sweep(10000); // 10ms配额
let elapsed = start.elapsed().as_micros() as u64;
println!(" GC清扫: 完成={}, 释放={}, 耗时={}us", complete, freed_count, elapsed);
assert!(elapsed < 10000, "GC清扫应该 < 10ms");
}
fn create_performance_test_code() -> Vec<u8> {
// 创建一个稍微复杂的测试代码
vec![
0xB8, 0x0A, 0x00, 0x00, 0x00, // mov eax, 10
0xBB, 0x14, 0x00, 0x00, 0x00, // mov ebx, 20
0x01, 0xD8, // add eax, ebx
0x83, 0xC0, 0x05, // add eax, 5
0x29, 0xD8, // sub eax, ebx
0xC3, // ret
]
}