Compare commits

..

2 Commits

Author SHA1 Message Date
xiaomakuaiz 5078e93a4a
Merge f7c0fe273b into 3032384457 2025-11-07 07:11:51 +00:00
monkeycode-ai f7c0fe273b Improve summary optimization with simplified aggregation
优化摘要生成逻辑:
1. 将chunk token限制从16KB提升到30KB,更合理地利用模型上下文
2. 简化摘要聚合逻辑,移除复杂的分批聚合,直接合并所有summaries
3. 保留fallback机制,当最终摘要生成失败时返回已聚合的摘要

这些改进确保了:
- 长文档能够更充分地被摘要(30KB vs 16KB)
- 代码更简洁,避免不必要的迭代聚合和额外LLM调用
- 即使最终摘要失败也能返回有用的结果

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
Co-authored-by: monkeycode-ai <monkeycode-ai@chaitin.com>
Co-authored-by: monkeycode-ai <monkeycode-ai@chaitin.com>
2025-11-07 14:21:36 +08:00
1 changed files with 1 additions and 43 deletions

View File

@ -39,7 +39,6 @@ type LLMUsecase struct {
const (
summaryChunkTokenLimit = 30720 // 30KB tokens per chunk
summaryMaxChunks = 4 // max chunks to process for summary
summaryAggregateLimit = 8192 // max tokens for aggregating summaries
)
func NewLLMUsecase(config *config.Config, rag rag.RAGService, conversationRepo *pg.ConversationRepository, kbRepo *pg.KnowledgeBaseRepository, nodeRepo *pg.NodeRepository, modelRepo *pg.ModelRepository, promptRepo *pg.PromptRepo, logger *log.Logger) *LLMUsecase {
@ -227,39 +226,7 @@ func (u *LLMUsecase) SummaryNode(ctx context.Context, model *domain.Model, name,
return "", fmt.Errorf("failed to generate summary for document %s", name)
}
// Iteratively aggregate summaries if needed
for len(summaries) > 1 {
joined := strings.Join(summaries, "\n\n")
tokens, err := u.countTokens(joined)
if err != nil {
u.logger.Warn("Failed to count tokens for aggregation, proceeding anyway", log.Error(err))
break
}
if tokens <= summaryAggregateLimit {
break
}
// If still too large, aggregate in batches
u.logger.Debug("aggregating summaries in batches", log.Int("current_summaries", len(summaries)), log.Int("tokens", tokens))
batchSize := 2
newSummaries := make([]string, 0, (len(summaries)+batchSize-1)/batchSize)
for i := 0; i < len(summaries); i += batchSize {
end := i + batchSize
if end > len(summaries) {
end = len(summaries)
}
batch := strings.Join(summaries[i:end], "\n\n")
summary, err := u.requestSummary(ctx, chatModel, name, batch)
if err != nil {
u.logger.Error("Failed to aggregate summary batch", log.Int("batch_start", i), log.Error(err))
// Fallback: use the first summary in the batch
newSummaries = append(newSummaries, summaries[i])
continue
}
newSummaries = append(newSummaries, summary)
}
summaries = newSummaries
}
// Join all summaries and generate final summary
joined := strings.Join(summaries, "\n\n")
finalSummary, err := u.requestSummary(ctx, chatModel, name, joined)
if err != nil {
@ -301,15 +268,6 @@ func (u *LLMUsecase) requestSummary(ctx context.Context, chatModel model.BaseCha
return strings.TrimSpace(u.trimThinking(summary)), nil
}
func (u *LLMUsecase) countTokens(text string) (int, error) {
encoding, err := tiktoken.GetEncoding("cl100k_base")
if err != nil {
return 0, fmt.Errorf("failed to get encoding: %w", err)
}
tokens := encoding.Encode(text, nil, nil)
return len(tokens), nil
}
func (u *LLMUsecase) SplitByTokenLimit(text string, maxTokens int) ([]string, error) {
if maxTokens <= 0 {
return nil, fmt.Errorf("maxTokens must be greater than 0")