2024-11-21 13:53:25 +08:00
package resource
import (
2025-01-13 23:05:04 +08:00
"cmp"
2024-11-21 13:53:25 +08:00
"context"
"fmt"
"log/slog"
2024-12-05 05:02:40 +08:00
"slices"
2024-12-11 02:37:37 +08:00
"strings"
2024-11-21 13:53:25 +08:00
"sync"
"time"
"github.com/hashicorp/golang-lru/v2/expirable"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"golang.org/x/sync/errgroup"
2025-07-10 19:54:10 +08:00
"golang.org/x/sync/singleflight"
2025-06-27 20:00:39 +08:00
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
2024-11-21 13:53:25 +08:00
"k8s.io/apimachinery/pkg/runtime/schema"
2024-11-22 21:44:06 +08:00
2025-01-21 17:06:55 +08:00
"github.com/grafana/authlib/types"
2025-05-16 03:36:52 +08:00
2025-04-24 01:54:35 +08:00
dashboardv1 "github.com/grafana/grafana/apps/dashboard/pkg/apis/dashboard/v1beta1"
2025-04-15 04:20:10 +08:00
folders "github.com/grafana/grafana/apps/folder/pkg/apis/folder/v1beta1"
2025-08-28 22:02:47 +08:00
"github.com/grafana/grafana/pkg/infra/tracing"
2025-05-16 03:36:52 +08:00
"github.com/grafana/grafana/pkg/storage/unified/resourcepb"
2024-11-21 13:53:25 +08:00
)
2025-05-09 21:36:21 +08:00
const maxBatchSize = 1000
2024-11-21 13:53:25 +08:00
type NamespacedResource struct {
Namespace string
Group string
Resource string
}
2024-11-22 21:44:06 +08:00
// All fields are set
func ( s * NamespacedResource ) Valid ( ) bool {
return s . Namespace != "" && s . Group != "" && s . Resource != ""
}
2025-06-17 22:28:51 +08:00
func ( s * NamespacedResource ) String ( ) string {
return fmt . Sprintf ( "%s/%s/%s" , s . Namespace , s . Group , s . Resource )
}
2025-05-09 21:36:21 +08:00
type IndexAction int
2024-11-22 21:44:06 +08:00
2025-05-09 21:36:21 +08:00
const (
ActionIndex IndexAction = iota
ActionDelete
)
2024-11-22 21:44:06 +08:00
2025-05-09 21:36:21 +08:00
type BulkIndexItem struct {
Action IndexAction
2025-05-16 03:36:52 +08:00
Key * resourcepb . ResourceKey // Only used for delete actions
Doc * IndexableDocument // Only used for index actions
2025-05-09 21:36:21 +08:00
}
type BulkIndexRequest struct {
Items [ ] * BulkIndexItem
ResourceVersion int64
}
type ResourceIndex interface {
// BulkIndex allows for multiple index actions to be performed in a single call.
// The order of the items is guaranteed to be the same as the input
BulkIndex ( req * BulkIndexRequest ) error
2024-11-22 21:44:06 +08:00
// Search within a namespaced resource
// When working with federated queries, the additional indexes will be passed in explicitly
2025-05-16 03:36:52 +08:00
Search ( ctx context . Context , access types . AccessClient , req * resourcepb . ResourceSearchRequest , federate [ ] ResourceIndex ) ( * resourcepb . ResourceSearchResponse , error )
2024-11-22 21:44:06 +08:00
2025-01-11 02:27:10 +08:00
// List within an response
2025-05-16 03:36:52 +08:00
ListManagedObjects ( ctx context . Context , req * resourcepb . ListManagedObjectsRequest ) ( * resourcepb . ListManagedObjectsResponse , error )
2025-01-11 02:27:10 +08:00
// Counts the values in a repo
2025-05-16 03:36:52 +08:00
CountManagedObjects ( ctx context . Context ) ( [ ] * resourcepb . CountManagedObjectsResponse_ResourceCount , error )
2024-12-05 05:02:40 +08:00
// Get the number of documents in the index
2024-12-11 02:37:37 +08:00
DocCount ( ctx context . Context , folder string ) ( int64 , error )
2025-08-25 16:13:07 +08:00
// UpdateIndex updates the index with the latest data (using update function provided when index was built) to guarantee strong consistency during the search.
// Returns RV to which index was updated.
UpdateIndex ( ctx context . Context , reason string ) ( int64 , error )
2024-11-22 21:44:06 +08:00
}
2025-08-25 16:13:07 +08:00
type BuildFn func ( index ResourceIndex ) ( int64 , error )
// UpdateFn is responsible for updating index with changes since given RV. It should return new RV (to be used as next sinceRV), number of updated documents and error, if any.
type UpdateFn func ( context context . Context , index ResourceIndex , sinceRV int64 ) ( newRV int64 , updatedDocs int , _ error )
2024-11-22 21:44:06 +08:00
// SearchBackend contains the technology specific logic to support search
2024-11-21 13:53:25 +08:00
type SearchBackend interface {
2025-07-10 19:54:10 +08:00
// GetIndex returns existing index, or nil.
2024-11-22 21:44:06 +08:00
GetIndex ( ctx context . Context , key NamespacedResource ) ( ResourceIndex , error )
2025-07-10 19:54:10 +08:00
// BuildIndex builds an index from scratch.
// Depending on the size, the backend may choose different options (eg: memory vs disk).
// The last known resource version can be used to detect that nothing has changed, and existing on-disk index can be reused.
// The builder will write all documents before returning.
2025-08-25 16:13:07 +08:00
// Updater function is used to update the index before performing the search.
BuildIndex (
ctx context . Context ,
key NamespacedResource ,
size int64 ,
nonStandardFields SearchableDocumentFields ,
indexBuildReason string ,
builder BuildFn ,
updater UpdateFn ,
2025-08-27 23:10:54 +08:00
rebuild bool ,
2025-08-25 16:13:07 +08:00
) ( ResourceIndex , error )
2024-11-22 21:44:06 +08:00
2025-07-10 19:54:10 +08:00
// TotalDocs returns the total number of documents across all indexes.
2024-12-05 05:02:40 +08:00
TotalDocs ( ) int64
2024-11-21 13:53:25 +08:00
}
const tracingPrexfixSearch = "unified_search."
// This supports indexing+search regardless of implementation
type searchSupport struct {
2025-09-11 16:23:03 +08:00
tracer trace . Tracer
log * slog . Logger
storage StorageBackend
search SearchBackend
indexMetrics * BleveIndexMetrics
access types . AccessClient
builders * builderCache
initWorkers int
initMinSize int
2025-05-09 21:36:21 +08:00
2025-09-24 22:54:35 +08:00
ownsIndexFn func ( key NamespacedResource ) ( bool , error )
2025-07-24 03:59:24 +08:00
2025-07-10 19:54:10 +08:00
buildIndex singleflight . Group
2025-06-13 03:34:48 +08:00
// periodic rebuilding of the indexes to keep usage insights up to date
rebuildInterval time . Duration
2024-11-21 13:53:25 +08:00
}
2024-11-27 13:57:53 +08:00
var (
2025-05-16 03:36:52 +08:00
_ resourcepb . ResourceIndexServer = ( * searchSupport ) ( nil )
_ resourcepb . ManagedObjectIndexServer = ( * searchSupport ) ( nil )
2024-11-27 13:57:53 +08:00
)
2025-09-24 22:54:35 +08:00
func newSearchSupport ( opts SearchOptions , storage StorageBackend , access types . AccessClient , blob BlobSupport , tracer trace . Tracer , indexMetrics * BleveIndexMetrics , ownsIndexFn func ( key NamespacedResource ) ( bool , error ) ) ( support * searchSupport , err error ) {
2024-11-21 13:53:25 +08:00
// No backend search support
if opts . Backend == nil {
return nil , nil
}
2025-02-12 01:57:46 +08:00
if tracer == nil {
return nil , fmt . Errorf ( "missing tracer" )
}
2024-11-21 13:53:25 +08:00
if opts . WorkerThreads < 1 {
opts . WorkerThreads = 1
}
2025-09-24 22:54:35 +08:00
if ownsIndexFn == nil {
ownsIndexFn = func ( key NamespacedResource ) ( bool , error ) {
return true , nil
}
}
2024-11-21 13:53:25 +08:00
support = & searchSupport {
2025-09-11 16:23:03 +08:00
access : access ,
tracer : tracer ,
storage : storage ,
search : opts . Backend ,
log : slog . Default ( ) . With ( "logger" , "resource-search" ) ,
initWorkers : opts . WorkerThreads ,
initMinSize : opts . InitMinCount ,
indexMetrics : indexMetrics ,
rebuildInterval : opts . RebuildInterval ,
2025-09-24 22:54:35 +08:00
ownsIndexFn : ownsIndexFn ,
2024-11-21 13:53:25 +08:00
}
info , err := opts . Resources . GetDocumentBuilders ( )
if err != nil {
return nil , err
}
support . builders , err = newBuilderCache ( info , 100 , time . Minute * 2 ) // TODO? opts
if support . builders != nil {
support . builders . blob = blob
}
return support , err
}
2025-05-16 03:36:52 +08:00
func ( s * searchSupport ) ListManagedObjects ( ctx context . Context , req * resourcepb . ListManagedObjectsRequest ) ( * resourcepb . ListManagedObjectsResponse , error ) {
2025-01-13 23:05:04 +08:00
if req . NextPageToken != "" {
2025-05-16 03:36:52 +08:00
return & resourcepb . ListManagedObjectsResponse {
2025-01-13 23:05:04 +08:00
Error : NewBadRequestError ( "multiple pages not yet supported" ) ,
2025-01-11 02:27:10 +08:00
} , nil
}
2025-01-13 23:05:04 +08:00
2025-05-16 03:36:52 +08:00
rsp := & resourcepb . ListManagedObjectsResponse { }
2025-01-13 23:05:04 +08:00
stats , err := s . storage . GetResourceStats ( ctx , req . Namespace , 0 )
if err != nil {
rsp . Error = AsErrorResult ( err )
return rsp , nil
}
for _ , info := range stats {
idx , err := s . getOrCreateIndex ( ctx , NamespacedResource {
Namespace : req . Namespace ,
Group : info . Group ,
Resource : info . Resource ,
2025-07-30 22:34:15 +08:00
} , "listManagedObjects" )
2025-01-13 23:05:04 +08:00
if err != nil {
rsp . Error = AsErrorResult ( err )
return rsp , nil
}
2025-03-11 00:48:53 +08:00
kind , err := idx . ListManagedObjects ( ctx , req )
2025-01-13 23:05:04 +08:00
if err != nil {
rsp . Error = AsErrorResult ( err )
return rsp , nil
}
if kind . NextPageToken != "" {
2025-05-16 03:36:52 +08:00
rsp . Error = & resourcepb . ErrorResult {
2025-01-13 23:05:04 +08:00
Message : "Multiple pages are not yet supported" ,
}
return rsp , nil
}
rsp . Items = append ( rsp . Items , kind . Items ... )
}
// Sort based on path
2025-05-16 03:36:52 +08:00
slices . SortFunc ( rsp . Items , func ( a , b * resourcepb . ListManagedObjectsResponse_Item ) int {
2025-01-13 23:05:04 +08:00
return cmp . Compare ( a . Path , b . Path )
} )
return rsp , nil
2024-11-27 13:57:53 +08:00
}
2025-05-16 03:36:52 +08:00
func ( s * searchSupport ) CountManagedObjects ( ctx context . Context , req * resourcepb . CountManagedObjectsRequest ) ( * resourcepb . CountManagedObjectsResponse , error ) {
rsp := & resourcepb . CountManagedObjectsResponse { }
2025-01-13 23:05:04 +08:00
stats , err := s . storage . GetResourceStats ( ctx , req . Namespace , 0 )
if err != nil {
rsp . Error = AsErrorResult ( err )
return rsp , nil
}
for _ , info := range stats {
idx , err := s . getOrCreateIndex ( ctx , NamespacedResource {
Namespace : req . Namespace ,
Group : info . Group ,
Resource : info . Resource ,
2025-07-30 22:34:15 +08:00
} , "countManagedObjects" )
2025-01-13 23:05:04 +08:00
if err != nil {
rsp . Error = AsErrorResult ( err )
return rsp , nil
}
2025-03-11 00:48:53 +08:00
counts , err := idx . CountManagedObjects ( ctx )
2025-01-13 23:05:04 +08:00
if err != nil {
rsp . Error = AsErrorResult ( err )
return rsp , nil
}
2025-03-11 00:48:53 +08:00
if req . Id == "" {
2025-01-13 23:05:04 +08:00
rsp . Items = append ( rsp . Items , counts ... )
} else {
for _ , k := range counts {
2025-03-11 00:48:53 +08:00
if k . Id == req . Id {
2025-01-13 23:05:04 +08:00
rsp . Items = append ( rsp . Items , k )
}
}
}
}
2025-03-11 00:48:53 +08:00
// Sort based on manager/group/resource
2025-05-16 03:36:52 +08:00
slices . SortFunc ( rsp . Items , func ( a , b * resourcepb . CountManagedObjectsResponse_ResourceCount ) int {
2025-01-13 23:05:04 +08:00
return cmp . Or (
2025-03-11 00:48:53 +08:00
cmp . Compare ( a . Kind , b . Kind ) ,
cmp . Compare ( a . Id , b . Id ) ,
2025-01-13 23:05:04 +08:00
cmp . Compare ( a . Group , b . Group ) ,
cmp . Compare ( a . Resource , b . Resource ) ,
)
} )
return rsp , nil
2024-11-27 13:57:53 +08:00
}
// Search implements ResourceIndexServer.
2025-05-16 03:36:52 +08:00
func ( s * searchSupport ) Search ( ctx context . Context , req * resourcepb . ResourceSearchRequest ) ( * resourcepb . ResourceSearchResponse , error ) {
2025-01-29 03:27:01 +08:00
ctx , span := s . tracer . Start ( ctx , tracingPrexfixSearch + "Search" )
defer span . End ( )
2025-09-09 20:15:45 +08:00
if req . Options . Key . Namespace == "" || req . Options . Key . Group == "" || req . Options . Key . Resource == "" {
return & resourcepb . ResourceSearchResponse {
Error : NewBadRequestError ( "missing namespace, group or resource" ) ,
} , nil
}
2024-11-27 13:57:53 +08:00
nsr := NamespacedResource {
Group : req . Options . Key . Group ,
Namespace : req . Options . Key . Namespace ,
Resource : req . Options . Key . Resource ,
}
2025-07-30 22:34:15 +08:00
idx , err := s . getOrCreateIndex ( ctx , nsr , "search" )
2024-11-27 13:57:53 +08:00
if err != nil {
2025-05-16 03:36:52 +08:00
return & resourcepb . ResourceSearchResponse {
2024-11-27 13:57:53 +08:00
Error : AsErrorResult ( err ) ,
} , nil
}
// Get the federated indexes
federate := make ( [ ] ResourceIndex , len ( req . Federated ) )
for i , f := range req . Federated {
nsr . Group = f . Group
nsr . Resource = f . Resource
2025-07-30 22:34:15 +08:00
federate [ i ] , err = s . getOrCreateIndex ( ctx , nsr , "federatedSearch" )
2024-11-27 13:57:53 +08:00
if err != nil {
2025-05-16 03:36:52 +08:00
return & resourcepb . ResourceSearchResponse {
2024-11-27 13:57:53 +08:00
Error : AsErrorResult ( err ) ,
} , nil
}
}
return idx . Search ( ctx , s . access , req , federate )
}
2024-12-11 02:37:37 +08:00
// GetStats implements ResourceServer.
2025-05-16 03:36:52 +08:00
func ( s * searchSupport ) GetStats ( ctx context . Context , req * resourcepb . ResourceStatsRequest ) ( * resourcepb . ResourceStatsResponse , error ) {
2024-12-11 02:37:37 +08:00
if req . Namespace == "" {
2025-05-16 03:36:52 +08:00
return & resourcepb . ResourceStatsResponse {
2024-12-11 02:37:37 +08:00
Error : NewBadRequestError ( "missing namespace" ) ,
} , nil
}
2025-05-16 03:36:52 +08:00
rsp := & resourcepb . ResourceStatsResponse { }
2024-12-11 02:37:37 +08:00
// Explicit list of kinds
if len ( req . Kinds ) > 0 {
2025-05-16 03:36:52 +08:00
rsp . Stats = make ( [ ] * resourcepb . ResourceStatsResponse_Stats , len ( req . Kinds ) )
2024-12-11 02:37:37 +08:00
for i , k := range req . Kinds {
parts := strings . SplitN ( k , "/" , 2 )
index , err := s . getOrCreateIndex ( ctx , NamespacedResource {
Namespace : req . Namespace ,
Group : parts [ 0 ] ,
Resource : parts [ 1 ] ,
2025-07-30 22:34:15 +08:00
} , "getStats" )
2024-12-11 02:37:37 +08:00
if err != nil {
rsp . Error = AsErrorResult ( err )
return rsp , nil
}
count , err := index . DocCount ( ctx , req . Folder )
if err != nil {
rsp . Error = AsErrorResult ( err )
return rsp , nil
}
2025-05-16 03:36:52 +08:00
rsp . Stats [ i ] = & resourcepb . ResourceStatsResponse_Stats {
2024-12-11 02:37:37 +08:00
Group : parts [ 0 ] ,
Resource : parts [ 1 ] ,
Count : count ,
}
}
return rsp , nil
}
stats , err := s . storage . GetResourceStats ( ctx , req . Namespace , 0 )
if err != nil {
2025-05-16 03:36:52 +08:00
return & resourcepb . ResourceStatsResponse {
2024-12-11 02:37:37 +08:00
Error : AsErrorResult ( err ) ,
} , nil
}
2025-05-16 03:36:52 +08:00
rsp . Stats = make ( [ ] * resourcepb . ResourceStatsResponse_Stats , len ( stats ) )
2024-12-11 02:37:37 +08:00
// When not filtered by folder or repository, we can use the results directly
if req . Folder == "" {
for i , stat := range stats {
2025-05-16 03:36:52 +08:00
rsp . Stats [ i ] = & resourcepb . ResourceStatsResponse_Stats {
2024-12-11 02:37:37 +08:00
Group : stat . Group ,
Resource : stat . Resource ,
Count : stat . Count ,
}
}
return rsp , nil
}
for i , stat := range stats {
index , err := s . getOrCreateIndex ( ctx , NamespacedResource {
Namespace : req . Namespace ,
Group : stat . Group ,
Resource : stat . Resource ,
2025-07-30 22:34:15 +08:00
} , "getStats" )
2024-12-11 02:37:37 +08:00
if err != nil {
rsp . Error = AsErrorResult ( err )
return rsp , nil
}
count , err := index . DocCount ( ctx , req . Folder )
if err != nil {
rsp . Error = AsErrorResult ( err )
return rsp , nil
}
2025-05-16 03:36:52 +08:00
rsp . Stats [ i ] = & resourcepb . ResourceStatsResponse_Stats {
2024-12-11 02:37:37 +08:00
Group : stat . Group ,
Resource : stat . Resource ,
Count : count ,
}
}
return rsp , nil
}
2025-06-13 03:34:48 +08:00
func ( s * searchSupport ) buildIndexes ( ctx context . Context , rebuild bool ) ( int , error ) {
2024-11-21 13:53:25 +08:00
totalBatchesIndexed := 0
group := errgroup . Group { }
group . SetLimit ( s . initWorkers )
2024-12-05 18:58:13 +08:00
stats , err := s . storage . GetResourceStats ( ctx , "" , s . initMinSize )
2024-12-04 01:20:27 +08:00
if err != nil {
2025-06-13 03:34:48 +08:00
return 0 , err
2024-12-04 01:20:27 +08:00
}
for _ , info := range stats {
2025-06-13 03:34:48 +08:00
// only periodically rebuild the dashboard index, specifically to update the usage insights data
if rebuild && info . Resource != dashboardv1 . DASHBOARD_RESOURCE {
continue
}
2025-09-24 22:54:35 +08:00
own , err := s . ownsIndexFn ( info . NamespacedResource )
if err != nil {
s . log . Warn ( "failed to check index ownership, building index" , "namespace" , info . Namespace , "group" , info . Group , "resource" , info . Resource , "error" , err )
} else if ! own {
2025-07-24 03:59:24 +08:00
s . log . Debug ( "skip building index" , "namespace" , info . Namespace , "group" , info . Group , "resource" , info . Resource )
continue
}
2024-12-04 01:20:27 +08:00
group . Go ( func ( ) error {
2025-06-13 03:34:48 +08:00
if rebuild {
// we need to clear the cache to make sure we get the latest usage insights data
s . builders . clearNamespacedCache ( info . NamespacedResource )
}
2024-12-04 01:20:27 +08:00
totalBatchesIndexed ++
2025-06-27 20:00:39 +08:00
2025-07-30 22:34:15 +08:00
s . log . Debug ( "building index" , "namespace" , info . Namespace , "group" , info . Group , "resource" , info . Resource , "rebuild" , rebuild )
reason := "init"
if rebuild {
reason = "rebuild"
}
2025-09-11 16:23:03 +08:00
_ , err := s . build ( ctx , info . NamespacedResource , info . Count , reason , rebuild )
2024-12-04 01:20:27 +08:00
return err
} )
2024-11-21 13:53:25 +08:00
}
err = group . Wait ( )
2025-06-13 03:34:48 +08:00
if err != nil {
return totalBatchesIndexed , err
}
return totalBatchesIndexed , nil
}
func ( s * searchSupport ) init ( ctx context . Context ) error {
2025-09-11 16:23:03 +08:00
origCtx := ctx
2025-06-13 03:34:48 +08:00
ctx , span := s . tracer . Start ( ctx , tracingPrexfixSearch + "Init" )
defer span . End ( )
start := time . Now ( ) . Unix ( )
totalBatchesIndexed , err := s . buildIndexes ( ctx , false )
2024-11-21 13:53:25 +08:00
if err != nil {
return err
}
2025-06-13 03:34:48 +08:00
2024-11-21 13:53:25 +08:00
span . AddEvent ( "namespaces indexed" , trace . WithAttributes ( attribute . Int ( "namespaced_indexed" , totalBatchesIndexed ) ) )
2025-06-13 03:34:48 +08:00
// since usage insights is not in unified storage, we need to periodically rebuild the index
// to make sure these data points are up to date.
if s . rebuildInterval > 0 {
2025-09-11 16:23:03 +08:00
go s . startPeriodicRebuild ( origCtx )
2025-06-13 03:34:48 +08:00
}
2024-12-05 05:02:40 +08:00
end := time . Now ( ) . Unix ( )
2024-12-10 12:32:19 +08:00
s . log . Info ( "search index initialized" , "duration_secs" , end - start , "total_docs" , s . search . TotalDocs ( ) )
2024-12-05 05:02:40 +08:00
2024-11-21 13:53:25 +08:00
return nil
}
2025-06-13 03:34:48 +08:00
func ( s * searchSupport ) startPeriodicRebuild ( ctx context . Context ) {
ticker := time . NewTicker ( s . rebuildInterval )
defer ticker . Stop ( )
s . log . Info ( "starting periodic index rebuild" , "interval" , s . rebuildInterval )
for {
select {
case <- ctx . Done ( ) :
s . log . Info ( "stopping periodic index rebuild due to context cancellation" )
return
case <- ticker . C :
s . log . Info ( "starting periodic index rebuild" )
if err := s . rebuildDashboardIndexes ( ctx ) ; err != nil {
s . log . Error ( "error during periodic index rebuild" , "error" , err )
} else {
s . log . Info ( "periodic index rebuild completed successfully" )
}
}
}
}
func ( s * searchSupport ) rebuildDashboardIndexes ( ctx context . Context ) error {
ctx , span := s . tracer . Start ( ctx , tracingPrexfixSearch + "RebuildDashboardIndexes" )
defer span . End ( )
start := time . Now ( )
s . log . Info ( "rebuilding all search indexes" )
totalBatchesIndexed , err := s . buildIndexes ( ctx , true )
if err != nil {
return fmt . Errorf ( "failed to rebuild dashboard indexes: %w" , err )
}
end := time . Now ( )
duration := end . Sub ( start )
s . log . Info ( "completed rebuilding all dashboard search indexes" ,
"duration" , duration ,
"rebuilt_indexes" , totalBatchesIndexed ,
"total_docs" , s . search . TotalDocs ( ) )
return nil
}
2025-07-30 22:34:15 +08:00
func ( s * searchSupport ) getOrCreateIndex ( ctx context . Context , key NamespacedResource , reason string ) ( ResourceIndex , error ) {
2025-01-13 23:05:04 +08:00
if s == nil || s . search == nil {
return nil , fmt . Errorf ( "search is not configured properly (missing unifiedStorageSearch feature toggle?)" )
}
2025-01-29 03:27:01 +08:00
ctx , span := s . tracer . Start ( ctx , tracingPrexfixSearch + "GetOrCreateIndex" )
defer span . End ( )
2025-08-28 22:02:47 +08:00
span . SetAttributes (
attribute . String ( "namespace" , key . Namespace ) ,
attribute . String ( "group" , key . Group ) ,
attribute . String ( "resource" , key . Resource ) ,
attribute . String ( "namespace" , key . Namespace ) ,
)
2025-01-29 03:27:01 +08:00
2024-11-27 13:57:53 +08:00
idx , err := s . search . GetIndex ( ctx , key )
if err != nil {
2025-08-28 22:02:47 +08:00
return nil , tracing . Error ( span , err )
2024-11-27 13:57:53 +08:00
}
2025-08-25 16:13:07 +08:00
if idx == nil {
2025-08-28 22:02:47 +08:00
span . AddEvent ( "Building index" )
2025-08-25 16:13:07 +08:00
ch := s . buildIndex . DoChan ( key . String ( ) , func ( ) ( interface { } , error ) {
// We want to finish building of the index even if original context is canceled.
// We reuse original context without cancel to keep the tracing spans correct.
ctx := context . WithoutCancel ( ctx )
2025-07-10 19:54:10 +08:00
2025-08-25 16:13:07 +08:00
// Recheck if some other goroutine managed to build an index in the meantime.
// (That is, it finished running this function and stored the index into the cache)
idx , err := s . search . GetIndex ( ctx , key )
if err == nil && idx != nil {
return idx , nil
}
2025-07-31 19:45:55 +08:00
2025-08-25 16:13:07 +08:00
// Get correct value of size + RV for building the index. This is important for our Bleve
// backend to decide whether to build index in-memory or as file-based.
stats , err := s . storage . GetResourceStats ( ctx , key . Namespace , 0 )
if err != nil {
return nil , fmt . Errorf ( "failed to get resource stats: %w" , err )
}
2025-07-10 19:54:10 +08:00
2025-08-25 16:13:07 +08:00
size := int64 ( 0 )
for _ , stat := range stats {
if stat . Namespace == key . Namespace && stat . Group == key . Group && stat . Resource == key . Resource {
size = stat . Count
break
}
}
2025-09-11 16:23:03 +08:00
idx , err = s . build ( ctx , key , size , reason , false )
2025-08-25 16:13:07 +08:00
if err != nil {
return nil , fmt . Errorf ( "error building search index, %w" , err )
}
if idx == nil {
return nil , fmt . Errorf ( "nil index after build" )
}
return idx , nil
} )
2025-07-10 19:54:10 +08:00
2025-08-25 16:13:07 +08:00
select {
case res := <- ch :
if res . Err != nil {
2025-08-28 22:02:47 +08:00
return nil , tracing . Error ( span , res . Err )
2025-07-10 19:54:10 +08:00
}
2025-08-25 16:13:07 +08:00
idx = res . Val . ( ResourceIndex )
case <- ctx . Done ( ) :
2025-08-28 22:02:47 +08:00
return nil , tracing . Error ( span , fmt . Errorf ( "failed to get index: %w" , ctx . Err ( ) ) )
2025-07-10 19:54:10 +08:00
}
2025-08-25 16:13:07 +08:00
}
2025-07-10 19:54:10 +08:00
2025-09-11 16:23:03 +08:00
span . AddEvent ( "Updating index" )
start := time . Now ( )
rv , err := idx . UpdateIndex ( ctx , reason )
if err != nil {
return nil , tracing . Error ( span , fmt . Errorf ( "failed to update index to guarantee strong consistency: %w" , err ) )
}
elapsed := time . Since ( start )
if s . indexMetrics != nil {
s . indexMetrics . SearchUpdateWaitTime . WithLabelValues ( reason ) . Observe ( elapsed . Seconds ( ) )
2024-11-27 13:57:53 +08:00
}
2025-09-11 16:23:03 +08:00
s . log . Debug ( "Index updated before search" , "namespace" , key . Namespace , "group" , key . Group , "resource" , key . Resource , "reason" , reason , "duration" , elapsed , "rv" , rv )
span . AddEvent ( "Index updated" )
2025-08-25 16:13:07 +08:00
return idx , nil
2024-11-27 13:57:53 +08:00
}
2025-09-11 16:23:03 +08:00
func ( s * searchSupport ) build ( ctx context . Context , nsr NamespacedResource , size int64 , indexBuildReason string , rebuild bool ) ( ResourceIndex , error ) {
2024-12-10 12:32:19 +08:00
ctx , span := s . tracer . Start ( ctx , tracingPrexfixSearch + "Build" )
2024-11-21 13:53:25 +08:00
defer span . End ( )
2025-07-30 22:34:15 +08:00
span . SetAttributes (
attribute . String ( "namespace" , nsr . Namespace ) ,
attribute . String ( "group" , nsr . Group ) ,
attribute . String ( "resource" , nsr . Resource ) ,
attribute . Int64 ( "size" , size ) ,
)
2025-05-27 03:33:51 +08:00
logger := s . log . With ( "namespace" , nsr . Namespace , "group" , nsr . Group , "resource" , nsr . Resource )
2024-11-21 13:53:25 +08:00
builder , err := s . builders . get ( ctx , nsr )
if err != nil {
2025-09-11 16:23:03 +08:00
return nil , err
2024-11-21 13:53:25 +08:00
}
2024-11-22 21:44:06 +08:00
fields := s . builders . GetFields ( nsr )
2024-11-21 13:53:25 +08:00
2025-08-25 16:13:07 +08:00
builderFn := func ( index ResourceIndex ) ( int64 , error ) {
2025-07-30 22:34:15 +08:00
span := trace . SpanFromContext ( ctx )
2025-09-11 16:23:03 +08:00
span . AddEvent ( "building index" , trace . WithAttributes ( attribute . Int64 ( "size" , size ) , attribute . String ( "reason" , indexBuildReason ) ) )
2025-07-30 22:34:15 +08:00
2025-08-19 21:41:35 +08:00
listRV , err := s . storage . ListIterator ( ctx , & resourcepb . ListRequest {
2024-11-22 21:44:06 +08:00
Limit : 1000000000000 , // big number
2025-05-16 03:36:52 +08:00
Options : & resourcepb . ListOptions {
Key : & resourcepb . ResourceKey {
2025-05-12 13:56:25 +08:00
Group : nsr . Group ,
Resource : nsr . Resource ,
Namespace : nsr . Namespace ,
} ,
2024-11-22 21:44:06 +08:00
} ,
} , func ( iter ListIterator ) error {
2025-05-27 15:48:39 +08:00
// Process documents in batches to avoid memory issues
// When dealing with large collections (e.g., 100k+ documents),
// loading all documents into memory at once can cause OOM errors.
items := make ( [ ] * BulkIndexItem , 0 , maxBatchSize )
2025-05-09 21:36:21 +08:00
2024-11-22 21:44:06 +08:00
for iter . Next ( ) {
if err = iter . Error ( ) ; err != nil {
return err
}
// Update the key name
2025-05-16 03:36:52 +08:00
key := & resourcepb . ResourceKey {
2025-05-12 13:56:25 +08:00
Group : nsr . Group ,
Resource : nsr . Resource ,
Namespace : nsr . Namespace ,
Name : iter . Name ( ) ,
}
2024-11-22 21:44:06 +08:00
2025-07-30 22:34:15 +08:00
span . AddEvent ( "building document" , trace . WithAttributes ( attribute . String ( "name" , iter . Name ( ) ) ) )
2024-11-22 21:44:06 +08:00
// Convert it to an indexable document
doc , err := builder . BuildDocument ( ctx , key , iter . ResourceVersion ( ) , iter . Value ( ) )
if err != nil {
2025-07-30 22:34:15 +08:00
span . RecordError ( err )
2025-05-27 03:33:51 +08:00
logger . Error ( "error building search document" , "key" , SearchID ( key ) , "err" , err )
2025-02-12 01:57:46 +08:00
continue
2024-11-22 21:44:06 +08:00
}
2025-05-09 21:36:21 +08:00
// Add to bulk items
items = append ( items , & BulkIndexItem {
Action : ActionIndex ,
Doc : doc ,
} )
2025-05-27 15:48:39 +08:00
// When we reach the batch size, perform bulk index and reset the batch.
if len ( items ) >= maxBatchSize {
2025-07-30 22:34:15 +08:00
span . AddEvent ( "bulk indexing" , trace . WithAttributes ( attribute . Int ( "count" , len ( items ) ) ) )
2025-08-25 16:13:07 +08:00
if err = index . BulkIndex ( & BulkIndexRequest { Items : items } ) ; err != nil {
2025-05-27 15:48:39 +08:00
return err
}
items = items [ : 0 ]
}
2025-05-09 21:36:21 +08:00
}
2025-05-27 15:48:39 +08:00
// Index any remaining items in the final batch.
2025-05-09 21:36:21 +08:00
if len ( items ) > 0 {
2025-07-30 22:34:15 +08:00
span . AddEvent ( "bulk indexing" , trace . WithAttributes ( attribute . Int ( "count" , len ( items ) ) ) )
2025-08-25 16:13:07 +08:00
if err = index . BulkIndex ( & BulkIndexRequest { Items : items } ) ; err != nil {
2024-11-22 21:44:06 +08:00
return err
}
}
2025-04-11 22:25:40 +08:00
return iter . Error ( )
2024-11-22 21:44:06 +08:00
} )
2025-08-19 21:41:35 +08:00
return listRV , err
2025-08-25 16:13:07 +08:00
}
updaterFn := func ( ctx context . Context , index ResourceIndex , sinceRV int64 ) ( int64 , int , error ) {
span := trace . SpanFromContext ( ctx )
2025-09-11 16:23:03 +08:00
span . AddEvent ( "updating index" , trace . WithAttributes ( attribute . Int64 ( "sinceRV" , sinceRV ) ) )
2025-08-25 16:13:07 +08:00
rv , it := s . storage . ListModifiedSince ( ctx , NamespacedResource {
Group : nsr . Group ,
Resource : nsr . Resource ,
Namespace : nsr . Namespace ,
} , sinceRV )
// Process documents in batches to avoid memory issues
// When dealing with large collections (e.g., 100k+ documents),
// loading all documents into memory at once can cause OOM errors.
items := make ( [ ] * BulkIndexItem , 0 , maxBatchSize )
docs := 0
for res , err := range it {
// Finish quickly if context is done.
if ctx . Err ( ) != nil {
return 0 , 0 , ctx . Err ( )
}
docs ++
if err != nil {
span . RecordError ( err )
return 0 , 0 , err
}
key := & res . Key
switch res . Action {
case resourcepb . WatchEvent_ADDED , resourcepb . WatchEvent_MODIFIED :
span . AddEvent ( "building document" , trace . WithAttributes ( attribute . String ( "name" , res . Key . Name ) ) )
// Convert it to an indexable document
doc , err := builder . BuildDocument ( ctx , key , res . ResourceVersion , res . Value )
if err != nil {
span . RecordError ( err )
logger . Error ( "error building search document" , "key" , SearchID ( key ) , "err" , err )
continue
}
items = append ( items , & BulkIndexItem {
Action : ActionIndex ,
Doc : doc ,
} )
case resourcepb . WatchEvent_DELETED :
span . AddEvent ( "deleting document" , trace . WithAttributes ( attribute . String ( "name" , res . Key . Name ) ) )
items = append ( items , & BulkIndexItem {
Action : ActionDelete ,
Key : & res . Key ,
} )
default :
logger . Error ( "can't update index with item, unknown action" , "action" , res . Action , "key" , key )
continue
}
// When we reach the batch size, perform bulk index and reset the batch.
if len ( items ) >= maxBatchSize {
span . AddEvent ( "bulk indexing" , trace . WithAttributes ( attribute . Int ( "count" , len ( items ) ) ) )
if err = index . BulkIndex ( & BulkIndexRequest { Items : items } ) ; err != nil {
return 0 , 0 , err
}
items = items [ : 0 ]
}
}
// Index any remaining items in the final batch.
if len ( items ) > 0 {
span . AddEvent ( "bulk indexing" , trace . WithAttributes ( attribute . Int ( "count" , len ( items ) ) ) )
if err = index . BulkIndex ( & BulkIndexRequest { Items : items } ) ; err != nil {
return 0 , 0 , err
}
}
return rv , docs , nil
}
2025-09-11 16:23:03 +08:00
index , err := s . search . BuildIndex ( ctx , nsr , size , fields , indexBuildReason , builderFn , updaterFn , rebuild )
2024-11-22 21:44:06 +08:00
2024-12-06 03:14:04 +08:00
if err != nil {
2025-09-11 16:23:03 +08:00
return nil , err
2024-12-06 03:14:04 +08:00
}
2024-12-05 05:02:40 +08:00
// Record the number of objects indexed for the kind/resource
2024-12-11 02:37:37 +08:00
docCount , err := index . DocCount ( ctx , "" )
2024-12-05 05:02:40 +08:00
if err != nil {
2025-05-27 03:33:51 +08:00
logger . Warn ( "error getting doc count" , "error" , err )
2024-12-05 05:02:40 +08:00
}
2025-03-13 22:09:38 +08:00
if s . indexMetrics != nil {
2025-05-12 13:56:25 +08:00
s . indexMetrics . IndexedKinds . WithLabelValues ( nsr . Resource ) . Add ( float64 ( docCount ) )
2024-12-05 05:02:40 +08:00
}
2025-09-11 16:23:03 +08:00
return index , err
2025-06-27 20:00:39 +08:00
}
2024-11-21 13:53:25 +08:00
type builderCache struct {
// The default builder
defaultBuilder DocumentBuilder
// Possible blob support
blob BlobSupport
2024-11-22 21:44:06 +08:00
// searchable fields initialized once on startup
fields map [ schema . GroupResource ] SearchableDocumentFields
2024-11-21 13:53:25 +08:00
// lookup by group, then resource (namespace)
// This is only modified at startup, so we do not need mutex for access
lookup map [ string ] map [ string ] DocumentBuilderInfo
// For namespaced based resources that require a cache
ns * expirable . LRU [ NamespacedResource , DocumentBuilder ]
mu sync . Mutex // only locked for a cache miss
}
func newBuilderCache ( cfg [ ] DocumentBuilderInfo , nsCacheSize int , ttl time . Duration ) ( * builderCache , error ) {
cache := & builderCache {
2024-11-22 21:44:06 +08:00
fields : make ( map [ schema . GroupResource ] SearchableDocumentFields ) ,
2024-11-21 13:53:25 +08:00
lookup : make ( map [ string ] map [ string ] DocumentBuilderInfo ) ,
ns : expirable . NewLRU [ NamespacedResource , DocumentBuilder ] ( nsCacheSize , nil , ttl ) ,
}
if len ( cfg ) == 0 {
return cache , fmt . Errorf ( "no builders configured" )
}
for _ , b := range cfg {
// the default
if b . GroupResource . Group == "" && b . GroupResource . Resource == "" {
if b . Builder == nil {
return cache , fmt . Errorf ( "default document builder is missing" )
}
cache . defaultBuilder = b . Builder
continue
}
g , ok := cache . lookup [ b . GroupResource . Group ]
if ! ok {
g = make ( map [ string ] DocumentBuilderInfo )
cache . lookup [ b . GroupResource . Group ] = g
}
g [ b . GroupResource . Resource ] = b
2024-11-22 21:44:06 +08:00
// Any custom fields
cache . fields [ b . GroupResource ] = b . Fields
2024-11-21 13:53:25 +08:00
}
return cache , nil
}
2024-11-22 21:44:06 +08:00
func ( s * builderCache ) GetFields ( key NamespacedResource ) SearchableDocumentFields {
return s . fields [ schema . GroupResource { Group : key . Group , Resource : key . Resource } ]
}
2024-11-21 13:53:25 +08:00
// context is typically background. Holds an LRU cache for a
func ( s * builderCache ) get ( ctx context . Context , key NamespacedResource ) ( DocumentBuilder , error ) {
g , ok := s . lookup [ key . Group ]
if ok {
r , ok := g [ key . Resource ]
if ok {
if r . Builder != nil {
return r . Builder , nil
}
// The builder needs context
builder , ok := s . ns . Get ( key )
if ok {
return builder , nil
}
{
s . mu . Lock ( )
defer s . mu . Unlock ( )
b , err := r . Namespaced ( ctx , key . Namespace , s . blob )
if err == nil {
_ = s . ns . Add ( key , b )
}
return b , err
}
}
}
return s . defaultBuilder , nil
}
2025-02-07 00:30:47 +08:00
// AsResourceKey converts the given namespace and type to a search key
2025-05-16 03:36:52 +08:00
func AsResourceKey ( ns string , t string ) ( * resourcepb . ResourceKey , error ) {
2025-02-07 00:30:47 +08:00
if ns == "" {
return nil , fmt . Errorf ( "missing namespace" )
}
switch t {
case "folders" , "folder" :
2025-05-16 03:36:52 +08:00
return & resourcepb . ResourceKey {
2025-02-07 00:30:47 +08:00
Namespace : ns ,
2025-04-11 20:09:52 +08:00
Group : folders . GROUP ,
Resource : folders . RESOURCE ,
2025-02-07 00:30:47 +08:00
} , nil
case "dashboards" , "dashboard" :
2025-05-16 03:36:52 +08:00
return & resourcepb . ResourceKey {
2025-02-07 00:30:47 +08:00
Namespace : ns ,
2025-04-11 20:09:52 +08:00
Group : dashboardv1 . GROUP ,
Resource : dashboardv1 . DASHBOARD_RESOURCE ,
2025-02-07 00:30:47 +08:00
} , nil
// NOT really supported in the dashboard search UI, but useful for manual testing
case "playlist" , "playlists" :
2025-05-16 03:36:52 +08:00
return & resourcepb . ResourceKey {
2025-02-07 00:30:47 +08:00
Namespace : ns ,
Group : "playlist.grafana.app" ,
Resource : "playlists" ,
} , nil
}
return nil , fmt . Errorf ( "unknown resource type" )
}
2025-05-09 21:36:21 +08:00
2025-06-13 03:34:48 +08:00
func ( s * builderCache ) clearNamespacedCache ( key NamespacedResource ) {
s . mu . Lock ( )
defer s . mu . Unlock ( )
s . ns . Remove ( key )
}
2025-06-27 20:00:39 +08:00
// Test utilities for document building
// testDocumentBuilder implements DocumentBuilder for testing
type testDocumentBuilder struct { }
func ( b * testDocumentBuilder ) BuildDocument ( ctx context . Context , key * resourcepb . ResourceKey , rv int64 , value [ ] byte ) ( * IndexableDocument , error ) {
// convert value to unstructured.Unstructured
var u unstructured . Unstructured
if err := u . UnmarshalJSON ( value ) ; err != nil {
return nil , fmt . Errorf ( "failed to unmarshal value: %w" , err )
}
title := ""
tags := [ ] string { }
val := ""
spec , ok , _ := unstructured . NestedMap ( u . Object , "spec" )
if ok {
if v , ok := spec [ "title" ] ; ok {
title = v . ( string )
}
if v , ok := spec [ "tags" ] ; ok {
if tagSlice , ok := v . ( [ ] interface { } ) ; ok {
tags = make ( [ ] string , len ( tagSlice ) )
for i , tag := range tagSlice {
if strTag , ok := tag . ( string ) ; ok {
tags [ i ] = strTag
}
}
}
}
if v , ok := spec [ "value" ] ; ok {
val = v . ( string )
}
}
return & IndexableDocument {
Key : & resourcepb . ResourceKey {
Namespace : key . Namespace ,
Group : key . Group ,
Resource : key . Resource ,
Name : u . GetName ( ) ,
} ,
Title : title ,
Tags : tags ,
Fields : map [ string ] interface { } {
"value" : val ,
} ,
} , nil
}
// TestDocumentBuilderSupplier implements DocumentBuilderSupplier for testing
type TestDocumentBuilderSupplier struct {
GroupsResources map [ string ] string
}
func ( s * TestDocumentBuilderSupplier ) GetDocumentBuilders ( ) ( [ ] DocumentBuilderInfo , error ) {
builders := make ( [ ] DocumentBuilderInfo , 0 , len ( s . GroupsResources ) )
// Add builders for all possible group/resource combinations
for group , resourceType := range s . GroupsResources {
builders = append ( builders , DocumentBuilderInfo {
GroupResource : schema . GroupResource {
Group : group ,
Resource : resourceType ,
} ,
Builder : & testDocumentBuilder { } ,
} )
}
return builders , nil
}