Search: Update bluge version (#50377)

This commit is contained in:
Alexander Emelin 2022-06-22 15:49:26 +03:00 committed by GitHub
parent 454f65cd88
commit cef81a6478
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 86 additions and 22 deletions

13
go.mod
View File

@ -243,7 +243,7 @@ require (
github.com/Azure/azure-sdk-for-go/sdk/keyvault/azkeys v0.4.0
github.com/Azure/go-autorest/autorest/adal v0.9.17
github.com/armon/go-radix v1.0.0
github.com/blugelabs/bluge v0.1.9
github.com/blugelabs/bluge v0.2.1
github.com/getkin/kin-openapi v0.94.0
github.com/golang-migrate/migrate/v4 v4.7.0
github.com/grafana/dskit v0.0.0-20211011144203-3a88ec0b675f
@ -273,16 +273,17 @@ require (
github.com/Azure/azure-sdk-for-go/sdk/keyvault/internal v0.2.1 // indirect
github.com/AzureAD/microsoft-authentication-library-for-go v0.4.0 // indirect
github.com/Microsoft/go-winio v0.5.2 // indirect
github.com/RoaringBitmap/roaring v0.9.1 // indirect
github.com/RoaringBitmap/roaring v0.9.4 // indirect
github.com/axiomhq/hyperloglog v0.0.0-20191112132149-a4c4c47bc57f // indirect
github.com/bits-and-blooms/bitset v1.2.0 // indirect
github.com/blevesearch/go-porterstemmer v1.0.3 // indirect
github.com/blevesearch/mmap-go v1.0.2 // indirect
github.com/blevesearch/mmap-go v1.0.4 // indirect
github.com/blevesearch/segment v0.9.0 // indirect
github.com/blevesearch/snowballstem v0.9.0 // indirect
github.com/blevesearch/vellum v1.0.5 // indirect
github.com/blevesearch/vellum v1.0.7 // indirect
github.com/blugelabs/bluge_segment_api v0.2.0 // indirect
github.com/blugelabs/ice v0.2.0 // indirect
github.com/blugelabs/ice v1.0.0 // indirect
github.com/blugelabs/ice/v2 v2.0.1 // indirect
github.com/caio/go-tdigest v3.1.0+incompatible // indirect
github.com/chromedp/cdproto v0.0.0-20220208224320-6efb837e6bc2 // indirect
github.com/containerd/containerd v1.6.2 // indirect
@ -296,7 +297,7 @@ require (
github.com/golang-jwt/jwt v3.2.2+incompatible // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 // indirect
github.com/imdario/mergo v0.3.12 // indirect
github.com/klauspost/compress v1.15.1 // indirect
github.com/klauspost/compress v1.15.2 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
github.com/labstack/echo/v4 v4.7.2 // indirect
github.com/labstack/gommon v0.3.1 // indirect

23
go.sum
View File

@ -296,8 +296,9 @@ github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/RoaringBitmap/gocroaring v0.4.0/go.mod h1:NieMwz7ZqwU2DD73/vvYwv7r4eWBKuPVSXZIpsaMwCI=
github.com/RoaringBitmap/real-roaring-datasets v0.0.0-20190726190000-eb7c87156f76/go.mod h1:oM0MHmQ3nDsq609SS36p+oYbRi16+oVvU2Bw4Ipv0SE=
github.com/RoaringBitmap/roaring v0.9.1 h1:5PRizBmoN/PfV17nPNQou4dHQ7NcJi8FO/bihdYyCEM=
github.com/RoaringBitmap/roaring v0.9.1/go.mod h1:h1B7iIUOmnAeb5ytYMvnHJwxMc6LUrwBnzXWRuqTQUc=
github.com/RoaringBitmap/roaring v0.9.4 h1:ckvZSX5gwCRaJYBNe7syNawCU5oruY9gQmjXlp4riwo=
github.com/RoaringBitmap/roaring v0.9.4/go.mod h1:icnadbWcNyfEHlYdr+tDlOTih1Bf/h+rzPpv4sbomAA=
github.com/SAP/go-hdb v0.14.1/go.mod h1:7fdQLVC2lER3urZLjZCm0AuMQfApof92n3aylBPEkMo=
github.com/Shopify/goreferrer v0.0.0-20181106222321-ec9c9a553398/go.mod h1:a1uqRtAwp2Xwc6WNPJEufxJ7fx3npB4UV/JOLmbu5I0=
github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d/go.mod h1:HI8ITrYtUY+O+ZhtlqUnD8+KwNPOyugEhfP9fdUIaEQ=
@ -479,20 +480,25 @@ github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnweb
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo=
github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M=
github.com/blevesearch/mmap-go v1.0.2 h1:JtMHb+FgQCTTYIhtMvimw15dJwu1Y5lrZDMOFXVWPk0=
github.com/blevesearch/mmap-go v1.0.2/go.mod h1:ol2qBqYaOUsGdm7aRMRrYGgPvnwLe6Y+7LMvAB5IbSA=
github.com/blevesearch/mmap-go v1.0.3/go.mod h1:pYvKl/grLQrBxuaRYgoTssa4rVujYYeenDp++2E+yvs=
github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc=
github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs=
github.com/blevesearch/segment v0.9.0 h1:5lG7yBCx98or7gK2cHMKPukPZ/31Kag7nONpoBt22Ac=
github.com/blevesearch/segment v0.9.0/go.mod h1:9PfHYUdQCgHktBgvtUOF4x+pc4/l8rdH0u5spnW85UQ=
github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s=
github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs=
github.com/blevesearch/vellum v1.0.5 h1:L5dJ7hKauRVbuH7I8uqLeSK92CPPY6FfrbAmLhAug8A=
github.com/blevesearch/vellum v1.0.5/go.mod h1:atE0EH3fvk43zzS7t1YNdNC7DbmcC3uz+eMD5xZ2OyQ=
github.com/blugelabs/bluge v0.1.9 h1:bPgXlcsWugrXNjzeoLdOnvfJpHsyODKpYaAndayl/SM=
github.com/blugelabs/bluge v0.1.9/go.mod h1:5d7LktUkQgvbh5Bmi6tPWtvo4+6uRTm6gAwP+5z6FqQ=
github.com/blevesearch/vellum v1.0.7 h1:+vn8rfyCRHxKVRgDLeR0FAXej2+6mEb5Q15aQE/XESQ=
github.com/blevesearch/vellum v1.0.7/go.mod h1:doBZpmRhwTsASB4QdUZANlJvqVAUdUyX0ZK7QJCTeBE=
github.com/blugelabs/bluge v0.2.1 h1:lgd6IhAAIDoq6my9HQOwk3p6xqJBmR49+lpFmpeFHgU=
github.com/blugelabs/bluge v0.2.1/go.mod h1:am1LU9jS8dZgWkRzkGLQN3757EgMs3upWrU2fdN9foE=
github.com/blugelabs/bluge_segment_api v0.2.0 h1:cCX1Y2y8v0LZ7+EEJ6gH7dW6TtVTW4RhG0vp3R+N2Lo=
github.com/blugelabs/bluge_segment_api v0.2.0/go.mod h1:95XA+ZXfRj/IXADm7gZ+iTcWOJPg5jQTY1EReIzl3LA=
github.com/blugelabs/ice v0.2.0 h1:9N/TRBqAr43emheD1ptk9mohuT6xAVq83gesgE60Qqk=
github.com/blugelabs/ice v0.2.0/go.mod h1:7foiDf4V83FIYYnGh2LOoRWsbNoCqAAMNgKn879Iyu0=
github.com/blugelabs/ice v1.0.0 h1:um7wf9e6jbkTVCrOyQq3tKK43fBMOvLUYxbj3Qtc4eo=
github.com/blugelabs/ice v1.0.0/go.mod h1:gNfFPk5zM+yxJROhthxhVQYjpBO9amuxWXJQ2Lo+IbQ=
github.com/blugelabs/ice/v2 v2.0.1 h1:mzHbntLjk2v7eDRgoXCgzOsPKN1Tenu9Svo6l9cTLS4=
github.com/blugelabs/ice/v2 v2.0.1/go.mod h1:QxAWSPNwZwsIqS25c3lbIPFQrVvT1sphf5x5DfMLH5M=
github.com/bmatcuk/doublestar v1.2.2/go.mod h1:wiQtGV+rzVYxB7WIlirSN++5HPtPlXEo9MEoZQC/PmE=
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4=
github.com/bmizerany/pat v0.0.0-20170815010413-6226ea591a40/go.mod h1:8rLXio+WjiTceGBHIoTvn60HIbs7Hm7bcHjyrSqYB9c=
@ -1843,8 +1849,9 @@ github.com/klauspost/compress v1.11.12/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdY
github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/compress v1.12.2/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
github.com/klauspost/compress v1.13.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
github.com/klauspost/compress v1.15.1 h1:y9FcTHGyrebwfP0ZZqFiaxTaiDnUrGkJkI+f583BL1A=
github.com/klauspost/compress v1.15.1/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
github.com/klauspost/compress v1.15.2 h1:3WH+AG7s2+T8o3nrM/8u2rdqUEcQhmga7smjrT41nAw=
github.com/klauspost/compress v1.15.2/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
github.com/klauspost/cpuid v0.0.0-20170728055534-ae7887de9fa5/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
github.com/klauspost/cpuid v1.2.3/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=

View File

@ -50,7 +50,7 @@ func initIndex(dashboards []dashboard, logger log.Logger, extendDoc ExtendDashbo
// In order to reduce memory usage while initial indexing we are limiting
// the size of batch here.
docsInBatch := 0
maxBatchSize := 100
maxBatchSize := 300
flushIfRequired := func(force bool) error {
docsInBatch++

View File

@ -3,9 +3,11 @@ package searchV2
import (
"bytes"
"context"
"errors"
"fmt"
"io/ioutil"
"os"
"os/exec"
"runtime"
"strconv"
"strings"
@ -195,19 +197,19 @@ func (i *dashboardIndex) buildInitialIndexes(ctx context.Context, orgIDs []int64
}
func (i *dashboardIndex) buildInitialIndex(ctx context.Context, orgID int64) error {
memCtx, memCancel := context.WithCancel(ctx)
debugCtx, debugCtxCancel := context.WithCancel(ctx)
if os.Getenv("GF_SEARCH_DEBUG") != "" {
go i.debugMemStats(memCtx, 200*time.Millisecond)
go i.debugResourceUsage(debugCtx, 200*time.Millisecond)
}
started := time.Now()
numDashboards, err := i.buildOrgIndex(ctx, orgID)
if err != nil {
memCancel()
return fmt.Errorf("can't build dashboard search index for org ID %d: %w", orgID, err)
debugCtxCancel()
return fmt.Errorf("can't build dashboard search index for org ID 1: %w", err)
}
i.logger.Info("Indexing for org finished", "orgIndexElapsed", time.Since(started), "orgId", orgID, "numDashboards", numDashboards)
memCancel()
debugCtxCancel()
if os.Getenv("GF_SEARCH_DEBUG") != "" {
// May help to estimate size of index when introducing changes. Though it's not a direct
@ -219,7 +221,47 @@ func (i *dashboardIndex) buildInitialIndex(ctx context.Context, orgID int64) err
return nil
}
func (i *dashboardIndex) debugMemStats(ctx context.Context, frequency time.Duration) {
// This is a naive implementation of process CPU getting (credits to
// https://stackoverflow.com/a/11357813/1288429). Should work on both Linux and Darwin.
// Since we only use this during development seems simple and cheap solution to get
// process CPU usage in cross-platform way.
func getProcessCPU(currentPid int) (float64, error) {
cmd := exec.Command("ps", "aux")
var out bytes.Buffer
cmd.Stdout = &out
err := cmd.Run()
if err != nil {
return 0, err
}
for {
line, err := out.ReadString('\n')
if err != nil {
break
}
tokens := strings.Split(line, " ")
ft := make([]string, 0)
for _, t := range tokens {
if t != "" && t != "\t" {
ft = append(ft, t)
}
}
pid, err := strconv.Atoi(ft[1])
if err != nil {
continue
}
if pid != currentPid {
continue
}
cpu, err := strconv.ParseFloat(ft[2], 64)
if err != nil {
return 0, err
}
return cpu, nil
}
return 0, errors.New("process not found")
}
func (i *dashboardIndex) debugResourceUsage(ctx context.Context, frequency time.Duration) {
var maxHeapInuse uint64
var maxSys uint64
@ -234,15 +276,29 @@ func (i *dashboardIndex) debugMemStats(ctx context.Context, frequency time.Durat
}
}
var cpuUtilization []float64
captureCPUStats := func() {
cpu, err := getProcessCPU(os.Getpid())
if err != nil {
i.logger.Error("CPU stats error", "error", err)
return
}
// Just collect CPU utilization to a slice and show in the of index build.
cpuUtilization = append(cpuUtilization, cpu)
}
captureMemStats()
captureCPUStats()
for {
select {
case <-ctx.Done():
i.logger.Warn("Memory stats during indexing", "maxHeapInUse", formatBytes(maxHeapInuse), "maxSys", formatBytes(maxSys))
i.logger.Warn("Resource usage during indexing", "maxHeapInUse", formatBytes(maxHeapInuse), "maxSys", formatBytes(maxSys), "cpuPercent", cpuUtilization)
return
case <-time.After(frequency):
captureMemStats()
captureCPUStats()
}
}
}